From 0d769b3bb0fa07600a7d36d4e0b045e404f7e753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20B=2E=20Brandenburg?= Date: Thu, 21 Oct 2010 16:08:46 -0400 Subject: Add 2010.2 release --- download/2010.2/SHA256SUMS | 3 + download/2010.2/ft_tools-2010.2.tgz | Bin 0 -> 5577 bytes download/2010.2/liblitmus-2010.2.tgz | Bin 0 -> 17962 bytes download/2010.2/litmus-rt-2010.2.patch | 11076 +++++++++++++++++++++++++++++++ index.html | 44 +- 5 files changed, 11119 insertions(+), 4 deletions(-) create mode 100644 download/2010.2/SHA256SUMS create mode 100644 download/2010.2/ft_tools-2010.2.tgz create mode 100644 download/2010.2/liblitmus-2010.2.tgz create mode 100644 download/2010.2/litmus-rt-2010.2.patch diff --git a/download/2010.2/SHA256SUMS b/download/2010.2/SHA256SUMS new file mode 100644 index 0000000..19f5f12 --- /dev/null +++ b/download/2010.2/SHA256SUMS @@ -0,0 +1,3 @@ +b911c0a77b0bfd4d73928404338f6a1d98279340d9288a32deb0c5c1e4281469 ft_tools-2010.2.tgz +d2b772cd6c3a03c1329b259ad4a2bfbf9f7268a5699b8f988b85aa1eafe7600a liblitmus-2010.2.tgz +c460952c4c91076392e889ef457cf231d5ecbcf7fbf72257ff84c0e63be7f9da litmus-rt-2010.2.patch diff --git a/download/2010.2/ft_tools-2010.2.tgz b/download/2010.2/ft_tools-2010.2.tgz new file mode 100644 index 0000000..4d95abb Binary files /dev/null and b/download/2010.2/ft_tools-2010.2.tgz differ diff --git a/download/2010.2/liblitmus-2010.2.tgz b/download/2010.2/liblitmus-2010.2.tgz new file mode 100644 index 0000000..abeb6c2 Binary files /dev/null and b/download/2010.2/liblitmus-2010.2.tgz differ diff --git a/download/2010.2/litmus-rt-2010.2.patch b/download/2010.2/litmus-rt-2010.2.patch new file mode 100644 index 0000000..6dcfc56 --- /dev/null +++ b/download/2010.2/litmus-rt-2010.2.patch @@ -0,0 +1,11076 @@ + Makefile | 4 +- + arch/x86/Kconfig | 8 + + arch/x86/include/asm/entry_arch.h | 1 + + arch/x86/include/asm/feather_trace.h | 17 + + arch/x86/include/asm/feather_trace_32.h | 79 +++ + arch/x86/include/asm/feather_trace_64.h | 67 +++ + arch/x86/include/asm/hw_irq.h | 3 + + arch/x86/include/asm/irq_vectors.h | 5 + + arch/x86/include/asm/processor.h | 4 + + arch/x86/include/asm/unistd_32.h | 6 +- + arch/x86/include/asm/unistd_64.h | 4 + + arch/x86/kernel/Makefile | 2 + + arch/x86/kernel/cpu/intel_cacheinfo.c | 17 + + arch/x86/kernel/entry_64.S | 2 + + arch/x86/kernel/ft_event.c | 118 ++++ + arch/x86/kernel/irqinit.c | 3 + + arch/x86/kernel/smp.c | 28 + + arch/x86/kernel/syscall_table_32.S | 14 + + fs/exec.c | 13 +- + fs/inode.c | 2 + + include/linux/completion.h | 1 + + include/linux/fs.h | 21 +- + include/linux/hrtimer.h | 32 ++ + include/linux/sched.h | 17 +- + include/linux/smp.h | 5 + + include/linux/tick.h | 5 + + include/litmus/bheap.h | 77 +++ + include/litmus/budget.h | 8 + + include/litmus/edf_common.h | 27 + + include/litmus/fdso.h | 70 +++ + include/litmus/feather_buffer.h | 94 ++++ + include/litmus/feather_trace.h | 65 +++ + include/litmus/ftdev.h | 49 ++ + include/litmus/jobs.h | 9 + + include/litmus/litmus.h | 267 +++++++++ + include/litmus/rt_domain.h | 182 +++++++ + include/litmus/rt_param.h | 196 +++++++ + include/litmus/sched_plugin.h | 162 ++++++ + include/litmus/sched_trace.h | 192 +++++++ + include/litmus/trace.h | 113 ++++ + include/litmus/unistd_32.h | 23 + + include/litmus/unistd_64.h | 37 ++ + kernel/exit.c | 4 + + kernel/fork.c | 7 + + kernel/hrtimer.c | 95 ++++ + kernel/printk.c | 14 +- + kernel/sched.c | 106 ++++- + kernel/sched_fair.c | 2 +- + kernel/sched_rt.c | 2 +- + kernel/time/tick-sched.c | 48 ++- + litmus/Kconfig | 134 +++++ + litmus/Makefile | 25 + + litmus/bheap.c | 314 +++++++++++ + litmus/budget.c | 109 ++++ + litmus/ctrldev.c | 150 +++++ + litmus/edf_common.c | 102 ++++ + litmus/fdso.c | 281 ++++++++++ + litmus/fmlp.c | 268 +++++++++ + litmus/ft_event.c | 43 ++ + litmus/ftdev.c | 360 +++++++++++++ + litmus/jobs.c | 43 ++ + litmus/litmus.c | 799 +++++++++++++++++++++++++++ + litmus/rt_domain.c | 355 ++++++++++++ + litmus/sched_cedf.c | 773 ++++++++++++++++++++++++++ + litmus/sched_gsn_edf.c | 842 +++++++++++++++++++++++++++++ + litmus/sched_litmus.c | 315 +++++++++++ + litmus/sched_pfair.c | 897 +++++++++++++++++++++++++++++++ + litmus/sched_plugin.c | 265 +++++++++ + litmus/sched_psn_edf.c | 482 +++++++++++++++++ + litmus/sched_task_trace.c | 204 +++++++ + litmus/sched_trace.c | 378 +++++++++++++ + litmus/srp.c | 318 +++++++++++ + litmus/sync.c | 104 ++++ + litmus/trace.c | 103 ++++ + 74 files changed, 9954 insertions(+), 37 deletions(-) + +diff --git a/Makefile b/Makefile +index ebc8225..316557d 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 34 +-EXTRAVERSION = ++EXTRAVERSION =-litmus2010 + NAME = Sheep on Meth + + # *DOCUMENTATION* +@@ -650,7 +650,7 @@ export mod_strip_cmd + + + ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ ++core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ + + vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ + $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 9458685..7b2c8db 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -2125,3 +2125,11 @@ source "crypto/Kconfig" + source "arch/x86/kvm/Kconfig" + + source "lib/Kconfig" ++ ++config ARCH_HAS_FEATHER_TRACE ++ def_bool y ++ ++config ARCH_HAS_SEND_PULL_TIMERS ++ def_bool y ++ ++source "litmus/Kconfig" +diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h +index 8e8ec66..5d07dea 100644 +--- a/arch/x86/include/asm/entry_arch.h ++++ b/arch/x86/include/asm/entry_arch.h +@@ -13,6 +13,7 @@ + BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) + BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) + BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) ++BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR) + BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) + BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) + +diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h +new file mode 100644 +index 0000000..4fd3163 +--- /dev/null ++++ b/arch/x86/include/asm/feather_trace.h +@@ -0,0 +1,17 @@ ++#ifndef _ARCH_FEATHER_TRACE_H ++#define _ARCH_FEATHER_TRACE_H ++ ++#include ++ ++static inline unsigned long long ft_timestamp(void) ++{ ++ return __native_read_tsc(); ++} ++ ++#ifdef CONFIG_X86_32 ++#include "feather_trace_32.h" ++#else ++#include "feather_trace_64.h" ++#endif ++ ++#endif +diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h +new file mode 100644 +index 0000000..70202f9 +--- /dev/null ++++ b/arch/x86/include/asm/feather_trace_32.h +@@ -0,0 +1,79 @@ ++/* Do not directly include this file. Include feather_trace.h instead */ ++ ++#define feather_callback __attribute__((regparm(0))) ++ ++/* ++ * make the compiler reload any register that is not saved in ++ * a cdecl function call ++ */ ++#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx" ++ ++#define ft_event(id, callback) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " call " #callback " \n\t" \ ++ ".section __event_table, \"aw\" \n\t" \ ++ ".long " #id ", 0, 1b, 2f \n\t" \ ++ ".previous \n\t" \ ++ "2: \n\t" \ ++ : : : CLOBBER_LIST) ++ ++#define ft_event0(id, callback) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " subl $4, %%esp \n\t" \ ++ " movl $" #id ", (%%esp) \n\t" \ ++ " call " #callback " \n\t" \ ++ " addl $4, %%esp \n\t" \ ++ ".section __event_table, \"aw\" \n\t" \ ++ ".long " #id ", 0, 1b, 2f \n\t" \ ++ ".previous \n\t" \ ++ "2: \n\t" \ ++ : : : CLOBBER_LIST) ++ ++#define ft_event1(id, callback, param) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " subl $8, %%esp \n\t" \ ++ " movl %0, 4(%%esp) \n\t" \ ++ " movl $" #id ", (%%esp) \n\t" \ ++ " call " #callback " \n\t" \ ++ " addl $8, %%esp \n\t" \ ++ ".section __event_table, \"aw\" \n\t" \ ++ ".long " #id ", 0, 1b, 2f \n\t" \ ++ ".previous \n\t" \ ++ "2: \n\t" \ ++ : : "r" (param) : CLOBBER_LIST) ++ ++#define ft_event2(id, callback, param, param2) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " subl $12, %%esp \n\t" \ ++ " movl %1, 8(%%esp) \n\t" \ ++ " movl %0, 4(%%esp) \n\t" \ ++ " movl $" #id ", (%%esp) \n\t" \ ++ " call " #callback " \n\t" \ ++ " addl $12, %%esp \n\t" \ ++ ".section __event_table, \"aw\" \n\t" \ ++ ".long " #id ", 0, 1b, 2f \n\t" \ ++ ".previous \n\t" \ ++ "2: \n\t" \ ++ : : "r" (param), "r" (param2) : CLOBBER_LIST) ++ ++ ++#define ft_event3(id, callback, p, p2, p3) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " subl $16, %%esp \n\t" \ ++ " movl %2, 12(%%esp) \n\t" \ ++ " movl %1, 8(%%esp) \n\t" \ ++ " movl %0, 4(%%esp) \n\t" \ ++ " movl $" #id ", (%%esp) \n\t" \ ++ " call " #callback " \n\t" \ ++ " addl $16, %%esp \n\t" \ ++ ".section __event_table, \"aw\" \n\t" \ ++ ".long " #id ", 0, 1b, 2f \n\t" \ ++ ".previous \n\t" \ ++ "2: \n\t" \ ++ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) ++ +diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h +new file mode 100644 +index 0000000..54ac2ae +--- /dev/null ++++ b/arch/x86/include/asm/feather_trace_64.h +@@ -0,0 +1,67 @@ ++/* Do not directly include this file. Include feather_trace.h instead */ ++ ++/* regparm is the default on x86_64 */ ++#define feather_callback ++ ++# define _EVENT_TABLE(id,from,to) \ ++ ".section __event_table, \"aw\"\n\t" \ ++ ".balign 8\n\t" \ ++ ".quad " #id ", 0, " #from ", " #to " \n\t" \ ++ ".previous \n\t" ++ ++/* ++ * x86_64 callee only owns rbp, rbx, r12 -> r15 ++ * the called can freely modify the others ++ */ ++#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ ++ "r8", "r9", "r10", "r11", "rax" ++ ++#define ft_event(id, callback) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " call " #callback " \n\t" \ ++ _EVENT_TABLE(id,1b,2f) \ ++ "2: \n\t" \ ++ : : : CLOBBER_LIST) ++ ++#define ft_event0(id, callback) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " movq $" #id ", %%rdi \n\t" \ ++ " call " #callback " \n\t" \ ++ _EVENT_TABLE(id,1b,2f) \ ++ "2: \n\t" \ ++ : : : CLOBBER_LIST) ++ ++#define ft_event1(id, callback, param) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " movq %0, %%rsi \n\t" \ ++ " movq $" #id ", %%rdi \n\t" \ ++ " call " #callback " \n\t" \ ++ _EVENT_TABLE(id,1b,2f) \ ++ "2: \n\t" \ ++ : : "r" (param) : CLOBBER_LIST) ++ ++#define ft_event2(id, callback, param, param2) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " movq %1, %%rdx \n\t" \ ++ " movq %0, %%rsi \n\t" \ ++ " movq $" #id ", %%rdi \n\t" \ ++ " call " #callback " \n\t" \ ++ _EVENT_TABLE(id,1b,2f) \ ++ "2: \n\t" \ ++ : : "r" (param), "r" (param2) : CLOBBER_LIST) ++ ++#define ft_event3(id, callback, p, p2, p3) \ ++ __asm__ __volatile__( \ ++ "1: jmp 2f \n\t" \ ++ " movq %2, %%rcx \n\t" \ ++ " movq %1, %%rdx \n\t" \ ++ " movq %0, %%rsi \n\t" \ ++ " movq $" #id ", %%rdi \n\t" \ ++ " call " #callback " \n\t" \ ++ _EVENT_TABLE(id,1b,2f) \ ++ "2: \n\t" \ ++ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) +diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h +index 46c0fe0..c174115 100644 +--- a/arch/x86/include/asm/hw_irq.h ++++ b/arch/x86/include/asm/hw_irq.h +@@ -53,6 +53,8 @@ extern void threshold_interrupt(void); + extern void call_function_interrupt(void); + extern void call_function_single_interrupt(void); + ++extern void pull_timers_interrupt(void); ++ + /* IOAPIC */ + #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) + extern unsigned long io_apic_irqs; +@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); + extern void smp_reschedule_interrupt(struct pt_regs *); + extern void smp_call_function_interrupt(struct pt_regs *); + extern void smp_call_function_single_interrupt(struct pt_regs *); ++extern void smp_pull_timers_interrupt(struct pt_regs *); + #ifdef CONFIG_X86_32 + extern void smp_invalidate_interrupt(struct pt_regs *); + #else +diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h +index 8767d99..bb5318b 100644 +--- a/arch/x86/include/asm/irq_vectors.h ++++ b/arch/x86/include/asm/irq_vectors.h +@@ -109,6 +109,11 @@ + #define LOCAL_TIMER_VECTOR 0xef + + /* ++ * LITMUS^RT pull timers IRQ vector ++ */ ++#define PULL_TIMERS_VECTOR 0xee ++ ++/* + * Generic system vector for platform specific use + */ + #define X86_PLATFORM_IPI_VECTOR 0xed +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index b753ea5..48426f9 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -173,6 +173,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *); + extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); + extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); + extern unsigned short num_cache_leaves; ++#ifdef CONFIG_SYSFS ++extern int get_shared_cpu_map(cpumask_var_t mask, ++ unsigned int cpu, int index); ++#endif + + extern void detect_extended_topology(struct cpuinfo_x86 *c); + extern void detect_ht(struct cpuinfo_x86 *c); +diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h +index beb9b5f..987e523 100644 +--- a/arch/x86/include/asm/unistd_32.h ++++ b/arch/x86/include/asm/unistd_32.h +@@ -344,9 +344,13 @@ + #define __NR_perf_event_open 336 + #define __NR_recvmmsg 337 + ++#define __NR_LITMUS 338 ++ ++#include "litmus/unistd_32.h" ++ + #ifdef __KERNEL__ + +-#define NR_syscalls 338 ++#define NR_syscalls 338 + NR_litmus_syscalls + + #define __ARCH_WANT_IPC_PARSE_VERSION + #define __ARCH_WANT_OLD_READDIR +diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h +index ff4307b..b21c3b2 100644 +--- a/arch/x86/include/asm/unistd_64.h ++++ b/arch/x86/include/asm/unistd_64.h +@@ -664,6 +664,10 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) + #define __NR_recvmmsg 299 + __SYSCALL(__NR_recvmmsg, sys_recvmmsg) + ++#define __NR_LITMUS 299 ++ ++#include "litmus/unistd_64.h" ++ + #ifndef __NO_STUBS + #define __ARCH_WANT_OLD_READDIR + #define __ARCH_WANT_OLD_STAT +diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile +index 4c58352..d09934e 100644 +--- a/arch/x86/kernel/Makefile ++++ b/arch/x86/kernel/Makefile +@@ -117,6 +117,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o + + obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o + ++obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ++ + ### + # 64 bit specific files + ifeq ($(CONFIG_X86_64),y) +diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c +index 95962a9..94d8e47 100644 +--- a/arch/x86/kernel/cpu/intel_cacheinfo.c ++++ b/arch/x86/kernel/cpu/intel_cacheinfo.c +@@ -632,6 +632,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) + static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); + #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) + ++/* returns CPUs that share the index cache with cpu */ ++int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) ++{ ++ int ret = 0; ++ struct _cpuid4_info *this_leaf; ++ ++ if (index >= num_cache_leaves) { ++ index = num_cache_leaves - 1; ++ ret = index; ++ } ++ ++ this_leaf = CPUID4_INFO_IDX(cpu,index); ++ cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map)); ++ ++ return ret; ++} ++ + #ifdef CONFIG_SMP + static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) + { +diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S +index 0697ff1..b9ec6cd 100644 +--- a/arch/x86/kernel/entry_64.S ++++ b/arch/x86/kernel/entry_64.S +@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \ + call_function_interrupt smp_call_function_interrupt + apicinterrupt RESCHEDULE_VECTOR \ + reschedule_interrupt smp_reschedule_interrupt ++apicinterrupt PULL_TIMERS_VECTOR \ ++ pull_timers_interrupt smp_pull_timers_interrupt + #endif + + apicinterrupt ERROR_APIC_VECTOR \ +diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c +new file mode 100644 +index 0000000..37cc332 +--- /dev/null ++++ b/arch/x86/kernel/ft_event.c +@@ -0,0 +1,118 @@ ++#include ++ ++#include ++ ++/* the feather trace management functions assume ++ * exclusive access to the event table ++ */ ++ ++#ifndef CONFIG_DEBUG_RODATA ++ ++#define BYTE_JUMP 0xeb ++#define BYTE_JUMP_LEN 0x02 ++ ++/* for each event, there is an entry in the event table */ ++struct trace_event { ++ long id; ++ long count; ++ long start_addr; ++ long end_addr; ++}; ++ ++extern struct trace_event __start___event_table[]; ++extern struct trace_event __stop___event_table[]; ++ ++/* Workaround: if no events are defined, then the event_table section does not ++ * exist and the above references cause linker errors. This could probably be ++ * fixed by adjusting the linker script, but it is easier to maintain for us if ++ * we simply create a dummy symbol in the event table section. ++ */ ++int __event_table_dummy[0] __attribute__ ((section("__event_table"))); ++ ++int ft_enable_event(unsigned long id) ++{ ++ struct trace_event* te = __start___event_table; ++ int count = 0; ++ char* delta; ++ unsigned char* instr; ++ ++ while (te < __stop___event_table) { ++ if (te->id == id && ++te->count == 1) { ++ instr = (unsigned char*) te->start_addr; ++ /* make sure we don't clobber something wrong */ ++ if (*instr == BYTE_JUMP) { ++ delta = (((unsigned char*) te->start_addr) + 1); ++ *delta = 0; ++ } ++ } ++ if (te->id == id) ++ count++; ++ te++; ++ } ++ ++ printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count); ++ return count; ++} ++ ++int ft_disable_event(unsigned long id) ++{ ++ struct trace_event* te = __start___event_table; ++ int count = 0; ++ char* delta; ++ unsigned char* instr; ++ ++ while (te < __stop___event_table) { ++ if (te->id == id && --te->count == 0) { ++ instr = (unsigned char*) te->start_addr; ++ if (*instr == BYTE_JUMP) { ++ delta = (((unsigned char*) te->start_addr) + 1); ++ *delta = te->end_addr - te->start_addr - ++ BYTE_JUMP_LEN; ++ } ++ } ++ if (te->id == id) ++ count++; ++ te++; ++ } ++ ++ printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count); ++ return count; ++} ++ ++int ft_disable_all_events(void) ++{ ++ struct trace_event* te = __start___event_table; ++ int count = 0; ++ char* delta; ++ unsigned char* instr; ++ ++ while (te < __stop___event_table) { ++ if (te->count) { ++ instr = (unsigned char*) te->start_addr; ++ if (*instr == BYTE_JUMP) { ++ delta = (((unsigned char*) te->start_addr) ++ + 1); ++ *delta = te->end_addr - te->start_addr - ++ BYTE_JUMP_LEN; ++ te->count = 0; ++ count++; ++ } ++ } ++ te++; ++ } ++ return count; ++} ++ ++int ft_is_event_enabled(unsigned long id) ++{ ++ struct trace_event* te = __start___event_table; ++ ++ while (te < __stop___event_table) { ++ if (te->id == id) ++ return te->count; ++ te++; ++ } ++ return 0; ++} ++ ++#endif +diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c +index 0ed2d30..a760ce1 100644 +--- a/arch/x86/kernel/irqinit.c ++++ b/arch/x86/kernel/irqinit.c +@@ -189,6 +189,9 @@ static void __init smp_intr_init(void) + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); + ++ /* IPI for hrtimer pulling on remote cpus */ ++ alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt); ++ + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); +diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c +index d801210..97af589 100644 +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -23,6 +23,9 @@ + #include + #include + ++#include ++#include ++ + #include + #include + #include +@@ -118,6 +121,7 @@ static void native_smp_send_reschedule(int cpu) + WARN_ON(1); + return; + } ++ TS_SEND_RESCHED_START(cpu); + apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + } + +@@ -147,6 +151,16 @@ void native_send_call_func_ipi(const struct cpumask *mask) + free_cpumask_var(allbutself); + } + ++/* trigger timers on remote cpu */ ++void smp_send_pull_timers(int cpu) ++{ ++ if (unlikely(cpu_is_offline(cpu))) { ++ WARN_ON(1); ++ return; ++ } ++ apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR); ++} ++ + /* + * this function calls the 'stop' function on all other CPUs in the system. + */ +@@ -198,7 +212,12 @@ static void native_smp_send_stop(void) + void smp_reschedule_interrupt(struct pt_regs *regs) + { + ack_APIC_irq(); ++ /* LITMUS^RT needs this interrupt to proper reschedule ++ * on this cpu ++ */ ++ set_tsk_need_resched(current); + inc_irq_stat(irq_resched_count); ++ TS_SEND_RESCHED_END; + /* + * KVM uses this interrupt to force a cpu out of guest mode + */ +@@ -222,6 +241,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) + irq_exit(); + } + ++extern void hrtimer_pull(void); ++ ++void smp_pull_timers_interrupt(struct pt_regs *regs) ++{ ++ ack_APIC_irq(); ++ TRACE("pull timer interrupt\n"); ++ hrtimer_pull(); ++} ++ + struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, +diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S +index 8b37293..5da9a68 100644 +--- a/arch/x86/kernel/syscall_table_32.S ++++ b/arch/x86/kernel/syscall_table_32.S +@@ -337,3 +337,17 @@ ENTRY(sys_call_table) + .long sys_rt_tgsigqueueinfo /* 335 */ + .long sys_perf_event_open + .long sys_recvmmsg ++ .long sys_set_rt_task_param /* LITMUS^RT 338 */ ++ .long sys_get_rt_task_param ++ .long sys_complete_job ++ .long sys_od_open ++ .long sys_od_close ++ .long sys_fmlp_down ++ .long sys_fmlp_up ++ .long sys_srp_down ++ .long sys_srp_up ++ .long sys_query_job_no ++ .long sys_wait_for_job_release ++ .long sys_wait_for_ts_release ++ .long sys_release_ts ++ .long sys_null_call +diff --git a/fs/exec.c b/fs/exec.c +index e6e94c6..0293087 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -19,7 +19,7 @@ + * current->executable is only used by the procfs. This allows a dispatch + * table to check for several different types of binary formats. We keep + * trying until we recognize the file or we run out of supported binary +- * formats. ++ * formats. + */ + + #include +@@ -56,6 +56,8 @@ + #include + #include + ++#include ++ + #include + #include + #include +@@ -79,7 +81,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert) + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); + write_unlock(&binfmt_lock); +- return 0; ++ return 0; + } + + EXPORT_SYMBOL(__register_binfmt); +@@ -1045,7 +1047,7 @@ void setup_new_exec(struct linux_binprm * bprm) + group */ + + current->self_exec_id++; +- ++ + flush_signal_handlers(current, 0); + flush_old_files(current->files); + } +@@ -1135,8 +1137,8 @@ int check_unsafe_exec(struct linux_binprm *bprm) + return res; + } + +-/* +- * Fill the binprm structure from the inode. ++/* ++ * Fill the binprm structure from the inode. + * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes + * + * This may be called multiple times for binary chains (scripts for example). +@@ -1348,6 +1350,7 @@ int do_execve(char * filename, + goto out_unmark; + + sched_exec(); ++ litmus_exec(); + + bprm->file = file; + bprm->filename = filename; +diff --git a/fs/inode.c b/fs/inode.c +index 407bf39..aaaaf09 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -271,6 +271,8 @@ void inode_init_once(struct inode *inode) + #ifdef CONFIG_FSNOTIFY + INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); + #endif ++ INIT_LIST_HEAD(&inode->i_obj_list); ++ mutex_init(&inode->i_obj_mutex); + } + EXPORT_SYMBOL(inode_init_once); + +diff --git a/include/linux/completion.h b/include/linux/completion.h +index 4a6b604..258bec1 100644 +--- a/include/linux/completion.h ++++ b/include/linux/completion.h +@@ -88,6 +88,7 @@ extern bool completion_done(struct completion *x); + + extern void complete(struct completion *); + extern void complete_all(struct completion *); ++extern void complete_n(struct completion *, int n); + + /** + * INIT_COMPLETION: - reinitialize a completion structure +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 44f35ae..8949184 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -15,8 +15,8 @@ + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * +- * Some programs (notably those using select()) may have to be +- * recompiled to take full advantage of the new limits.. ++ * Some programs (notably those using select()) may have to be ++ * recompiled to take full advantage of the new limits.. + */ + + /* Fixed constants first: */ +@@ -173,7 +173,7 @@ struct inodes_stat_t { + #define SEL_EX 4 + + /* public flags for file_system_type */ +-#define FS_REQUIRES_DEV 1 ++#define FS_REQUIRES_DEV 1 + #define FS_BINARY_MOUNTDATA 2 + #define FS_HAS_SUBTYPE 4 + #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ +@@ -471,7 +471,7 @@ struct iattr { + */ + #include + +-/** ++/** + * enum positive_aop_returns - aop return codes with specific semantics + * + * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has +@@ -481,7 +481,7 @@ struct iattr { + * be a candidate for writeback again in the near + * future. Other callers must be careful to unlock + * the page if they get this return. Returned by +- * writepage(); ++ * writepage(); + * + * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has + * unlocked it and the page might have been truncated. +@@ -720,6 +720,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping) + + struct posix_acl; + #define ACL_NOT_CACHED ((void *)(-1)) ++struct inode_obj_id_table; + + struct inode { + struct hlist_node i_hash; +@@ -788,6 +789,8 @@ struct inode { + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; + #endif ++ struct list_head i_obj_list; ++ struct mutex i_obj_mutex; + void *i_private; /* fs or device private pointer */ + }; + +@@ -1000,10 +1003,10 @@ static inline int file_check_writeable(struct file *filp) + + #define MAX_NON_LFS ((1UL<<31) - 1) + +-/* Page cache limit. The filesystems should put that into their s_maxbytes +- limits, otherwise bad things can happen in VM. */ ++/* Page cache limit. The filesystems should put that into their s_maxbytes ++ limits, otherwise bad things can happen in VM. */ + #if BITS_PER_LONG==32 +-#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) ++#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) + #elif BITS_PER_LONG==64 + #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL + #endif +@@ -2129,7 +2132,7 @@ extern int may_open(struct path *, int, int); + + extern int kernel_read(struct file *, loff_t, char *, unsigned long); + extern struct file * open_exec(const char *); +- ++ + /* fs/dcache.c -- generic fs support functions */ + extern int is_subdir(struct dentry *, struct dentry *); + extern int path_is_under(struct path *, struct path *); +diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h +index 5d86fb2..9470a9e 100644 +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -167,6 +167,7 @@ struct hrtimer_clock_base { + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt ++ * @to_pull: LITMUS^RT list of timers to be pulled on this cpu + */ + struct hrtimer_cpu_base { + raw_spinlock_t lock; +@@ -180,8 +181,32 @@ struct hrtimer_cpu_base { + unsigned long nr_hangs; + ktime_t max_hang_time; + #endif ++ struct list_head to_pull; + }; + ++#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS ++ ++#define HRTIMER_START_ON_INACTIVE 0 ++#define HRTIMER_START_ON_QUEUED 1 ++ ++/* ++ * struct hrtimer_start_on_info - save timer info on remote cpu ++ * @list: list of hrtimer_start_on_info on remote cpu (to_pull) ++ * @timer: timer to be triggered on remote cpu ++ * @time: time event ++ * @mode: timer mode ++ * @state: activity flag ++ */ ++struct hrtimer_start_on_info { ++ struct list_head list; ++ struct hrtimer *timer; ++ ktime_t time; ++ enum hrtimer_mode mode; ++ atomic_t state; ++}; ++ ++#endif ++ + static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) + { + timer->_expires = time; +@@ -348,6 +373,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long delta_ns, + const enum hrtimer_mode mode, int wakeup); + ++#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS ++extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info); ++extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info, ++ struct hrtimer *timer, ktime_t time, ++ const enum hrtimer_mode mode); ++#endif ++ + extern int hrtimer_cancel(struct hrtimer *timer); + extern int hrtimer_try_to_cancel(struct hrtimer *timer); + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 2b7b81d..225347d 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -38,6 +38,7 @@ + #define SCHED_BATCH 3 + /* SCHED_ISO: reserved but not implemented yet */ + #define SCHED_IDLE 5 ++#define SCHED_LITMUS 6 + /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ + #define SCHED_RESET_ON_FORK 0x40000000 + +@@ -94,6 +95,8 @@ struct sched_param { + + #include + ++#include ++ + struct exec_domain; + struct futex_pi_state; + struct robust_list_head; +@@ -1166,6 +1169,7 @@ struct sched_rt_entity { + }; + + struct rcu_node; ++struct od_table_entry; + + struct task_struct { + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ +@@ -1250,9 +1254,9 @@ struct task_struct { + unsigned long stack_canary; + #endif + +- /* ++ /* + * pointers to (original) parent process, youngest child, younger sibling, +- * older sibling, respectively. (p->father can be replaced with ++ * older sibling, respectively. (p->father can be replaced with + * p->real_parent->pid) + */ + struct task_struct *real_parent; /* real parent process */ +@@ -1464,6 +1468,13 @@ struct task_struct { + int make_it_fail; + #endif + struct prop_local_single dirties; ++ ++ /* LITMUS RT parameters and state */ ++ struct rt_param rt_param; ++ ++ /* references to PI semaphores, etc. */ ++ struct od_table_entry *od_table; ++ + #ifdef CONFIG_LATENCYTOP + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; +@@ -2018,7 +2029,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + + return ret; +-} ++} + + extern void block_all_signals(int (*notifier)(void *priv), void *priv, + sigset_t *mask); +diff --git a/include/linux/smp.h b/include/linux/smp.h +index cfa2d20..f86d407 100644 +--- a/include/linux/smp.h ++++ b/include/linux/smp.h +@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait); + + /* ++ * sends a 'pull timer' event to a remote CPU ++ */ ++extern void smp_send_pull_timers(int cpu); ++ ++/* + * Generic and arch helpers + */ + #ifdef CONFIG_USE_GENERIC_SMP_HELPERS +diff --git a/include/linux/tick.h b/include/linux/tick.h +index d2ae79e..25d0cf4 100644 +--- a/include/linux/tick.h ++++ b/include/linux/tick.h +@@ -73,6 +73,11 @@ extern int tick_is_oneshot_available(void); + extern struct tick_device *tick_get_device(int cpu); + + # ifdef CONFIG_HIGH_RES_TIMERS ++/* LITMUS^RT tick alignment */ ++#define LINUX_DEFAULT_TICKS 0 ++#define LITMUS_ALIGNED_TICKS 1 ++#define LITMUS_STAGGERED_TICKS 2 ++ + extern int tick_init_highres(void); + extern int tick_program_event(ktime_t expires, int force); + extern void tick_setup_sched_timer(void); +diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h +new file mode 100644 +index 0000000..cf4864a +--- /dev/null ++++ b/include/litmus/bheap.h +@@ -0,0 +1,77 @@ ++/* bheaps.h -- Binomial Heaps ++ * ++ * (c) 2008, 2009 Bjoern Brandenburg ++ */ ++ ++#ifndef BHEAP_H ++#define BHEAP_H ++ ++#define NOT_IN_HEAP UINT_MAX ++ ++struct bheap_node { ++ struct bheap_node* parent; ++ struct bheap_node* next; ++ struct bheap_node* child; ++ ++ unsigned int degree; ++ void* value; ++ struct bheap_node** ref; ++}; ++ ++struct bheap { ++ struct bheap_node* head; ++ /* We cache the minimum of the heap. ++ * This speeds up repeated peek operations. ++ */ ++ struct bheap_node* min; ++}; ++ ++typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b); ++ ++void bheap_init(struct bheap* heap); ++void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value); ++ ++static inline int bheap_node_in_heap(struct bheap_node* h) ++{ ++ return h->degree != NOT_IN_HEAP; ++} ++ ++static inline int bheap_empty(struct bheap* heap) ++{ ++ return heap->head == NULL && heap->min == NULL; ++} ++ ++/* insert (and reinitialize) a node into the heap */ ++void bheap_insert(bheap_prio_t higher_prio, ++ struct bheap* heap, ++ struct bheap_node* node); ++ ++/* merge addition into target */ ++void bheap_union(bheap_prio_t higher_prio, ++ struct bheap* target, ++ struct bheap* addition); ++ ++struct bheap_node* bheap_peek(bheap_prio_t higher_prio, ++ struct bheap* heap); ++ ++struct bheap_node* bheap_take(bheap_prio_t higher_prio, ++ struct bheap* heap); ++ ++void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap); ++int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node); ++ ++void bheap_delete(bheap_prio_t higher_prio, ++ struct bheap* heap, ++ struct bheap_node* node); ++ ++/* allocate from memcache */ ++struct bheap_node* bheap_node_alloc(int gfp_flags); ++void bheap_node_free(struct bheap_node* hn); ++ ++/* allocate a heap node for value and insert into the heap */ ++int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, ++ void* value, int gfp_flags); ++ ++void* bheap_take_del(bheap_prio_t higher_prio, ++ struct bheap* heap); ++#endif +diff --git a/include/litmus/budget.h b/include/litmus/budget.h +new file mode 100644 +index 0000000..732530e +--- /dev/null ++++ b/include/litmus/budget.h +@@ -0,0 +1,8 @@ ++#ifndef _LITMUS_BUDGET_H_ ++#define _LITMUS_BUDGET_H_ ++ ++/* Update the per-processor enforcement timer (arm/reproram/cancel) for ++ * the next task. */ ++void update_enforcement_timer(struct task_struct* t); ++ ++#endif +diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h +new file mode 100644 +index 0000000..80d4321 +--- /dev/null ++++ b/include/litmus/edf_common.h +@@ -0,0 +1,27 @@ ++/* ++ * EDF common data structures and utility functions shared by all EDF ++ * based scheduler plugins ++ */ ++ ++/* CLEANUP: Add comments and make it less messy. ++ * ++ */ ++ ++#ifndef __UNC_EDF_COMMON_H__ ++#define __UNC_EDF_COMMON_H__ ++ ++#include ++ ++void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, ++ release_jobs_t release); ++ ++int edf_higher_prio(struct task_struct* first, ++ struct task_struct* second); ++ ++int edf_ready_order(struct bheap_node* a, struct bheap_node* b); ++ ++int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); ++ ++int edf_set_hp_task(struct pi_semaphore *sem); ++int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu); ++#endif +diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h +new file mode 100644 +index 0000000..61f1b5b +--- /dev/null ++++ b/include/litmus/fdso.h +@@ -0,0 +1,70 @@ ++/* fdso.h - file descriptor attached shared objects ++ * ++ * (c) 2007 B. Brandenburg, LITMUS^RT project ++ */ ++ ++#ifndef _LINUX_FDSO_H_ ++#define _LINUX_FDSO_H_ ++ ++#include ++#include ++ ++#include ++#include ++ ++#define MAX_OBJECT_DESCRIPTORS 32 ++ ++typedef enum { ++ MIN_OBJ_TYPE = 0, ++ ++ FMLP_SEM = 0, ++ SRP_SEM = 1, ++ ++ MAX_OBJ_TYPE = 1 ++} obj_type_t; ++ ++struct inode_obj_id { ++ struct list_head list; ++ atomic_t count; ++ struct inode* inode; ++ ++ obj_type_t type; ++ void* obj; ++ unsigned int id; ++}; ++ ++ ++struct od_table_entry { ++ unsigned int used; ++ ++ struct inode_obj_id* obj; ++ void* extra; ++}; ++ ++struct fdso_ops { ++ void* (*create) (void); ++ void (*destroy)(void*); ++ int (*open) (struct od_table_entry*, void* __user); ++ int (*close) (struct od_table_entry*); ++}; ++ ++/* translate a userspace supplied od into the raw table entry ++ * returns NULL if od is invalid ++ */ ++struct od_table_entry* __od_lookup(int od); ++ ++/* translate a userspace supplied od into the associated object ++ * returns NULL if od is invalid ++ */ ++static inline void* od_lookup(int od, obj_type_t type) ++{ ++ struct od_table_entry* e = __od_lookup(od); ++ return e && e->obj->type == type ? e->obj->obj : NULL; ++} ++ ++#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) ++#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) ++#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) ++ ++ ++#endif +diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h +new file mode 100644 +index 0000000..6c18277 +--- /dev/null ++++ b/include/litmus/feather_buffer.h +@@ -0,0 +1,94 @@ ++#ifndef _FEATHER_BUFFER_H_ ++#define _FEATHER_BUFFER_H_ ++ ++/* requires UINT_MAX and memcpy */ ++ ++#define SLOT_FREE 0 ++#define SLOT_BUSY 1 ++#define SLOT_READY 2 ++ ++struct ft_buffer { ++ unsigned int slot_count; ++ unsigned int slot_size; ++ ++ int free_count; ++ unsigned int write_idx; ++ unsigned int read_idx; ++ ++ char* slots; ++ void* buffer_mem; ++ unsigned int failed_writes; ++}; ++ ++static inline int init_ft_buffer(struct ft_buffer* buf, ++ unsigned int slot_count, ++ unsigned int slot_size, ++ char* slots, ++ void* buffer_mem) ++{ ++ int i = 0; ++ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { ++ /* The slot count must divide UNIT_MAX + 1 so that when it ++ * wraps around the index correctly points to 0. ++ */ ++ return 0; ++ } else { ++ buf->slot_count = slot_count; ++ buf->slot_size = slot_size; ++ buf->slots = slots; ++ buf->buffer_mem = buffer_mem; ++ buf->free_count = slot_count; ++ buf->write_idx = 0; ++ buf->read_idx = 0; ++ buf->failed_writes = 0; ++ for (i = 0; i < slot_count; i++) ++ buf->slots[i] = SLOT_FREE; ++ return 1; ++ } ++} ++ ++static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) ++{ ++ int free = fetch_and_dec(&buf->free_count); ++ unsigned int idx; ++ if (free <= 0) { ++ fetch_and_inc(&buf->free_count); ++ *ptr = 0; ++ fetch_and_inc(&buf->failed_writes); ++ return 0; ++ } else { ++ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; ++ buf->slots[idx] = SLOT_BUSY; ++ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; ++ return 1; ++ } ++} ++ ++static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) ++{ ++ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; ++ buf->slots[idx] = SLOT_READY; ++} ++ ++ ++/* exclusive reader access is assumed */ ++static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) ++{ ++ unsigned int idx; ++ if (buf->free_count == buf->slot_count) ++ /* nothing available */ ++ return 0; ++ idx = buf->read_idx % buf->slot_count; ++ if (buf->slots[idx] == SLOT_READY) { ++ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, ++ buf->slot_size); ++ buf->slots[idx] = SLOT_FREE; ++ buf->read_idx++; ++ fetch_and_inc(&buf->free_count); ++ return 1; ++ } else ++ return 0; ++} ++ ++ ++#endif +diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h +new file mode 100644 +index 0000000..028dfb2 +--- /dev/null ++++ b/include/litmus/feather_trace.h +@@ -0,0 +1,65 @@ ++#ifndef _FEATHER_TRACE_H_ ++#define _FEATHER_TRACE_H_ ++ ++#include ++ ++int ft_enable_event(unsigned long id); ++int ft_disable_event(unsigned long id); ++int ft_is_event_enabled(unsigned long id); ++int ft_disable_all_events(void); ++ ++/* atomic_* funcitons are inline anyway */ ++static inline int fetch_and_inc(int *val) ++{ ++ return atomic_add_return(1, (atomic_t*) val) - 1; ++} ++ ++static inline int fetch_and_dec(int *val) ++{ ++ return atomic_sub_return(1, (atomic_t*) val) + 1; ++} ++ ++/* Don't use rewriting implementation if kernel text pages are read-only. ++ * Ftrace gets around this by using the identity mapping, but that's more ++ * effort that is warrented right now for Feather-Trace. ++ * Eventually, it may make sense to replace Feather-Trace with ftrace. ++ */ ++#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA) ++ ++#include ++ ++#else /* !__ARCH_HAS_FEATHER_TRACE */ ++ ++/* provide default implementation */ ++ ++#include /* for get_cycles() */ ++ ++static inline unsigned long long ft_timestamp(void) ++{ ++ return get_cycles(); ++} ++ ++#define feather_callback ++ ++#define MAX_EVENTS 1024 ++ ++extern int ft_events[MAX_EVENTS]; ++ ++#define ft_event(id, callback) \ ++ if (ft_events[id]) callback(); ++ ++#define ft_event0(id, callback) \ ++ if (ft_events[id]) callback(id); ++ ++#define ft_event1(id, callback, param) \ ++ if (ft_events[id]) callback(id, param); ++ ++#define ft_event2(id, callback, param, param2) \ ++ if (ft_events[id]) callback(id, param, param2); ++ ++#define ft_event3(id, callback, p, p2, p3) \ ++ if (ft_events[id]) callback(id, p, p2, p3); ++ ++#endif /* __ARCH_HAS_FEATHER_TRACE */ ++ ++#endif +diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h +new file mode 100644 +index 0000000..7697b46 +--- /dev/null ++++ b/include/litmus/ftdev.h +@@ -0,0 +1,49 @@ ++#ifndef _LITMUS_FTDEV_H_ ++#define _LITMUS_FTDEV_H_ ++ ++#include ++#include ++#include ++#include ++ ++#define MAX_FTDEV_MINORS NR_CPUS ++ ++#define FTDEV_ENABLE_CMD 0 ++#define FTDEV_DISABLE_CMD 1 ++ ++struct ftdev; ++ ++/* return 0 if buffer can be opened, otherwise -$REASON */ ++typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no); ++/* return 0 on success, otherwise -$REASON */ ++typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no); ++typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no); ++ ++ ++struct ftdev_event; ++ ++struct ftdev_minor { ++ struct ft_buffer* buf; ++ unsigned int readers; ++ struct mutex lock; ++ /* FIXME: filter for authorized events */ ++ struct ftdev_event* events; ++}; ++ ++struct ftdev { ++ struct cdev cdev; ++ /* FIXME: don't waste memory, allocate dynamically */ ++ struct ftdev_minor minor[MAX_FTDEV_MINORS]; ++ unsigned int minor_cnt; ++ ftdev_alloc_t alloc; ++ ftdev_free_t free; ++ ftdev_can_open_t can_open; ++}; ++ ++struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size); ++void free_ft_buffer(struct ft_buffer* buf); ++ ++void ftdev_init(struct ftdev* ftdev, struct module* owner); ++int register_ftdev(struct ftdev* ftdev, const char* name, int major); ++ ++#endif +diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h +new file mode 100644 +index 0000000..9bd361e +--- /dev/null ++++ b/include/litmus/jobs.h +@@ -0,0 +1,9 @@ ++#ifndef __LITMUS_JOBS_H__ ++#define __LITMUS_JOBS_H__ ++ ++void prepare_for_next_period(struct task_struct *t); ++void release_at(struct task_struct *t, lt_t start); ++long complete_job(void); ++ ++#endif ++ +diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h +new file mode 100644 +index 0000000..5d20276 +--- /dev/null ++++ b/include/litmus/litmus.h +@@ -0,0 +1,267 @@ ++/* ++ * Constant definitions related to ++ * scheduling policy. ++ */ ++ ++#ifndef _LINUX_LITMUS_H_ ++#define _LINUX_LITMUS_H_ ++ ++#include ++#include ++ ++#ifdef CONFIG_RELEASE_MASTER ++extern atomic_t release_master_cpu; ++#endif ++ ++extern atomic_t __log_seq_no; ++ ++#define TRACE(fmt, args...) \ ++ sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \ ++ raw_smp_processor_id(), ## args) ++ ++#define TRACE_TASK(t, fmt, args...) \ ++ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args) ++ ++#define TRACE_CUR(fmt, args...) \ ++ TRACE_TASK(current, fmt, ## args) ++ ++#define TRACE_BUG_ON(cond) \ ++ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \ ++ "called from %p current=%s/%d state=%d " \ ++ "flags=%x partition=%d cpu=%d rtflags=%d"\ ++ " job=%u timeslice=%u\n", \ ++ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \ ++ current->pid, current->state, current->flags, \ ++ get_partition(current), smp_processor_id(), get_rt_flags(current), \ ++ current->rt_param.job_params.job_no, \ ++ current->rt.time_slice\ ++ ); } while(0); ++ ++ ++/* in_list - is a given list_head queued on some list? ++ */ ++static inline int in_list(struct list_head* list) ++{ ++ return !( /* case 1: deleted */ ++ (list->next == LIST_POISON1 && ++ list->prev == LIST_POISON2) ++ || ++ /* case 2: initialized */ ++ (list->next == list && ++ list->prev == list) ++ ); ++} ++ ++#define NO_CPU 0xffffffff ++ ++void litmus_fork(struct task_struct *tsk); ++void litmus_exec(void); ++/* clean up real-time state of a task */ ++void exit_litmus(struct task_struct *dead_tsk); ++ ++long litmus_admit_task(struct task_struct *tsk); ++void litmus_exit_task(struct task_struct *tsk); ++ ++#define is_realtime(t) ((t)->policy == SCHED_LITMUS) ++#define rt_transition_pending(t) \ ++ ((t)->rt_param.transition_pending) ++ ++#define tsk_rt(t) (&(t)->rt_param) ++ ++/* Realtime utility macros */ ++#define get_rt_flags(t) (tsk_rt(t)->flags) ++#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) ++#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) ++#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) ++#define get_rt_period(t) (tsk_rt(t)->task_params.period) ++#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) ++#define get_partition(t) (tsk_rt(t)->task_params.cpu) ++#define get_deadline(t) (tsk_rt(t)->job_params.deadline) ++#define get_release(t) (tsk_rt(t)->job_params.release) ++#define get_class(t) (tsk_rt(t)->task_params.cls) ++ ++inline static int budget_exhausted(struct task_struct* t) ++{ ++ return get_exec_time(t) >= get_exec_cost(t); ++} ++ ++inline static lt_t budget_remaining(struct task_struct* t) ++{ ++ if (!budget_exhausted(t)) ++ return get_exec_time(t) - get_exec_cost(t); ++ else ++ /* avoid overflow */ ++ return 0; ++} ++ ++#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) ++ ++#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ ++ == PRECISE_ENFORCEMENT) ++ ++#define is_hrt(t) \ ++ (tsk_rt(t)->task_params.class == RT_CLASS_HARD) ++#define is_srt(t) \ ++ (tsk_rt(t)->task_params.class == RT_CLASS_SOFT) ++#define is_be(t) \ ++ (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT) ++ ++/* Our notion of time within LITMUS: kernel monotonic time. */ ++static inline lt_t litmus_clock(void) ++{ ++ return ktime_to_ns(ktime_get()); ++} ++ ++/* A macro to convert from nanoseconds to ktime_t. */ ++#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t) ++ ++#define get_domain(t) (tsk_rt(t)->domain) ++ ++/* Honor the flag in the preempt_count variable that is set ++ * when scheduling is in progress. ++ */ ++#define is_running(t) \ ++ ((t)->state == TASK_RUNNING || \ ++ task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) ++ ++#define is_blocked(t) \ ++ (!is_running(t)) ++#define is_released(t, now) \ ++ (lt_before_eq(get_release(t), now)) ++#define is_tardy(t, now) \ ++ (lt_before_eq(tsk_rt(t)->job_params.deadline, now)) ++ ++/* real-time comparison macros */ ++#define earlier_deadline(a, b) (lt_before(\ ++ (a)->rt_param.job_params.deadline,\ ++ (b)->rt_param.job_params.deadline)) ++#define earlier_release(a, b) (lt_before(\ ++ (a)->rt_param.job_params.release,\ ++ (b)->rt_param.job_params.release)) ++ ++void preempt_if_preemptable(struct task_struct* t, int on_cpu); ++ ++#ifdef CONFIG_SRP ++void srp_ceiling_block(void); ++#else ++#define srp_ceiling_block() /* nothing */ ++#endif ++ ++#define bheap2task(hn) ((struct task_struct*) hn->value) ++ ++#ifdef CONFIG_NP_SECTION ++ ++static inline int is_kernel_np(struct task_struct *t) ++{ ++ return tsk_rt(t)->kernel_np; ++} ++ ++static inline int is_user_np(struct task_struct *t) ++{ ++ return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->np_flag : 0; ++} ++ ++static inline void request_exit_np(struct task_struct *t) ++{ ++ if (is_user_np(t)) { ++ /* Set the flag that tells user space to call ++ * into the kernel at the end of a critical section. */ ++ if (likely(tsk_rt(t)->ctrl_page)) { ++ TRACE_TASK(t, "setting delayed_preemption flag\n"); ++ tsk_rt(t)->ctrl_page->delayed_preemption = 1; ++ } ++ } ++} ++ ++static inline void clear_exit_np(struct task_struct *t) ++{ ++ if (likely(tsk_rt(t)->ctrl_page)) ++ tsk_rt(t)->ctrl_page->delayed_preemption = 0; ++} ++ ++static inline void make_np(struct task_struct *t) ++{ ++ tsk_rt(t)->kernel_np++; ++} ++ ++/* Caller should check if preemption is necessary when ++ * the function return 0. ++ */ ++static inline int take_np(struct task_struct *t) ++{ ++ return --tsk_rt(t)->kernel_np; ++} ++ ++#else ++ ++static inline int is_kernel_np(struct task_struct* t) ++{ ++ return 0; ++} ++ ++static inline int is_user_np(struct task_struct* t) ++{ ++ return 0; ++} ++ ++static inline void request_exit_np(struct task_struct *t) ++{ ++ /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */ ++ BUG(); ++} ++ ++static inline void clear_exit_np(struct task_struct* t) ++{ ++} ++ ++#endif ++ ++static inline int is_np(struct task_struct *t) ++{ ++#ifdef CONFIG_SCHED_DEBUG_TRACE ++ int kernel, user; ++ kernel = is_kernel_np(t); ++ user = is_user_np(t); ++ if (kernel || user) ++ TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n", ++ ++ kernel, user); ++ return kernel || user; ++#else ++ return unlikely(is_kernel_np(t) || is_user_np(t)); ++#endif ++} ++ ++static inline int is_present(struct task_struct* t) ++{ ++ return t && tsk_rt(t)->present; ++} ++ ++ ++/* make the unit explicit */ ++typedef unsigned long quanta_t; ++ ++enum round { ++ FLOOR, ++ CEIL ++}; ++ ++ ++/* Tick period is used to convert ns-specified execution ++ * costs and periods into tick-based equivalents. ++ */ ++extern ktime_t tick_period; ++ ++static inline quanta_t time2quanta(lt_t time, enum round round) ++{ ++ s64 quantum_length = ktime_to_ns(tick_period); ++ ++ if (do_div(time, quantum_length) && round == CEIL) ++ time++; ++ return (quanta_t) time; ++} ++ ++/* By how much is cpu staggered behind CPU 0? */ ++u64 cpu_stagger_offset(int cpu); ++ ++#endif +diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h +new file mode 100644 +index 0000000..ac24929 +--- /dev/null ++++ b/include/litmus/rt_domain.h +@@ -0,0 +1,182 @@ ++/* CLEANUP: Add comments and make it less messy. ++ * ++ */ ++ ++#ifndef __UNC_RT_DOMAIN_H__ ++#define __UNC_RT_DOMAIN_H__ ++ ++#include ++ ++#define RELEASE_QUEUE_SLOTS 127 /* prime */ ++ ++struct _rt_domain; ++ ++typedef int (*check_resched_needed_t)(struct _rt_domain *rt); ++typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks); ++ ++struct release_queue { ++ /* each slot maintains a list of release heaps sorted ++ * by release time */ ++ struct list_head slot[RELEASE_QUEUE_SLOTS]; ++}; ++ ++typedef struct _rt_domain { ++ /* runnable rt tasks are in here */ ++ raw_spinlock_t ready_lock; ++ struct bheap ready_queue; ++ ++ /* real-time tasks waiting for release are in here */ ++ raw_spinlock_t release_lock; ++ struct release_queue release_queue; ++ ++#ifdef CONFIG_RELEASE_MASTER ++ int release_master; ++#endif ++ ++ /* for moving tasks to the release queue */ ++ raw_spinlock_t tobe_lock; ++ struct list_head tobe_released; ++ ++ /* how do we check if we need to kick another CPU? */ ++ check_resched_needed_t check_resched; ++ ++ /* how do we release jobs? */ ++ release_jobs_t release_jobs; ++ ++ /* how are tasks ordered in the ready queue? */ ++ bheap_prio_t order; ++} rt_domain_t; ++ ++struct release_heap { ++ /* list_head for per-time-slot list */ ++ struct list_head list; ++ lt_t release_time; ++ /* all tasks to be released at release_time */ ++ struct bheap heap; ++ /* used to trigger the release */ ++ struct hrtimer timer; ++ ++#ifdef CONFIG_RELEASE_MASTER ++ /* used to delegate releases */ ++ struct hrtimer_start_on_info info; ++#endif ++ /* required for the timer callback */ ++ rt_domain_t* dom; ++}; ++ ++ ++static inline struct task_struct* __next_ready(rt_domain_t* rt) ++{ ++ struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue); ++ if (hn) ++ return bheap2task(hn); ++ else ++ return NULL; ++} ++ ++void rt_domain_init(rt_domain_t *rt, bheap_prio_t order, ++ check_resched_needed_t check, ++ release_jobs_t relase); ++ ++void __add_ready(rt_domain_t* rt, struct task_struct *new); ++void __merge_ready(rt_domain_t* rt, struct bheap *tasks); ++void __add_release(rt_domain_t* rt, struct task_struct *task); ++ ++static inline struct task_struct* __take_ready(rt_domain_t* rt) ++{ ++ struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue); ++ if (hn) ++ return bheap2task(hn); ++ else ++ return NULL; ++} ++ ++static inline struct task_struct* __peek_ready(rt_domain_t* rt) ++{ ++ struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue); ++ if (hn) ++ return bheap2task(hn); ++ else ++ return NULL; ++} ++ ++static inline int is_queued(struct task_struct *t) ++{ ++ BUG_ON(!tsk_rt(t)->heap_node); ++ return bheap_node_in_heap(tsk_rt(t)->heap_node); ++} ++ ++static inline void remove(rt_domain_t* rt, struct task_struct *t) ++{ ++ bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node); ++} ++ ++static inline void add_ready(rt_domain_t* rt, struct task_struct *new) ++{ ++ unsigned long flags; ++ /* first we need the write lock for rt_ready_queue */ ++ raw_spin_lock_irqsave(&rt->ready_lock, flags); ++ __add_ready(rt, new); ++ raw_spin_unlock_irqrestore(&rt->ready_lock, flags); ++} ++ ++static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks) ++{ ++ unsigned long flags; ++ raw_spin_lock_irqsave(&rt->ready_lock, flags); ++ __merge_ready(rt, tasks); ++ raw_spin_unlock_irqrestore(&rt->ready_lock, flags); ++} ++ ++static inline struct task_struct* take_ready(rt_domain_t* rt) ++{ ++ unsigned long flags; ++ struct task_struct* ret; ++ /* first we need the write lock for rt_ready_queue */ ++ raw_spin_lock_irqsave(&rt->ready_lock, flags); ++ ret = __take_ready(rt); ++ raw_spin_unlock_irqrestore(&rt->ready_lock, flags); ++ return ret; ++} ++ ++ ++static inline void add_release(rt_domain_t* rt, struct task_struct *task) ++{ ++ unsigned long flags; ++ raw_spin_lock_irqsave(&rt->tobe_lock, flags); ++ __add_release(rt, task); ++ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); ++} ++ ++#ifdef CONFIG_RELEASE_MASTER ++void __add_release_on(rt_domain_t* rt, struct task_struct *task, ++ int target_cpu); ++ ++static inline void add_release_on(rt_domain_t* rt, ++ struct task_struct *task, ++ int target_cpu) ++{ ++ unsigned long flags; ++ raw_spin_lock_irqsave(&rt->tobe_lock, flags); ++ __add_release_on(rt, task, target_cpu); ++ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); ++} ++#endif ++ ++static inline int __jobs_pending(rt_domain_t* rt) ++{ ++ return !bheap_empty(&rt->ready_queue); ++} ++ ++static inline int jobs_pending(rt_domain_t* rt) ++{ ++ unsigned long flags; ++ int ret; ++ /* first we need the write lock for rt_ready_queue */ ++ raw_spin_lock_irqsave(&rt->ready_lock, flags); ++ ret = !bheap_empty(&rt->ready_queue); ++ raw_spin_unlock_irqrestore(&rt->ready_lock, flags); ++ return ret; ++} ++ ++#endif +diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h +new file mode 100644 +index 0000000..a7a183f +--- /dev/null ++++ b/include/litmus/rt_param.h +@@ -0,0 +1,196 @@ ++/* ++ * Definition of the scheduler plugin interface. ++ * ++ */ ++#ifndef _LINUX_RT_PARAM_H_ ++#define _LINUX_RT_PARAM_H_ ++ ++/* Litmus time type. */ ++typedef unsigned long long lt_t; ++ ++static inline int lt_after(lt_t a, lt_t b) ++{ ++ return ((long long) b) - ((long long) a) < 0; ++} ++#define lt_before(a, b) lt_after(b, a) ++ ++static inline int lt_after_eq(lt_t a, lt_t b) ++{ ++ return ((long long) a) - ((long long) b) >= 0; ++} ++#define lt_before_eq(a, b) lt_after_eq(b, a) ++ ++/* different types of clients */ ++typedef enum { ++ RT_CLASS_HARD, ++ RT_CLASS_SOFT, ++ RT_CLASS_BEST_EFFORT ++} task_class_t; ++ ++typedef enum { ++ NO_ENFORCEMENT, /* job may overrun unhindered */ ++ QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ ++ PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ ++} budget_policy_t; ++ ++struct rt_task { ++ lt_t exec_cost; ++ lt_t period; ++ lt_t phase; ++ unsigned int cpu; ++ task_class_t cls; ++ budget_policy_t budget_policy; /* ignored by pfair */ ++}; ++ ++/* The definition of the data that is shared between the kernel and real-time ++ * tasks via a shared page (see litmus/ctrldev.c). ++ * ++ * WARNING: User space can write to this, so don't trust ++ * the correctness of the fields! ++ * ++ * This servees two purposes: to enable efficient signaling ++ * of non-preemptive sections (user->kernel) and ++ * delayed preemptions (kernel->user), and to export ++ * some real-time relevant statistics such as preemption and ++ * migration data to user space. We can't use a device to export ++ * statistics because we want to avoid system call overhead when ++ * determining preemption/migration overheads). ++ */ ++struct control_page { ++ /* Is the task currently in a non-preemptive section? */ ++ int np_flag; ++ /* Should the task call into the kernel when it leaves ++ * its non-preemptive section? */ ++ int delayed_preemption; ++ ++ /* to be extended */ ++}; ++ ++/* don't export internal data structures to user space (liblitmus) */ ++#ifdef __KERNEL__ ++ ++struct _rt_domain; ++struct bheap_node; ++struct release_heap; ++ ++struct rt_job { ++ /* Time instant the the job was or will be released. */ ++ lt_t release; ++ /* What is the current deadline? */ ++ lt_t deadline; ++ ++ /* How much service has this job received so far? */ ++ lt_t exec_time; ++ ++ /* Which job is this. This is used to let user space ++ * specify which job to wait for, which is important if jobs ++ * overrun. If we just call sys_sleep_next_period() then we ++ * will unintentionally miss jobs after an overrun. ++ * ++ * Increase this sequence number when a job is released. ++ */ ++ unsigned int job_no; ++}; ++ ++struct pfair_param; ++ ++/* RT task parameters for scheduling extensions ++ * These parameters are inherited during clone and therefore must ++ * be explicitly set up before the task set is launched. ++ */ ++struct rt_param { ++ /* is the task sleeping? */ ++ unsigned int flags:8; ++ ++ /* do we need to check for srp blocking? */ ++ unsigned int srp_non_recurse:1; ++ ++ /* is the task present? (true if it can be scheduled) */ ++ unsigned int present:1; ++ ++ /* user controlled parameters */ ++ struct rt_task task_params; ++ ++ /* timing parameters */ ++ struct rt_job job_params; ++ ++ /* task representing the current "inherited" task ++ * priority, assigned by inherit_priority and ++ * return priority in the scheduler plugins. ++ * could point to self if PI does not result in ++ * an increased task priority. ++ */ ++ struct task_struct* inh_task; ++ ++#ifdef CONFIG_NP_SECTION ++ /* For the FMLP under PSN-EDF, it is required to make the task ++ * non-preemptive from kernel space. In order not to interfere with ++ * user space, this counter indicates the kernel space np setting. ++ * kernel_np > 0 => task is non-preemptive ++ */ ++ unsigned int kernel_np; ++#endif ++ ++ /* This field can be used by plugins to store where the task ++ * is currently scheduled. It is the responsibility of the ++ * plugin to avoid race conditions. ++ * ++ * This used by GSN-EDF and PFAIR. ++ */ ++ volatile int scheduled_on; ++ ++ /* Is the stack of the task currently in use? This is updated by ++ * the LITMUS core. ++ * ++ * Be careful to avoid deadlocks! ++ */ ++ volatile int stack_in_use; ++ ++ /* This field can be used by plugins to store where the task ++ * is currently linked. It is the responsibility of the plugin ++ * to avoid race conditions. ++ * ++ * Used by GSN-EDF. ++ */ ++ volatile int linked_on; ++ ++ /* PFAIR/PD^2 state. Allocated on demand. */ ++ struct pfair_param* pfair; ++ ++ /* Fields saved before BE->RT transition. ++ */ ++ int old_policy; ++ int old_prio; ++ ++ /* ready queue for this task */ ++ struct _rt_domain* domain; ++ ++ /* heap element for this task ++ * ++ * Warning: Don't statically allocate this node. The heap ++ * implementation swaps these between tasks, thus after ++ * dequeuing from a heap you may end up with a different node ++ * then the one you had when enqueuing the task. For the same ++ * reason, don't obtain and store references to this node ++ * other than this pointer (which is updated by the heap ++ * implementation). ++ */ ++ struct bheap_node* heap_node; ++ struct release_heap* rel_heap; ++ ++ /* Used by rt_domain to queue task in release list. ++ */ ++ struct list_head list; ++ ++ /* Pointer to the page shared between userspace and kernel. */ ++ struct control_page * ctrl_page; ++}; ++ ++/* Possible RT flags */ ++#define RT_F_RUNNING 0x00000000 ++#define RT_F_SLEEP 0x00000001 ++#define RT_F_EXIT_SEM 0x00000008 ++ ++#endif ++ ++#endif +diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h +new file mode 100644 +index 0000000..9c1c9f2 +--- /dev/null ++++ b/include/litmus/sched_plugin.h +@@ -0,0 +1,162 @@ ++/* ++ * Definition of the scheduler plugin interface. ++ * ++ */ ++#ifndef _LINUX_SCHED_PLUGIN_H_ ++#define _LINUX_SCHED_PLUGIN_H_ ++ ++#include ++ ++/* struct for semaphore with priority inheritance */ ++struct pi_semaphore { ++ atomic_t count; ++ int sleepers; ++ wait_queue_head_t wait; ++ struct { ++ /* highest-prio holder/waiter */ ++ struct task_struct *task; ++ struct task_struct* cpu_task[NR_CPUS]; ++ } hp; ++ /* current lock holder */ ++ struct task_struct *holder; ++}; ++ ++/************************ setup/tear down ********************/ ++ ++typedef long (*activate_plugin_t) (void); ++typedef long (*deactivate_plugin_t) (void); ++ ++ ++ ++/********************* scheduler invocation ******************/ ++ ++/* Plugin-specific realtime tick handler */ ++typedef void (*scheduler_tick_t) (struct task_struct *cur); ++/* Novell make sched decision function */ ++typedef struct task_struct* (*schedule_t)(struct task_struct * prev); ++/* Clean up after the task switch has occured. ++ * This function is called after every (even non-rt) task switch. ++ */ ++typedef void (*finish_switch_t)(struct task_struct *prev); ++ ++ ++/********************* task state changes ********************/ ++ ++/* Called to setup a new real-time task. ++ * Release the first job, enqueue, etc. ++ * Task may already be running. ++ */ ++typedef void (*task_new_t) (struct task_struct *task, ++ int on_rq, ++ int running); ++ ++/* Called to re-introduce a task after blocking. ++ * Can potentially be called multiple times. ++ */ ++typedef void (*task_wake_up_t) (struct task_struct *task); ++/* called to notify the plugin of a blocking real-time task ++ * it will only be called for real-time tasks and before schedule is called */ ++typedef void (*task_block_t) (struct task_struct *task); ++/* Called when a real-time task exits or changes to a different scheduling ++ * class. ++ * Free any allocated resources ++ */ ++typedef void (*task_exit_t) (struct task_struct *); ++ ++/* Called when the new_owner is released from the wait queue ++ * it should now inherit the priority from sem, _before_ it gets readded ++ * to any queue ++ */ ++typedef long (*inherit_priority_t) (struct pi_semaphore *sem, ++ struct task_struct *new_owner); ++ ++/* Called when the current task releases a semahpore where it might have ++ * inherited a piority from ++ */ ++typedef long (*return_priority_t) (struct pi_semaphore *sem); ++ ++/* Called when a task tries to acquire a semaphore and fails. Check if its ++ * priority is higher than that of the current holder. ++ */ ++typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t); ++ ++ ++ ++ ++/********************* sys call backends ********************/ ++/* This function causes the caller to sleep until the next release */ ++typedef long (*complete_job_t) (void); ++ ++typedef long (*admit_task_t)(struct task_struct* tsk); ++ ++typedef void (*release_at_t)(struct task_struct *t, lt_t start); ++ ++struct sched_plugin { ++ struct list_head list; ++ /* basic info */ ++ char *plugin_name; ++ ++ /* setup */ ++ activate_plugin_t activate_plugin; ++ deactivate_plugin_t deactivate_plugin; ++ ++#ifdef CONFIG_SRP ++ unsigned int srp_active; ++#endif ++ ++ /* scheduler invocation */ ++ scheduler_tick_t tick; ++ schedule_t schedule; ++ finish_switch_t finish_switch; ++ ++ /* syscall backend */ ++ complete_job_t complete_job; ++ release_at_t release_at; ++ ++ /* task state changes */ ++ admit_task_t admit_task; ++ ++ task_new_t task_new; ++ task_wake_up_t task_wake_up; ++ task_block_t task_block; ++ task_exit_t task_exit; ++ ++#ifdef CONFIG_FMLP ++ /* priority inheritance */ ++ unsigned int fmlp_active; ++ inherit_priority_t inherit_priority; ++ return_priority_t return_priority; ++ pi_block_t pi_block; ++#endif ++} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); ++ ++ ++extern struct sched_plugin *litmus; ++ ++/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ ++extern int cluster_cache_index; ++ ++int register_sched_plugin(struct sched_plugin* plugin); ++struct sched_plugin* find_sched_plugin(const char* name); ++int print_sched_plugins(char* buf, int max); ++ ++static inline int srp_active(void) ++{ ++#ifdef CONFIG_SRP ++ return litmus->srp_active; ++#else ++ return 0; ++#endif ++} ++static inline int fmlp_active(void) ++{ ++#ifdef CONFIG_FMLP ++ return litmus->fmlp_active; ++#else ++ return 0; ++#endif ++} ++ ++extern struct sched_plugin linux_sched_plugin; ++ ++#endif +diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h +new file mode 100644 +index 0000000..e1b0c97 +--- /dev/null ++++ b/include/litmus/sched_trace.h +@@ -0,0 +1,192 @@ ++/* ++ * sched_trace.h -- record scheduler events to a byte stream for offline analysis. ++ */ ++#ifndef _LINUX_SCHED_TRACE_H_ ++#define _LINUX_SCHED_TRACE_H_ ++ ++/* all times in nanoseconds */ ++ ++struct st_trace_header { ++ u8 type; /* Of what type is this record? */ ++ u8 cpu; /* On which CPU was it recorded? */ ++ u16 pid; /* PID of the task. */ ++ u32 job; /* The job sequence number. */ ++}; ++ ++#define ST_NAME_LEN 16 ++struct st_name_data { ++ char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ ++}; ++ ++struct st_param_data { /* regular params */ ++ u32 wcet; ++ u32 period; ++ u32 phase; ++ u8 partition; ++ u8 __unused[3]; ++}; ++ ++struct st_release_data { /* A job is was/is going to be released. */ ++ u64 release; /* What's the release time? */ ++ u64 deadline; /* By when must it finish? */ ++}; ++ ++struct st_assigned_data { /* A job was asigned to a CPU. */ ++ u64 when; ++ u8 target; /* Where should it execute? */ ++ u8 __unused[3]; ++}; ++ ++struct st_switch_to_data { /* A process was switched to on a given CPU. */ ++ u64 when; /* When did this occur? */ ++ u32 exec_time; /* Time the current job has executed. */ ++ ++}; ++ ++struct st_switch_away_data { /* A process was switched away from on a given CPU. */ ++ u64 when; ++ u64 exec_time; ++}; ++ ++struct st_completion_data { /* A job completed. */ ++ u64 when; ++ u8 forced:1; /* Set to 1 if job overran and kernel advanced to the ++ * next task automatically; set to 0 otherwise. ++ */ ++ u8 __uflags:7; ++ u8 __unused[3]; ++}; ++ ++struct st_block_data { /* A task blocks. */ ++ u64 when; ++ u64 __unused; ++}; ++ ++struct st_resume_data { /* A task resumes. */ ++ u64 when; ++ u64 __unused; ++}; ++ ++struct st_sys_release_data { ++ u64 when; ++ u64 release; ++}; ++ ++#define DATA(x) struct st_ ## x ## _data x; ++ ++typedef enum { ++ ST_NAME = 1, /* Start at one, so that we can spot ++ * uninitialized records. */ ++ ST_PARAM, ++ ST_RELEASE, ++ ST_ASSIGNED, ++ ST_SWITCH_TO, ++ ST_SWITCH_AWAY, ++ ST_COMPLETION, ++ ST_BLOCK, ++ ST_RESUME, ++ ST_SYS_RELEASE, ++} st_event_record_type_t; ++ ++struct st_event_record { ++ struct st_trace_header hdr; ++ union { ++ u64 raw[2]; ++ ++ DATA(name); ++ DATA(param); ++ DATA(release); ++ DATA(assigned); ++ DATA(switch_to); ++ DATA(switch_away); ++ DATA(completion); ++ DATA(block); ++ DATA(resume); ++ DATA(sys_release); ++ ++ } data; ++}; ++ ++#undef DATA ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++ ++#ifdef CONFIG_SCHED_TASK_TRACE ++ ++#define SCHED_TRACE(id, callback, task) \ ++ ft_event1(id, callback, task) ++#define SCHED_TRACE2(id, callback, task, xtra) \ ++ ft_event2(id, callback, task, xtra) ++ ++/* provide prototypes; needed on sparc64 */ ++#ifndef NO_TASK_TRACE_DECLS ++feather_callback void do_sched_trace_task_name(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_param(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_release(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_switch_to(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_switch_away(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_completion(unsigned long id, ++ struct task_struct* task, ++ unsigned long forced); ++feather_callback void do_sched_trace_task_block(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_task_resume(unsigned long id, ++ struct task_struct* task); ++feather_callback void do_sched_trace_sys_release(unsigned long id, ++ lt_t* start); ++#endif ++ ++#else ++ ++#define SCHED_TRACE(id, callback, task) /* no tracing */ ++#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ ++ ++#endif ++ ++ ++#define SCHED_TRACE_BASE_ID 500 ++ ++ ++#define sched_trace_task_name(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t) ++#define sched_trace_task_param(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t) ++#define sched_trace_task_release(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t) ++#define sched_trace_task_switch_to(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t) ++#define sched_trace_task_switch_away(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t) ++#define sched_trace_task_completion(t, forced) \ ++ SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \ ++ (unsigned long) forced) ++#define sched_trace_task_block(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t) ++#define sched_trace_task_resume(t) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t) ++/* when is a pointer, it does not need an explicit cast to unsigned long */ ++#define sched_trace_sys_release(when) \ ++ SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when) ++ ++#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ ++ ++#ifdef CONFIG_SCHED_DEBUG_TRACE ++void sched_trace_log_message(const char* fmt, ...); ++void dump_trace_buffer(int max); ++#else ++ ++#define sched_trace_log_message(fmt, ...) ++ ++#endif ++ ++#endif /* __KERNEL__ */ ++ ++#endif +diff --git a/include/litmus/trace.h b/include/litmus/trace.h +new file mode 100644 +index 0000000..b32c711 +--- /dev/null ++++ b/include/litmus/trace.h +@@ -0,0 +1,113 @@ ++#ifndef _SYS_TRACE_H_ ++#define _SYS_TRACE_H_ ++ ++#ifdef CONFIG_SCHED_OVERHEAD_TRACE ++ ++#include ++#include ++ ++ ++/*********************** TIMESTAMPS ************************/ ++ ++enum task_type_marker { ++ TSK_BE, ++ TSK_RT, ++ TSK_UNKNOWN ++}; ++ ++struct timestamp { ++ uint64_t timestamp; ++ uint32_t seq_no; ++ uint8_t cpu; ++ uint8_t event; ++ uint8_t task_type; ++}; ++ ++/* tracing callbacks */ ++feather_callback void save_timestamp(unsigned long event); ++feather_callback void save_timestamp_def(unsigned long event, unsigned long type); ++feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); ++feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); ++ ++ ++#define TIMESTAMP(id) ft_event0(id, save_timestamp) ++ ++#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def) ++ ++#define TTIMESTAMP(id, task) \ ++ ft_event1(id, save_timestamp_task, (unsigned long) task) ++ ++#define CTIMESTAMP(id, cpu) \ ++ ft_event1(id, save_timestamp_cpu, (unsigned long) cpu) ++ ++#else /* !CONFIG_SCHED_OVERHEAD_TRACE */ ++ ++#define TIMESTAMP(id) /* no tracing */ ++ ++#define DTIMESTAMP(id, def) /* no tracing */ ++ ++#define TTIMESTAMP(id, task) /* no tracing */ ++ ++#define CTIMESTAMP(id, cpu) /* no tracing */ ++ ++#endif ++ ++ ++/* Convention for timestamps ++ * ========================= ++ * ++ * In order to process the trace files with a common tool, we use the following ++ * convention to measure execution times: The end time id of a code segment is ++ * always the next number after the start time event id. ++ */ ++ ++#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only ++ * care ++ * about ++ * next */ ++#define TS_SCHED_END(t) TTIMESTAMP(101, t) ++#define TS_SCHED2_START(t) TTIMESTAMP(102, t) ++#define TS_SCHED2_END(t) TTIMESTAMP(103, t) ++ ++#define TS_CXS_START(t) TTIMESTAMP(104, t) ++#define TS_CXS_END(t) TTIMESTAMP(105, t) ++ ++#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT) ++#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT) ++ ++#define TS_TICK_START(t) TTIMESTAMP(110, t) ++#define TS_TICK_END(t) TTIMESTAMP(111, t) ++ ++ ++#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ ++#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ ++ ++#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ ++#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ ++ ++#define TS_ENTER_NP_START TIMESTAMP(140) ++#define TS_ENTER_NP_END TIMESTAMP(141) ++ ++#define TS_EXIT_NP_START TIMESTAMP(150) ++#define TS_EXIT_NP_END TIMESTAMP(151) ++ ++#define TS_SRP_UP_START TIMESTAMP(160) ++#define TS_SRP_UP_END TIMESTAMP(161) ++#define TS_SRP_DOWN_START TIMESTAMP(162) ++#define TS_SRP_DOWN_END TIMESTAMP(163) ++ ++#define TS_PI_UP_START TIMESTAMP(170) ++#define TS_PI_UP_END TIMESTAMP(171) ++#define TS_PI_DOWN_START TIMESTAMP(172) ++#define TS_PI_DOWN_END TIMESTAMP(173) ++ ++#define TS_FIFO_UP_START TIMESTAMP(180) ++#define TS_FIFO_UP_END TIMESTAMP(181) ++#define TS_FIFO_DOWN_START TIMESTAMP(182) ++#define TS_FIFO_DOWN_END TIMESTAMP(183) ++ ++#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) ++#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) ++ ++ ++#endif /* !_SYS_TRACE_H_ */ +diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h +new file mode 100644 +index 0000000..dbddc65 +--- /dev/null ++++ b/include/litmus/unistd_32.h +@@ -0,0 +1,23 @@ ++/* ++ * included from arch/x86/include/asm/unistd_32.h ++ * ++ * LITMUS^RT syscalls with "relative" numbers ++ */ ++#define __LSC(x) (__NR_LITMUS + x) ++ ++#define __NR_set_rt_task_param __LSC(0) ++#define __NR_get_rt_task_param __LSC(1) ++#define __NR_complete_job __LSC(2) ++#define __NR_od_open __LSC(3) ++#define __NR_od_close __LSC(4) ++#define __NR_fmlp_down __LSC(5) ++#define __NR_fmlp_up __LSC(6) ++#define __NR_srp_down __LSC(7) ++#define __NR_srp_up __LSC(8) ++#define __NR_query_job_no __LSC(9) ++#define __NR_wait_for_job_release __LSC(10) ++#define __NR_wait_for_ts_release __LSC(11) ++#define __NR_release_ts __LSC(12) ++#define __NR_null_call __LSC(13) ++ ++#define NR_litmus_syscalls 14 +diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h +new file mode 100644 +index 0000000..f0618e7 +--- /dev/null ++++ b/include/litmus/unistd_64.h +@@ -0,0 +1,37 @@ ++/* ++ * included from arch/x86/include/asm/unistd_64.h ++ * ++ * LITMUS^RT syscalls with "relative" numbers ++ */ ++#define __LSC(x) (__NR_LITMUS + x) ++ ++#define __NR_set_rt_task_param __LSC(0) ++__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param) ++#define __NR_get_rt_task_param __LSC(1) ++__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param) ++#define __NR_complete_job __LSC(2) ++__SYSCALL(__NR_complete_job, sys_complete_job) ++#define __NR_od_open __LSC(3) ++__SYSCALL(__NR_od_open, sys_od_open) ++#define __NR_od_close __LSC(4) ++__SYSCALL(__NR_od_close, sys_od_close) ++#define __NR_fmlp_down __LSC(5) ++__SYSCALL(__NR_fmlp_down, sys_fmlp_down) ++#define __NR_fmlp_up __LSC(6) ++__SYSCALL(__NR_fmlp_up, sys_fmlp_up) ++#define __NR_srp_down __LSC(7) ++__SYSCALL(__NR_srp_down, sys_srp_down) ++#define __NR_srp_up __LSC(8) ++__SYSCALL(__NR_srp_up, sys_srp_up) ++#define __NR_query_job_no __LSC(9) ++__SYSCALL(__NR_query_job_no, sys_query_job_no) ++#define __NR_wait_for_job_release __LSC(10) ++__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release) ++#define __NR_wait_for_ts_release __LSC(11) ++__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) ++#define __NR_release_ts __LSC(12) ++__SYSCALL(__NR_release_ts, sys_release_ts) ++#define __NR_null_call __LSC(13) ++__SYSCALL(__NR_null_call, sys_null_call) ++ ++#define NR_litmus_syscalls 14 +diff --git a/kernel/exit.c b/kernel/exit.c +index 7f2683a..256ce8c 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -57,6 +57,8 @@ + #include + #include "cred-internals.h" + ++extern void exit_od_table(struct task_struct *t); ++ + static void exit_mm(struct task_struct * tsk); + + static void __unhash_process(struct task_struct *p) +@@ -968,6 +970,8 @@ NORET_TYPE void do_exit(long code) + if (unlikely(tsk->audit_context)) + audit_free(tsk); + ++ exit_od_table(tsk); ++ + tsk->exit_code = code; + taskstats_exit(tsk, group_dead); + +diff --git a/kernel/fork.c b/kernel/fork.c +index 4c14942..166eb78 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -75,6 +75,9 @@ + + #include + ++#include ++#include ++ + /* + * Protected counters by write_lock_irq(&tasklist_lock) + */ +@@ -171,6 +174,7 @@ void __put_task_struct(struct task_struct *tsk) + WARN_ON(atomic_read(&tsk->usage)); + WARN_ON(tsk == current); + ++ exit_litmus(tsk); + exit_creds(tsk); + delayacct_tsk_free(tsk); + +@@ -253,6 +257,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) + + tsk->stack = ti; + ++ /* Don't let the new task be a real-time task. */ ++ litmus_fork(tsk); ++ + err = prop_local_init_single(&tsk->dirties); + if (err) + goto out; +diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c +index 0086628..fdf9596 100644 +--- a/kernel/hrtimer.c ++++ b/kernel/hrtimer.c +@@ -46,6 +46,8 @@ + #include + #include + ++#include ++ + #include + + #include +@@ -1041,6 +1043,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) + } + EXPORT_SYMBOL_GPL(hrtimer_start); + ++#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS ++ ++/** ++ * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info ++ */ ++void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info) ++{ ++ memset(info, 0, sizeof(struct hrtimer_start_on_info)); ++ atomic_set(&info->state, HRTIMER_START_ON_INACTIVE); ++} ++ ++/** ++ * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu ++ */ ++void hrtimer_pull(void) ++{ ++ struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); ++ struct hrtimer_start_on_info *info; ++ struct list_head *pos, *safe, list; ++ ++ raw_spin_lock(&base->lock); ++ list_replace_init(&base->to_pull, &list); ++ raw_spin_unlock(&base->lock); ++ ++ list_for_each_safe(pos, safe, &list) { ++ info = list_entry(pos, struct hrtimer_start_on_info, list); ++ TRACE("pulled timer 0x%x\n", info->timer); ++ list_del(pos); ++ hrtimer_start(info->timer, info->time, info->mode); ++ } ++} ++ ++/** ++ * hrtimer_start_on - trigger timer arming on remote cpu ++ * @cpu: remote cpu ++ * @info: save timer information for enqueuing on remote cpu ++ * @timer: timer to be pulled ++ * @time: expire time ++ * @mode: timer mode ++ */ ++int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info, ++ struct hrtimer *timer, ktime_t time, ++ const enum hrtimer_mode mode) ++{ ++ unsigned long flags; ++ struct hrtimer_cpu_base* base; ++ int in_use = 0, was_empty; ++ ++ /* serialize access to info through the timer base */ ++ lock_hrtimer_base(timer, &flags); ++ ++ in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE); ++ if (!in_use) { ++ INIT_LIST_HEAD(&info->list); ++ info->timer = timer; ++ info->time = time; ++ info->mode = mode; ++ /* mark as in use */ ++ atomic_set(&info->state, HRTIMER_START_ON_QUEUED); ++ } ++ ++ unlock_hrtimer_base(timer, &flags); ++ ++ if (!in_use) { ++ /* initiate pull */ ++ preempt_disable(); ++ if (cpu == smp_processor_id()) { ++ /* start timer locally; we may get called ++ * with rq->lock held, do not wake up anything ++ */ ++ TRACE("hrtimer_start_on: starting on local CPU\n"); ++ __hrtimer_start_range_ns(info->timer, info->time, ++ 0, info->mode, 0); ++ } else { ++ TRACE("hrtimer_start_on: pulling to remote CPU\n"); ++ base = &per_cpu(hrtimer_bases, cpu); ++ raw_spin_lock_irqsave(&base->lock, flags); ++ was_empty = list_empty(&base->to_pull); ++ list_add(&info->list, &base->to_pull); ++ raw_spin_unlock_irqrestore(&base->lock, flags); ++ if (was_empty) ++ /* only send IPI if other no else ++ * has done so already ++ */ ++ smp_send_pull_timers(cpu); ++ } ++ preempt_enable(); ++ } ++ return in_use; ++} ++ ++#endif + + /** + * hrtimer_try_to_cancel - try to deactivate a timer +@@ -1631,6 +1725,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) + cpu_base->clock_base[i].cpu_base = cpu_base; + + hrtimer_init_hres(cpu_base); ++ INIT_LIST_HEAD(&cpu_base->to_pull); + } + + #ifdef CONFIG_HOTPLUG_CPU +diff --git a/kernel/printk.c b/kernel/printk.c +index 75077ad..ee54355 100644 +--- a/kernel/printk.c ++++ b/kernel/printk.c +@@ -71,6 +71,13 @@ int console_printk[4] = { + }; + + /* ++ * divert printk() messages when there is a LITMUS^RT debug listener ++ */ ++#include ++int trace_override = 0; ++int trace_recurse = 0; ++ ++/* + * Low level drivers may need that to know if they can schedule in + * their unblank() callback or not. So let's export it. + */ +@@ -708,6 +715,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) + /* Emit the output into the temporary buffer */ + printed_len += vscnprintf(printk_buf + printed_len, + sizeof(printk_buf) - printed_len, fmt, args); ++ /* if LITMUS^RT tracer is active divert printk() msgs */ ++ if (trace_override && !trace_recurse) ++ TRACE("%s", printk_buf); + + + p = printk_buf; +@@ -777,7 +787,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) + * Try to acquire and then immediately release the + * console semaphore. The release will do all the + * actual magic (print out buffers, wake up klogd, +- * etc). ++ * etc). + * + * The acquire_console_semaphore_for_printk() function + * will release 'logbuf_lock' regardless of whether it +@@ -1014,7 +1024,7 @@ int printk_needs_cpu(int cpu) + + void wake_up_klogd(void) + { +- if (waitqueue_active(&log_wait)) ++ if (!trace_override && waitqueue_active(&log_wait)) + __raw_get_cpu_var(printk_pending) = 1; + } + +diff --git a/kernel/sched.c b/kernel/sched.c +index 3c2a54f..5e3c509 100644 +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -78,6 +78,9 @@ + + #include "sched_cpupri.h" + ++#include ++#include ++ + #define CREATE_TRACE_POINTS + #include + +@@ -450,6 +453,12 @@ struct rt_rq { + #endif + }; + ++/* Litmus related fields in a runqueue */ ++struct litmus_rq { ++ unsigned long nr_running; ++ struct task_struct *prev; ++}; ++ + #ifdef CONFIG_SMP + + /* +@@ -512,6 +521,7 @@ struct rq { + + struct cfs_rq cfs; + struct rt_rq rt; ++ struct litmus_rq litmus; + + #ifdef CONFIG_FAIR_GROUP_SCHED + /* list of leaf cfs_rq on this cpu: */ +@@ -1833,7 +1843,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) + + static const struct sched_class rt_sched_class; + +-#define sched_class_highest (&rt_sched_class) ++#define sched_class_highest (&litmus_sched_class) + #define for_each_class(class) \ + for (class = sched_class_highest; class; class = class->next) + +@@ -1932,6 +1942,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) + #include "sched_idletask.c" + #include "sched_fair.c" + #include "sched_rt.c" ++#include "../litmus/sched_litmus.c" + #ifdef CONFIG_SCHED_DEBUG + # include "sched_debug.c" + #endif +@@ -2372,6 +2383,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + unsigned long flags; + struct rq *rq; + ++ if (is_realtime(p)) ++ TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); ++ + if (!sched_feat(SYNC_WAKEUPS)) + wake_flags &= ~WF_SYNC; + +@@ -2390,7 +2404,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, + orig_cpu = cpu; + + #ifdef CONFIG_SMP +- if (unlikely(task_running(rq, p))) ++ if (unlikely(task_running(rq, p)) || is_realtime(p)) + goto out_activate; + + /* +@@ -2497,6 +2511,8 @@ out_running: + } + #endif + out: ++ if (is_realtime(p)) ++ TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state); + task_rq_unlock(rq, &flags); + put_cpu(); + +@@ -2814,6 +2830,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) + */ + prev_state = prev->state; + finish_arch_switch(prev); ++ litmus->finish_switch(prev); ++ prev->rt_param.stack_in_use = NO_CPU; + #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW + local_irq_disable(); + #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ +@@ -2843,6 +2861,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev) + { + if (prev->sched_class->pre_schedule) + prev->sched_class->pre_schedule(rq, prev); ++ ++ /* LITMUS^RT not very clean hack: we need to save the prev task ++ * as our scheduling decision rely on it (as we drop the rq lock ++ * something in prev can change...); there is no way to escape ++ * this ack apart from modifying pick_nex_task(rq, _prev_) or ++ * falling back on the previous solution of decoupling ++ * scheduling decisions ++ */ ++ rq->litmus.prev = prev; + } + + /* rq->lock is NOT held, but preemption is disabled */ +@@ -3520,18 +3547,26 @@ void scheduler_tick(void) + + sched_clock_tick(); + ++ TS_TICK_START(current); ++ + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + update_cpu_load(rq); + curr->sched_class->task_tick(rq, curr, 0); ++ ++ /* litmus_tick may force current to resched */ ++ litmus_tick(rq, curr); ++ + raw_spin_unlock(&rq->lock); + + perf_event_task_tick(curr); + + #ifdef CONFIG_SMP + rq->idle_at_tick = idle_cpu(cpu); +- trigger_load_balance(rq, cpu); ++ if (!is_realtime(current)) ++ trigger_load_balance(rq, cpu); + #endif ++ TS_TICK_END(current); + } + + notrace unsigned long get_parent_ip(unsigned long addr) +@@ -3672,12 +3707,20 @@ pick_next_task(struct rq *rq) + /* + * Optimization: we know that if all tasks are in + * the fair class we can call that function directly: +- */ +- if (likely(rq->nr_running == rq->cfs.nr_running)) { ++ ++ * NOT IN LITMUS^RT! ++ ++ * This breaks many assumptions in the plugins. ++ * Do not uncomment without thinking long and hard ++ * about how this affects global plugins such as GSN-EDF. ++ ++ if (rq->nr_running == rq->cfs.nr_running) { ++ TRACE("taking shortcut in pick_next_task()\n"); + p = fair_sched_class.pick_next_task(rq); + if (likely(p)) + return p; + } ++ */ + + class = sched_class_highest; + for ( ; ; ) { +@@ -3712,6 +3755,8 @@ need_resched: + + release_kernel_lock(prev); + need_resched_nonpreemptible: ++ TS_SCHED_START; ++ sched_trace_task_switch_away(prev); + + schedule_debug(prev); + +@@ -3746,15 +3791,22 @@ need_resched_nonpreemptible: + rq->curr = next; + ++*switch_count; + ++ TS_SCHED_END(next); ++ TS_CXS_START(next); + context_switch(rq, prev, next); /* unlocks the rq */ ++ TS_CXS_END(current); + /* + * the context switch might have flipped the stack from under + * us, hence refresh the local variables. + */ + cpu = smp_processor_id(); + rq = cpu_rq(cpu); +- } else ++ } else { ++ TS_SCHED_END(prev); + raw_spin_unlock_irq(&rq->lock); ++ } ++ ++ sched_trace_task_switch_to(current); + + post_schedule(rq); + +@@ -3767,6 +3819,9 @@ need_resched_nonpreemptible: + preempt_enable_no_resched(); + if (need_resched()) + goto need_resched; ++ ++ if (srp_active()) ++ srp_ceiling_block(); + } + EXPORT_SYMBOL(schedule); + +@@ -4043,6 +4098,17 @@ void complete_all(struct completion *x) + } + EXPORT_SYMBOL(complete_all); + ++void complete_n(struct completion *x, int n) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&x->wait.lock, flags); ++ x->done += n; ++ __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL); ++ spin_unlock_irqrestore(&x->wait.lock, flags); ++} ++EXPORT_SYMBOL(complete_n); ++ + static inline long __sched + do_wait_for_common(struct completion *x, long timeout, int state) + { +@@ -4471,7 +4537,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) + p->normal_prio = normal_prio(p); + /* we are holding p->pi_lock already */ + p->prio = rt_mutex_getprio(p); +- if (rt_prio(p->prio)) ++ if (p->policy == SCHED_LITMUS) ++ p->sched_class = &litmus_sched_class; ++ else if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; +@@ -4516,7 +4584,7 @@ recheck: + + if (policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_NORMAL && policy != SCHED_BATCH && +- policy != SCHED_IDLE) ++ policy != SCHED_IDLE && policy != SCHED_LITMUS) + return -EINVAL; + } + +@@ -4531,6 +4599,8 @@ recheck: + return -EINVAL; + if (rt_policy(policy) != (param->sched_priority != 0)) + return -EINVAL; ++ if (policy == SCHED_LITMUS && policy == p->policy) ++ return -EINVAL; + + /* + * Allow unprivileged RT tasks to decrease priority: +@@ -4585,6 +4655,12 @@ recheck: + return retval; + } + ++ if (policy == SCHED_LITMUS) { ++ retval = litmus_admit_task(p); ++ if (retval) ++ return retval; ++ } ++ + /* + * make sure no PI-waiters arrive (or leave) while we are + * changing the priority of the task: +@@ -4612,10 +4688,19 @@ recheck: + + p->sched_reset_on_fork = reset_on_fork; + ++ if (p->policy == SCHED_LITMUS) ++ litmus_exit_task(p); ++ + oldprio = p->prio; + prev_class = p->sched_class; + __setscheduler(rq, p, policy, param->sched_priority); + ++ if (policy == SCHED_LITMUS) { ++ p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; ++ p->rt_param.present = running; ++ litmus->task_new(p, on_rq, running); ++ } ++ + if (running) + p->sched_class->set_curr_task(rq); + if (on_rq) { +@@ -4785,10 +4870,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) + rcu_read_lock(); + + p = find_process_by_pid(pid); +- if (!p) { ++ /* Don't set affinity if task not found and for LITMUS tasks */ ++ if (!p || is_realtime(p)) { + rcu_read_unlock(); + put_online_cpus(); +- return -ESRCH; ++ return p ? -EPERM : -ESRCH; + } + + /* Prevent p going away */ +diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c +index 5a5ea2c..b1af6d4 100644 +--- a/kernel/sched_fair.c ++++ b/kernel/sched_fair.c +@@ -1708,7 +1708,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + int sync = wake_flags & WF_SYNC; + int scale = cfs_rq->nr_running >= sched_nr_latency; + +- if (unlikely(rt_prio(p->prio))) ++ if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) + goto preempt; + + if (unlikely(p->sched_class != &fair_sched_class)) +diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c +index b5b920a..c2fbb02 100644 +--- a/kernel/sched_rt.c ++++ b/kernel/sched_rt.c +@@ -1014,7 +1014,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) + */ + static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) + { +- if (p->prio < rq->curr->prio) { ++ if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) { + resched_task(rq->curr); + return; + } +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index f992762..0adc54b 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -721,6 +721,46 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) + } + + /** ++ * tick_set_quanta_type - get the quanta type as a boot option ++ * Default is standard setup with ticks staggered over first ++ * half of tick period. ++ */ ++int quanta_type = LINUX_DEFAULT_TICKS; ++static int __init tick_set_quanta_type(char *str) ++{ ++ if (strcmp("aligned", str) == 0) { ++ quanta_type = LITMUS_ALIGNED_TICKS; ++ printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n"); ++ } ++ else if (strcmp("staggered", str) == 0) { ++ quanta_type = LITMUS_STAGGERED_TICKS; ++ printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n"); ++ } ++ return 1; ++} ++__setup("quanta=", tick_set_quanta_type); ++ ++u64 cpu_stagger_offset(int cpu) ++{ ++ u64 offset = 0; ++ switch (quanta_type) { ++ case LITMUS_ALIGNED_TICKS: ++ offset = 0; ++ break; ++ case LITMUS_STAGGERED_TICKS: ++ offset = ktime_to_ns(tick_period); ++ do_div(offset, num_possible_cpus()); ++ offset *= cpu; ++ break; ++ default: ++ offset = ktime_to_ns(tick_period) >> 1; ++ do_div(offset, num_possible_cpus()); ++ offset *= cpu; ++ } ++ return offset; ++} ++ ++/** + * tick_setup_sched_timer - setup the tick emulation timer + */ + void tick_setup_sched_timer(void) +@@ -737,9 +777,11 @@ void tick_setup_sched_timer(void) + + /* Get the next period (per cpu) */ + hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); +- offset = ktime_to_ns(tick_period) >> 1; +- do_div(offset, num_possible_cpus()); +- offset *= smp_processor_id(); ++ ++ /* Offset must be set correctly to achieve desired quanta type. */ ++ offset = cpu_stagger_offset(smp_processor_id()); ++ ++ /* Add the correct offset to expiration time */ + hrtimer_add_expires_ns(&ts->sched_timer, offset); + + for (;;) { +diff --git a/litmus/Kconfig b/litmus/Kconfig +new file mode 100644 +index 0000000..9888589 +--- /dev/null ++++ b/litmus/Kconfig +@@ -0,0 +1,134 @@ ++menu "LITMUS^RT" ++ ++menu "Scheduling" ++ ++config PLUGIN_CEDF ++ bool "Clustered-EDF" ++ depends on X86 && SYSFS ++ default y ++ help ++ Include the Clustered EDF (C-EDF) plugin in the kernel. ++ This is appropriate for large platforms with shared caches. ++ On smaller platforms (e.g., ARM PB11MPCore), using C-EDF ++ makes little sense since there aren't any shared caches. ++ ++config PLUGIN_PFAIR ++ bool "PFAIR" ++ depends on HIGH_RES_TIMERS && !NO_HZ ++ default y ++ help ++ Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel. ++ The PFAIR plugin requires high resolution timers (for staggered quanta) ++ and does not support NO_HZ (quanta could be missed when the system is idle). ++ ++ If unsure, say Yes. ++ ++config RELEASE_MASTER ++ bool "Release-master Support" ++ depends on ARCH_HAS_SEND_PULL_TIMERS ++ default n ++ help ++ Allow one processor to act as a dedicated interrupt processor ++ that services all timer interrupts, but that does not schedule ++ real-time tasks. See RTSS'09 paper for details ++ (http://www.cs.unc.edu/~anderson/papers.html). ++ Currently only supported by GSN-EDF. ++ ++endmenu ++ ++menu "Real-Time Synchronization" ++ ++config NP_SECTION ++ bool "Non-preemptive section support" ++ default n ++ help ++ Allow tasks to become non-preemptable. ++ Note that plugins still need to explicitly support non-preemptivity. ++ Currently, only GSN-EDF and PSN-EDF have such support. ++ ++ This is required to support the FMLP. ++ If disabled, all tasks will be considered preemptable at all times. ++ ++config SRP ++ bool "Stack Resource Policy (SRP)" ++ default n ++ help ++ Include support for Baker's Stack Resource Policy. ++ ++ Say Yes if you want FMLP local long critical section ++ synchronization support. ++ ++config FMLP ++ bool "FMLP support" ++ depends on NP_SECTION ++ default n ++ help ++ Include support for deterministic multiprocessor real-time ++ synchronization support. ++ ++ Say Yes if you want FMLP long critical section ++ synchronization support. ++ ++endmenu ++ ++menu "Tracing" ++ ++config FEATHER_TRACE ++ bool "Feather-Trace Infrastructure" ++ default y ++ help ++ Feather-Trace basic tracing infrastructure. Includes device file ++ driver and instrumentation point support. ++ ++ There are actually two implementations of Feather-Trace. ++ 1) A slower, but portable, default implementation. ++ 2) Architecture-specific implementations that rewrite kernel .text at runtime. ++ ++ If enabled, Feather-Trace will be based on 2) if available (currently only for x86). ++ However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case ++ to avoid problems with write-protected .text pages. ++ ++ Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n. ++ ++ Note that this option only enables the basic Feather-Trace infrastructure; ++ you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to ++ actually enable any events. ++ ++config SCHED_TASK_TRACE ++ bool "Trace real-time tasks" ++ depends on FEATHER_TRACE ++ default y ++ help ++ Include support for the sched_trace_XXX() tracing functions. This ++ allows the collection of real-time task events such as job ++ completions, job releases, early completions, etc. This results in a ++ small overhead in the scheduling code. Disable if the overhead is not ++ acceptable (e.g., benchmarking). ++ ++ Say Yes for debugging. ++ Say No for overhead tracing. ++ ++config SCHED_OVERHEAD_TRACE ++ bool "Record timestamps for overhead measurements" ++ depends on FEATHER_TRACE ++ default n ++ help ++ Export event stream for overhead tracing. ++ Say Yes for overhead tracing. ++ ++config SCHED_DEBUG_TRACE ++ bool "TRACE() debugging" ++ default y ++ help ++ Include support for sched_trace_log_messageg(), which is used to ++ implement TRACE(). If disabled, no TRACE() messages will be included ++ in the kernel, and no overheads due to debugging statements will be ++ incurred by the scheduler. Disable if the overhead is not acceptable ++ (e.g. benchmarking). ++ ++ Say Yes for debugging. ++ Say No for overhead tracing. ++ ++endmenu ++ ++endmenu +diff --git a/litmus/Makefile b/litmus/Makefile +new file mode 100644 +index 0000000..f301d28 +--- /dev/null ++++ b/litmus/Makefile +@@ -0,0 +1,25 @@ ++# ++# Makefile for LITMUS^RT ++# ++ ++obj-y = sched_plugin.o litmus.o \ ++ budget.o \ ++ jobs.o \ ++ sync.o \ ++ rt_domain.o \ ++ edf_common.o \ ++ fdso.o \ ++ srp.o \ ++ fmlp.o \ ++ bheap.o \ ++ ctrldev.o \ ++ sched_gsn_edf.o \ ++ sched_psn_edf.o ++ ++obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o ++obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o ++ ++obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o ++obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o ++obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o ++obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o +diff --git a/litmus/bheap.c b/litmus/bheap.c +new file mode 100644 +index 0000000..528af97 +--- /dev/null ++++ b/litmus/bheap.c +@@ -0,0 +1,314 @@ ++#include "linux/kernel.h" ++#include "litmus/bheap.h" ++ ++void bheap_init(struct bheap* heap) ++{ ++ heap->head = NULL; ++ heap->min = NULL; ++} ++ ++void bheap_node_init(struct bheap_node** _h, void* value) ++{ ++ struct bheap_node* h = *_h; ++ h->parent = NULL; ++ h->next = NULL; ++ h->child = NULL; ++ h->degree = NOT_IN_HEAP; ++ h->value = value; ++ h->ref = _h; ++} ++ ++ ++/* make child a subtree of root */ ++static void __bheap_link(struct bheap_node* root, ++ struct bheap_node* child) ++{ ++ child->parent = root; ++ child->next = root->child; ++ root->child = child; ++ root->degree++; ++} ++ ++/* merge root lists */ ++static struct bheap_node* __bheap_merge(struct bheap_node* a, ++ struct bheap_node* b) ++{ ++ struct bheap_node* head = NULL; ++ struct bheap_node** pos = &head; ++ ++ while (a && b) { ++ if (a->degree < b->degree) { ++ *pos = a; ++ a = a->next; ++ } else { ++ *pos = b; ++ b = b->next; ++ } ++ pos = &(*pos)->next; ++ } ++ if (a) ++ *pos = a; ++ else ++ *pos = b; ++ return head; ++} ++ ++/* reverse a linked list of nodes. also clears parent pointer */ ++static struct bheap_node* __bheap_reverse(struct bheap_node* h) ++{ ++ struct bheap_node* tail = NULL; ++ struct bheap_node* next; ++ ++ if (!h) ++ return h; ++ ++ h->parent = NULL; ++ while (h->next) { ++ next = h->next; ++ h->next = tail; ++ tail = h; ++ h = next; ++ h->parent = NULL; ++ } ++ h->next = tail; ++ return h; ++} ++ ++static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, ++ struct bheap_node** prev, struct bheap_node** node) ++{ ++ struct bheap_node *_prev, *cur; ++ *prev = NULL; ++ ++ if (!heap->head) { ++ *node = NULL; ++ return; ++ } ++ ++ *node = heap->head; ++ _prev = heap->head; ++ cur = heap->head->next; ++ while (cur) { ++ if (higher_prio(cur, *node)) { ++ *node = cur; ++ *prev = _prev; ++ } ++ _prev = cur; ++ cur = cur->next; ++ } ++} ++ ++static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, ++ struct bheap_node* h2) ++{ ++ struct bheap_node* h1; ++ struct bheap_node *prev, *x, *next; ++ if (!h2) ++ return; ++ h1 = heap->head; ++ if (!h1) { ++ heap->head = h2; ++ return; ++ } ++ h1 = __bheap_merge(h1, h2); ++ prev = NULL; ++ x = h1; ++ next = x->next; ++ while (next) { ++ if (x->degree != next->degree || ++ (next->next && next->next->degree == x->degree)) { ++ /* nothing to do, advance */ ++ prev = x; ++ x = next; ++ } else if (higher_prio(x, next)) { ++ /* x becomes the root of next */ ++ x->next = next->next; ++ __bheap_link(x, next); ++ } else { ++ /* next becomes the root of x */ ++ if (prev) ++ prev->next = next; ++ else ++ h1 = next; ++ __bheap_link(next, x); ++ x = next; ++ } ++ next = x->next; ++ } ++ heap->head = h1; ++} ++ ++static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, ++ struct bheap* heap) ++{ ++ struct bheap_node *prev, *node; ++ __bheap_min(higher_prio, heap, &prev, &node); ++ if (!node) ++ return NULL; ++ if (prev) ++ prev->next = node->next; ++ else ++ heap->head = node->next; ++ __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); ++ return node; ++} ++ ++/* insert (and reinitialize) a node into the heap */ ++void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, ++ struct bheap_node* node) ++{ ++ struct bheap_node *min; ++ node->child = NULL; ++ node->parent = NULL; ++ node->next = NULL; ++ node->degree = 0; ++ if (heap->min && higher_prio(node, heap->min)) { ++ /* swap min cache */ ++ min = heap->min; ++ min->child = NULL; ++ min->parent = NULL; ++ min->next = NULL; ++ min->degree = 0; ++ __bheap_union(higher_prio, heap, min); ++ heap->min = node; ++ } else ++ __bheap_union(higher_prio, heap, node); ++} ++ ++void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) ++{ ++ struct bheap_node* min; ++ if (heap->min) { ++ min = heap->min; ++ heap->min = NULL; ++ bheap_insert(higher_prio, heap, min); ++ } ++} ++ ++/* merge addition into target */ ++void bheap_union(bheap_prio_t higher_prio, ++ struct bheap* target, struct bheap* addition) ++{ ++ /* first insert any cached minima, if necessary */ ++ bheap_uncache_min(higher_prio, target); ++ bheap_uncache_min(higher_prio, addition); ++ __bheap_union(higher_prio, target, addition->head); ++ /* this is a destructive merge */ ++ addition->head = NULL; ++} ++ ++struct bheap_node* bheap_peek(bheap_prio_t higher_prio, ++ struct bheap* heap) ++{ ++ if (!heap->min) ++ heap->min = __bheap_extract_min(higher_prio, heap); ++ return heap->min; ++} ++ ++struct bheap_node* bheap_take(bheap_prio_t higher_prio, ++ struct bheap* heap) ++{ ++ struct bheap_node *node; ++ if (!heap->min) ++ heap->min = __bheap_extract_min(higher_prio, heap); ++ node = heap->min; ++ heap->min = NULL; ++ if (node) ++ node->degree = NOT_IN_HEAP; ++ return node; ++} ++ ++int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) ++{ ++ struct bheap_node *parent; ++ struct bheap_node** tmp_ref; ++ void* tmp; ++ ++ /* bubble up */ ++ parent = node->parent; ++ while (parent && higher_prio(node, parent)) { ++ /* swap parent and node */ ++ tmp = parent->value; ++ parent->value = node->value; ++ node->value = tmp; ++ /* swap references */ ++ *(parent->ref) = node; ++ *(node->ref) = parent; ++ tmp_ref = parent->ref; ++ parent->ref = node->ref; ++ node->ref = tmp_ref; ++ /* step up */ ++ node = parent; ++ parent = node->parent; ++ } ++ ++ return parent != NULL; ++} ++ ++void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, ++ struct bheap_node* node) ++{ ++ struct bheap_node *parent, *prev, *pos; ++ struct bheap_node** tmp_ref; ++ void* tmp; ++ ++ if (heap->min != node) { ++ /* bubble up */ ++ parent = node->parent; ++ while (parent) { ++ /* swap parent and node */ ++ tmp = parent->value; ++ parent->value = node->value; ++ node->value = tmp; ++ /* swap references */ ++ *(parent->ref) = node; ++ *(node->ref) = parent; ++ tmp_ref = parent->ref; ++ parent->ref = node->ref; ++ node->ref = tmp_ref; ++ /* step up */ ++ node = parent; ++ parent = node->parent; ++ } ++ /* now delete: ++ * first find prev */ ++ prev = NULL; ++ pos = heap->head; ++ while (pos != node) { ++ prev = pos; ++ pos = pos->next; ++ } ++ /* we have prev, now remove node */ ++ if (prev) ++ prev->next = node->next; ++ else ++ heap->head = node->next; ++ __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); ++ } else ++ heap->min = NULL; ++ node->degree = NOT_IN_HEAP; ++} ++ ++/* allocate a heap node for value and insert into the heap */ ++int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, ++ void* value, int gfp_flags) ++{ ++ struct bheap_node* hn = bheap_node_alloc(gfp_flags); ++ if (likely(hn)) { ++ bheap_node_init(&hn, value); ++ bheap_insert(higher_prio, heap, hn); ++ } ++ return hn != NULL; ++} ++ ++void* bheap_take_del(bheap_prio_t higher_prio, ++ struct bheap* heap) ++{ ++ struct bheap_node* hn = bheap_take(higher_prio, heap); ++ void* ret = NULL; ++ if (hn) { ++ ret = hn->value; ++ bheap_node_free(hn); ++ } ++ return ret; ++} +diff --git a/litmus/budget.c b/litmus/budget.c +new file mode 100644 +index 0000000..b99177a +--- /dev/null ++++ b/litmus/budget.c +@@ -0,0 +1,109 @@ ++#include ++#include ++ ++#include ++ ++struct enforcement_timer { ++ /* The enforcement timer is used to accurately police ++ * slice budgets. */ ++ struct hrtimer timer; ++ int armed; ++}; ++ ++DEFINE_PER_CPU(struct enforcement_timer, budget_timer); ++ ++static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) ++{ ++ struct enforcement_timer* et = container_of(timer, ++ struct enforcement_timer, ++ timer); ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ TRACE("enforcement timer fired.\n"); ++ et->armed = 0; ++ /* activate scheduler */ ++ set_tsk_need_resched(current); ++ local_irq_restore(flags); ++ ++ return HRTIMER_NORESTART; ++} ++ ++/* assumes called with IRQs off */ ++static void cancel_enforcement_timer(struct enforcement_timer* et) ++{ ++ int ret; ++ ++ TRACE("cancelling enforcement timer.\n"); ++ ++ /* Since interrupts are disabled and et->armed is only ++ * modified locally, we do not need any locks. ++ */ ++ ++ if (et->armed) { ++ ret = hrtimer_try_to_cancel(&et->timer); ++ /* Should never be inactive. */ ++ BUG_ON(ret == 0); ++ /* Should never be running concurrently. */ ++ BUG_ON(ret == -1); ++ ++ et->armed = 0; ++ } ++} ++ ++/* assumes called with IRQs off */ ++static void arm_enforcement_timer(struct enforcement_timer* et, ++ struct task_struct* t) ++{ ++ lt_t when_to_fire; ++ TRACE_TASK(t, "arming enforcement timer.\n"); ++ ++ /* Calling this when there is no budget left for the task ++ * makes no sense, unless the task is non-preemptive. */ ++ BUG_ON(budget_exhausted(t) && (!is_np(t))); ++ ++ /* __hrtimer_start_range_ns() cancels the timer ++ * anyway, so we don't have to check whether it is still armed */ ++ ++ if (likely(!is_np(t))) { ++ when_to_fire = litmus_clock() + budget_remaining(t); ++ __hrtimer_start_range_ns(&et->timer, ++ ns_to_ktime(when_to_fire), ++ 0 /* delta */, ++ HRTIMER_MODE_ABS_PINNED, ++ 0 /* no wakeup */); ++ et->armed = 1; ++ } ++} ++ ++ ++/* expects to be called with IRQs off */ ++void update_enforcement_timer(struct task_struct* t) ++{ ++ struct enforcement_timer* et = &__get_cpu_var(budget_timer); ++ ++ if (t && budget_precisely_enforced(t)) { ++ /* Make sure we call into the scheduler when this budget ++ * expires. */ ++ arm_enforcement_timer(et, t); ++ } else if (et->armed) { ++ /* Make sure we don't cause unnecessary interrupts. */ ++ cancel_enforcement_timer(et); ++ } ++} ++ ++ ++static int __init init_budget_enforcement(void) ++{ ++ int cpu; ++ struct enforcement_timer* et; ++ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ et = &per_cpu(budget_timer, cpu); ++ hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ et->timer.function = on_enforcement_timeout; ++ } ++ return 0; ++} ++ ++module_init(init_budget_enforcement); +diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c +new file mode 100644 +index 0000000..6677a67 +--- /dev/null ++++ b/litmus/ctrldev.c +@@ -0,0 +1,150 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++/* only one page for now, but we might want to add a RO version at some point */ ++ ++#define CTRL_NAME "litmus/ctrl" ++ ++/* allocate t->rt_param.ctrl_page*/ ++static int alloc_ctrl_page(struct task_struct *t) ++{ ++ int err = 0; ++ ++ /* only allocate if the task doesn't have one yet */ ++ if (!tsk_rt(t)->ctrl_page) { ++ tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); ++ if (!tsk_rt(t)->ctrl_page) ++ err = -ENOMEM; ++ /* will get de-allocated in task teardown */ ++ TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, ++ tsk_rt(t)->ctrl_page); ++ } ++ return err; ++} ++ ++static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) ++{ ++ int err; ++ unsigned long pfn; ++ ++ struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); ++ ++ /* Increase ref count. Is decreased when vma is destroyed. */ ++ get_page(ctrl); ++ ++ /* compute page frame number */ ++ pfn = page_to_pfn(ctrl); ++ ++ TRACE_CUR(CTRL_NAME ++ ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n", ++ tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start, ++ vma->vm_page_prot); ++ ++ /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise ++ * userspace actually gets a copy-on-write page. */ ++ err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED); ++ ++ if (err) ++ TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err); ++ ++ return err; ++} ++ ++static void litmus_ctrl_vm_close(struct vm_area_struct* vma) ++{ ++ TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, ++ vma->vm_flags, vma->vm_page_prot); ++ ++ TRACE_CUR(CTRL_NAME ++ ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", ++ (void*) vma->vm_start, (void*) vma->vm_end, vma, ++ vma->vm_private_data, current->comm, ++ current->pid); ++} ++ ++static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, ++ struct vm_fault* vmf) ++{ ++ /* This function should never be called, since ++ * all pages should have been mapped by mmap() ++ * already. */ ++ TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags); ++ ++ /* nope, you only get one page */ ++ return VM_FAULT_SIGBUS; ++} ++ ++static struct vm_operations_struct litmus_ctrl_vm_ops = { ++ .close = litmus_ctrl_vm_close, ++ .fault = litmus_ctrl_vm_fault, ++}; ++ ++static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) ++{ ++ int err = 0; ++ ++ /* first make sure mapper knows what he's doing */ ++ ++ /* you can only get one page */ ++ if (vma->vm_end - vma->vm_start != PAGE_SIZE) ++ return -EINVAL; ++ ++ /* you can only map the "first" page */ ++ if (vma->vm_pgoff != 0) ++ return -EINVAL; ++ ++ /* you can't share it with anyone */ ++ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) ++ return -EINVAL; ++ ++ vma->vm_ops = &litmus_ctrl_vm_ops; ++ /* this mapping should not be kept across forks, ++ * and cannot be expanded */ ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; ++ ++ err = alloc_ctrl_page(current); ++ if (!err) ++ err = map_ctrl_page(current, vma); ++ ++ TRACE_CUR("%s flags=0x%x prot=0x%lx\n", ++ __FUNCTION__, vma->vm_flags, vma->vm_page_prot); ++ ++ return err; ++} ++ ++static struct file_operations litmus_ctrl_fops = { ++ .owner = THIS_MODULE, ++ .mmap = litmus_ctrl_mmap, ++}; ++ ++static struct miscdevice litmus_ctrl_dev = { ++ .name = CTRL_NAME, ++ .minor = MISC_DYNAMIC_MINOR, ++ .fops = &litmus_ctrl_fops, ++}; ++ ++static int __init init_litmus_ctrl_dev(void) ++{ ++ int err; ++ ++ BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); ++ ++ printk("Initializing LITMUS^RT control device.\n"); ++ err = misc_register(&litmus_ctrl_dev); ++ if (err) ++ printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); ++ return err; ++} ++ ++static void __exit exit_litmus_ctrl_dev(void) ++{ ++ misc_deregister(&litmus_ctrl_dev); ++} ++ ++module_init(init_litmus_ctrl_dev); ++module_exit(exit_litmus_ctrl_dev); +diff --git a/litmus/edf_common.c b/litmus/edf_common.c +new file mode 100644 +index 0000000..06daec6 +--- /dev/null ++++ b/litmus/edf_common.c +@@ -0,0 +1,102 @@ ++/* ++ * kernel/edf_common.c ++ * ++ * Common functions for EDF based scheduler. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++/* edf_higher_prio - returns true if first has a higher EDF priority ++ * than second. Deadline ties are broken by PID. ++ * ++ * both first and second may be NULL ++ */ ++int edf_higher_prio(struct task_struct* first, ++ struct task_struct* second) ++{ ++ struct task_struct *first_task = first; ++ struct task_struct *second_task = second; ++ ++ /* There is no point in comparing a task to itself. */ ++ if (first && first == second) { ++ TRACE_TASK(first, ++ "WARNING: pointless edf priority comparison.\n"); ++ return 0; ++ } ++ ++ ++ /* Check for inherited priorities. Change task ++ * used for comparison in such a case. ++ */ ++ if (first && first->rt_param.inh_task) ++ first_task = first->rt_param.inh_task; ++ if (second && second->rt_param.inh_task) ++ second_task = second->rt_param.inh_task; ++ ++ return ++ /* it has to exist in order to have higher priority */ ++ first_task && ( ++ /* does the second task exist and is it a real-time task? If ++ * not, the first task (which is a RT task) has higher ++ * priority. ++ */ ++ !second_task || !is_realtime(second_task) || ++ ++ /* is the deadline of the first task earlier? ++ * Then it has higher priority. ++ */ ++ earlier_deadline(first_task, second_task) || ++ ++ /* Do we have a deadline tie? ++ * Then break by PID. ++ */ ++ (get_deadline(first_task) == get_deadline(second_task) && ++ (first_task->pid < second_task->pid || ++ ++ /* If the PIDs are the same then the task with the inherited ++ * priority wins. ++ */ ++ (first_task->pid == second_task->pid && ++ !second->rt_param.inh_task)))); ++} ++ ++int edf_ready_order(struct bheap_node* a, struct bheap_node* b) ++{ ++ return edf_higher_prio(bheap2task(a), bheap2task(b)); ++} ++ ++void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, ++ release_jobs_t release) ++{ ++ rt_domain_init(rt, edf_ready_order, resched, release); ++} ++ ++/* need_to_preempt - check whether the task t needs to be preempted ++ * call only with irqs disabled and with ready_lock acquired ++ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! ++ */ ++int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) ++{ ++ /* we need the read lock for edf_ready_queue */ ++ /* no need to preempt if there is nothing pending */ ++ if (!__jobs_pending(rt)) ++ return 0; ++ /* we need to reschedule if t doesn't exist */ ++ if (!t) ++ return 1; ++ ++ /* NOTE: We cannot check for non-preemptibility since we ++ * don't know what address space we're currently in. ++ */ ++ ++ /* make sure to get non-rt stuff out of the way */ ++ return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); ++} +diff --git a/litmus/fdso.c b/litmus/fdso.c +new file mode 100644 +index 0000000..85be716 +--- /dev/null ++++ b/litmus/fdso.c +@@ -0,0 +1,281 @@ ++/* fdso.c - file descriptor attached shared objects ++ * ++ * (c) 2007 B. Brandenburg, LITMUS^RT project ++ * ++ * Notes: ++ * - objects descriptor (OD) tables are not cloned during a fork. ++ * - objects are created on-demand, and freed after the last reference ++ * is dropped. ++ * - for now, object types are hard coded. ++ * - As long as we have live objects, we keep a reference to the inode. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++extern struct fdso_ops fmlp_sem_ops; ++extern struct fdso_ops srp_sem_ops; ++ ++static const struct fdso_ops* fdso_ops[] = { ++ &fmlp_sem_ops, ++ &srp_sem_ops, ++}; ++ ++static void* fdso_create(obj_type_t type) ++{ ++ if (fdso_ops[type]->create) ++ return fdso_ops[type]->create(); ++ else ++ return NULL; ++} ++ ++static void fdso_destroy(obj_type_t type, void* obj) ++{ ++ fdso_ops[type]->destroy(obj); ++} ++ ++static int fdso_open(struct od_table_entry* entry, void* __user config) ++{ ++ if (fdso_ops[entry->obj->type]->open) ++ return fdso_ops[entry->obj->type]->open(entry, config); ++ else ++ return 0; ++} ++ ++static int fdso_close(struct od_table_entry* entry) ++{ ++ if (fdso_ops[entry->obj->type]->close) ++ return fdso_ops[entry->obj->type]->close(entry); ++ else ++ return 0; ++} ++ ++/* inode must be locked already */ ++static struct inode_obj_id* alloc_inode_obj(struct inode* inode, ++ obj_type_t type, ++ unsigned int id) ++{ ++ struct inode_obj_id* obj; ++ void* raw_obj; ++ ++ raw_obj = fdso_create(type); ++ if (!raw_obj) ++ return NULL; ++ ++ obj = kmalloc(sizeof(*obj), GFP_KERNEL); ++ if (!obj) ++ return NULL; ++ INIT_LIST_HEAD(&obj->list); ++ atomic_set(&obj->count, 1); ++ obj->type = type; ++ obj->id = id; ++ obj->obj = raw_obj; ++ obj->inode = inode; ++ ++ list_add(&obj->list, &inode->i_obj_list); ++ atomic_inc(&inode->i_count); ++ ++ printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); ++ return obj; ++} ++ ++/* inode must be locked already */ ++static struct inode_obj_id* get_inode_obj(struct inode* inode, ++ obj_type_t type, ++ unsigned int id) ++{ ++ struct list_head* pos; ++ struct inode_obj_id* obj = NULL; ++ ++ list_for_each(pos, &inode->i_obj_list) { ++ obj = list_entry(pos, struct inode_obj_id, list); ++ if (obj->id == id && obj->type == type) { ++ atomic_inc(&obj->count); ++ return obj; ++ } ++ } ++ printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); ++ return NULL; ++} ++ ++ ++static void put_inode_obj(struct inode_obj_id* obj) ++{ ++ struct inode* inode; ++ int let_go = 0; ++ ++ inode = obj->inode; ++ if (atomic_dec_and_test(&obj->count)) { ++ ++ mutex_lock(&inode->i_obj_mutex); ++ /* no new references can be obtained */ ++ if (!atomic_read(&obj->count)) { ++ list_del(&obj->list); ++ fdso_destroy(obj->type, obj->obj); ++ kfree(obj); ++ let_go = 1; ++ } ++ mutex_unlock(&inode->i_obj_mutex); ++ if (let_go) ++ iput(inode); ++ } ++} ++ ++static struct od_table_entry* get_od_entry(struct task_struct* t) ++{ ++ struct od_table_entry* table; ++ int i; ++ ++ ++ table = t->od_table; ++ if (!table) { ++ table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, ++ GFP_KERNEL); ++ t->od_table = table; ++ } ++ ++ for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) ++ if (!table[i].used) { ++ table[i].used = 1; ++ return table + i; ++ } ++ return NULL; ++} ++ ++static int put_od_entry(struct od_table_entry* od) ++{ ++ put_inode_obj(od->obj); ++ od->used = 0; ++ return 0; ++} ++ ++void exit_od_table(struct task_struct* t) ++{ ++ int i; ++ ++ if (t->od_table) { ++ for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) ++ if (t->od_table[i].used) ++ put_od_entry(t->od_table + i); ++ kfree(t->od_table); ++ t->od_table = NULL; ++ } ++} ++ ++static int do_sys_od_open(struct file* file, obj_type_t type, int id, ++ void* __user config) ++{ ++ int idx = 0, err; ++ struct inode* inode; ++ struct inode_obj_id* obj = NULL; ++ struct od_table_entry* entry; ++ ++ inode = file->f_dentry->d_inode; ++ ++ entry = get_od_entry(current); ++ if (!entry) ++ return -ENOMEM; ++ ++ mutex_lock(&inode->i_obj_mutex); ++ obj = get_inode_obj(inode, type, id); ++ if (!obj) ++ obj = alloc_inode_obj(inode, type, id); ++ if (!obj) { ++ idx = -ENOMEM; ++ entry->used = 0; ++ } else { ++ entry->obj = obj; ++ entry->extra = NULL; ++ idx = entry - current->od_table; ++ } ++ ++ mutex_unlock(&inode->i_obj_mutex); ++ ++ err = fdso_open(entry, config); ++ if (err < 0) { ++ /* The class rejected the open call. ++ * We need to clean up and tell user space. ++ */ ++ put_od_entry(entry); ++ idx = err; ++ } ++ ++ return idx; ++} ++ ++ ++struct od_table_entry* __od_lookup(int od) ++{ ++ struct task_struct *t = current; ++ ++ if (!t->od_table) ++ return NULL; ++ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) ++ return NULL; ++ if (!t->od_table[od].used) ++ return NULL; ++ return t->od_table + od; ++} ++ ++ ++asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) ++{ ++ int ret = 0; ++ struct file* file; ++ ++ /* ++ 1) get file from fd, get inode from file ++ 2) lock inode ++ 3) try to lookup object ++ 4) if not present create and enqueue object, inc inode refcnt ++ 5) increment refcnt of object ++ 6) alloc od_table_entry, setup ptrs ++ 7) unlock inode ++ 8) return offset in od_table as OD ++ */ ++ ++ if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ file = fget(fd); ++ if (!file) { ++ ret = -EBADF; ++ goto out; ++ } ++ ++ ret = do_sys_od_open(file, type, obj_id, config); ++ ++ fput(file); ++ ++out: ++ return ret; ++} ++ ++ ++asmlinkage long sys_od_close(int od) ++{ ++ int ret = -EINVAL; ++ struct task_struct *t = current; ++ ++ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) ++ return ret; ++ ++ if (!t->od_table || !t->od_table[od].used) ++ return ret; ++ ++ ++ /* give the class a chance to reject the close ++ */ ++ ret = fdso_close(t->od_table + od); ++ if (ret == 0) ++ ret = put_od_entry(t->od_table + od); ++ ++ return ret; ++} +diff --git a/litmus/fmlp.c b/litmus/fmlp.c +new file mode 100644 +index 0000000..03fa735 +--- /dev/null ++++ b/litmus/fmlp.c +@@ -0,0 +1,268 @@ ++/* ++ * FMLP implementation. ++ * Much of the code here is borrowed from include/asm-i386/semaphore.h ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#ifdef CONFIG_FMLP ++ ++static void* create_fmlp_semaphore(void) ++{ ++ struct pi_semaphore* sem; ++ int i; ++ ++ sem = kmalloc(sizeof(*sem), GFP_KERNEL); ++ if (!sem) ++ return NULL; ++ atomic_set(&sem->count, 1); ++ sem->sleepers = 0; ++ init_waitqueue_head(&sem->wait); ++ sem->hp.task = NULL; ++ sem->holder = NULL; ++ for (i = 0; i < NR_CPUS; i++) ++ sem->hp.cpu_task[i] = NULL; ++ return sem; ++} ++ ++static int open_fmlp_semaphore(struct od_table_entry* entry, void* __user arg) ++{ ++ if (!fmlp_active()) ++ return -EBUSY; ++ return 0; ++} ++ ++static void destroy_fmlp_semaphore(void* sem) ++{ ++ /* XXX assert invariants */ ++ kfree(sem); ++} ++ ++struct fdso_ops fmlp_sem_ops = { ++ .create = create_fmlp_semaphore, ++ .open = open_fmlp_semaphore, ++ .destroy = destroy_fmlp_semaphore ++}; ++ ++struct wq_pair { ++ struct task_struct* tsk; ++ struct pi_semaphore* sem; ++}; ++ ++static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync, ++ void *key) ++{ ++ struct wq_pair* wqp = (struct wq_pair*) wait->private; ++ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM); ++ litmus->inherit_priority(wqp->sem, wqp->tsk); ++ TRACE_TASK(wqp->tsk, ++ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n"); ++ /* point to task for default_wake_function() */ ++ wait->private = wqp->tsk; ++ default_wake_function(wait, mode, sync, key); ++ ++ /* Always return true since we know that if we encountered a task ++ * that was already running the wake_up raced with the schedule in ++ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled ++ * immediately and own the lock. We must not wake up another task in ++ * any case. ++ */ ++ return 1; ++} ++ ++/* caller is responsible for locking */ ++int edf_set_hp_task(struct pi_semaphore *sem) ++{ ++ struct list_head *tmp, *next; ++ struct task_struct *queued; ++ int ret = 0; ++ ++ sem->hp.task = NULL; ++ list_for_each_safe(tmp, next, &sem->wait.task_list) { ++ queued = ((struct wq_pair*) ++ list_entry(tmp, wait_queue_t, ++ task_list)->private)->tsk; ++ ++ /* Compare task prios, find high prio task. */ ++ if (edf_higher_prio(queued, sem->hp.task)) { ++ sem->hp.task = queued; ++ ret = 1; ++ } ++ } ++ return ret; ++} ++ ++/* caller is responsible for locking */ ++int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu) ++{ ++ struct list_head *tmp, *next; ++ struct task_struct *queued; ++ int ret = 0; ++ ++ sem->hp.cpu_task[cpu] = NULL; ++ list_for_each_safe(tmp, next, &sem->wait.task_list) { ++ queued = ((struct wq_pair*) ++ list_entry(tmp, wait_queue_t, ++ task_list)->private)->tsk; ++ ++ /* Compare task prios, find high prio task. */ ++ if (get_partition(queued) == cpu && ++ edf_higher_prio(queued, sem->hp.cpu_task[cpu])) { ++ sem->hp.cpu_task[cpu] = queued; ++ ret = 1; ++ } ++ } ++ return ret; ++} ++ ++static int do_fmlp_down(struct pi_semaphore* sem) ++{ ++ unsigned long flags; ++ struct task_struct *tsk = current; ++ struct wq_pair pair; ++ int suspended = 1; ++ wait_queue_t wait = { ++ .private = &pair, ++ .func = rt_pi_wake_up, ++ .task_list = {NULL, NULL} ++ }; ++ ++ pair.tsk = tsk; ++ pair.sem = sem; ++ spin_lock_irqsave(&sem->wait.lock, flags); ++ ++ if (atomic_dec_return(&sem->count) < 0 || ++ waitqueue_active(&sem->wait)) { ++ /* we need to suspend */ ++ tsk->state = TASK_UNINTERRUPTIBLE; ++ add_wait_queue_exclusive_locked(&sem->wait, &wait); ++ ++ TRACE_CUR("suspends on PI lock %p\n", sem); ++ litmus->pi_block(sem, tsk); ++ ++ /* release lock before sleeping */ ++ spin_unlock_irqrestore(&sem->wait.lock, flags); ++ ++ TS_PI_DOWN_END; ++ preempt_enable_no_resched(); ++ ++ ++ /* we depend on the FIFO order ++ * Thus, we don't need to recheck when we wake up, we ++ * are guaranteed to have the lock since there is only one ++ * wake up per release ++ */ ++ schedule(); ++ ++ TRACE_CUR("woke up, now owns PI lock %p\n", sem); ++ ++ /* try_to_wake_up() set our state to TASK_RUNNING, ++ * all we need to do is to remove our wait queue entry ++ */ ++ remove_wait_queue(&sem->wait, &wait); ++ } else { ++ /* no priority inheritance necessary, since there are no queued ++ * tasks. ++ */ ++ suspended = 0; ++ TRACE_CUR("acquired PI lock %p, no contention\n", sem); ++ sem->holder = tsk; ++ ++ /* don't know if we're global or partitioned. */ ++ sem->hp.task = tsk; ++ sem->hp.cpu_task[get_partition(tsk)] = tsk; ++ ++ litmus->inherit_priority(sem, tsk); ++ spin_unlock_irqrestore(&sem->wait.lock, flags); ++ } ++ return suspended; ++} ++ ++static void do_fmlp_up(struct pi_semaphore* sem) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&sem->wait.lock, flags); ++ ++ TRACE_CUR("releases PI lock %p\n", sem); ++ litmus->return_priority(sem); ++ sem->holder = NULL; ++ if (atomic_inc_return(&sem->count) < 1) ++ /* there is a task queued */ ++ wake_up_locked(&sem->wait); ++ ++ spin_unlock_irqrestore(&sem->wait.lock, flags); ++} ++ ++asmlinkage long sys_fmlp_down(int sem_od) ++{ ++ long ret = 0; ++ struct pi_semaphore * sem; ++ int suspended = 0; ++ ++ preempt_disable(); ++ TS_PI_DOWN_START; ++ ++ sem = lookup_fmlp_sem(sem_od); ++ if (sem) ++ suspended = do_fmlp_down(sem); ++ else ++ ret = -EINVAL; ++ ++ if (!suspended) { ++ TS_PI_DOWN_END; ++ preempt_enable(); ++ } ++ ++ return ret; ++} ++ ++asmlinkage long sys_fmlp_up(int sem_od) ++{ ++ long ret = 0; ++ struct pi_semaphore * sem; ++ ++ preempt_disable(); ++ TS_PI_UP_START; ++ ++ sem = lookup_fmlp_sem(sem_od); ++ if (sem) ++ do_fmlp_up(sem); ++ else ++ ret = -EINVAL; ++ ++ ++ TS_PI_UP_END; ++ preempt_enable(); ++ ++ return ret; ++} ++ ++#else ++ ++struct fdso_ops fmlp_sem_ops = {}; ++ ++asmlinkage long sys_fmlp_down(int sem_od) ++{ ++ return -ENOSYS; ++} ++ ++asmlinkage long sys_fmlp_up(int sem_od) ++{ ++ return -ENOSYS; ++} ++ ++#endif +diff --git a/litmus/ft_event.c b/litmus/ft_event.c +new file mode 100644 +index 0000000..399a07b +--- /dev/null ++++ b/litmus/ft_event.c +@@ -0,0 +1,43 @@ ++#include ++ ++#include ++ ++#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA) ++/* provide dummy implementation */ ++ ++int ft_events[MAX_EVENTS]; ++ ++int ft_enable_event(unsigned long id) ++{ ++ if (id < MAX_EVENTS) { ++ ft_events[id]++; ++ return 1; ++ } else ++ return 0; ++} ++ ++int ft_disable_event(unsigned long id) ++{ ++ if (id < MAX_EVENTS && ft_events[id]) { ++ ft_events[id]--; ++ return 1; ++ } else ++ return 0; ++} ++ ++int ft_disable_all_events(void) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_EVENTS; i++) ++ ft_events[i] = 0; ++ ++ return MAX_EVENTS; ++} ++ ++int ft_is_event_enabled(unsigned long id) ++{ ++ return id < MAX_EVENTS && ft_events[id]; ++} ++ ++#endif +diff --git a/litmus/ftdev.c b/litmus/ftdev.c +new file mode 100644 +index 0000000..51dafae +--- /dev/null ++++ b/litmus/ftdev.c +@@ -0,0 +1,360 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) ++{ ++ struct ft_buffer* buf; ++ size_t total = (size + 1) * count; ++ char* mem; ++ int order = 0, pages = 1; ++ ++ buf = kmalloc(sizeof(*buf), GFP_KERNEL); ++ if (!buf) ++ return NULL; ++ ++ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); ++ while (pages < total) { ++ order++; ++ pages *= 2; ++ } ++ ++ mem = (char*) __get_free_pages(GFP_KERNEL, order); ++ if (!mem) { ++ kfree(buf); ++ return NULL; ++ } ++ ++ if (!init_ft_buffer(buf, count, size, ++ mem + (count * size), /* markers at the end */ ++ mem)) { /* buffer objects */ ++ free_pages((unsigned long) mem, order); ++ kfree(buf); ++ return NULL; ++ } ++ return buf; ++} ++ ++void free_ft_buffer(struct ft_buffer* buf) ++{ ++ int order = 0, pages = 1; ++ size_t total; ++ ++ if (buf) { ++ total = (buf->slot_size + 1) * buf->slot_count; ++ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); ++ while (pages < total) { ++ order++; ++ pages *= 2; ++ } ++ free_pages((unsigned long) buf->buffer_mem, order); ++ kfree(buf); ++ } ++} ++ ++struct ftdev_event { ++ int id; ++ struct ftdev_event* next; ++}; ++ ++static int activate(struct ftdev_event** chain, int id) ++{ ++ struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL); ++ if (ev) { ++ printk(KERN_INFO ++ "Enabling feather-trace event %d.\n", (int) id); ++ ft_enable_event(id); ++ ev->id = id; ++ ev->next = *chain; ++ *chain = ev; ++ } ++ return ev ? 0 : -ENOMEM; ++} ++ ++static void deactivate(struct ftdev_event** chain, int id) ++{ ++ struct ftdev_event **cur = chain; ++ struct ftdev_event *nxt; ++ while (*cur) { ++ if ((*cur)->id == id) { ++ nxt = (*cur)->next; ++ kfree(*cur); ++ *cur = nxt; ++ printk(KERN_INFO ++ "Disabling feather-trace event %d.\n", (int) id); ++ ft_disable_event(id); ++ break; ++ } ++ cur = &(*cur)->next; ++ } ++} ++ ++static int ftdev_open(struct inode *in, struct file *filp) ++{ ++ struct ftdev* ftdev; ++ struct ftdev_minor* ftdm; ++ unsigned int buf_idx = iminor(in); ++ int err = 0; ++ ++ ftdev = container_of(in->i_cdev, struct ftdev, cdev); ++ ++ if (buf_idx >= ftdev->minor_cnt) { ++ err = -ENODEV; ++ goto out; ++ } ++ if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx))) ++ goto out; ++ ++ ftdm = ftdev->minor + buf_idx; ++ filp->private_data = ftdm; ++ ++ if (mutex_lock_interruptible(&ftdm->lock)) { ++ err = -ERESTARTSYS; ++ goto out; ++ } ++ ++ if (!ftdm->readers && ftdev->alloc) ++ err = ftdev->alloc(ftdev, buf_idx); ++ if (0 == err) ++ ftdm->readers++; ++ ++ mutex_unlock(&ftdm->lock); ++out: ++ return err; ++} ++ ++static int ftdev_release(struct inode *in, struct file *filp) ++{ ++ struct ftdev* ftdev; ++ struct ftdev_minor* ftdm; ++ unsigned int buf_idx = iminor(in); ++ int err = 0; ++ ++ ftdev = container_of(in->i_cdev, struct ftdev, cdev); ++ ++ if (buf_idx >= ftdev->minor_cnt) { ++ err = -ENODEV; ++ goto out; ++ } ++ ftdm = ftdev->minor + buf_idx; ++ ++ if (mutex_lock_interruptible(&ftdm->lock)) { ++ err = -ERESTARTSYS; ++ goto out; ++ } ++ ++ if (ftdm->readers == 1) { ++ while (ftdm->events) ++ deactivate(&ftdm->events, ftdm->events->id); ++ ++ /* wait for any pending events to complete */ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(HZ); ++ ++ printk(KERN_ALERT "Failed trace writes: %u\n", ++ ftdm->buf->failed_writes); ++ ++ if (ftdev->free) ++ ftdev->free(ftdev, buf_idx); ++ } ++ ++ ftdm->readers--; ++ mutex_unlock(&ftdm->lock); ++out: ++ return err; ++} ++ ++/* based on ft_buffer_read ++ * @returns < 0 : page fault ++ * = 0 : no data available ++ * = 1 : one slot copied ++ */ ++static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest) ++{ ++ unsigned int idx; ++ int err = 0; ++ if (buf->free_count != buf->slot_count) { ++ /* data available */ ++ idx = buf->read_idx % buf->slot_count; ++ if (buf->slots[idx] == SLOT_READY) { ++ err = copy_to_user(dest, ((char*) buf->buffer_mem) + ++ idx * buf->slot_size, ++ buf->slot_size); ++ if (err == 0) { ++ /* copy ok */ ++ buf->slots[idx] = SLOT_FREE; ++ buf->read_idx++; ++ fetch_and_inc(&buf->free_count); ++ err = 1; ++ } ++ } ++ } ++ return err; ++} ++ ++static ssize_t ftdev_read(struct file *filp, ++ char __user *to, size_t len, loff_t *f_pos) ++{ ++ /* we ignore f_pos, this is strictly sequential */ ++ ++ ssize_t err = 0; ++ size_t chunk; ++ int copied; ++ struct ftdev_minor* ftdm = filp->private_data; ++ ++ if (mutex_lock_interruptible(&ftdm->lock)) { ++ err = -ERESTARTSYS; ++ goto out; ++ } ++ ++ ++ chunk = ftdm->buf->slot_size; ++ while (len >= chunk) { ++ copied = ft_buffer_copy_to_user(ftdm->buf, to); ++ if (copied == 1) { ++ len -= chunk; ++ to += chunk; ++ err += chunk; ++ } else if (err == 0 && copied == 0 && ftdm->events) { ++ /* Only wait if there are any events enabled and only ++ * if we haven't copied some data yet. We cannot wait ++ * here with copied data because that data would get ++ * lost if the task is interrupted (e.g., killed). ++ */ ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(50); ++ if (signal_pending(current)) { ++ if (err == 0) ++ /* nothing read yet, signal problem */ ++ err = -ERESTARTSYS; ++ break; ++ } ++ } else if (copied < 0) { ++ /* page fault */ ++ err = copied; ++ break; ++ } else ++ /* nothing left to get, return to user space */ ++ break; ++ } ++ mutex_unlock(&ftdm->lock); ++out: ++ return err; ++} ++ ++typedef uint32_t cmd_t; ++ ++static ssize_t ftdev_write(struct file *filp, const char __user *from, ++ size_t len, loff_t *f_pos) ++{ ++ struct ftdev_minor* ftdm = filp->private_data; ++ ssize_t err = -EINVAL; ++ cmd_t cmd; ++ cmd_t id; ++ ++ if (len % sizeof(cmd) || len < 2 * sizeof(cmd)) ++ goto out; ++ ++ if (copy_from_user(&cmd, from, sizeof(cmd))) { ++ err = -EFAULT; ++ goto out; ++ } ++ len -= sizeof(cmd); ++ from += sizeof(cmd); ++ ++ if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD) ++ goto out; ++ ++ if (mutex_lock_interruptible(&ftdm->lock)) { ++ err = -ERESTARTSYS; ++ goto out; ++ } ++ ++ err = sizeof(cmd); ++ while (len) { ++ if (copy_from_user(&id, from, sizeof(cmd))) { ++ err = -EFAULT; ++ goto out_unlock; ++ } ++ /* FIXME: check id against list of acceptable events */ ++ len -= sizeof(cmd); ++ from += sizeof(cmd); ++ if (cmd == FTDEV_DISABLE_CMD) ++ deactivate(&ftdm->events, id); ++ else if (activate(&ftdm->events, id) != 0) { ++ err = -ENOMEM; ++ goto out_unlock; ++ } ++ err += sizeof(cmd); ++ } ++ ++out_unlock: ++ mutex_unlock(&ftdm->lock); ++out: ++ return err; ++} ++ ++struct file_operations ftdev_fops = { ++ .owner = THIS_MODULE, ++ .open = ftdev_open, ++ .release = ftdev_release, ++ .write = ftdev_write, ++ .read = ftdev_read, ++}; ++ ++ ++void ftdev_init(struct ftdev* ftdev, struct module* owner) ++{ ++ int i; ++ cdev_init(&ftdev->cdev, &ftdev_fops); ++ ftdev->cdev.owner = owner; ++ ftdev->cdev.ops = &ftdev_fops; ++ ftdev->minor_cnt = 0; ++ for (i = 0; i < MAX_FTDEV_MINORS; i++) { ++ mutex_init(&ftdev->minor[i].lock); ++ ftdev->minor[i].readers = 0; ++ ftdev->minor[i].buf = NULL; ++ ftdev->minor[i].events = NULL; ++ } ++ ftdev->alloc = NULL; ++ ftdev->free = NULL; ++ ftdev->can_open = NULL; ++} ++ ++int register_ftdev(struct ftdev* ftdev, const char* name, int major) ++{ ++ dev_t trace_dev; ++ int error = 0; ++ ++ if(major) { ++ trace_dev = MKDEV(major, 0); ++ error = register_chrdev_region(trace_dev, ftdev->minor_cnt, ++ name); ++ } else { ++ error = alloc_chrdev_region(&trace_dev, 0, ftdev->minor_cnt, ++ name); ++ major = MAJOR(trace_dev); ++ } ++ if (error) ++ { ++ printk(KERN_WARNING "ftdev(%s): " ++ "Could not register major/minor number %d/%u\n", ++ name, major, ftdev->minor_cnt); ++ return error; ++ } ++ error = cdev_add(&ftdev->cdev, trace_dev, ftdev->minor_cnt); ++ if (error) { ++ printk(KERN_WARNING "ftdev(%s): " ++ "Could not add cdev for major/minor = %d/%u.\n", ++ name, major, ftdev->minor_cnt); ++ return error; ++ } ++ return error; ++} +diff --git a/litmus/jobs.c b/litmus/jobs.c +new file mode 100644 +index 0000000..36e3146 +--- /dev/null ++++ b/litmus/jobs.c +@@ -0,0 +1,43 @@ ++/* litmus/jobs.c - common job control code ++ */ ++ ++#include ++ ++#include ++#include ++ ++void prepare_for_next_period(struct task_struct *t) ++{ ++ BUG_ON(!t); ++ /* prepare next release */ ++ t->rt_param.job_params.release = t->rt_param.job_params.deadline; ++ t->rt_param.job_params.deadline += get_rt_period(t); ++ t->rt_param.job_params.exec_time = 0; ++ /* update job sequence number */ ++ t->rt_param.job_params.job_no++; ++ ++ /* don't confuse Linux */ ++ t->rt.time_slice = 1; ++} ++ ++void release_at(struct task_struct *t, lt_t start) ++{ ++ t->rt_param.job_params.deadline = start; ++ prepare_for_next_period(t); ++ set_rt_flags(t, RT_F_RUNNING); ++} ++ ++ ++/* ++ * Deactivate current task until the beginning of the next period. ++ */ ++long complete_job(void) ++{ ++ /* Mark that we do not excute anymore */ ++ set_rt_flags(current, RT_F_SLEEP); ++ /* call schedule, this will return when a new job arrives ++ * it also takes care of preparing for the next release ++ */ ++ schedule(); ++ return 0; ++} +diff --git a/litmus/litmus.c b/litmus/litmus.c +new file mode 100644 +index 0000000..b04a42b +--- /dev/null ++++ b/litmus/litmus.c +@@ -0,0 +1,799 @@ ++/* ++ * litmus.c -- Implementation of the LITMUS syscalls, ++ * the LITMUS intialization code, ++ * and the procfs interface.. ++ */ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#include ++ ++/* Number of RT tasks that exist in the system */ ++atomic_t rt_task_count = ATOMIC_INIT(0); ++static DEFINE_RAW_SPINLOCK(task_transition_lock); ++/* synchronize plugin switching */ ++atomic_t cannot_use_plugin = ATOMIC_INIT(0); ++ ++/* Give log messages sequential IDs. */ ++atomic_t __log_seq_no = ATOMIC_INIT(0); ++ ++#ifdef CONFIG_RELEASE_MASTER ++/* current master CPU for handling timer IRQs */ ++atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); ++#endif ++ ++static struct kmem_cache * bheap_node_cache; ++extern struct kmem_cache * release_heap_cache; ++ ++struct bheap_node* bheap_node_alloc(int gfp_flags) ++{ ++ return kmem_cache_alloc(bheap_node_cache, gfp_flags); ++} ++ ++void bheap_node_free(struct bheap_node* hn) ++{ ++ kmem_cache_free(bheap_node_cache, hn); ++} ++ ++struct release_heap* release_heap_alloc(int gfp_flags); ++void release_heap_free(struct release_heap* rh); ++ ++/* ++ * sys_set_task_rt_param ++ * @pid: Pid of the task which scheduling parameters must be changed ++ * @param: New real-time extension parameters such as the execution cost and ++ * period ++ * Syscall for manipulating with task rt extension params ++ * Returns EFAULT if param is NULL. ++ * ESRCH if pid is not corrsponding ++ * to a valid task. ++ * EINVAL if either period or execution cost is <=0 ++ * EPERM if pid is a real-time task ++ * 0 if success ++ * ++ * Only non-real-time tasks may be configured with this system call ++ * to avoid races with the scheduler. In practice, this means that a ++ * task's parameters must be set _before_ calling sys_prepare_rt_task() ++ * ++ * find_task_by_vpid() assumes that we are in the same namespace of the ++ * target. ++ */ ++asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) ++{ ++ struct rt_task tp; ++ struct task_struct *target; ++ int retval = -EINVAL; ++ ++ printk("Setting up rt task parameters for process %d.\n", pid); ++ ++ if (pid < 0 || param == 0) { ++ goto out; ++ } ++ if (copy_from_user(&tp, param, sizeof(tp))) { ++ retval = -EFAULT; ++ goto out; ++ } ++ ++ /* Task search and manipulation must be protected */ ++ read_lock_irq(&tasklist_lock); ++ if (!(target = find_task_by_vpid(pid))) { ++ retval = -ESRCH; ++ goto out_unlock; ++ } ++ ++ if (is_realtime(target)) { ++ /* The task is already a real-time task. ++ * We cannot not allow parameter changes at this point. ++ */ ++ retval = -EBUSY; ++ goto out_unlock; ++ } ++ ++ if (tp.exec_cost <= 0) ++ goto out_unlock; ++ if (tp.period <= 0) ++ goto out_unlock; ++ if (!cpu_online(tp.cpu)) ++ goto out_unlock; ++ if (tp.period < tp.exec_cost) ++ { ++ printk(KERN_INFO "litmus: real-time task %d rejected " ++ "because wcet > period\n", pid); ++ goto out_unlock; ++ } ++ if (tp.budget_policy != NO_ENFORCEMENT && ++ tp.budget_policy != QUANTUM_ENFORCEMENT && ++ tp.budget_policy != PRECISE_ENFORCEMENT) ++ { ++ printk(KERN_INFO "litmus: real-time task %d rejected " ++ "because unsupported budget enforcement policy " ++ "specified (%d)\n", ++ pid, tp.budget_policy); ++ goto out_unlock; ++ } ++ ++ target->rt_param.task_params = tp; ++ ++ retval = 0; ++ out_unlock: ++ read_unlock_irq(&tasklist_lock); ++ out: ++ return retval; ++} ++ ++/* ++ * Getter of task's RT params ++ * returns EINVAL if param or pid is NULL ++ * returns ESRCH if pid does not correspond to a valid task ++ * returns EFAULT if copying of parameters has failed. ++ * ++ * find_task_by_vpid() assumes that we are in the same namespace of the ++ * target. ++ */ ++asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) ++{ ++ int retval = -EINVAL; ++ struct task_struct *source; ++ struct rt_task lp; ++ if (param == 0 || pid < 0) ++ goto out; ++ read_lock(&tasklist_lock); ++ if (!(source = find_task_by_vpid(pid))) { ++ retval = -ESRCH; ++ goto out_unlock; ++ } ++ lp = source->rt_param.task_params; ++ read_unlock(&tasklist_lock); ++ /* Do copying outside the lock */ ++ retval = ++ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; ++ return retval; ++ out_unlock: ++ read_unlock(&tasklist_lock); ++ out: ++ return retval; ++ ++} ++ ++/* ++ * This is the crucial function for periodic task implementation, ++ * It checks if a task is periodic, checks if such kind of sleep ++ * is permitted and calls plugin-specific sleep, which puts the ++ * task into a wait array. ++ * returns 0 on successful wakeup ++ * returns EPERM if current conditions do not permit such sleep ++ * returns EINVAL if current task is not able to go to sleep ++ */ ++asmlinkage long sys_complete_job(void) ++{ ++ int retval = -EPERM; ++ if (!is_realtime(current)) { ++ retval = -EINVAL; ++ goto out; ++ } ++ /* Task with negative or zero period cannot sleep */ ++ if (get_rt_period(current) <= 0) { ++ retval = -EINVAL; ++ goto out; ++ } ++ /* The plugin has to put the task into an ++ * appropriate queue and call schedule ++ */ ++ retval = litmus->complete_job(); ++ out: ++ return retval; ++} ++ ++/* This is an "improved" version of sys_complete_job that ++ * addresses the problem of unintentionally missing a job after ++ * an overrun. ++ * ++ * returns 0 on successful wakeup ++ * returns EPERM if current conditions do not permit such sleep ++ * returns EINVAL if current task is not able to go to sleep ++ */ ++asmlinkage long sys_wait_for_job_release(unsigned int job) ++{ ++ int retval = -EPERM; ++ if (!is_realtime(current)) { ++ retval = -EINVAL; ++ goto out; ++ } ++ ++ /* Task with negative or zero period cannot sleep */ ++ if (get_rt_period(current) <= 0) { ++ retval = -EINVAL; ++ goto out; ++ } ++ ++ retval = 0; ++ ++ /* first wait until we have "reached" the desired job ++ * ++ * This implementation has at least two problems: ++ * ++ * 1) It doesn't gracefully handle the wrap around of ++ * job_no. Since LITMUS is a prototype, this is not much ++ * of a problem right now. ++ * ++ * 2) It is theoretically racy if a job release occurs ++ * between checking job_no and calling sleep_next_period(). ++ * A proper solution would requiring adding another callback ++ * in the plugin structure and testing the condition with ++ * interrupts disabled. ++ * ++ * FIXME: At least problem 2 should be taken care of eventually. ++ */ ++ while (!retval && job > current->rt_param.job_params.job_no) ++ /* If the last job overran then job <= job_no and we ++ * don't send the task to sleep. ++ */ ++ retval = litmus->complete_job(); ++ out: ++ return retval; ++} ++ ++/* This is a helper syscall to query the current job sequence number. ++ * ++ * returns 0 on successful query ++ * returns EPERM if task is not a real-time task. ++ * returns EFAULT if &job is not a valid pointer. ++ */ ++asmlinkage long sys_query_job_no(unsigned int __user *job) ++{ ++ int retval = -EPERM; ++ if (is_realtime(current)) ++ retval = put_user(current->rt_param.job_params.job_no, job); ++ ++ return retval; ++} ++ ++/* sys_null_call() is only used for determining raw system call ++ * overheads (kernel entry, kernel exit). It has no useful side effects. ++ * If ts is non-NULL, then the current Feather-Trace time is recorded. ++ */ ++asmlinkage long sys_null_call(cycles_t __user *ts) ++{ ++ long ret = 0; ++ cycles_t now; ++ ++ if (ts) { ++ now = get_cycles(); ++ ret = put_user(now, ts); ++ } ++ ++ return ret; ++} ++ ++/* p is a real-time task. Re-init its state as a best-effort task. */ ++static void reinit_litmus_state(struct task_struct* p, int restore) ++{ ++ struct rt_task user_config = {}; ++ void* ctrl_page = NULL; ++ ++ if (restore) { ++ /* Safe user-space provided configuration data. ++ * and allocated page. */ ++ user_config = p->rt_param.task_params; ++ ctrl_page = p->rt_param.ctrl_page; ++ } ++ ++ /* We probably should not be inheriting any task's priority ++ * at this point in time. ++ */ ++ WARN_ON(p->rt_param.inh_task); ++ ++ /* We need to restore the priority of the task. */ ++// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); XXX why is this commented? ++ ++ /* Cleanup everything else. */ ++ memset(&p->rt_param, 0, sizeof(p->rt_param)); ++ ++ /* Restore preserved fields. */ ++ if (restore) { ++ p->rt_param.task_params = user_config; ++ p->rt_param.ctrl_page = ctrl_page; ++ } ++} ++ ++long litmus_admit_task(struct task_struct* tsk) ++{ ++ long retval = 0; ++ unsigned long flags; ++ ++ BUG_ON(is_realtime(tsk)); ++ ++ if (get_rt_period(tsk) == 0 || ++ get_exec_cost(tsk) > get_rt_period(tsk)) { ++ TRACE_TASK(tsk, "litmus admit: invalid task parameters " ++ "(%lu, %lu)\n", ++ get_exec_cost(tsk), get_rt_period(tsk)); ++ retval = -EINVAL; ++ goto out; ++ } ++ ++ if (!cpu_online(get_partition(tsk))) { ++ TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", ++ get_partition(tsk)); ++ retval = -EINVAL; ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&tsk_rt(tsk)->list); ++ ++ /* avoid scheduler plugin changing underneath us */ ++ raw_spin_lock_irqsave(&task_transition_lock, flags); ++ ++ /* allocate heap node for this task */ ++ tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); ++ tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); ++ ++ if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { ++ printk(KERN_WARNING "litmus: no more heap node memory!?\n"); ++ ++ bheap_node_free(tsk_rt(tsk)->heap_node); ++ release_heap_free(tsk_rt(tsk)->rel_heap); ++ ++ retval = -ENOMEM; ++ goto out_unlock; ++ } else { ++ bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); ++ } ++ ++ retval = litmus->admit_task(tsk); ++ ++ if (!retval) { ++ sched_trace_task_name(tsk); ++ sched_trace_task_param(tsk); ++ atomic_inc(&rt_task_count); ++ } ++ ++out_unlock: ++ raw_spin_unlock_irqrestore(&task_transition_lock, flags); ++out: ++ return retval; ++} ++ ++void litmus_exit_task(struct task_struct* tsk) ++{ ++ if (is_realtime(tsk)) { ++ sched_trace_task_completion(tsk, 1); ++ ++ litmus->task_exit(tsk); ++ ++ BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); ++ bheap_node_free(tsk_rt(tsk)->heap_node); ++ release_heap_free(tsk_rt(tsk)->rel_heap); ++ ++ atomic_dec(&rt_task_count); ++ reinit_litmus_state(tsk, 1); ++ } ++} ++ ++/* IPI callback to synchronize plugin switching */ ++static void synch_on_plugin_switch(void* info) ++{ ++ while (atomic_read(&cannot_use_plugin)) ++ cpu_relax(); ++} ++ ++/* Switching a plugin in use is tricky. ++ * We must watch out that no real-time tasks exists ++ * (and that none is created in parallel) and that the plugin is not ++ * currently in use on any processor (in theory). ++ */ ++int switch_sched_plugin(struct sched_plugin* plugin) ++{ ++ unsigned long flags; ++ int ret = 0; ++ ++ BUG_ON(!plugin); ++ ++ /* forbid other cpus to use the plugin */ ++ atomic_set(&cannot_use_plugin, 1); ++ /* send IPI to force other CPUs to synch with us */ ++ smp_call_function(synch_on_plugin_switch, NULL, 0); ++ ++ /* stop task transitions */ ++ raw_spin_lock_irqsave(&task_transition_lock, flags); ++ ++ /* don't switch if there are active real-time tasks */ ++ if (atomic_read(&rt_task_count) == 0) { ++ ret = litmus->deactivate_plugin(); ++ if (0 != ret) ++ goto out; ++ ret = plugin->activate_plugin(); ++ if (0 != ret) { ++ printk(KERN_INFO "Can't activate %s (%d).\n", ++ plugin->plugin_name, ret); ++ plugin = &linux_sched_plugin; ++ } ++ printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); ++ litmus = plugin; ++ } else ++ ret = -EBUSY; ++out: ++ raw_spin_unlock_irqrestore(&task_transition_lock, flags); ++ atomic_set(&cannot_use_plugin, 0); ++ return ret; ++} ++ ++/* Called upon fork. ++ * p is the newly forked task. ++ */ ++void litmus_fork(struct task_struct* p) ++{ ++ if (is_realtime(p)) ++ /* clean out any litmus related state, don't preserve anything */ ++ reinit_litmus_state(p, 0); ++ else ++ /* non-rt tasks might have ctrl_page set */ ++ tsk_rt(p)->ctrl_page = NULL; ++ ++ /* od tables are never inherited across a fork */ ++ p->od_table = NULL; ++} ++ ++/* Called upon execve(). ++ * current is doing the exec. ++ * Don't let address space specific stuff leak. ++ */ ++void litmus_exec(void) ++{ ++ struct task_struct* p = current; ++ ++ if (is_realtime(p)) { ++ WARN_ON(p->rt_param.inh_task); ++ if (tsk_rt(p)->ctrl_page) { ++ free_page((unsigned long) tsk_rt(p)->ctrl_page); ++ tsk_rt(p)->ctrl_page = NULL; ++ } ++ } ++} ++ ++void exit_litmus(struct task_struct *dead_tsk) ++{ ++ /* We also allow non-RT tasks to ++ * allocate control pages to allow ++ * measurements with non-RT tasks. ++ * So check if we need to free the page ++ * in any case. ++ */ ++ if (tsk_rt(dead_tsk)->ctrl_page) { ++ TRACE_TASK(dead_tsk, ++ "freeing ctrl_page %p\n", ++ tsk_rt(dead_tsk)->ctrl_page); ++ free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); ++ } ++ ++ /* main cleanup only for RT tasks */ ++ if (is_realtime(dead_tsk)) ++ litmus_exit_task(dead_tsk); ++} ++ ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++int sys_kill(int pid, int sig); ++ ++static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty) ++{ ++ struct task_struct *t; ++ read_lock(&tasklist_lock); ++ for_each_process(t) { ++ if (is_realtime(t)) { ++ sys_kill(t->pid, SIGKILL); ++ } ++ } ++ read_unlock(&tasklist_lock); ++} ++ ++static struct sysrq_key_op sysrq_kill_rt_tasks_op = { ++ .handler = sysrq_handle_kill_rt_tasks, ++ .help_msg = "quit-rt-tasks(X)", ++ .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", ++}; ++#endif ++ ++/* in litmus/sync.c */ ++int count_tasks_waiting_for_release(void); ++ ++static int proc_read_stats(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ ++ len = snprintf(page, PAGE_SIZE, ++ "real-time tasks = %d\n" ++ "ready for release = %d\n", ++ atomic_read(&rt_task_count), ++ count_tasks_waiting_for_release()); ++ return len; ++} ++ ++static int proc_read_plugins(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ ++ len = print_sched_plugins(page, PAGE_SIZE); ++ return len; ++} ++ ++static int proc_read_curr(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ ++ len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); ++ return len; ++} ++ ++static int proc_write_curr(struct file *file, ++ const char *buffer, ++ unsigned long count, ++ void *data) ++{ ++ int len, ret; ++ char name[65]; ++ struct sched_plugin* found; ++ ++ if(count > 64) ++ len = 64; ++ else ++ len = count; ++ ++ if(copy_from_user(name, buffer, len)) ++ return -EFAULT; ++ ++ name[len] = '\0'; ++ /* chomp name */ ++ if (len > 1 && name[len - 1] == '\n') ++ name[len - 1] = '\0'; ++ ++ found = find_sched_plugin(name); ++ ++ if (found) { ++ ret = switch_sched_plugin(found); ++ if (ret != 0) ++ printk(KERN_INFO "Could not switch plugin: %d\n", ret); ++ } else ++ printk(KERN_INFO "Plugin '%s' is unknown.\n", name); ++ ++ return len; ++} ++ ++static int proc_read_cluster_size(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ int len; ++ if (cluster_cache_index == 2) ++ len = snprintf(page, PAGE_SIZE, "L2\n"); ++ else if (cluster_cache_index == 3) ++ len = snprintf(page, PAGE_SIZE, "L3\n"); ++ else if (cluster_cache_index == 1) ++ len = snprintf(page, PAGE_SIZE, "L1\n"); ++ else ++ len = snprintf(page, PAGE_SIZE, "ALL\n"); ++ ++ return len; ++} ++ ++static int proc_write_cluster_size(struct file *file, ++ const char *buffer, ++ unsigned long count, ++ void *data) ++{ ++ int len; ++ /* L2, L3 */ ++ char cache_name[33]; ++ ++ if(count > 32) ++ len = 32; ++ else ++ len = count; ++ ++ if(copy_from_user(cache_name, buffer, len)) ++ return -EFAULT; ++ ++ cache_name[len] = '\0'; ++ /* chomp name */ ++ if (len > 1 && cache_name[len - 1] == '\n') ++ cache_name[len - 1] = '\0'; ++ ++ /* do a quick and dirty comparison to find the cluster size */ ++ if (!strcmp(cache_name, "L2")) ++ cluster_cache_index = 2; ++ else if (!strcmp(cache_name, "L3")) ++ cluster_cache_index = 3; ++ else if (!strcmp(cache_name, "L1")) ++ cluster_cache_index = 1; ++ else if (!strcmp(cache_name, "ALL")) ++ cluster_cache_index = num_online_cpus(); ++ else ++ printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); ++ ++ return len; ++} ++ ++#ifdef CONFIG_RELEASE_MASTER ++static int proc_read_release_master(char *page, char **start, ++ off_t off, int count, ++ int *eof, void *data) ++{ ++ int len, master; ++ master = atomic_read(&release_master_cpu); ++ if (master == NO_CPU) ++ len = snprintf(page, PAGE_SIZE, "NO_CPU\n"); ++ else ++ len = snprintf(page, PAGE_SIZE, "%d\n", master); ++ return len; ++} ++ ++static int proc_write_release_master(struct file *file, ++ const char *buffer, ++ unsigned long count, ++ void *data) ++{ ++ int cpu, err, online = 0; ++ char msg[64]; ++ ++ if (count > 63) ++ return -EINVAL; ++ ++ if (copy_from_user(msg, buffer, count)) ++ return -EFAULT; ++ ++ /* terminate */ ++ msg[count] = '\0'; ++ /* chomp */ ++ if (count > 1 && msg[count - 1] == '\n') ++ msg[count - 1] = '\0'; ++ ++ if (strcmp(msg, "NO_CPU") == 0) { ++ atomic_set(&release_master_cpu, NO_CPU); ++ return count; ++ } else { ++ err = sscanf(msg, "%d", &cpu); ++ if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { ++ atomic_set(&release_master_cpu, cpu); ++ return count; ++ } else { ++ TRACE("invalid release master: '%s' " ++ "(err:%d cpu:%d online:%d)\n", ++ msg, err, cpu, online); ++ return -EINVAL; ++ } ++ } ++} ++#endif ++ ++static struct proc_dir_entry *litmus_dir = NULL, ++ *curr_file = NULL, ++ *stat_file = NULL, ++ *plugs_file = NULL, ++#ifdef CONFIG_RELEASE_MASTER ++ *release_master_file = NULL, ++#endif ++ *clus_cache_idx_file = NULL; ++ ++static int __init init_litmus_proc(void) ++{ ++ litmus_dir = proc_mkdir("litmus", NULL); ++ if (!litmus_dir) { ++ printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); ++ return -ENOMEM; ++ } ++ ++ curr_file = create_proc_entry("active_plugin", ++ 0644, litmus_dir); ++ if (!curr_file) { ++ printk(KERN_ERR "Could not allocate active_plugin " ++ "procfs entry.\n"); ++ return -ENOMEM; ++ } ++ curr_file->read_proc = proc_read_curr; ++ curr_file->write_proc = proc_write_curr; ++ ++#ifdef CONFIG_RELEASE_MASTER ++ release_master_file = create_proc_entry("release_master", ++ 0644, litmus_dir); ++ if (!release_master_file) { ++ printk(KERN_ERR "Could not allocate release_master " ++ "procfs entry.\n"); ++ return -ENOMEM; ++ } ++ release_master_file->read_proc = proc_read_release_master; ++ release_master_file->write_proc = proc_write_release_master; ++#endif ++ ++ clus_cache_idx_file = create_proc_entry("cluster_cache", ++ 0644, litmus_dir); ++ if (!clus_cache_idx_file) { ++ printk(KERN_ERR "Could not allocate cluster_cache " ++ "procfs entry.\n"); ++ return -ENOMEM; ++ } ++ clus_cache_idx_file->read_proc = proc_read_cluster_size; ++ clus_cache_idx_file->write_proc = proc_write_cluster_size; ++ ++ stat_file = create_proc_read_entry("stats", 0444, litmus_dir, ++ proc_read_stats, NULL); ++ ++ plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir, ++ proc_read_plugins, NULL); ++ ++ return 0; ++} ++ ++static void exit_litmus_proc(void) ++{ ++ if (plugs_file) ++ remove_proc_entry("plugins", litmus_dir); ++ if (stat_file) ++ remove_proc_entry("stats", litmus_dir); ++ if (curr_file) ++ remove_proc_entry("active_plugin", litmus_dir); ++ if (clus_cache_idx_file) ++ remove_proc_entry("cluster_cache", litmus_dir); ++#ifdef CONFIG_RELEASE_MASTER ++ if (release_master_file) ++ remove_proc_entry("release_master", litmus_dir); ++#endif ++ if (litmus_dir) ++ remove_proc_entry("litmus", NULL); ++} ++ ++extern struct sched_plugin linux_sched_plugin; ++ ++static int __init _init_litmus(void) ++{ ++ /* Common initializers, ++ * mode change lock is used to enforce single mode change ++ * operation. ++ */ ++ printk("Starting LITMUS^RT kernel\n"); ++ ++ register_sched_plugin(&linux_sched_plugin); ++ ++ bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); ++ release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++ /* offer some debugging help */ ++ if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) ++ printk("Registered kill rt tasks magic sysrq.\n"); ++ else ++ printk("Could not register kill rt tasks magic sysrq.\n"); ++#endif ++ ++ init_litmus_proc(); ++ ++ return 0; ++} ++ ++static void _exit_litmus(void) ++{ ++ exit_litmus_proc(); ++ kmem_cache_destroy(bheap_node_cache); ++ kmem_cache_destroy(release_heap_cache); ++} ++ ++module_init(_init_litmus); ++module_exit(_exit_litmus); +diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c +new file mode 100644 +index 0000000..81a5ac1 +--- /dev/null ++++ b/litmus/rt_domain.c +@@ -0,0 +1,355 @@ ++/* ++ * litmus/rt_domain.c ++ * ++ * LITMUS real-time infrastructure. This file contains the ++ * functions that manipulate RT domains. RT domains are an abstraction ++ * of a ready queue and a release queue. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++#include ++ ++/* Uncomment when debugging timer races... */ ++#if 0 ++#define VTRACE_TASK TRACE_TASK ++#define VTRACE TRACE ++#else ++#define VTRACE_TASK(t, fmt, args...) /* shut up */ ++#define VTRACE(fmt, args...) /* be quiet already */ ++#endif ++ ++static int dummy_resched(rt_domain_t *rt) ++{ ++ return 0; ++} ++ ++static int dummy_order(struct bheap_node* a, struct bheap_node* b) ++{ ++ return 0; ++} ++ ++/* default implementation: use default lock */ ++static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) ++{ ++ merge_ready(rt, tasks); ++} ++ ++static unsigned int time2slot(lt_t time) ++{ ++ return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; ++} ++ ++static enum hrtimer_restart on_release_timer(struct hrtimer *timer) ++{ ++ unsigned long flags; ++ struct release_heap* rh; ++ ++ VTRACE("on_release_timer(0x%p) starts.\n", timer); ++ ++ TS_RELEASE_START; ++ ++ rh = container_of(timer, struct release_heap, timer); ++ ++ raw_spin_lock_irqsave(&rh->dom->release_lock, flags); ++ VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); ++ /* remove from release queue */ ++ list_del(&rh->list); ++ raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); ++ VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); ++ ++ /* call release callback */ ++ rh->dom->release_jobs(rh->dom, &rh->heap); ++ /* WARNING: rh can be referenced from other CPUs from now on. */ ++ ++ TS_RELEASE_END; ++ ++ VTRACE("on_release_timer(0x%p) ends.\n", timer); ++ ++ return HRTIMER_NORESTART; ++} ++ ++/* allocated in litmus.c */ ++struct kmem_cache * release_heap_cache; ++ ++struct release_heap* release_heap_alloc(int gfp_flags) ++{ ++ struct release_heap* rh; ++ rh= kmem_cache_alloc(release_heap_cache, gfp_flags); ++ if (rh) { ++ /* initialize timer */ ++ hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ++ rh->timer.function = on_release_timer; ++ } ++ return rh; ++} ++ ++void release_heap_free(struct release_heap* rh) ++{ ++ /* make sure timer is no longer in use */ ++ hrtimer_cancel(&rh->timer); ++ kmem_cache_free(release_heap_cache, rh); ++} ++ ++/* Caller must hold release lock. ++ * Will return heap for given time. If no such heap exists prior to ++ * the invocation it will be created. ++ */ ++static struct release_heap* get_release_heap(rt_domain_t *rt, ++ struct task_struct* t, ++ int use_task_heap) ++{ ++ struct list_head* pos; ++ struct release_heap* heap = NULL; ++ struct release_heap* rh; ++ lt_t release_time = get_release(t); ++ unsigned int slot = time2slot(release_time); ++ ++ /* initialize pos for the case that the list is empty */ ++ pos = rt->release_queue.slot[slot].next; ++ list_for_each(pos, &rt->release_queue.slot[slot]) { ++ rh = list_entry(pos, struct release_heap, list); ++ if (release_time == rh->release_time) { ++ /* perfect match -- this happens on hyperperiod ++ * boundaries ++ */ ++ heap = rh; ++ break; ++ } else if (lt_before(release_time, rh->release_time)) { ++ /* we need to insert a new node since rh is ++ * already in the future ++ */ ++ break; ++ } ++ } ++ if (!heap && use_task_heap) { ++ /* use pre-allocated release heap */ ++ rh = tsk_rt(t)->rel_heap; ++ ++ rh->dom = rt; ++ rh->release_time = release_time; ++ ++ /* add to release queue */ ++ list_add(&rh->list, pos->prev); ++ heap = rh; ++ } ++ return heap; ++} ++ ++static void reinit_release_heap(struct task_struct* t) ++{ ++ struct release_heap* rh; ++ ++ /* use pre-allocated release heap */ ++ rh = tsk_rt(t)->rel_heap; ++ ++ /* Make sure it is safe to use. The timer callback could still ++ * be executing on another CPU; hrtimer_cancel() will wait ++ * until the timer callback has completed. However, under no ++ * circumstances should the timer be active (= yet to be ++ * triggered). ++ * ++ * WARNING: If the CPU still holds the release_lock at this point, ++ * deadlock may occur! ++ */ ++ BUG_ON(hrtimer_cancel(&rh->timer)); ++ ++ /* initialize */ ++ bheap_init(&rh->heap); ++#ifdef CONFIG_RELEASE_MASTER ++ atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); ++#endif ++} ++/* arm_release_timer() - start local release timer or trigger ++ * remote timer (pull timer) ++ * ++ * Called by add_release() with: ++ * - tobe_lock taken ++ * - IRQ disabled ++ */ ++#ifdef CONFIG_RELEASE_MASTER ++#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) ++static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) ++#else ++static void arm_release_timer(rt_domain_t *_rt) ++#endif ++{ ++ rt_domain_t *rt = _rt; ++ struct list_head list; ++ struct list_head *pos, *safe; ++ struct task_struct* t; ++ struct release_heap* rh; ++ ++ VTRACE("arm_release_timer() at %llu\n", litmus_clock()); ++ list_replace_init(&rt->tobe_released, &list); ++ ++ list_for_each_safe(pos, safe, &list) { ++ /* pick task of work list */ ++ t = list_entry(pos, struct task_struct, rt_param.list); ++ sched_trace_task_release(t); ++ list_del(pos); ++ ++ /* put into release heap while holding release_lock */ ++ raw_spin_lock(&rt->release_lock); ++ VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); ++ ++ rh = get_release_heap(rt, t, 0); ++ if (!rh) { ++ /* need to use our own, but drop lock first */ ++ raw_spin_unlock(&rt->release_lock); ++ VTRACE_TASK(t, "Dropped release_lock 0x%p\n", ++ &rt->release_lock); ++ ++ reinit_release_heap(t); ++ VTRACE_TASK(t, "release_heap ready\n"); ++ ++ raw_spin_lock(&rt->release_lock); ++ VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n", ++ &rt->release_lock); ++ ++ rh = get_release_heap(rt, t, 1); ++ } ++ bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); ++ VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); ++ ++ raw_spin_unlock(&rt->release_lock); ++ VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); ++ ++ /* To avoid arming the timer multiple times, we only let the ++ * owner do the arming (which is the "first" task to reference ++ * this release_heap anyway). ++ */ ++ if (rh == tsk_rt(t)->rel_heap) { ++ VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); ++ /* we cannot arm the timer using hrtimer_start() ++ * as it may deadlock on rq->lock ++ * ++ * PINNED mode is ok on both local and remote CPU ++ */ ++#ifdef CONFIG_RELEASE_MASTER ++ if (rt->release_master == NO_CPU && ++ target_cpu == NO_CPU) ++#endif ++ __hrtimer_start_range_ns(&rh->timer, ++ ns_to_ktime(rh->release_time), ++ 0, HRTIMER_MODE_ABS_PINNED, 0); ++#ifdef CONFIG_RELEASE_MASTER ++ else ++ hrtimer_start_on( ++ /* target_cpu overrides release master */ ++ (target_cpu != NO_CPU ? ++ target_cpu : rt->release_master), ++ &rh->info, &rh->timer, ++ ns_to_ktime(rh->release_time), ++ HRTIMER_MODE_ABS_PINNED); ++#endif ++ } else ++ VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); ++ } ++} ++ ++void rt_domain_init(rt_domain_t *rt, ++ bheap_prio_t order, ++ check_resched_needed_t check, ++ release_jobs_t release ++ ) ++{ ++ int i; ++ ++ BUG_ON(!rt); ++ if (!check) ++ check = dummy_resched; ++ if (!release) ++ release = default_release_jobs; ++ if (!order) ++ order = dummy_order; ++ ++#ifdef CONFIG_RELEASE_MASTER ++ rt->release_master = NO_CPU; ++#endif ++ ++ bheap_init(&rt->ready_queue); ++ INIT_LIST_HEAD(&rt->tobe_released); ++ for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) ++ INIT_LIST_HEAD(&rt->release_queue.slot[i]); ++ ++ raw_spin_lock_init(&rt->ready_lock); ++ raw_spin_lock_init(&rt->release_lock); ++ raw_spin_lock_init(&rt->tobe_lock); ++ ++ rt->check_resched = check; ++ rt->release_jobs = release; ++ rt->order = order; ++} ++ ++/* add_ready - add a real-time task to the rt ready queue. It must be runnable. ++ * @new: the newly released task ++ */ ++void __add_ready(rt_domain_t* rt, struct task_struct *new) ++{ ++ TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n", ++ new->comm, new->pid, get_exec_cost(new), get_rt_period(new), ++ get_release(new), litmus_clock()); ++ ++ BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); ++ ++ bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); ++ rt->check_resched(rt); ++} ++ ++/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. ++ * @tasks - the newly released tasks ++ */ ++void __merge_ready(rt_domain_t* rt, struct bheap* tasks) ++{ ++ bheap_union(rt->order, &rt->ready_queue, tasks); ++ rt->check_resched(rt); ++} ++ ++ ++#ifdef CONFIG_RELEASE_MASTER ++void __add_release_on(rt_domain_t* rt, struct task_struct *task, ++ int target_cpu) ++{ ++ TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", ++ get_release(task), target_cpu); ++ list_add(&tsk_rt(task)->list, &rt->tobe_released); ++ task->rt_param.domain = rt; ++ ++ /* start release timer */ ++ TS_SCHED2_START(task); ++ ++ arm_release_timer_on(rt, target_cpu); ++ ++ TS_SCHED2_END(task); ++} ++#endif ++ ++/* add_release - add a real-time task to the rt release queue. ++ * @task: the sleeping task ++ */ ++void __add_release(rt_domain_t* rt, struct task_struct *task) ++{ ++ TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); ++ list_add(&tsk_rt(task)->list, &rt->tobe_released); ++ task->rt_param.domain = rt; ++ ++ /* start release timer */ ++ TS_SCHED2_START(task); ++ ++ arm_release_timer(rt); ++ ++ TS_SCHED2_END(task); ++} ++ +diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c +new file mode 100644 +index 0000000..f5b7708 +--- /dev/null ++++ b/litmus/sched_cedf.c +@@ -0,0 +1,773 @@ ++/* ++ * litmus/sched_cedf.c ++ * ++ * Implementation of the C-EDF scheduling algorithm. ++ * ++ * This implementation is based on G-EDF: ++ * - CPUs are clustered around L2 or L3 caches. ++ * - Clusters topology is automatically detected (this is arch dependent ++ * and is working only on x86 at the moment --- and only with modern ++ * cpus that exports cpuid4 information) ++ * - The plugins _does not_ attempt to put tasks in the right cluster i.e. ++ * the programmer needs to be aware of the topology to place tasks ++ * in the desired cluster ++ * - default clustering is around L2 cache (cache index = 2) ++ * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all ++ * online_cpus are placed in a single cluster). ++ * ++ * For details on functions, take a look at sched_gsn_edf.c ++ * ++ * Currently, we do not support changes in the number of online cpus. ++ * If the num_online_cpus() dynamically changes, the plugin is broken. ++ * ++ * This version uses the simple approach and serializes all scheduling ++ * decisions by the use of a queue lock. This is probably not the ++ * best way to do it, but it should suffice for now. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++/* forward declaration... a funny thing with C ;) */ ++struct clusterdomain; ++ ++/* cpu_entry_t - maintain the linked and scheduled state ++ * ++ * A cpu also contains a pointer to the cedf_domain_t cluster ++ * that owns it (struct clusterdomain*) ++ */ ++typedef struct { ++ int cpu; ++ struct clusterdomain* cluster; /* owning cluster */ ++ struct task_struct* linked; /* only RT tasks */ ++ struct task_struct* scheduled; /* only RT tasks */ ++ atomic_t will_schedule; /* prevent unneeded IPIs */ ++ struct bheap_node* hn; ++} cpu_entry_t; ++ ++/* one cpu_entry_t per CPU */ ++DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); ++ ++#define set_will_schedule() \ ++ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) ++#define clear_will_schedule() \ ++ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0)) ++#define test_will_schedule(cpu) \ ++ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) ++ ++/* ++ * In C-EDF there is a cedf domain _per_ cluster ++ * The number of clusters is dynamically determined accordingly to the ++ * total cpu number and the cluster size ++ */ ++typedef struct clusterdomain { ++ /* rt_domain for this cluster */ ++ rt_domain_t domain; ++ /* cpus in this cluster */ ++ cpu_entry_t* *cpus; ++ /* map of this cluster cpus */ ++ cpumask_var_t cpu_map; ++ /* the cpus queue themselves according to priority in here */ ++ struct bheap_node *heap_node; ++ struct bheap cpu_heap; ++ /* lock for this cluster */ ++#define lock domain.ready_lock ++} cedf_domain_t; ++ ++/* a cedf_domain per cluster; allocation is done at init/activation time */ ++cedf_domain_t *cedf; ++ ++#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) ++#define task_cpu_cluster(task) remote_cluster(get_partition(task)) ++ ++/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling ++ * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose ++ * information during the initialization of the plugin (e.g., topology) ++#define WANT_ALL_SCHED_EVENTS ++ */ ++#define VERBOSE_INIT ++ ++static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) ++{ ++ cpu_entry_t *a, *b; ++ a = _a->value; ++ b = _b->value; ++ /* Note that a and b are inverted: we want the lowest-priority CPU at ++ * the top of the heap. ++ */ ++ return edf_higher_prio(b->linked, a->linked); ++} ++ ++/* update_cpu_position - Move the cpu entry to the correct place to maintain ++ * order in the cpu queue. Caller must hold cedf lock. ++ */ ++static void update_cpu_position(cpu_entry_t *entry) ++{ ++ cedf_domain_t *cluster = entry->cluster; ++ ++ if (likely(bheap_node_in_heap(entry->hn))) ++ bheap_delete(cpu_lower_prio, ++ &cluster->cpu_heap, ++ entry->hn); ++ ++ bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); ++} ++ ++/* caller must hold cedf lock */ ++static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) ++{ ++ struct bheap_node* hn; ++ hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); ++ return hn->value; ++} ++ ++ ++/* link_task_to_cpu - Update the link of a CPU. ++ * Handles the case where the to-be-linked task is already ++ * scheduled on a different CPU. ++ */ ++static noinline void link_task_to_cpu(struct task_struct* linked, ++ cpu_entry_t *entry) ++{ ++ cpu_entry_t *sched; ++ struct task_struct* tmp; ++ int on_cpu; ++ ++ BUG_ON(linked && !is_realtime(linked)); ++ ++ /* Currently linked task is set to be unlinked. */ ++ if (entry->linked) { ++ entry->linked->rt_param.linked_on = NO_CPU; ++ } ++ ++ /* Link new task to CPU. */ ++ if (linked) { ++ set_rt_flags(linked, RT_F_RUNNING); ++ /* handle task is already scheduled somewhere! */ ++ on_cpu = linked->rt_param.scheduled_on; ++ if (on_cpu != NO_CPU) { ++ sched = &per_cpu(cedf_cpu_entries, on_cpu); ++ /* this should only happen if not linked already */ ++ BUG_ON(sched->linked == linked); ++ ++ /* If we are already scheduled on the CPU to which we ++ * wanted to link, we don't need to do the swap -- ++ * we just link ourselves to the CPU and depend on ++ * the caller to get things right. ++ */ ++ if (entry != sched) { ++ TRACE_TASK(linked, ++ "already scheduled on %d, updating link.\n", ++ sched->cpu); ++ tmp = sched->linked; ++ linked->rt_param.linked_on = sched->cpu; ++ sched->linked = linked; ++ update_cpu_position(sched); ++ linked = tmp; ++ } ++ } ++ if (linked) /* might be NULL due to swap */ ++ linked->rt_param.linked_on = entry->cpu; ++ } ++ entry->linked = linked; ++#ifdef WANT_ALL_SCHED_EVENTS ++ if (linked) ++ TRACE_TASK(linked, "linked to %d.\n", entry->cpu); ++ else ++ TRACE("NULL linked to %d.\n", entry->cpu); ++#endif ++ update_cpu_position(entry); ++} ++ ++/* unlink - Make sure a task is not linked any longer to an entry ++ * where it was linked before. Must hold cedf_lock. ++ */ ++static noinline void unlink(struct task_struct* t) ++{ ++ cpu_entry_t *entry; ++ ++ if (unlikely(!t)) { ++ TRACE_BUG_ON(!t); ++ return; ++ } ++ ++ ++ if (t->rt_param.linked_on != NO_CPU) { ++ /* unlink */ ++ entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); ++ t->rt_param.linked_on = NO_CPU; ++ link_task_to_cpu(NULL, entry); ++ } else if (is_queued(t)) { ++ /* This is an interesting situation: t is scheduled, ++ * but was just recently unlinked. It cannot be ++ * linked anywhere else (because then it would have ++ * been relinked to this CPU), thus it must be in some ++ * queue. We must remove it from the list in this ++ * case. ++ * ++ * in C-EDF case is should be somewhere in the queue for ++ * its domain, therefore and we can get the domain using ++ * task_cpu_cluster ++ */ ++ remove(&(task_cpu_cluster(t))->domain, t); ++ } ++} ++ ++ ++/* preempt - force a CPU to reschedule ++ */ ++static void preempt(cpu_entry_t *entry) ++{ ++ preempt_if_preemptable(entry->scheduled, entry->cpu); ++} ++ ++/* requeue - Put an unlinked task into gsn-edf domain. ++ * Caller must hold cedf_lock. ++ */ ++static noinline void requeue(struct task_struct* task) ++{ ++ cedf_domain_t *cluster = task_cpu_cluster(task); ++ BUG_ON(!task); ++ /* sanity check before insertion */ ++ BUG_ON(is_queued(task)); ++ ++ if (is_released(task, litmus_clock())) ++ __add_ready(&cluster->domain, task); ++ else { ++ /* it has got to wait */ ++ add_release(&cluster->domain, task); ++ } ++} ++ ++/* check for any necessary preemptions */ ++static void check_for_preemptions(cedf_domain_t *cluster) ++{ ++ struct task_struct *task; ++ cpu_entry_t* last; ++ ++ for(last = lowest_prio_cpu(cluster); ++ edf_preemption_needed(&cluster->domain, last->linked); ++ last = lowest_prio_cpu(cluster)) { ++ /* preemption necessary */ ++ task = __take_ready(&cluster->domain); ++ TRACE("check_for_preemptions: attempting to link task %d to %d\n", ++ task->pid, last->cpu); ++ if (last->linked) ++ requeue(last->linked); ++ link_task_to_cpu(task, last); ++ preempt(last); ++ } ++} ++ ++/* cedf_job_arrival: task is either resumed or released */ ++static noinline void cedf_job_arrival(struct task_struct* task) ++{ ++ cedf_domain_t *cluster = task_cpu_cluster(task); ++ BUG_ON(!task); ++ ++ requeue(task); ++ check_for_preemptions(cluster); ++} ++ ++static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) ++{ ++ cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&cluster->lock, flags); ++ ++ __merge_ready(&cluster->domain, tasks); ++ check_for_preemptions(cluster); ++ ++ raw_spin_unlock_irqrestore(&cluster->lock, flags); ++} ++ ++/* caller holds cedf_lock */ ++static noinline void job_completion(struct task_struct *t, int forced) ++{ ++ BUG_ON(!t); ++ ++ sched_trace_task_completion(t, forced); ++ ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ /* set flags */ ++ set_rt_flags(t, RT_F_SLEEP); ++ /* prepare for next period */ ++ prepare_for_next_period(t); ++ if (is_released(t, litmus_clock())) ++ sched_trace_task_release(t); ++ /* unlink */ ++ unlink(t); ++ /* requeue ++ * But don't requeue a blocking task. */ ++ if (is_running(t)) ++ cedf_job_arrival(t); ++} ++ ++/* cedf_tick - this function is called for every local timer ++ * interrupt. ++ * ++ * checks whether the current task has expired and checks ++ * whether we need to preempt it if it has not expired ++ */ ++static void cedf_tick(struct task_struct* t) ++{ ++ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { ++ if (!is_np(t)) { ++ /* np tasks will be preempted when they become ++ * preemptable again ++ */ ++ set_tsk_need_resched(t); ++ set_will_schedule(); ++ TRACE("cedf_scheduler_tick: " ++ "%d is preemptable " ++ " => FORCE_RESCHED\n", t->pid); ++ } else if (is_user_np(t)) { ++ TRACE("cedf_scheduler_tick: " ++ "%d is non-preemptable, " ++ "preemption delayed.\n", t->pid); ++ request_exit_np(t); ++ } ++ } ++} ++ ++/* Getting schedule() right is a bit tricky. schedule() may not make any ++ * assumptions on the state of the current task since it may be called for a ++ * number of reasons. The reasons include a scheduler_tick() determined that it ++ * was necessary, because sys_exit_np() was called, because some Linux ++ * subsystem determined so, or even (in the worst case) because there is a bug ++ * hidden somewhere. Thus, we must take extreme care to determine what the ++ * current state is. ++ * ++ * The CPU could currently be scheduling a task (or not), be linked (or not). ++ * ++ * The following assertions for the scheduled task could hold: ++ * ++ * - !is_running(scheduled) // the job blocks ++ * - scheduled->timeslice == 0 // the job completed (forcefully) ++ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) ++ * - linked != scheduled // we need to reschedule (for any reason) ++ * - is_np(scheduled) // rescheduling must be delayed, ++ * sys_exit_np must be requested ++ * ++ * Any of these can occur together. ++ */ ++static struct task_struct* cedf_schedule(struct task_struct * prev) ++{ ++ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); ++ cedf_domain_t *cluster = entry->cluster; ++ int out_of_time, sleep, preempt, np, exists, blocks; ++ struct task_struct* next = NULL; ++ ++ raw_spin_lock(&cluster->lock); ++ clear_will_schedule(); ++ ++ /* sanity checking */ ++ BUG_ON(entry->scheduled && entry->scheduled != prev); ++ BUG_ON(entry->scheduled && !is_realtime(prev)); ++ BUG_ON(is_realtime(prev) && !entry->scheduled); ++ ++ /* (0) Determine state */ ++ exists = entry->scheduled != NULL; ++ blocks = exists && !is_running(entry->scheduled); ++ out_of_time = exists && ++ budget_enforced(entry->scheduled) && ++ budget_exhausted(entry->scheduled); ++ np = exists && is_np(entry->scheduled); ++ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; ++ preempt = entry->scheduled != entry->linked; ++ ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE_TASK(prev, "invoked cedf_schedule.\n"); ++#endif ++ ++ if (exists) ++ TRACE_TASK(prev, ++ "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " ++ "state:%d sig:%d\n", ++ blocks, out_of_time, np, sleep, preempt, ++ prev->state, signal_pending(prev)); ++ if (entry->linked && preempt) ++ TRACE_TASK(prev, "will be preempted by %s/%d\n", ++ entry->linked->comm, entry->linked->pid); ++ ++ ++ /* If a task blocks we have no choice but to reschedule. ++ */ ++ if (blocks) ++ unlink(entry->scheduled); ++ ++ /* Request a sys_exit_np() call if we would like to preempt but cannot. ++ * We need to make sure to update the link structure anyway in case ++ * that we are still linked. Multiple calls to request_exit_np() don't ++ * hurt. ++ */ ++ if (np && (out_of_time || preempt || sleep)) { ++ unlink(entry->scheduled); ++ request_exit_np(entry->scheduled); ++ } ++ ++ /* Any task that is preemptable and either exhausts its execution ++ * budget or wants to sleep completes. We may have to reschedule after ++ * this. Don't do a job completion if we block (can't have timers running ++ * for blocked jobs). Preemption go first for the same reason. ++ */ ++ if (!np && (out_of_time || sleep) && !blocks && !preempt) ++ job_completion(entry->scheduled, !sleep); ++ ++ /* Link pending task if we became unlinked. ++ */ ++ if (!entry->linked) ++ link_task_to_cpu(__take_ready(&cluster->domain), entry); ++ ++ /* The final scheduling decision. Do we need to switch for some reason? ++ * If linked is different from scheduled, then select linked as next. ++ */ ++ if ((!np || blocks) && ++ entry->linked != entry->scheduled) { ++ /* Schedule a linked job? */ ++ if (entry->linked) { ++ entry->linked->rt_param.scheduled_on = entry->cpu; ++ next = entry->linked; ++ } ++ if (entry->scheduled) { ++ /* not gonna be scheduled soon */ ++ entry->scheduled->rt_param.scheduled_on = NO_CPU; ++ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); ++ } ++ } else ++ /* Only override Linux scheduler if we have a real-time task ++ * scheduled that needs to continue. ++ */ ++ if (exists) ++ next = prev; ++ ++ raw_spin_unlock(&cluster->lock); ++ ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE("cedf_lock released, next=0x%p\n", next); ++ ++ if (next) ++ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); ++ else if (exists && !next) ++ TRACE("becomes idle at %llu.\n", litmus_clock()); ++#endif ++ ++ ++ return next; ++} ++ ++ ++/* _finish_switch - we just finished the switch away from prev ++ */ ++static void cedf_finish_switch(struct task_struct *prev) ++{ ++ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); ++ ++ entry->scheduled = is_realtime(current) ? current : NULL; ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE_TASK(prev, "switched away from\n"); ++#endif ++} ++ ++ ++/* Prepare a task for running in RT mode ++ */ ++static void cedf_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ unsigned long flags; ++ cpu_entry_t* entry; ++ cedf_domain_t* cluster; ++ ++ TRACE("gsn edf: task new %d\n", t->pid); ++ ++ /* the cluster doesn't change even if t is running */ ++ cluster = task_cpu_cluster(t); ++ ++ raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); ++ ++ /* setup job params */ ++ release_at(t, litmus_clock()); ++ ++ if (running) { ++ entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); ++ BUG_ON(entry->scheduled); ++ ++ entry->scheduled = t; ++ tsk_rt(t)->scheduled_on = task_cpu(t); ++ } else { ++ t->rt_param.scheduled_on = NO_CPU; ++ } ++ t->rt_param.linked_on = NO_CPU; ++ ++ cedf_job_arrival(t); ++ raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); ++} ++ ++static void cedf_task_wake_up(struct task_struct *task) ++{ ++ unsigned long flags; ++ lt_t now; ++ cedf_domain_t *cluster; ++ ++ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); ++ ++ cluster = task_cpu_cluster(task); ++ ++ raw_spin_lock_irqsave(&cluster->lock, flags); ++ /* We need to take suspensions because of semaphores into ++ * account! If a job resumes after being suspended due to acquiring ++ * a semaphore, it should never be treated as a new job release. ++ */ ++ if (get_rt_flags(task) == RT_F_EXIT_SEM) { ++ set_rt_flags(task, RT_F_RUNNING); ++ } else { ++ now = litmus_clock(); ++ if (is_tardy(task, now)) { ++ /* new sporadic release */ ++ release_at(task, now); ++ sched_trace_task_release(task); ++ } ++ else { ++ if (task->rt.time_slice) { ++ /* came back in time before deadline ++ */ ++ set_rt_flags(task, RT_F_RUNNING); ++ } ++ } ++ } ++ cedf_job_arrival(task); ++ raw_spin_unlock_irqrestore(&cluster->lock, flags); ++} ++ ++static void cedf_task_block(struct task_struct *t) ++{ ++ unsigned long flags; ++ cedf_domain_t *cluster; ++ ++ TRACE_TASK(t, "block at %llu\n", litmus_clock()); ++ ++ cluster = task_cpu_cluster(t); ++ ++ /* unlink if necessary */ ++ raw_spin_lock_irqsave(&cluster->lock, flags); ++ unlink(t); ++ raw_spin_unlock_irqrestore(&cluster->lock, flags); ++ ++ BUG_ON(!is_realtime(t)); ++} ++ ++ ++static void cedf_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ cedf_domain_t *cluster = task_cpu_cluster(t); ++ ++ /* unlink if necessary */ ++ raw_spin_lock_irqsave(&cluster->lock, flags); ++ unlink(t); ++ if (tsk_rt(t)->scheduled_on != NO_CPU) { ++ cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; ++ tsk_rt(t)->scheduled_on = NO_CPU; ++ } ++ raw_spin_unlock_irqrestore(&cluster->lock, flags); ++ ++ BUG_ON(!is_realtime(t)); ++ TRACE_TASK(t, "RIP\n"); ++} ++ ++static long cedf_admit_task(struct task_struct* tsk) ++{ ++ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; ++} ++ ++/* total number of cluster */ ++static int num_clusters; ++/* we do not support cluster of different sizes */ ++static unsigned int cluster_size; ++ ++#ifdef VERBOSE_INIT ++static void print_cluster_topology(cpumask_var_t mask, int cpu) ++{ ++ int chk; ++ char buf[255]; ++ ++ chk = cpulist_scnprintf(buf, 254, mask); ++ buf[chk] = '\0'; ++ printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); ++ ++} ++#endif ++ ++static int clusters_allocated = 0; ++ ++static void cleanup_cedf(void) ++{ ++ int i; ++ ++ if (clusters_allocated) { ++ for (i = 0; i < num_clusters; i++) { ++ kfree(cedf[i].cpus); ++ kfree(cedf[i].heap_node); ++ free_cpumask_var(cedf[i].cpu_map); ++ } ++ ++ kfree(cedf); ++ } ++} ++ ++static long cedf_activate_plugin(void) ++{ ++ int i, j, cpu, ccpu, cpu_count; ++ cpu_entry_t *entry; ++ ++ cpumask_var_t mask; ++ int chk = 0; ++ ++ /* de-allocate old clusters, if any */ ++ cleanup_cedf(); ++ ++ printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n", ++ cluster_cache_index); ++ ++ /* need to get cluster_size first */ ++ if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) ++ return -ENOMEM; ++ ++ if (unlikely(cluster_cache_index == num_online_cpus())) { ++ ++ cluster_size = num_online_cpus(); ++ } else { ++ ++ chk = get_shared_cpu_map(mask, 0, cluster_cache_index); ++ if (chk) { ++ /* if chk != 0 then it is the max allowed index */ ++ printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n", ++ cluster_cache_index); ++ printk(KERN_INFO "C-EDF: Using cache index = %d\n", ++ chk); ++ cluster_cache_index = chk; ++ } ++ ++ cluster_size = cpumask_weight(mask); ++ } ++ ++ if ((num_online_cpus() % cluster_size) != 0) { ++ /* this can't be right, some cpus are left out */ ++ printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", ++ num_online_cpus(), cluster_size); ++ return -1; ++ } ++ ++ num_clusters = num_online_cpus() / cluster_size; ++ printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", ++ num_clusters, cluster_size); ++ ++ /* initialize clusters */ ++ cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); ++ for (i = 0; i < num_clusters; i++) { ++ ++ cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), ++ GFP_ATOMIC); ++ cedf[i].heap_node = kmalloc( ++ cluster_size * sizeof(struct bheap_node), ++ GFP_ATOMIC); ++ bheap_init(&(cedf[i].cpu_heap)); ++ edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); ++ ++ if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) ++ return -ENOMEM; ++ } ++ ++ /* cycle through cluster and add cpus to them */ ++ for (i = 0; i < num_clusters; i++) { ++ ++ for_each_online_cpu(cpu) { ++ /* check if the cpu is already in a cluster */ ++ for (j = 0; j < num_clusters; j++) ++ if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) ++ break; ++ /* if it is in a cluster go to next cpu */ ++ if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) ++ continue; ++ ++ /* this cpu isn't in any cluster */ ++ /* get the shared cpus */ ++ if (unlikely(cluster_cache_index == num_online_cpus())) ++ cpumask_copy(mask, cpu_online_mask); ++ else ++ get_shared_cpu_map(mask, cpu, cluster_cache_index); ++ ++ cpumask_copy(cedf[i].cpu_map, mask); ++#ifdef VERBOSE_INIT ++ print_cluster_topology(mask, cpu); ++#endif ++ /* add cpus to current cluster and init cpu_entry_t */ ++ cpu_count = 0; ++ for_each_cpu(ccpu, cedf[i].cpu_map) { ++ ++ entry = &per_cpu(cedf_cpu_entries, ccpu); ++ cedf[i].cpus[cpu_count] = entry; ++ atomic_set(&entry->will_schedule, 0); ++ entry->cpu = ccpu; ++ entry->cluster = &cedf[i]; ++ entry->hn = &(cedf[i].heap_node[cpu_count]); ++ bheap_node_init(&entry->hn, entry); ++ ++ cpu_count++; ++ ++ entry->linked = NULL; ++ entry->scheduled = NULL; ++ update_cpu_position(entry); ++ } ++ /* done with this cluster */ ++ break; ++ } ++ } ++ ++ free_cpumask_var(mask); ++ clusters_allocated = 1; ++ return 0; ++} ++ ++/* Plugin object */ ++static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "C-EDF", ++ .finish_switch = cedf_finish_switch, ++ .tick = cedf_tick, ++ .task_new = cedf_task_new, ++ .complete_job = complete_job, ++ .task_exit = cedf_task_exit, ++ .schedule = cedf_schedule, ++ .task_wake_up = cedf_task_wake_up, ++ .task_block = cedf_task_block, ++ .admit_task = cedf_admit_task, ++ .activate_plugin = cedf_activate_plugin, ++}; ++ ++ ++static int __init init_cedf(void) ++{ ++ return register_sched_plugin(&cedf_plugin); ++} ++ ++static void clean_cedf(void) ++{ ++ cleanup_cedf(); ++} ++ ++module_init(init_cedf); ++module_exit(clean_cedf); +diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c +new file mode 100644 +index 0000000..e101768 +--- /dev/null ++++ b/litmus/sched_gsn_edf.c +@@ -0,0 +1,842 @@ ++/* ++ * litmus/sched_gsn_edf.c ++ * ++ * Implementation of the GSN-EDF scheduling algorithm. ++ * ++ * This version uses the simple approach and serializes all scheduling ++ * decisions by the use of a queue lock. This is probably not the ++ * best way to do it, but it should suffice for now. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++/* Overview of GSN-EDF operations. ++ * ++ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This ++ * description only covers how the individual operations are implemented in ++ * LITMUS. ++ * ++ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage ++ * structure (NOT the actually scheduled ++ * task). If there is another linked task To ++ * already it will set To->linked_on = NO_CPU ++ * (thereby removing its association with this ++ * CPU). However, it will not requeue the ++ * previously linked task (if any). It will set ++ * T's state to RT_F_RUNNING and check whether ++ * it is already running somewhere else. If T ++ * is scheduled somewhere else it will link ++ * it to that CPU instead (and pull the linked ++ * task to cpu). T may be NULL. ++ * ++ * unlink(T) - Unlink removes T from all scheduler data ++ * structures. If it is linked to some CPU it ++ * will link NULL to that CPU. If it is ++ * currently queued in the gsnedf queue it will ++ * be removed from the rt_domain. It is safe to ++ * call unlink(T) if T is not linked. T may not ++ * be NULL. ++ * ++ * requeue(T) - Requeue will insert T into the appropriate ++ * queue. If the system is in real-time mode and ++ * the T is released already, it will go into the ++ * ready queue. If the system is not in ++ * real-time mode is T, then T will go into the ++ * release queue. If T's release time is in the ++ * future, it will go into the release ++ * queue. That means that T's release time/job ++ * no/etc. has to be updated before requeu(T) is ++ * called. It is not safe to call requeue(T) ++ * when T is already queued. T may not be NULL. ++ * ++ * gsnedf_job_arrival(T) - This is the catch all function when T enters ++ * the system after either a suspension or at a ++ * job release. It will queue T (which means it ++ * is not safe to call gsnedf_job_arrival(T) if ++ * T is already queued) and then check whether a ++ * preemption is necessary. If a preemption is ++ * necessary it will update the linkage ++ * accordingly and cause scheduled to be called ++ * (either with an IPI or need_resched). It is ++ * safe to call gsnedf_job_arrival(T) if T's ++ * next job has not been actually released yet ++ * (releast time in the future). T will be put ++ * on the release queue in that case. ++ * ++ * job_completion(T) - Take care of everything that needs to be done ++ * to prepare T for its next release and place ++ * it in the right queue with ++ * gsnedf_job_arrival(). ++ * ++ * ++ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is ++ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of ++ * the functions will automatically propagate pending task from the ready queue ++ * to a linked task. This is the job of the calling function ( by means of ++ * __take_ready). ++ */ ++ ++ ++/* cpu_entry_t - maintain the linked and scheduled state ++ */ ++typedef struct { ++ int cpu; ++ struct task_struct* linked; /* only RT tasks */ ++ struct task_struct* scheduled; /* only RT tasks */ ++ atomic_t will_schedule; /* prevent unneeded IPIs */ ++ struct bheap_node* hn; ++} cpu_entry_t; ++DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); ++ ++cpu_entry_t* gsnedf_cpus[NR_CPUS]; ++ ++#define set_will_schedule() \ ++ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1)) ++#define clear_will_schedule() \ ++ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0)) ++#define test_will_schedule(cpu) \ ++ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule)) ++ ++ ++/* the cpus queue themselves according to priority in here */ ++static struct bheap_node gsnedf_heap_node[NR_CPUS]; ++static struct bheap gsnedf_cpu_heap; ++ ++static rt_domain_t gsnedf; ++#define gsnedf_lock (gsnedf.ready_lock) ++ ++ ++/* Uncomment this if you want to see all scheduling decisions in the ++ * TRACE() log. ++#define WANT_ALL_SCHED_EVENTS ++ */ ++ ++static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) ++{ ++ cpu_entry_t *a, *b; ++ a = _a->value; ++ b = _b->value; ++ /* Note that a and b are inverted: we want the lowest-priority CPU at ++ * the top of the heap. ++ */ ++ return edf_higher_prio(b->linked, a->linked); ++} ++ ++/* update_cpu_position - Move the cpu entry to the correct place to maintain ++ * order in the cpu queue. Caller must hold gsnedf lock. ++ */ ++static void update_cpu_position(cpu_entry_t *entry) ++{ ++ if (likely(bheap_node_in_heap(entry->hn))) ++ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); ++ bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); ++} ++ ++/* caller must hold gsnedf lock */ ++static cpu_entry_t* lowest_prio_cpu(void) ++{ ++ struct bheap_node* hn; ++ hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); ++ return hn->value; ++} ++ ++ ++/* link_task_to_cpu - Update the link of a CPU. ++ * Handles the case where the to-be-linked task is already ++ * scheduled on a different CPU. ++ */ ++static noinline void link_task_to_cpu(struct task_struct* linked, ++ cpu_entry_t *entry) ++{ ++ cpu_entry_t *sched; ++ struct task_struct* tmp; ++ int on_cpu; ++ ++ BUG_ON(linked && !is_realtime(linked)); ++ ++ /* Currently linked task is set to be unlinked. */ ++ if (entry->linked) { ++ entry->linked->rt_param.linked_on = NO_CPU; ++ } ++ ++ /* Link new task to CPU. */ ++ if (linked) { ++ set_rt_flags(linked, RT_F_RUNNING); ++ /* handle task is already scheduled somewhere! */ ++ on_cpu = linked->rt_param.scheduled_on; ++ if (on_cpu != NO_CPU) { ++ sched = &per_cpu(gsnedf_cpu_entries, on_cpu); ++ /* this should only happen if not linked already */ ++ BUG_ON(sched->linked == linked); ++ ++ /* If we are already scheduled on the CPU to which we ++ * wanted to link, we don't need to do the swap -- ++ * we just link ourselves to the CPU and depend on ++ * the caller to get things right. ++ */ ++ if (entry != sched) { ++ TRACE_TASK(linked, ++ "already scheduled on %d, updating link.\n", ++ sched->cpu); ++ tmp = sched->linked; ++ linked->rt_param.linked_on = sched->cpu; ++ sched->linked = linked; ++ update_cpu_position(sched); ++ linked = tmp; ++ } ++ } ++ if (linked) /* might be NULL due to swap */ ++ linked->rt_param.linked_on = entry->cpu; ++ } ++ entry->linked = linked; ++#ifdef WANT_ALL_SCHED_EVENTS ++ if (linked) ++ TRACE_TASK(linked, "linked to %d.\n", entry->cpu); ++ else ++ TRACE("NULL linked to %d.\n", entry->cpu); ++#endif ++ update_cpu_position(entry); ++} ++ ++/* unlink - Make sure a task is not linked any longer to an entry ++ * where it was linked before. Must hold gsnedf_lock. ++ */ ++static noinline void unlink(struct task_struct* t) ++{ ++ cpu_entry_t *entry; ++ ++ if (unlikely(!t)) { ++ TRACE_BUG_ON(!t); ++ return; ++ } ++ ++ if (t->rt_param.linked_on != NO_CPU) { ++ /* unlink */ ++ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); ++ t->rt_param.linked_on = NO_CPU; ++ link_task_to_cpu(NULL, entry); ++ } else if (is_queued(t)) { ++ /* This is an interesting situation: t is scheduled, ++ * but was just recently unlinked. It cannot be ++ * linked anywhere else (because then it would have ++ * been relinked to this CPU), thus it must be in some ++ * queue. We must remove it from the list in this ++ * case. ++ */ ++ remove(&gsnedf, t); ++ } ++} ++ ++ ++/* preempt - force a CPU to reschedule ++ */ ++static void preempt(cpu_entry_t *entry) ++{ ++ preempt_if_preemptable(entry->scheduled, entry->cpu); ++} ++ ++/* requeue - Put an unlinked task into gsn-edf domain. ++ * Caller must hold gsnedf_lock. ++ */ ++static noinline void requeue(struct task_struct* task) ++{ ++ BUG_ON(!task); ++ /* sanity check before insertion */ ++ BUG_ON(is_queued(task)); ++ ++ if (is_released(task, litmus_clock())) ++ __add_ready(&gsnedf, task); ++ else { ++ /* it has got to wait */ ++ add_release(&gsnedf, task); ++ } ++} ++ ++/* check for any necessary preemptions */ ++static void check_for_preemptions(void) ++{ ++ struct task_struct *task; ++ cpu_entry_t* last; ++ ++ for(last = lowest_prio_cpu(); ++ edf_preemption_needed(&gsnedf, last->linked); ++ last = lowest_prio_cpu()) { ++ /* preemption necessary */ ++ task = __take_ready(&gsnedf); ++ TRACE("check_for_preemptions: attempting to link task %d to %d\n", ++ task->pid, last->cpu); ++ if (last->linked) ++ requeue(last->linked); ++ link_task_to_cpu(task, last); ++ preempt(last); ++ } ++} ++ ++/* gsnedf_job_arrival: task is either resumed or released */ ++static noinline void gsnedf_job_arrival(struct task_struct* task) ++{ ++ BUG_ON(!task); ++ ++ requeue(task); ++ check_for_preemptions(); ++} ++ ++static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) ++{ ++ unsigned long flags; ++ ++ raw_spin_lock_irqsave(&gsnedf_lock, flags); ++ ++ __merge_ready(rt, tasks); ++ check_for_preemptions(); ++ ++ raw_spin_unlock_irqrestore(&gsnedf_lock, flags); ++} ++ ++/* caller holds gsnedf_lock */ ++static noinline void job_completion(struct task_struct *t, int forced) ++{ ++ BUG_ON(!t); ++ ++ sched_trace_task_completion(t, forced); ++ ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ /* set flags */ ++ set_rt_flags(t, RT_F_SLEEP); ++ /* prepare for next period */ ++ prepare_for_next_period(t); ++ if (is_released(t, litmus_clock())) ++ sched_trace_task_release(t); ++ /* unlink */ ++ unlink(t); ++ /* requeue ++ * But don't requeue a blocking task. */ ++ if (is_running(t)) ++ gsnedf_job_arrival(t); ++} ++ ++/* gsnedf_tick - this function is called for every local timer ++ * interrupt. ++ * ++ * checks whether the current task has expired and checks ++ * whether we need to preempt it if it has not expired ++ */ ++static void gsnedf_tick(struct task_struct* t) ++{ ++ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { ++ if (!is_np(t)) { ++ /* np tasks will be preempted when they become ++ * preemptable again ++ */ ++ set_tsk_need_resched(t); ++ set_will_schedule(); ++ TRACE("gsnedf_scheduler_tick: " ++ "%d is preemptable " ++ " => FORCE_RESCHED\n", t->pid); ++ } else if (is_user_np(t)) { ++ TRACE("gsnedf_scheduler_tick: " ++ "%d is non-preemptable, " ++ "preemption delayed.\n", t->pid); ++ request_exit_np(t); ++ } ++ } ++} ++ ++/* Getting schedule() right is a bit tricky. schedule() may not make any ++ * assumptions on the state of the current task since it may be called for a ++ * number of reasons. The reasons include a scheduler_tick() determined that it ++ * was necessary, because sys_exit_np() was called, because some Linux ++ * subsystem determined so, or even (in the worst case) because there is a bug ++ * hidden somewhere. Thus, we must take extreme care to determine what the ++ * current state is. ++ * ++ * The CPU could currently be scheduling a task (or not), be linked (or not). ++ * ++ * The following assertions for the scheduled task could hold: ++ * ++ * - !is_running(scheduled) // the job blocks ++ * - scheduled->timeslice == 0 // the job completed (forcefully) ++ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) ++ * - linked != scheduled // we need to reschedule (for any reason) ++ * - is_np(scheduled) // rescheduling must be delayed, ++ * sys_exit_np must be requested ++ * ++ * Any of these can occur together. ++ */ ++static struct task_struct* gsnedf_schedule(struct task_struct * prev) ++{ ++ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); ++ int out_of_time, sleep, preempt, np, exists, blocks; ++ struct task_struct* next = NULL; ++ ++#ifdef CONFIG_RELEASE_MASTER ++ /* Bail out early if we are the release master. ++ * The release master never schedules any real-time tasks. ++ */ ++ if (gsnedf.release_master == entry->cpu) ++ return NULL; ++#endif ++ ++ raw_spin_lock(&gsnedf_lock); ++ clear_will_schedule(); ++ ++ /* sanity checking */ ++ BUG_ON(entry->scheduled && entry->scheduled != prev); ++ BUG_ON(entry->scheduled && !is_realtime(prev)); ++ BUG_ON(is_realtime(prev) && !entry->scheduled); ++ ++ /* (0) Determine state */ ++ exists = entry->scheduled != NULL; ++ blocks = exists && !is_running(entry->scheduled); ++ out_of_time = exists && ++ budget_enforced(entry->scheduled) && ++ budget_exhausted(entry->scheduled); ++ np = exists && is_np(entry->scheduled); ++ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; ++ preempt = entry->scheduled != entry->linked; ++ ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); ++#endif ++ ++ if (exists) ++ TRACE_TASK(prev, ++ "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " ++ "state:%d sig:%d\n", ++ blocks, out_of_time, np, sleep, preempt, ++ prev->state, signal_pending(prev)); ++ if (entry->linked && preempt) ++ TRACE_TASK(prev, "will be preempted by %s/%d\n", ++ entry->linked->comm, entry->linked->pid); ++ ++ ++ /* If a task blocks we have no choice but to reschedule. ++ */ ++ if (blocks) ++ unlink(entry->scheduled); ++ ++ /* Request a sys_exit_np() call if we would like to preempt but cannot. ++ * We need to make sure to update the link structure anyway in case ++ * that we are still linked. Multiple calls to request_exit_np() don't ++ * hurt. ++ */ ++ if (np && (out_of_time || preempt || sleep)) { ++ unlink(entry->scheduled); ++ request_exit_np(entry->scheduled); ++ } ++ ++ /* Any task that is preemptable and either exhausts its execution ++ * budget or wants to sleep completes. We may have to reschedule after ++ * this. Don't do a job completion if we block (can't have timers running ++ * for blocked jobs). Preemption go first for the same reason. ++ */ ++ if (!np && (out_of_time || sleep) && !blocks && !preempt) ++ job_completion(entry->scheduled, !sleep); ++ ++ /* Link pending task if we became unlinked. ++ */ ++ if (!entry->linked) ++ link_task_to_cpu(__take_ready(&gsnedf), entry); ++ ++ /* The final scheduling decision. Do we need to switch for some reason? ++ * If linked is different from scheduled, then select linked as next. ++ */ ++ if ((!np || blocks) && ++ entry->linked != entry->scheduled) { ++ /* Schedule a linked job? */ ++ if (entry->linked) { ++ entry->linked->rt_param.scheduled_on = entry->cpu; ++ next = entry->linked; ++ } ++ if (entry->scheduled) { ++ /* not gonna be scheduled soon */ ++ entry->scheduled->rt_param.scheduled_on = NO_CPU; ++ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); ++ } ++ } else ++ /* Only override Linux scheduler if we have a real-time task ++ * scheduled that needs to continue. ++ */ ++ if (exists) ++ next = prev; ++ ++ raw_spin_unlock(&gsnedf_lock); ++ ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE("gsnedf_lock released, next=0x%p\n", next); ++ ++ if (next) ++ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); ++ else if (exists && !next) ++ TRACE("becomes idle at %llu.\n", litmus_clock()); ++#endif ++ ++ ++ return next; ++} ++ ++ ++/* _finish_switch - we just finished the switch away from prev ++ */ ++static void gsnedf_finish_switch(struct task_struct *prev) ++{ ++ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); ++ ++ entry->scheduled = is_realtime(current) ? current : NULL; ++#ifdef WANT_ALL_SCHED_EVENTS ++ TRACE_TASK(prev, "switched away from\n"); ++#endif ++} ++ ++ ++/* Prepare a task for running in RT mode ++ */ ++static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ unsigned long flags; ++ cpu_entry_t* entry; ++ ++ TRACE("gsn edf: task new %d\n", t->pid); ++ ++ raw_spin_lock_irqsave(&gsnedf_lock, flags); ++ ++ /* setup job params */ ++ release_at(t, litmus_clock()); ++ ++ if (running) { ++ entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); ++ BUG_ON(entry->scheduled); ++ ++#ifdef CONFIG_RELEASE_MASTER ++ if (entry->cpu != gsnedf.release_master) { ++#endif ++ entry->scheduled = t; ++ tsk_rt(t)->scheduled_on = task_cpu(t); ++#ifdef CONFIG_RELEASE_MASTER ++ } else { ++ /* do not schedule on release master */ ++ preempt(entry); /* force resched */ ++ tsk_rt(t)->scheduled_on = NO_CPU; ++ } ++#endif ++ } else { ++ t->rt_param.scheduled_on = NO_CPU; ++ } ++ t->rt_param.linked_on = NO_CPU; ++ ++ gsnedf_job_arrival(t); ++ raw_spin_unlock_irqrestore(&gsnedf_lock, flags); ++} ++ ++static void gsnedf_task_wake_up(struct task_struct *task) ++{ ++ unsigned long flags; ++ lt_t now; ++ ++ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); ++ ++ raw_spin_lock_irqsave(&gsnedf_lock, flags); ++ /* We need to take suspensions because of semaphores into ++ * account! If a job resumes after being suspended due to acquiring ++ * a semaphore, it should never be treated as a new job release. ++ */ ++ if (get_rt_flags(task) == RT_F_EXIT_SEM) { ++ set_rt_flags(task, RT_F_RUNNING); ++ } else { ++ now = litmus_clock(); ++ if (is_tardy(task, now)) { ++ /* new sporadic release */ ++ release_at(task, now); ++ sched_trace_task_release(task); ++ } ++ else { ++ if (task->rt.time_slice) { ++ /* came back in time before deadline ++ */ ++ set_rt_flags(task, RT_F_RUNNING); ++ } ++ } ++ } ++ gsnedf_job_arrival(task); ++ raw_spin_unlock_irqrestore(&gsnedf_lock, flags); ++} ++ ++static void gsnedf_task_block(struct task_struct *t) ++{ ++ unsigned long flags; ++ ++ TRACE_TASK(t, "block at %llu\n", litmus_clock()); ++ ++ /* unlink if necessary */ ++ raw_spin_lock_irqsave(&gsnedf_lock, flags); ++ unlink(t); ++ raw_spin_unlock_irqrestore(&gsnedf_lock, flags); ++ ++ BUG_ON(!is_realtime(t)); ++} ++ ++ ++static void gsnedf_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ ++ /* unlink if necessary */ ++ raw_spin_lock_irqsave(&gsnedf_lock, flags); ++ unlink(t); ++ if (tsk_rt(t)->scheduled_on != NO_CPU) { ++ gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; ++ tsk_rt(t)->scheduled_on = NO_CPU; ++ } ++ raw_spin_unlock_irqrestore(&gsnedf_lock, flags); ++ ++ BUG_ON(!is_realtime(t)); ++ TRACE_TASK(t, "RIP\n"); ++} ++ ++#ifdef CONFIG_FMLP ++ ++/* Update the queue position of a task that got it's priority boosted via ++ * priority inheritance. */ ++static void update_queue_position(struct task_struct *holder) ++{ ++ /* We don't know whether holder is in the ready queue. It should, but ++ * on a budget overrun it may already be in a release queue. Hence, ++ * calling unlink() is not possible since it assumes that the task is ++ * not in a release queue. However, we can safely check whether ++ * sem->holder is currently in a queue or scheduled after locking both ++ * the release and the ready queue lock. */ ++ ++ /* Assumption: caller holds gsnedf_lock */ ++ ++ int check_preempt = 0; ++ ++ if (tsk_rt(holder)->linked_on != NO_CPU) { ++ TRACE_TASK(holder, "%s: linked on %d\n", ++ __FUNCTION__, tsk_rt(holder)->linked_on); ++ /* Holder is scheduled; need to re-order CPUs. ++ * We can't use heap_decrease() here since ++ * the cpu_heap is ordered in reverse direction, so ++ * it is actually an increase. */ ++ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, ++ gsnedf_cpus[tsk_rt(holder)->linked_on]->hn); ++ bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, ++ gsnedf_cpus[tsk_rt(holder)->linked_on]->hn); ++ } else { ++ /* holder may be queued: first stop queue changes */ ++ raw_spin_lock(&gsnedf.release_lock); ++ if (is_queued(holder)) { ++ TRACE_TASK(holder, "%s: is queued\n", ++ __FUNCTION__); ++ /* We need to update the position ++ * of holder in some heap. Note that this ++ * may be a release heap. */ ++ check_preempt = ++ !bheap_decrease(edf_ready_order, ++ tsk_rt(holder)->heap_node); ++ } else { ++ /* Nothing to do: if it is not queued and not linked ++ * then it is currently being moved by other code ++ * (e.g., a timer interrupt handler) that will use the ++ * correct priority when enqueuing the task. */ ++ TRACE_TASK(holder, "%s: is NOT queued => Done.\n", ++ __FUNCTION__); ++ } ++ raw_spin_unlock(&gsnedf.release_lock); ++ ++ /* If holder was enqueued in a release heap, then the following ++ * preemption check is pointless, but we can't easily detect ++ * that case. If you want to fix this, then consider that ++ * simply adding a state flag requires O(n) time to update when ++ * releasing n tasks, which conflicts with the goal to have ++ * O(log n) merges. */ ++ if (check_preempt) { ++ /* heap_decrease() hit the top level of the heap: make ++ * sure preemption checks get the right task, not the ++ * potentially stale cache. */ ++ bheap_uncache_min(edf_ready_order, ++ &gsnedf.ready_queue); ++ check_for_preemptions(); ++ } ++ } ++} ++ ++static long gsnedf_pi_block(struct pi_semaphore *sem, ++ struct task_struct *new_waiter) ++{ ++ /* This callback has to handle the situation where a new waiter is ++ * added to the wait queue of the semaphore. ++ * ++ * We must check if has a higher priority than the currently ++ * highest-priority task, and then potentially reschedule. ++ */ ++ ++ BUG_ON(!new_waiter); ++ ++ if (edf_higher_prio(new_waiter, sem->hp.task)) { ++ TRACE_TASK(new_waiter, " boosts priority via %p\n", sem); ++ /* called with IRQs disabled */ ++ raw_spin_lock(&gsnedf_lock); ++ /* store new highest-priority task */ ++ sem->hp.task = new_waiter; ++ if (sem->holder) { ++ TRACE_TASK(sem->holder, ++ " holds %p and will inherit from %s/%d\n", ++ sem, ++ new_waiter->comm, new_waiter->pid); ++ /* let holder inherit */ ++ sem->holder->rt_param.inh_task = new_waiter; ++ update_queue_position(sem->holder); ++ } ++ raw_spin_unlock(&gsnedf_lock); ++ } ++ ++ return 0; ++} ++ ++static long gsnedf_inherit_priority(struct pi_semaphore *sem, ++ struct task_struct *new_owner) ++{ ++ /* We don't need to acquire the gsnedf_lock since at the time of this ++ * call new_owner isn't actually scheduled yet (it's still sleeping) ++ * and since the calling function already holds sem->wait.lock, which ++ * prevents concurrent sem->hp.task changes. ++ */ ++ ++ if (sem->hp.task && sem->hp.task != new_owner) { ++ new_owner->rt_param.inh_task = sem->hp.task; ++ TRACE_TASK(new_owner, "inherited priority from %s/%d\n", ++ sem->hp.task->comm, sem->hp.task->pid); ++ } else ++ TRACE_TASK(new_owner, ++ "cannot inherit priority, " ++ "no higher priority job waits.\n"); ++ return 0; ++} ++ ++/* This function is called on a semaphore release, and assumes that ++ * the current task is also the semaphore holder. ++ */ ++static long gsnedf_return_priority(struct pi_semaphore *sem) ++{ ++ struct task_struct* t = current; ++ int ret = 0; ++ ++ /* Find new highest-priority semaphore task ++ * if holder task is the current hp.task. ++ * ++ * Calling function holds sem->wait.lock. ++ */ ++ if (t == sem->hp.task) ++ edf_set_hp_task(sem); ++ ++ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem); ++ ++ if (t->rt_param.inh_task) { ++ /* interrupts already disabled by PI code */ ++ raw_spin_lock(&gsnedf_lock); ++ ++ /* Reset inh_task to NULL. */ ++ t->rt_param.inh_task = NULL; ++ ++ /* Check if rescheduling is necessary */ ++ unlink(t); ++ gsnedf_job_arrival(t); ++ raw_spin_unlock(&gsnedf_lock); ++ } ++ ++ return ret; ++} ++ ++#endif ++ ++static long gsnedf_admit_task(struct task_struct* tsk) ++{ ++ return 0; ++} ++ ++static long gsnedf_activate_plugin(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ ++ bheap_init(&gsnedf_cpu_heap); ++#ifdef CONFIG_RELEASE_MASTER ++ gsnedf.release_master = atomic_read(&release_master_cpu); ++#endif ++ ++ for_each_online_cpu(cpu) { ++ entry = &per_cpu(gsnedf_cpu_entries, cpu); ++ bheap_node_init(&entry->hn, entry); ++ atomic_set(&entry->will_schedule, 0); ++ entry->linked = NULL; ++ entry->scheduled = NULL; ++#ifdef CONFIG_RELEASE_MASTER ++ if (cpu != gsnedf.release_master) { ++#endif ++ TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); ++ update_cpu_position(entry); ++#ifdef CONFIG_RELEASE_MASTER ++ } else { ++ TRACE("GSN-EDF: CPU %d is release master.\n", cpu); ++ } ++#endif ++ } ++ return 0; ++} ++ ++/* Plugin object */ ++static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "GSN-EDF", ++ .finish_switch = gsnedf_finish_switch, ++ .tick = gsnedf_tick, ++ .task_new = gsnedf_task_new, ++ .complete_job = complete_job, ++ .task_exit = gsnedf_task_exit, ++ .schedule = gsnedf_schedule, ++ .task_wake_up = gsnedf_task_wake_up, ++ .task_block = gsnedf_task_block, ++#ifdef CONFIG_FMLP ++ .fmlp_active = 1, ++ .pi_block = gsnedf_pi_block, ++ .inherit_priority = gsnedf_inherit_priority, ++ .return_priority = gsnedf_return_priority, ++#endif ++ .admit_task = gsnedf_admit_task, ++ .activate_plugin = gsnedf_activate_plugin, ++}; ++ ++ ++static int __init init_gsn_edf(void) ++{ ++ int cpu; ++ cpu_entry_t *entry; ++ ++ bheap_init(&gsnedf_cpu_heap); ++ /* initialize CPU state */ ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ entry = &per_cpu(gsnedf_cpu_entries, cpu); ++ gsnedf_cpus[cpu] = entry; ++ atomic_set(&entry->will_schedule, 0); ++ entry->cpu = cpu; ++ entry->hn = &gsnedf_heap_node[cpu]; ++ bheap_node_init(&entry->hn, entry); ++ } ++ edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); ++ return register_sched_plugin(&gsn_edf_plugin); ++} ++ ++ ++module_init(init_gsn_edf); +diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c +new file mode 100644 +index 0000000..3ed713b +--- /dev/null ++++ b/litmus/sched_litmus.c +@@ -0,0 +1,315 @@ ++/* This file is included from kernel/sched.c */ ++ ++#include ++#include ++#include ++ ++static void update_time_litmus(struct rq *rq, struct task_struct *p) ++{ ++ u64 delta = rq->clock - p->se.exec_start; ++ if (unlikely((s64)delta < 0)) ++ delta = 0; ++ /* per job counter */ ++ p->rt_param.job_params.exec_time += delta; ++ /* task counter */ ++ p->se.sum_exec_runtime += delta; ++ /* sched_clock() */ ++ p->se.exec_start = rq->clock; ++ cpuacct_charge(p, delta); ++} ++ ++static void double_rq_lock(struct rq *rq1, struct rq *rq2); ++static void double_rq_unlock(struct rq *rq1, struct rq *rq2); ++ ++/* ++ * litmus_tick gets called by scheduler_tick() with HZ freq ++ * Interrupts are disabled ++ */ ++static void litmus_tick(struct rq *rq, struct task_struct *p) ++{ ++ TS_PLUGIN_TICK_START; ++ ++ if (is_realtime(p)) ++ update_time_litmus(rq, p); ++ ++ /* plugin tick */ ++ litmus->tick(p); ++ ++ TS_PLUGIN_TICK_END; ++ ++ return; ++} ++ ++static struct task_struct * ++litmus_schedule(struct rq *rq, struct task_struct *prev) ++{ ++ struct rq* other_rq; ++ struct task_struct *next; ++ ++ long was_running; ++ lt_t _maybe_deadlock = 0; ++ ++ /* let the plugin schedule */ ++ next = litmus->schedule(prev); ++ ++ /* check if a global plugin pulled a task from a different RQ */ ++ if (next && task_rq(next) != rq) { ++ /* we need to migrate the task */ ++ other_rq = task_rq(next); ++ TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); ++ ++ /* while we drop the lock, the prev task could change its ++ * state ++ */ ++ was_running = is_running(prev); ++ mb(); ++ raw_spin_unlock(&rq->lock); ++ ++ /* Don't race with a concurrent switch. This could deadlock in ++ * the case of cross or circular migrations. It's the job of ++ * the plugin to make sure that doesn't happen. ++ */ ++ TRACE_TASK(next, "stack_in_use=%d\n", ++ next->rt_param.stack_in_use); ++ if (next->rt_param.stack_in_use != NO_CPU) { ++ TRACE_TASK(next, "waiting to deschedule\n"); ++ _maybe_deadlock = litmus_clock(); ++ } ++ while (next->rt_param.stack_in_use != NO_CPU) { ++ cpu_relax(); ++ mb(); ++ if (next->rt_param.stack_in_use == NO_CPU) ++ TRACE_TASK(next,"descheduled. Proceeding.\n"); ++ ++ if (lt_before(_maybe_deadlock + 10000000, ++ litmus_clock())) { ++ /* We've been spinning for 10ms. ++ * Something can't be right! ++ * Let's abandon the task and bail out; at least ++ * we will have debug info instead of a hard ++ * deadlock. ++ */ ++ TRACE_TASK(next,"stack too long in use. " ++ "Deadlock?\n"); ++ next = NULL; ++ ++ /* bail out */ ++ raw_spin_lock(&rq->lock); ++ return next; ++ } ++ } ++#ifdef __ARCH_WANT_UNLOCKED_CTXSW ++ if (next->oncpu) ++ TRACE_TASK(next, "waiting for !oncpu"); ++ while (next->oncpu) { ++ cpu_relax(); ++ mb(); ++ } ++#endif ++ double_rq_lock(rq, other_rq); ++ mb(); ++ if (is_realtime(prev) && is_running(prev) != was_running) { ++ TRACE_TASK(prev, ++ "state changed while we dropped" ++ " the lock: is_running=%d, was_running=%d\n", ++ is_running(prev), was_running); ++ if (is_running(prev) && !was_running) { ++ /* prev task became unblocked ++ * we need to simulate normal sequence of events ++ * to scheduler plugins. ++ */ ++ litmus->task_block(prev); ++ litmus->task_wake_up(prev); ++ } ++ } ++ ++ set_task_cpu(next, smp_processor_id()); ++ ++ /* DEBUG: now that we have the lock we need to make sure a ++ * couple of things still hold: ++ * - it is still a real-time task ++ * - it is still runnable (could have been stopped) ++ * If either is violated, then the active plugin is ++ * doing something wrong. ++ */ ++ if (!is_realtime(next) || !is_running(next)) { ++ /* BAD BAD BAD */ ++ TRACE_TASK(next,"BAD: migration invariant FAILED: " ++ "rt=%d running=%d\n", ++ is_realtime(next), ++ is_running(next)); ++ /* drop the task */ ++ next = NULL; ++ } ++ /* release the other CPU's runqueue, but keep ours */ ++ raw_spin_unlock(&other_rq->lock); ++ } ++ if (next) { ++ next->rt_param.stack_in_use = rq->cpu; ++ next->se.exec_start = rq->clock; ++ } ++ ++ update_enforcement_timer(next); ++ return next; ++} ++ ++static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, ++ int wakeup, bool head) ++{ ++ if (wakeup) { ++ sched_trace_task_resume(p); ++ tsk_rt(p)->present = 1; ++ /* LITMUS^RT plugins need to update the state ++ * _before_ making it available in global structures. ++ * Linux gets away with being lazy about the task state ++ * update. We can't do that, hence we update the task ++ * state already here. ++ * ++ * WARNING: this needs to be re-evaluated when porting ++ * to newer kernel versions. ++ */ ++ p->state = TASK_RUNNING; ++ litmus->task_wake_up(p); ++ ++ rq->litmus.nr_running++; ++ } else ++ TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); ++} ++ ++static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep) ++{ ++ if (sleep) { ++ litmus->task_block(p); ++ tsk_rt(p)->present = 0; ++ sched_trace_task_block(p); ++ ++ rq->litmus.nr_running--; ++ } else ++ TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); ++} ++ ++static void yield_task_litmus(struct rq *rq) ++{ ++ BUG_ON(rq->curr != current); ++ /* sched_yield() is called to trigger delayed preemptions. ++ * Thus, mark the current task as needing to be rescheduled. ++ * This will cause the scheduler plugin to be invoked, which can ++ * then determine if a preemption is still required. ++ */ ++ clear_exit_np(current); ++ set_tsk_need_resched(current); ++} ++ ++/* Plugins are responsible for this. ++ */ ++static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) ++{ ++} ++ ++static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) ++{ ++} ++ ++static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev) ++{ ++ update_time_litmus(rq, prev); ++ if (!is_running(prev)) ++ tsk_rt(prev)->present = 0; ++} ++ ++/* pick_next_task_litmus() - litmus_schedule() function ++ * ++ * return the next task to be scheduled ++ */ ++static struct task_struct *pick_next_task_litmus(struct rq *rq) ++{ ++ /* get the to-be-switched-out task (prev) */ ++ struct task_struct *prev = rq->litmus.prev; ++ struct task_struct *next; ++ ++ /* if not called from schedule() but from somewhere ++ * else (e.g., migration), return now! ++ */ ++ if(!rq->litmus.prev) ++ return NULL; ++ ++ rq->litmus.prev = NULL; ++ ++ TS_PLUGIN_SCHED_START; ++ next = litmus_schedule(rq, prev); ++ TS_PLUGIN_SCHED_END; ++ ++ return next; ++} ++ ++static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) ++{ ++ /* nothing to do; tick related tasks are done by litmus_tick() */ ++ return; ++} ++ ++static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) ++{ ++} ++ ++static void prio_changed_litmus(struct rq *rq, struct task_struct *p, ++ int oldprio, int running) ++{ ++} ++ ++unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p) ++{ ++ /* return infinity */ ++ return 0; ++} ++ ++/* This is called when a task became a real-time task, either due to a SCHED_* ++ * class transition or due to PI mutex inheritance. We don't handle Linux PI ++ * mutex inheritance yet (and probably never will). Use LITMUS provided ++ * synchronization primitives instead. ++ */ ++static void set_curr_task_litmus(struct rq *rq) ++{ ++ rq->curr->se.exec_start = rq->clock; ++} ++ ++ ++#ifdef CONFIG_SMP ++/* execve tries to rebalance task in this scheduling domain. ++ * We don't care about the scheduling domain; can gets called from ++ * exec, fork, wakeup. ++ */ ++static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags) ++{ ++ /* preemption is already disabled. ++ * We don't want to change cpu here ++ */ ++ return task_cpu(p); ++} ++#endif ++ ++static const struct sched_class litmus_sched_class = { ++ .next = &rt_sched_class, ++ .enqueue_task = enqueue_task_litmus, ++ .dequeue_task = dequeue_task_litmus, ++ .yield_task = yield_task_litmus, ++ ++ .check_preempt_curr = check_preempt_curr_litmus, ++ ++ .pick_next_task = pick_next_task_litmus, ++ .put_prev_task = put_prev_task_litmus, ++ ++#ifdef CONFIG_SMP ++ .select_task_rq = select_task_rq_litmus, ++ ++ .pre_schedule = pre_schedule_litmus, ++#endif ++ ++ .set_curr_task = set_curr_task_litmus, ++ .task_tick = task_tick_litmus, ++ ++ .get_rr_interval = get_rr_interval_litmus, ++ ++ .prio_changed = prio_changed_litmus, ++ .switched_to = switched_to_litmus, ++}; +diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c +new file mode 100644 +index 0000000..ea77d32 +--- /dev/null ++++ b/litmus/sched_pfair.c +@@ -0,0 +1,897 @@ ++/* ++ * kernel/sched_pfair.c ++ * ++ * Implementation of the (global) Pfair scheduling algorithm. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++struct subtask { ++ /* measured in quanta relative to job release */ ++ quanta_t release; ++ quanta_t deadline; ++ quanta_t overlap; /* called "b bit" by PD^2 */ ++ quanta_t group_deadline; ++}; ++ ++struct pfair_param { ++ quanta_t quanta; /* number of subtasks */ ++ quanta_t cur; /* index of current subtask */ ++ ++ quanta_t release; /* in quanta */ ++ quanta_t period; /* in quanta */ ++ ++ quanta_t last_quantum; /* when scheduled last */ ++ int last_cpu; /* where scheduled last */ ++ ++ unsigned int sporadic_release; /* On wakeup, new sporadic release? */ ++ ++ struct subtask subtasks[0]; /* allocate together with pfair_param */ ++}; ++ ++#define tsk_pfair(tsk) ((tsk)->rt_param.pfair) ++ ++struct pfair_state { ++ int cpu; ++ volatile quanta_t cur_tick; /* updated by the CPU that is advancing ++ * the time */ ++ volatile quanta_t local_tick; /* What tick is the local CPU currently ++ * executing? Updated only by the local ++ * CPU. In QEMU, this may lag behind the ++ * current tick. In a real system, with ++ * proper timers and aligned quanta, ++ * that should only be the ++ * case for a very short time after the ++ * time advanced. With staggered quanta, ++ * it will lag for the duration of the ++ * offset. ++ */ ++ ++ struct task_struct* linked; /* the task that should be executing */ ++ struct task_struct* local; /* the local copy of linked */ ++ struct task_struct* scheduled; /* what is actually scheduled */ ++ ++ unsigned long missed_quanta; ++ lt_t offset; /* stagger offset */ ++}; ++ ++/* Currently, we limit the maximum period of any task to 2000 quanta. ++ * The reason is that it makes the implementation easier since we do not ++ * need to reallocate the release wheel on task arrivals. ++ * In the future ++ */ ++#define PFAIR_MAX_PERIOD 2000 ++ ++/* This is the release queue wheel. It is indexed by pfair_time % ++ * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be ++ * merged with the ready queue. ++ */ ++static struct bheap release_queue[PFAIR_MAX_PERIOD]; ++ ++DEFINE_PER_CPU(struct pfair_state, pfair_state); ++struct pfair_state* *pstate; /* short cut */ ++ ++static quanta_t pfair_time = 0; /* the "official" PFAIR clock */ ++static quanta_t merge_time = 0; /* Updated after the release queue has been ++ * merged. Used by drop_all_references(). ++ */ ++ ++static rt_domain_t pfair; ++ ++/* The pfair_lock is used to serialize all scheduling events. ++ */ ++#define pfair_lock pfair.ready_lock ++ ++/* Enable for lots of trace info. ++ * #define PFAIR_DEBUG ++ */ ++ ++#ifdef PFAIR_DEBUG ++#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args) ++#define PTRACE(f, args...) TRACE(f, ## args) ++#else ++#define PTRACE_TASK(t, f, args...) ++#define PTRACE(f, args...) ++#endif ++ ++/* gcc will inline all of these accessor functions... */ ++static struct subtask* cur_subtask(struct task_struct* t) ++{ ++ return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur; ++} ++ ++static quanta_t cur_deadline(struct task_struct* t) ++{ ++ return cur_subtask(t)->deadline + tsk_pfair(t)->release; ++} ++ ++ ++static quanta_t cur_sub_release(struct task_struct* t) ++{ ++ return cur_subtask(t)->release + tsk_pfair(t)->release; ++} ++ ++static quanta_t cur_release(struct task_struct* t) ++{ ++#ifdef EARLY_RELEASE ++ /* only the release of the first subtask counts when we early ++ * release */ ++ return tsk_pfair(t)->release; ++#else ++ return cur_sub_release(t); ++#endif ++} ++ ++static quanta_t cur_overlap(struct task_struct* t) ++{ ++ return cur_subtask(t)->overlap; ++} ++ ++static quanta_t cur_group_deadline(struct task_struct* t) ++{ ++ quanta_t gdl = cur_subtask(t)->group_deadline; ++ if (gdl) ++ return gdl + tsk_pfair(t)->release; ++ else ++ return gdl; ++} ++ ++ ++static int pfair_higher_prio(struct task_struct* first, ++ struct task_struct* second) ++{ ++ return /* first task must exist */ ++ first && ( ++ /* Does the second task exist and is it a real-time task? If ++ * not, the first task (which is a RT task) has higher ++ * priority. ++ */ ++ !second || !is_realtime(second) || ++ ++ /* Is the (subtask) deadline of the first task earlier? ++ * Then it has higher priority. ++ */ ++ time_before(cur_deadline(first), cur_deadline(second)) || ++ ++ /* Do we have a deadline tie? ++ * Then break by B-bit. ++ */ ++ (cur_deadline(first) == cur_deadline(second) && ++ (cur_overlap(first) > cur_overlap(second) || ++ ++ /* Do we have a B-bit tie? ++ * Then break by group deadline. ++ */ ++ (cur_overlap(first) == cur_overlap(second) && ++ (time_after(cur_group_deadline(first), ++ cur_group_deadline(second)) || ++ ++ /* Do we have a group deadline tie? ++ * Then break by PID, which are unique. ++ */ ++ (cur_group_deadline(first) == ++ cur_group_deadline(second) && ++ first->pid < second->pid)))))); ++} ++ ++int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) ++{ ++ return pfair_higher_prio(bheap2task(a), bheap2task(b)); ++} ++ ++/* return the proper release queue for time t */ ++static struct bheap* relq(quanta_t t) ++{ ++ struct bheap* rq = &release_queue[t % PFAIR_MAX_PERIOD]; ++ return rq; ++} ++ ++static void prepare_release(struct task_struct* t, quanta_t at) ++{ ++ tsk_pfair(t)->release = at; ++ tsk_pfair(t)->cur = 0; ++} ++ ++static void __pfair_add_release(struct task_struct* t, struct bheap* queue) ++{ ++ bheap_insert(pfair_ready_order, queue, ++ tsk_rt(t)->heap_node); ++} ++ ++static void pfair_add_release(struct task_struct* t) ++{ ++ BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); ++ __pfair_add_release(t, relq(cur_release(t))); ++} ++ ++/* pull released tasks from the release queue */ ++static void poll_releases(quanta_t time) ++{ ++ __merge_ready(&pfair, relq(time)); ++ merge_time = time; ++} ++ ++static void check_preempt(struct task_struct* t) ++{ ++ int cpu = NO_CPU; ++ if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && ++ tsk_rt(t)->present) { ++ /* the task can be scheduled and ++ * is not scheduled where it ought to be scheduled ++ */ ++ cpu = tsk_rt(t)->linked_on != NO_CPU ? ++ tsk_rt(t)->linked_on : ++ tsk_rt(t)->scheduled_on; ++ PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n", ++ tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on); ++ /* preempt */ ++ if (cpu == smp_processor_id()) ++ set_tsk_need_resched(current); ++ else { ++ smp_send_reschedule(cpu); ++ } ++ } ++} ++ ++/* caller must hold pfair_lock */ ++static void drop_all_references(struct task_struct *t) ++{ ++ int cpu; ++ struct pfair_state* s; ++ struct bheap* q; ++ if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { ++ /* figure out what queue the node is in */ ++ if (time_before_eq(cur_release(t), merge_time)) ++ q = &pfair.ready_queue; ++ else ++ q = relq(cur_release(t)); ++ bheap_delete(pfair_ready_order, q, ++ tsk_rt(t)->heap_node); ++ } ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ s = &per_cpu(pfair_state, cpu); ++ if (s->linked == t) ++ s->linked = NULL; ++ if (s->local == t) ++ s->local = NULL; ++ if (s->scheduled == t) ++ s->scheduled = NULL; ++ } ++} ++ ++/* returns 1 if the task needs to go the release queue */ ++static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) ++{ ++ struct pfair_param* p = tsk_pfair(t); ++ int to_relq; ++ p->cur = (p->cur + 1) % p->quanta; ++ if (!p->cur) { ++ sched_trace_task_completion(t, 1); ++ if (tsk_rt(t)->present) { ++ /* we start a new job */ ++ prepare_for_next_period(t); ++ sched_trace_task_release(t); ++ get_rt_flags(t) = RT_F_RUNNING; ++ p->release += p->period; ++ } else { ++ /* remove task from system until it wakes */ ++ drop_all_references(t); ++ tsk_pfair(t)->sporadic_release = 1; ++ TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", ++ cpu, p->cur); ++ return 0; ++ } ++ } ++ to_relq = time_after(cur_release(t), time); ++ TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", ++ cpu, p->cur, to_relq); ++ return to_relq; ++} ++ ++static void advance_subtasks(quanta_t time) ++{ ++ int cpu, missed; ++ struct task_struct* l; ++ struct pfair_param* p; ++ ++ for_each_online_cpu(cpu) { ++ l = pstate[cpu]->linked; ++ missed = pstate[cpu]->linked != pstate[cpu]->local; ++ if (l) { ++ p = tsk_pfair(l); ++ p->last_quantum = time; ++ p->last_cpu = cpu; ++ if (advance_subtask(time, l, cpu)) { ++ pstate[cpu]->linked = NULL; ++ pfair_add_release(l); ++ } ++ } ++ } ++} ++ ++static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu) ++{ ++ int cpu; ++ if (tsk_rt(t)->scheduled_on != NO_CPU) { ++ /* always observe scheduled_on linkage */ ++ default_cpu = tsk_rt(t)->scheduled_on; ++ } else if (tsk_pfair(t)->last_quantum == time - 1) { ++ /* back2back quanta */ ++ /* Only observe last_quantum if no scheduled_on is in the way. ++ * This should only kick in if a CPU missed quanta, and that ++ * *should* only happen in QEMU. ++ */ ++ cpu = tsk_pfair(t)->last_cpu; ++ if (!pstate[cpu]->linked || ++ tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) { ++ default_cpu = cpu; ++ } ++ } ++ return default_cpu; ++} ++ ++/* returns one if linking was redirected */ ++static int pfair_link(quanta_t time, int cpu, ++ struct task_struct* t) ++{ ++ int target = target_cpu(time, t, cpu); ++ struct task_struct* prev = pstate[cpu]->linked; ++ struct task_struct* other; ++ ++ if (target != cpu) { ++ other = pstate[target]->linked; ++ pstate[target]->linked = t; ++ tsk_rt(t)->linked_on = target; ++ if (!other) ++ /* linked ok, but reschedule this CPU */ ++ return 1; ++ if (target < cpu) { ++ /* link other to cpu instead */ ++ tsk_rt(other)->linked_on = cpu; ++ pstate[cpu]->linked = other; ++ if (prev) { ++ /* prev got pushed back into the ready queue */ ++ tsk_rt(prev)->linked_on = NO_CPU; ++ __add_ready(&pfair, prev); ++ } ++ /* we are done with this cpu */ ++ return 0; ++ } else { ++ /* re-add other, it's original CPU was not considered yet */ ++ tsk_rt(other)->linked_on = NO_CPU; ++ __add_ready(&pfair, other); ++ /* reschedule this CPU */ ++ return 1; ++ } ++ } else { ++ pstate[cpu]->linked = t; ++ tsk_rt(t)->linked_on = cpu; ++ if (prev) { ++ /* prev got pushed back into the ready queue */ ++ tsk_rt(prev)->linked_on = NO_CPU; ++ __add_ready(&pfair, prev); ++ } ++ /* we are done with this CPU */ ++ return 0; ++ } ++} ++ ++static void schedule_subtasks(quanta_t time) ++{ ++ int cpu, retry; ++ ++ for_each_online_cpu(cpu) { ++ retry = 1; ++ while (retry) { ++ if (pfair_higher_prio(__peek_ready(&pfair), ++ pstate[cpu]->linked)) ++ retry = pfair_link(time, cpu, ++ __take_ready(&pfair)); ++ else ++ retry = 0; ++ } ++ } ++} ++ ++static void schedule_next_quantum(quanta_t time) ++{ ++ int cpu; ++ ++ /* called with interrupts disabled */ ++ PTRACE("--- Q %lu at %llu PRE-SPIN\n", ++ time, litmus_clock()); ++ raw_spin_lock(&pfair_lock); ++ PTRACE("<<< Q %lu at %llu\n", ++ time, litmus_clock()); ++ ++ sched_trace_quantum_boundary(); ++ ++ advance_subtasks(time); ++ poll_releases(time); ++ schedule_subtasks(time); ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) ++ if (pstate[cpu]->linked) ++ PTRACE_TASK(pstate[cpu]->linked, ++ " linked on %d.\n", cpu); ++ else ++ PTRACE("(null) linked on %d.\n", cpu); ++ ++ /* We are done. Advance time. */ ++ mb(); ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) { ++ TRACE("BAD Quantum not acked on %d " ++ "(l:%lu c:%lu p:%lu)\n", ++ cpu, ++ pstate[cpu]->local_tick, ++ pstate[cpu]->cur_tick, ++ pfair_time); ++ pstate[cpu]->missed_quanta++; ++ } ++ pstate[cpu]->cur_tick = time; ++ } ++ PTRACE(">>> Q %lu at %llu\n", ++ time, litmus_clock()); ++ raw_spin_unlock(&pfair_lock); ++} ++ ++static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state) ++{ ++ quanta_t loc; ++ ++ goto first; /* skip mb() on first iteration */ ++ do { ++ cpu_relax(); ++ mb(); ++ first: loc = state->cur_tick; ++ /* FIXME: what if loc > cur? */ ++ } while (time_before(loc, q)); ++ PTRACE("observed cur_tick:%lu >= q:%lu\n", ++ loc, q); ++} ++ ++static quanta_t current_quantum(struct pfair_state* state) ++{ ++ lt_t t = litmus_clock() - state->offset; ++ return time2quanta(t, FLOOR); ++} ++ ++static void catchup_quanta(quanta_t from, quanta_t target, ++ struct pfair_state* state) ++{ ++ quanta_t cur = from, time; ++ TRACE("+++< BAD catching up quanta from %lu to %lu\n", ++ from, target); ++ while (time_before(cur, target)) { ++ wait_for_quantum(cur, state); ++ cur++; ++ time = cmpxchg(&pfair_time, ++ cur - 1, /* expected */ ++ cur /* next */ ++ ); ++ if (time == cur - 1) ++ schedule_next_quantum(cur); ++ } ++ TRACE("+++> catching up done\n"); ++} ++ ++/* pfair_tick - this function is called for every local timer ++ * interrupt. ++ */ ++static void pfair_tick(struct task_struct* t) ++{ ++ struct pfair_state* state = &__get_cpu_var(pfair_state); ++ quanta_t time, cur; ++ int retry = 10; ++ ++ do { ++ cur = current_quantum(state); ++ PTRACE("q %lu at %llu\n", cur, litmus_clock()); ++ ++ /* Attempt to advance time. First CPU to get here ++ * will prepare the next quantum. ++ */ ++ time = cmpxchg(&pfair_time, ++ cur - 1, /* expected */ ++ cur /* next */ ++ ); ++ if (time == cur - 1) { ++ /* exchange succeeded */ ++ wait_for_quantum(cur - 1, state); ++ schedule_next_quantum(cur); ++ retry = 0; ++ } else if (time_before(time, cur - 1)) { ++ /* the whole system missed a tick !? */ ++ catchup_quanta(time, cur, state); ++ retry--; ++ } else if (time_after(time, cur)) { ++ /* our timer lagging behind!? */ ++ TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur); ++ retry--; ++ } else { ++ /* Some other CPU already started scheduling ++ * this quantum. Let it do its job and then update. ++ */ ++ retry = 0; ++ } ++ } while (retry); ++ ++ /* Spin locally until time advances. */ ++ wait_for_quantum(cur, state); ++ ++ /* copy assignment */ ++ /* FIXME: what if we race with a future update? Corrupted state? */ ++ state->local = state->linked; ++ /* signal that we are done */ ++ mb(); ++ state->local_tick = state->cur_tick; ++ ++ if (state->local != current ++ && (is_realtime(current) || is_present(state->local))) ++ set_tsk_need_resched(current); ++} ++ ++static int safe_to_schedule(struct task_struct* t, int cpu) ++{ ++ int where = tsk_rt(t)->scheduled_on; ++ if (where != NO_CPU && where != cpu) { ++ TRACE_TASK(t, "BAD: can't be scheduled on %d, " ++ "scheduled already on %d.\n", cpu, where); ++ return 0; ++ } else ++ return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING; ++} ++ ++static struct task_struct* pfair_schedule(struct task_struct * prev) ++{ ++ struct pfair_state* state = &__get_cpu_var(pfair_state); ++ int blocks; ++ struct task_struct* next = NULL; ++ ++ raw_spin_lock(&pfair_lock); ++ ++ blocks = is_realtime(prev) && !is_running(prev); ++ ++ if (state->local && safe_to_schedule(state->local, state->cpu)) ++ next = state->local; ++ ++ if (prev != next) { ++ tsk_rt(prev)->scheduled_on = NO_CPU; ++ if (next) ++ tsk_rt(next)->scheduled_on = state->cpu; ++ } ++ ++ raw_spin_unlock(&pfair_lock); ++ ++ if (next) ++ TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n", ++ tsk_pfair(next)->release, pfair_time, litmus_clock()); ++ else if (is_realtime(prev)) ++ TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock()); ++ ++ return next; ++} ++ ++static void pfair_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ unsigned long flags; ++ ++ TRACE("pfair: task new %d state:%d\n", t->pid, t->state); ++ ++ raw_spin_lock_irqsave(&pfair_lock, flags); ++ if (running) ++ t->rt_param.scheduled_on = task_cpu(t); ++ else ++ t->rt_param.scheduled_on = NO_CPU; ++ ++ prepare_release(t, pfair_time + 1); ++ tsk_pfair(t)->sporadic_release = 0; ++ pfair_add_release(t); ++ check_preempt(t); ++ ++ raw_spin_unlock_irqrestore(&pfair_lock, flags); ++} ++ ++static void pfair_task_wake_up(struct task_struct *t) ++{ ++ unsigned long flags; ++ lt_t now; ++ ++ TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n", ++ litmus_clock(), cur_release(t), pfair_time); ++ ++ raw_spin_lock_irqsave(&pfair_lock, flags); ++ ++ /* It is a little unclear how to deal with Pfair ++ * tasks that block for a while and then wake. For now, ++ * if a task blocks and wakes before its next job release, ++ * then it may resume if it is currently linked somewhere ++ * (as if it never blocked at all). Otherwise, we have a ++ * new sporadic job release. ++ */ ++ if (tsk_pfair(t)->sporadic_release) { ++ now = litmus_clock(); ++ release_at(t, now); ++ prepare_release(t, time2quanta(now, CEIL)); ++ sched_trace_task_release(t); ++ /* FIXME: race with pfair_time advancing */ ++ pfair_add_release(t); ++ tsk_pfair(t)->sporadic_release = 0; ++ } ++ ++ check_preempt(t); ++ ++ raw_spin_unlock_irqrestore(&pfair_lock, flags); ++ TRACE_TASK(t, "wake up done at %llu\n", litmus_clock()); ++} ++ ++static void pfair_task_block(struct task_struct *t) ++{ ++ BUG_ON(!is_realtime(t)); ++ TRACE_TASK(t, "blocks at %llu, state:%d\n", ++ litmus_clock(), t->state); ++} ++ ++static void pfair_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ ++ BUG_ON(!is_realtime(t)); ++ ++ /* Remote task from release or ready queue, and ensure ++ * that it is not the scheduled task for ANY CPU. We ++ * do this blanket check because occassionally when ++ * tasks exit while blocked, the task_cpu of the task ++ * might not be the same as the CPU that the PFAIR scheduler ++ * has chosen for it. ++ */ ++ raw_spin_lock_irqsave(&pfair_lock, flags); ++ ++ TRACE_TASK(t, "RIP, state:%d\n", t->state); ++ drop_all_references(t); ++ ++ raw_spin_unlock_irqrestore(&pfair_lock, flags); ++ ++ kfree(t->rt_param.pfair); ++ t->rt_param.pfair = NULL; ++} ++ ++ ++static void pfair_release_at(struct task_struct* task, lt_t start) ++{ ++ unsigned long flags; ++ quanta_t release; ++ ++ BUG_ON(!is_realtime(task)); ++ ++ raw_spin_lock_irqsave(&pfair_lock, flags); ++ release_at(task, start); ++ release = time2quanta(start, CEIL); ++ ++ if (release - pfair_time >= PFAIR_MAX_PERIOD) ++ release = pfair_time + PFAIR_MAX_PERIOD; ++ ++ TRACE_TASK(task, "sys release at %lu\n", release); ++ ++ drop_all_references(task); ++ prepare_release(task, release); ++ pfair_add_release(task); ++ ++ /* Clear sporadic release flag, since this release subsumes any ++ * sporadic release on wake. ++ */ ++ tsk_pfair(task)->sporadic_release = 0; ++ ++ raw_spin_unlock_irqrestore(&pfair_lock, flags); ++} ++ ++static void init_subtask(struct subtask* sub, unsigned long i, ++ lt_t quanta, lt_t period) ++{ ++ /* since i is zero-based, the formulas are shifted by one */ ++ lt_t tmp; ++ ++ /* release */ ++ tmp = period * i; ++ do_div(tmp, quanta); /* floor */ ++ sub->release = (quanta_t) tmp; ++ ++ /* deadline */ ++ tmp = period * (i + 1); ++ if (do_div(tmp, quanta)) /* ceil */ ++ tmp++; ++ sub->deadline = (quanta_t) tmp; ++ ++ /* next release */ ++ tmp = period * (i + 1); ++ do_div(tmp, quanta); /* floor */ ++ sub->overlap = sub->deadline - (quanta_t) tmp; ++ ++ /* Group deadline. ++ * Based on the formula given in Uma's thesis. ++ */ ++ if (2 * quanta >= period) { ++ /* heavy */ ++ tmp = (sub->deadline - (i + 1)) * period; ++ if (period > quanta && ++ do_div(tmp, (period - quanta))) /* ceil */ ++ tmp++; ++ sub->group_deadline = (quanta_t) tmp; ++ } else ++ sub->group_deadline = 0; ++} ++ ++static void dump_subtasks(struct task_struct* t) ++{ ++ unsigned long i; ++ for (i = 0; i < t->rt_param.pfair->quanta; i++) ++ TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n", ++ i + 1, ++ t->rt_param.pfair->subtasks[i].release, ++ t->rt_param.pfair->subtasks[i].deadline, ++ t->rt_param.pfair->subtasks[i].overlap, ++ t->rt_param.pfair->subtasks[i].group_deadline); ++} ++ ++static long pfair_admit_task(struct task_struct* t) ++{ ++ lt_t quanta; ++ lt_t period; ++ s64 quantum_length = ktime_to_ns(tick_period); ++ struct pfair_param* param; ++ unsigned long i; ++ ++ /* Pfair is a tick-based method, so the time ++ * of interest is jiffies. Calculate tick-based ++ * times for everything. ++ * (Ceiling of exec cost, floor of period.) ++ */ ++ ++ quanta = get_exec_cost(t); ++ period = get_rt_period(t); ++ ++ quanta = time2quanta(get_exec_cost(t), CEIL); ++ ++ if (do_div(period, quantum_length)) ++ printk(KERN_WARNING ++ "The period of %s/%d is not a multiple of %llu.\n", ++ t->comm, t->pid, (unsigned long long) quantum_length); ++ ++ if (period >= PFAIR_MAX_PERIOD) { ++ printk(KERN_WARNING ++ "PFAIR: Rejecting task %s/%d; its period is too long.\n", ++ t->comm, t->pid); ++ return -EINVAL; ++ } ++ ++ if (quanta == period) { ++ /* special case: task has weight 1.0 */ ++ printk(KERN_INFO ++ "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n", ++ t->comm, t->pid, quanta, period); ++ quanta = 1; ++ period = 1; ++ } ++ ++ param = kmalloc(sizeof(*param) + ++ quanta * sizeof(struct subtask), GFP_ATOMIC); ++ ++ if (!param) ++ return -ENOMEM; ++ ++ param->quanta = quanta; ++ param->cur = 0; ++ param->release = 0; ++ param->period = period; ++ ++ for (i = 0; i < quanta; i++) ++ init_subtask(param->subtasks + i, i, quanta, period); ++ ++ if (t->rt_param.pfair) ++ /* get rid of stale allocation */ ++ kfree(t->rt_param.pfair); ++ ++ t->rt_param.pfair = param; ++ ++ /* spew out some debug info */ ++ dump_subtasks(t); ++ ++ return 0; ++} ++ ++static long pfair_activate_plugin(void) ++{ ++ int cpu; ++ struct pfair_state* state; ++ ++ state = &__get_cpu_var(pfair_state); ++ pfair_time = current_quantum(state); ++ ++ TRACE("Activating PFAIR at q=%lu\n", pfair_time); ++ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ state = &per_cpu(pfair_state, cpu); ++ state->cur_tick = pfair_time; ++ state->local_tick = pfair_time; ++ state->missed_quanta = 0; ++ state->offset = cpu_stagger_offset(cpu); ++ } ++ ++ return 0; ++} ++ ++/* Plugin object */ ++static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "PFAIR", ++ .tick = pfair_tick, ++ .task_new = pfair_task_new, ++ .task_exit = pfair_task_exit, ++ .schedule = pfair_schedule, ++ .task_wake_up = pfair_task_wake_up, ++ .task_block = pfair_task_block, ++ .admit_task = pfair_admit_task, ++ .release_at = pfair_release_at, ++ .complete_job = complete_job, ++ .activate_plugin = pfair_activate_plugin, ++}; ++ ++static int __init init_pfair(void) ++{ ++ int cpu, i; ++ struct pfair_state *state; ++ ++ ++ /* ++ * initialize short_cut for per-cpu pfair state; ++ * there may be a problem here if someone removes a cpu ++ * while we are doing this initialization... and if cpus ++ * are added / removed later... is it a _real_ problem? ++ */ ++ pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL); ++ ++ /* initialize release queue */ ++ for (i = 0; i < PFAIR_MAX_PERIOD; i++) ++ bheap_init(&release_queue[i]); ++ ++ /* initialize CPU state */ ++ for (cpu = 0; cpu < num_online_cpus(); cpu++) { ++ state = &per_cpu(pfair_state, cpu); ++ state->cpu = cpu; ++ state->cur_tick = 0; ++ state->local_tick = 0; ++ state->linked = NULL; ++ state->local = NULL; ++ state->scheduled = NULL; ++ state->missed_quanta = 0; ++ state->offset = cpu_stagger_offset(cpu); ++ pstate[cpu] = state; ++ } ++ ++ rt_domain_init(&pfair, pfair_ready_order, NULL, NULL); ++ return register_sched_plugin(&pfair_plugin); ++} ++ ++static void __exit clean_pfair(void) ++{ ++ kfree(pstate); ++} ++ ++module_init(init_pfair); ++module_exit(clean_pfair); +diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c +new file mode 100644 +index 0000000..3543b7b +--- /dev/null ++++ b/litmus/sched_plugin.c +@@ -0,0 +1,265 @@ ++/* sched_plugin.c -- core infrastructure for the scheduler plugin system ++ * ++ * This file includes the initialization of the plugin system, the no-op Linux ++ * scheduler plugin, some dummy functions, and some helper functions. ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++#include ++ ++/* ++ * Generic function to trigger preemption on either local or remote cpu ++ * from scheduler plugins. The key feature is that this function is ++ * non-preemptive section aware and does not invoke the scheduler / send ++ * IPIs if the to-be-preempted task is actually non-preemptive. ++ */ ++void preempt_if_preemptable(struct task_struct* t, int on_cpu) ++{ ++ /* t is the real-time task executing on CPU on_cpu If t is NULL, then ++ * on_cpu is currently scheduling background work. ++ */ ++ ++ int send_ipi; ++ ++ if (smp_processor_id() == on_cpu) { ++ /* local CPU case */ ++ if (t) { ++ /* check if we need to poke userspace */ ++ if (is_user_np(t)) ++ /* yes, poke it */ ++ request_exit_np(t); ++ else ++ /* no, see if we are allowed to preempt the ++ * currently-executing task */ ++ if (!is_kernel_np(t)) ++ set_tsk_need_resched(t); ++ } else ++ /* move non-real-time task out of the way */ ++ set_tsk_need_resched(current); ++ } else { ++ /* remote CPU case */ ++ if (!t) ++ /* currently schedules non-real-time work */ ++ send_ipi = 1; ++ else { ++ /* currently schedules real-time work */ ++ if (is_user_np(t)) { ++ /* need to notify user space of delayed ++ * preemption */ ++ ++ /* to avoid a race, set the flag, then test ++ * again */ ++ request_exit_np(t); ++ /* make sure it got written */ ++ mb(); ++ } ++ /* Only send an ipi if remote task might have raced our ++ * request, i.e., send an IPI to make sure if it exited ++ * its critical section. ++ */ ++ send_ipi = !is_np(t) && !is_kernel_np(t); ++ } ++ if (likely(send_ipi)) ++ smp_send_reschedule(on_cpu); ++ } ++} ++ ++ ++/************************************************************* ++ * Dummy plugin functions * ++ *************************************************************/ ++ ++static void litmus_dummy_finish_switch(struct task_struct * prev) ++{ ++} ++ ++static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) ++{ ++ return NULL; ++} ++ ++static void litmus_dummy_tick(struct task_struct* tsk) ++{ ++} ++ ++static long litmus_dummy_admit_task(struct task_struct* tsk) ++{ ++ printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", ++ tsk->comm, tsk->pid); ++ return -EINVAL; ++} ++ ++static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) ++{ ++} ++ ++static void litmus_dummy_task_wake_up(struct task_struct *task) ++{ ++} ++ ++static void litmus_dummy_task_block(struct task_struct *task) ++{ ++} ++ ++static void litmus_dummy_task_exit(struct task_struct *task) ++{ ++} ++ ++static long litmus_dummy_complete_job(void) ++{ ++ return -ENOSYS; ++} ++ ++static long litmus_dummy_activate_plugin(void) ++{ ++ return 0; ++} ++ ++static long litmus_dummy_deactivate_plugin(void) ++{ ++ return 0; ++} ++ ++#ifdef CONFIG_FMLP ++ ++static long litmus_dummy_inherit_priority(struct pi_semaphore *sem, ++ struct task_struct *new_owner) ++{ ++ return -ENOSYS; ++} ++ ++static long litmus_dummy_return_priority(struct pi_semaphore *sem) ++{ ++ return -ENOSYS; ++} ++ ++static long litmus_dummy_pi_block(struct pi_semaphore *sem, ++ struct task_struct *new_waiter) ++{ ++ return -ENOSYS; ++} ++ ++#endif ++ ++ ++/* The default scheduler plugin. It doesn't do anything and lets Linux do its ++ * job. ++ */ ++struct sched_plugin linux_sched_plugin = { ++ .plugin_name = "Linux", ++ .tick = litmus_dummy_tick, ++ .task_new = litmus_dummy_task_new, ++ .task_exit = litmus_dummy_task_exit, ++ .task_wake_up = litmus_dummy_task_wake_up, ++ .task_block = litmus_dummy_task_block, ++ .complete_job = litmus_dummy_complete_job, ++ .schedule = litmus_dummy_schedule, ++ .finish_switch = litmus_dummy_finish_switch, ++ .activate_plugin = litmus_dummy_activate_plugin, ++ .deactivate_plugin = litmus_dummy_deactivate_plugin, ++#ifdef CONFIG_FMLP ++ .inherit_priority = litmus_dummy_inherit_priority, ++ .return_priority = litmus_dummy_return_priority, ++ .pi_block = litmus_dummy_pi_block, ++#endif ++ .admit_task = litmus_dummy_admit_task ++}; ++ ++/* ++ * The cluster size is needed in C-EDF: it makes sense only to cluster ++ * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster ++ * all the CPUs that shares a L2 cache, while cluster_cache_index = 3 ++ * we cluster all CPs that shares a L3 cache ++ */ ++int cluster_cache_index = 2; ++ ++/* ++ * The reference to current plugin that is used to schedule tasks within ++ * the system. It stores references to actual function implementations ++ * Should be initialized by calling "init_***_plugin()" ++ */ ++struct sched_plugin *litmus = &linux_sched_plugin; ++ ++/* the list of registered scheduling plugins */ ++static LIST_HEAD(sched_plugins); ++static DEFINE_RAW_SPINLOCK(sched_plugins_lock); ++ ++#define CHECK(func) {\ ++ if (!plugin->func) \ ++ plugin->func = litmus_dummy_ ## func;} ++ ++/* FIXME: get reference to module */ ++int register_sched_plugin(struct sched_plugin* plugin) ++{ ++ printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", ++ plugin->plugin_name); ++ ++ /* make sure we don't trip over null pointers later */ ++ CHECK(finish_switch); ++ CHECK(schedule); ++ CHECK(tick); ++ CHECK(task_wake_up); ++ CHECK(task_exit); ++ CHECK(task_block); ++ CHECK(task_new); ++ CHECK(complete_job); ++ CHECK(activate_plugin); ++ CHECK(deactivate_plugin); ++#ifdef CONFIG_FMLP ++ CHECK(inherit_priority); ++ CHECK(return_priority); ++ CHECK(pi_block); ++#endif ++ CHECK(admit_task); ++ ++ if (!plugin->release_at) ++ plugin->release_at = release_at; ++ ++ raw_spin_lock(&sched_plugins_lock); ++ list_add(&plugin->list, &sched_plugins); ++ raw_spin_unlock(&sched_plugins_lock); ++ ++ return 0; ++} ++ ++ ++/* FIXME: reference counting, etc. */ ++struct sched_plugin* find_sched_plugin(const char* name) ++{ ++ struct list_head *pos; ++ struct sched_plugin *plugin; ++ ++ raw_spin_lock(&sched_plugins_lock); ++ list_for_each(pos, &sched_plugins) { ++ plugin = list_entry(pos, struct sched_plugin, list); ++ if (!strcmp(plugin->plugin_name, name)) ++ goto out_unlock; ++ } ++ plugin = NULL; ++ ++out_unlock: ++ raw_spin_unlock(&sched_plugins_lock); ++ return plugin; ++} ++ ++int print_sched_plugins(char* buf, int max) ++{ ++ int count = 0; ++ struct list_head *pos; ++ struct sched_plugin *plugin; ++ ++ raw_spin_lock(&sched_plugins_lock); ++ list_for_each(pos, &sched_plugins) { ++ plugin = list_entry(pos, struct sched_plugin, list); ++ count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); ++ if (max - count <= 0) ++ break; ++ } ++ raw_spin_unlock(&sched_plugins_lock); ++ return count; ++} +diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c +new file mode 100644 +index 0000000..73f6473 +--- /dev/null ++++ b/litmus/sched_psn_edf.c +@@ -0,0 +1,482 @@ ++/* ++ * kernel/sched_psn_edf.c ++ * ++ * Implementation of the PSN-EDF scheduler plugin. ++ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c. ++ * ++ * Suspensions and non-preemptable sections are supported. ++ * Priority inheritance is not supported. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++ ++typedef struct { ++ rt_domain_t domain; ++ int cpu; ++ struct task_struct* scheduled; /* only RT tasks */ ++/* ++ * scheduling lock slock ++ * protects the domain and serializes scheduling decisions ++ */ ++#define slock domain.ready_lock ++ ++} psnedf_domain_t; ++ ++DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains); ++ ++#define local_edf (&__get_cpu_var(psnedf_domains).domain) ++#define local_pedf (&__get_cpu_var(psnedf_domains)) ++#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain) ++#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu)) ++#define task_edf(task) remote_edf(get_partition(task)) ++#define task_pedf(task) remote_pedf(get_partition(task)) ++ ++ ++static void psnedf_domain_init(psnedf_domain_t* pedf, ++ check_resched_needed_t check, ++ release_jobs_t release, ++ int cpu) ++{ ++ edf_domain_init(&pedf->domain, check, release); ++ pedf->cpu = cpu; ++ pedf->scheduled = NULL; ++} ++ ++static void requeue(struct task_struct* t, rt_domain_t *edf) ++{ ++ if (t->state != TASK_RUNNING) ++ TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); ++ ++ set_rt_flags(t, RT_F_RUNNING); ++ if (is_released(t, litmus_clock())) ++ __add_ready(edf, t); ++ else ++ add_release(edf, t); /* it has got to wait */ ++} ++ ++/* we assume the lock is being held */ ++static void preempt(psnedf_domain_t *pedf) ++{ ++ preempt_if_preemptable(pedf->scheduled, pedf->cpu); ++} ++ ++/* This check is trivial in partioned systems as we only have to consider ++ * the CPU of the partition. ++ */ ++static int psnedf_check_resched(rt_domain_t *edf) ++{ ++ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain); ++ ++ /* because this is a callback from rt_domain_t we already hold ++ * the necessary lock for the ready queue ++ */ ++ if (edf_preemption_needed(edf, pedf->scheduled)) { ++ preempt(pedf); ++ return 1; ++ } else ++ return 0; ++} ++ ++static void job_completion(struct task_struct* t, int forced) ++{ ++ sched_trace_task_completion(t,forced); ++ TRACE_TASK(t, "job_completion().\n"); ++ ++ set_rt_flags(t, RT_F_SLEEP); ++ prepare_for_next_period(t); ++} ++ ++static void psnedf_tick(struct task_struct *t) ++{ ++ psnedf_domain_t *pedf = local_pedf; ++ ++ /* Check for inconsistency. We don't need the lock for this since ++ * ->scheduled is only changed in schedule, which obviously is not ++ * executing in parallel on this CPU ++ */ ++ BUG_ON(is_realtime(t) && t != pedf->scheduled); ++ ++ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { ++ if (!is_np(t)) { ++ set_tsk_need_resched(t); ++ TRACE("psnedf_scheduler_tick: " ++ "%d is preemptable " ++ " => FORCE_RESCHED\n", t->pid); ++ } else if (is_user_np(t)) { ++ TRACE("psnedf_scheduler_tick: " ++ "%d is non-preemptable, " ++ "preemption delayed.\n", t->pid); ++ request_exit_np(t); ++ } ++ } ++} ++ ++static struct task_struct* psnedf_schedule(struct task_struct * prev) ++{ ++ psnedf_domain_t* pedf = local_pedf; ++ rt_domain_t* edf = &pedf->domain; ++ struct task_struct* next; ++ ++ int out_of_time, sleep, preempt, ++ np, exists, blocks, resched; ++ ++ raw_spin_lock(&pedf->slock); ++ ++ /* sanity checking ++ * differently from gedf, when a task exits (dead) ++ * pedf->schedule may be null and prev _is_ realtime ++ */ ++ BUG_ON(pedf->scheduled && pedf->scheduled != prev); ++ BUG_ON(pedf->scheduled && !is_realtime(prev)); ++ ++ /* (0) Determine state */ ++ exists = pedf->scheduled != NULL; ++ blocks = exists && !is_running(pedf->scheduled); ++ out_of_time = exists && ++ budget_enforced(pedf->scheduled) && ++ budget_exhausted(pedf->scheduled); ++ np = exists && is_np(pedf->scheduled); ++ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; ++ preempt = edf_preemption_needed(edf, prev); ++ ++ /* If we need to preempt do so. ++ * The following checks set resched to 1 in case of special ++ * circumstances. ++ */ ++ resched = preempt; ++ ++ /* If a task blocks we have no choice but to reschedule. ++ */ ++ if (blocks) ++ resched = 1; ++ ++ /* Request a sys_exit_np() call if we would like to preempt but cannot. ++ * Multiple calls to request_exit_np() don't hurt. ++ */ ++ if (np && (out_of_time || preempt || sleep)) ++ request_exit_np(pedf->scheduled); ++ ++ /* Any task that is preemptable and either exhausts its execution ++ * budget or wants to sleep completes. We may have to reschedule after ++ * this. ++ */ ++ if (!np && (out_of_time || sleep) && !blocks) { ++ job_completion(pedf->scheduled, !sleep); ++ resched = 1; ++ } ++ ++ /* The final scheduling decision. Do we need to switch for some reason? ++ * Switch if we are in RT mode and have no task or if we need to ++ * resched. ++ */ ++ next = NULL; ++ if ((!np || blocks) && (resched || !exists)) { ++ /* When preempting a task that does not block, then ++ * re-insert it into either the ready queue or the ++ * release queue (if it completed). requeue() picks ++ * the appropriate queue. ++ */ ++ if (pedf->scheduled && !blocks) ++ requeue(pedf->scheduled, edf); ++ next = __take_ready(edf); ++ } else ++ /* Only override Linux scheduler if we have a real-time task ++ * scheduled that needs to continue. ++ */ ++ if (exists) ++ next = prev; ++ ++ if (next) { ++ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); ++ set_rt_flags(next, RT_F_RUNNING); ++ } else { ++ TRACE("becoming idle at %llu\n", litmus_clock()); ++ } ++ ++ pedf->scheduled = next; ++ raw_spin_unlock(&pedf->slock); ++ ++ return next; ++} ++ ++ ++/* Prepare a task for running in RT mode ++ */ ++static void psnedf_task_new(struct task_struct * t, int on_rq, int running) ++{ ++ rt_domain_t* edf = task_edf(t); ++ psnedf_domain_t* pedf = task_pedf(t); ++ unsigned long flags; ++ ++ TRACE_TASK(t, "psn edf: task new, cpu = %d\n", ++ t->rt_param.task_params.cpu); ++ ++ /* setup job parameters */ ++ release_at(t, litmus_clock()); ++ ++ /* The task should be running in the queue, otherwise signal ++ * code will try to wake it up with fatal consequences. ++ */ ++ raw_spin_lock_irqsave(&pedf->slock, flags); ++ if (running) { ++ /* there shouldn't be anything else running at the time */ ++ BUG_ON(pedf->scheduled); ++ pedf->scheduled = t; ++ } else { ++ requeue(t, edf); ++ /* maybe we have to reschedule */ ++ preempt(pedf); ++ } ++ raw_spin_unlock_irqrestore(&pedf->slock, flags); ++} ++ ++static void psnedf_task_wake_up(struct task_struct *task) ++{ ++ unsigned long flags; ++ psnedf_domain_t* pedf = task_pedf(task); ++ rt_domain_t* edf = task_edf(task); ++ lt_t now; ++ ++ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); ++ raw_spin_lock_irqsave(&pedf->slock, flags); ++ BUG_ON(is_queued(task)); ++ /* We need to take suspensions because of semaphores into ++ * account! If a job resumes after being suspended due to acquiring ++ * a semaphore, it should never be treated as a new job release. ++ * ++ * FIXME: This should be done in some more predictable and userspace-controlled way. ++ */ ++ now = litmus_clock(); ++ if (is_tardy(task, now) && ++ get_rt_flags(task) != RT_F_EXIT_SEM) { ++ /* new sporadic release */ ++ release_at(task, now); ++ sched_trace_task_release(task); ++ } ++ ++ /* Only add to ready queue if it is not the currently-scheduled ++ * task. This could be the case if a task was woken up concurrently ++ * on a remote CPU before the executing CPU got around to actually ++ * de-scheduling the task, i.e., wake_up() raced with schedule() ++ * and won. ++ */ ++ if (pedf->scheduled != task) ++ requeue(task, edf); ++ ++ raw_spin_unlock_irqrestore(&pedf->slock, flags); ++ TRACE_TASK(task, "wake up done\n"); ++} ++ ++static void psnedf_task_block(struct task_struct *t) ++{ ++ /* only running tasks can block, thus t is in no queue */ ++ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); ++ ++ BUG_ON(!is_realtime(t)); ++ BUG_ON(is_queued(t)); ++} ++ ++static void psnedf_task_exit(struct task_struct * t) ++{ ++ unsigned long flags; ++ psnedf_domain_t* pedf = task_pedf(t); ++ rt_domain_t* edf; ++ ++ raw_spin_lock_irqsave(&pedf->slock, flags); ++ if (is_queued(t)) { ++ /* dequeue */ ++ edf = task_edf(t); ++ remove(edf, t); ++ } ++ if (pedf->scheduled == t) ++ pedf->scheduled = NULL; ++ ++ TRACE_TASK(t, "RIP, now reschedule\n"); ++ ++ preempt(pedf); ++ raw_spin_unlock_irqrestore(&pedf->slock, flags); ++} ++ ++#ifdef CONFIG_FMLP ++static long psnedf_pi_block(struct pi_semaphore *sem, ++ struct task_struct *new_waiter) ++{ ++ psnedf_domain_t* pedf; ++ rt_domain_t* edf; ++ struct task_struct* t; ++ int cpu = get_partition(new_waiter); ++ ++ BUG_ON(!new_waiter); ++ ++ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) { ++ TRACE_TASK(new_waiter, " boosts priority\n"); ++ pedf = task_pedf(new_waiter); ++ edf = task_edf(new_waiter); ++ ++ /* interrupts already disabled */ ++ raw_spin_lock(&pedf->slock); ++ ++ /* store new highest-priority task */ ++ sem->hp.cpu_task[cpu] = new_waiter; ++ if (sem->holder && ++ get_partition(sem->holder) == get_partition(new_waiter)) { ++ /* let holder inherit */ ++ sem->holder->rt_param.inh_task = new_waiter; ++ t = sem->holder; ++ if (is_queued(t)) { ++ /* queued in domain*/ ++ remove(edf, t); ++ /* readd to make priority change take place */ ++ /* FIXME: this looks outdated */ ++ if (is_released(t, litmus_clock())) ++ __add_ready(edf, t); ++ else ++ add_release(edf, t); ++ } ++ } ++ ++ /* check if we need to reschedule */ ++ if (edf_preemption_needed(edf, current)) ++ preempt(pedf); ++ ++ raw_spin_unlock(&pedf->slock); ++ } ++ ++ return 0; ++} ++ ++static long psnedf_inherit_priority(struct pi_semaphore *sem, ++ struct task_struct *new_owner) ++{ ++ int cpu = get_partition(new_owner); ++ ++ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu]; ++ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) { ++ TRACE_TASK(new_owner, ++ "inherited priority from %s/%d\n", ++ sem->hp.cpu_task[cpu]->comm, ++ sem->hp.cpu_task[cpu]->pid); ++ } else ++ TRACE_TASK(new_owner, ++ "cannot inherit priority: " ++ "no higher priority job waits on this CPU!\n"); ++ /* make new owner non-preemptable as required by FMLP under ++ * PSN-EDF. ++ */ ++ make_np(new_owner); ++ return 0; ++} ++ ++ ++/* This function is called on a semaphore release, and assumes that ++ * the current task is also the semaphore holder. ++ */ ++static long psnedf_return_priority(struct pi_semaphore *sem) ++{ ++ struct task_struct* t = current; ++ psnedf_domain_t* pedf = task_pedf(t); ++ rt_domain_t* edf = task_edf(t); ++ int ret = 0; ++ int cpu = get_partition(current); ++ int still_np; ++ ++ ++ /* Find new highest-priority semaphore task ++ * if holder task is the current hp.cpu_task[cpu]. ++ * ++ * Calling function holds sem->wait.lock. ++ */ ++ if (t == sem->hp.cpu_task[cpu]) ++ edf_set_hp_cpu_task(sem, cpu); ++ ++ still_np = take_np(current); ++ ++ /* Since we don't nest resources, this ++ * should always be zero */ ++ BUG_ON(still_np); ++ ++ if (current->rt_param.inh_task) { ++ TRACE_CUR("return priority of %s/%d\n", ++ current->rt_param.inh_task->comm, ++ current->rt_param.inh_task->pid); ++ } else ++ TRACE_CUR(" no priority to return %p\n", sem); ++ ++ ++ /* Always check for delayed preemptions that might have become ++ * necessary due to non-preemptive execution. ++ */ ++ raw_spin_lock(&pedf->slock); ++ ++ /* Reset inh_task to NULL. */ ++ current->rt_param.inh_task = NULL; ++ ++ /* check if we need to reschedule */ ++ if (edf_preemption_needed(edf, current)) ++ preempt(pedf); ++ ++ raw_spin_unlock(&pedf->slock); ++ ++ ++ return ret; ++} ++ ++#endif ++ ++static long psnedf_admit_task(struct task_struct* tsk) ++{ ++ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; ++} ++ ++/* Plugin object */ ++static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { ++ .plugin_name = "PSN-EDF", ++#ifdef CONFIG_SRP ++ .srp_active = 1, ++#endif ++ .tick = psnedf_tick, ++ .task_new = psnedf_task_new, ++ .complete_job = complete_job, ++ .task_exit = psnedf_task_exit, ++ .schedule = psnedf_schedule, ++ .task_wake_up = psnedf_task_wake_up, ++ .task_block = psnedf_task_block, ++#ifdef CONFIG_FMLP ++ .fmlp_active = 1, ++ .pi_block = psnedf_pi_block, ++ .inherit_priority = psnedf_inherit_priority, ++ .return_priority = psnedf_return_priority, ++#endif ++ .admit_task = psnedf_admit_task ++}; ++ ++ ++static int __init init_psn_edf(void) ++{ ++ int i; ++ ++ /* We do not really want to support cpu hotplug, do we? ;) ++ * However, if we are so crazy to do so, ++ * we cannot use num_online_cpu() ++ */ ++ for (i = 0; i < num_online_cpus(); i++) { ++ psnedf_domain_init(remote_pedf(i), ++ psnedf_check_resched, ++ NULL, i); ++ } ++ return register_sched_plugin(&psn_edf_plugin); ++} ++ ++module_init(init_psn_edf); ++ +diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c +new file mode 100644 +index 0000000..39a543e +--- /dev/null ++++ b/litmus/sched_task_trace.c +@@ -0,0 +1,204 @@ ++/* ++ * sched_task_trace.c -- record scheduling events to a byte stream ++ */ ++ ++#define NO_TASK_TRACE_DECLS ++ ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++#include ++ ++ ++/* set MAJOR to 0 to have it dynamically assigned */ ++#define FT_TASK_TRACE_MAJOR 253 ++#define NO_EVENTS 4096 /* this is a buffer of 12 4k pages per CPU */ ++ ++#define now() litmus_clock() ++ ++struct local_buffer { ++ struct st_event_record record[NO_EVENTS]; ++ char flag[NO_EVENTS]; ++ struct ft_buffer ftbuf; ++}; ++ ++DEFINE_PER_CPU(struct local_buffer, st_event_buffer); ++ ++static struct ftdev st_dev; ++ ++static int st_dev_can_open(struct ftdev *dev, unsigned int cpu) ++{ ++ return cpu_online(cpu) ? 0 : -ENODEV; ++} ++ ++static int __init init_sched_task_trace(void) ++{ ++ struct local_buffer* buf; ++ int i, ok = 0; ++ ftdev_init(&st_dev, THIS_MODULE); ++ for (i = 0; i < NR_CPUS; i++) { ++ buf = &per_cpu(st_event_buffer, i); ++ ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, ++ sizeof(struct st_event_record), ++ buf->flag, ++ buf->record); ++ st_dev.minor[i].buf = &buf->ftbuf; ++ } ++ if (ok == NR_CPUS) { ++ st_dev.minor_cnt = NR_CPUS; ++ st_dev.can_open = st_dev_can_open; ++ return register_ftdev(&st_dev, "sched_trace", FT_TASK_TRACE_MAJOR); ++ } else { ++ return -EINVAL; ++ } ++} ++ ++module_init(init_sched_task_trace); ++ ++ ++static inline struct st_event_record* get_record(u8 type, struct task_struct* t) ++{ ++ struct st_event_record* rec = NULL; ++ struct local_buffer* buf; ++ ++ buf = &get_cpu_var(st_event_buffer); ++ if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) { ++ rec->hdr.type = type; ++ rec->hdr.cpu = smp_processor_id(); ++ rec->hdr.pid = t ? t->pid : 0; ++ rec->hdr.job = t ? t->rt_param.job_params.job_no : 0; ++ } else { ++ put_cpu_var(st_event_buffer); ++ } ++ /* rec will be NULL if it failed */ ++ return rec; ++} ++ ++static inline void put_record(struct st_event_record* rec) ++{ ++ struct local_buffer* buf; ++ buf = &__get_cpu_var(st_event_buffer); ++ ft_buffer_finish_write(&buf->ftbuf, rec); ++ put_cpu_var(st_event_buffer); ++} ++ ++feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_NAME, t); ++ int i; ++ if (rec) { ++ for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++) ++ rec->data.name.cmd[i] = t->comm[i]; ++ put_record(rec); ++ } ++} ++ ++feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_PARAM, t); ++ if (rec) { ++ rec->data.param.wcet = get_exec_cost(t); ++ rec->data.param.period = get_rt_period(t); ++ rec->data.param.phase = get_rt_phase(t); ++ rec->data.param.partition = get_partition(t); ++ put_record(rec); ++ } ++} ++ ++feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_RELEASE, t); ++ if (rec) { ++ rec->data.release.release = get_release(t); ++ rec->data.release.deadline = get_deadline(t); ++ put_record(rec); ++ } ++} ++ ++/* skipped: st_assigned_data, we don't use it atm */ ++ ++feather_callback void do_sched_trace_task_switch_to(unsigned long id, ++ unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec; ++ if (is_realtime(t)) { ++ rec = get_record(ST_SWITCH_TO, t); ++ if (rec) { ++ rec->data.switch_to.when = now(); ++ rec->data.switch_to.exec_time = get_exec_time(t); ++ put_record(rec); ++ } ++ } ++} ++ ++feather_callback void do_sched_trace_task_switch_away(unsigned long id, ++ unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec; ++ if (is_realtime(t)) { ++ rec = get_record(ST_SWITCH_AWAY, t); ++ if (rec) { ++ rec->data.switch_away.when = now(); ++ rec->data.switch_away.exec_time = get_exec_time(t); ++ put_record(rec); ++ } ++ } ++} ++ ++feather_callback void do_sched_trace_task_completion(unsigned long id, ++ unsigned long _task, ++ unsigned long forced) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_COMPLETION, t); ++ if (rec) { ++ rec->data.completion.when = now(); ++ rec->data.completion.forced = forced; ++ put_record(rec); ++ } ++} ++ ++feather_callback void do_sched_trace_task_block(unsigned long id, ++ unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_BLOCK, t); ++ if (rec) { ++ rec->data.block.when = now(); ++ put_record(rec); ++ } ++} ++ ++feather_callback void do_sched_trace_task_resume(unsigned long id, ++ unsigned long _task) ++{ ++ struct task_struct *t = (struct task_struct*) _task; ++ struct st_event_record* rec = get_record(ST_RESUME, t); ++ if (rec) { ++ rec->data.resume.when = now(); ++ put_record(rec); ++ } ++} ++ ++feather_callback void do_sched_trace_sys_release(unsigned long id, ++ unsigned long _start) ++{ ++ lt_t *start = (lt_t*) _start; ++ struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL); ++ if (rec) { ++ rec->data.sys_release.when = now(); ++ rec->data.sys_release.release = *start; ++ put_record(rec); ++ } ++} +diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c +new file mode 100644 +index 0000000..1fa2094 +--- /dev/null ++++ b/litmus/sched_trace.c +@@ -0,0 +1,378 @@ ++/* ++ * sched_trace.c -- record scheduling events to a byte stream. ++ */ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++#define SCHED_TRACE_NAME "litmus/log" ++ ++/* Allocate a buffer of about 32k per CPU */ ++#define LITMUS_TRACE_BUF_PAGES 8 ++#define LITMUS_TRACE_BUF_SIZE (PAGE_SIZE * LITMUS_TRACE_BUF_PAGES * NR_CPUS) ++ ++/* Max length of one read from the buffer */ ++#define MAX_READ_LEN (64 * 1024) ++ ++/* Max length for one write --- from kernel --- to the buffer */ ++#define MSG_SIZE 255 ++ ++/* Inner ring buffer structure */ ++typedef struct { ++ rwlock_t del_lock; ++ ++ /* the buffer */ ++ struct kfifo kfifo; ++} ring_buffer_t; ++ ++/* Main buffer structure */ ++typedef struct { ++ ring_buffer_t buf; ++ atomic_t reader_cnt; ++ struct semaphore reader_mutex; ++} trace_buffer_t; ++ ++ ++/* ++ * Inner buffer management functions ++ */ ++void rb_init(ring_buffer_t* buf) ++{ ++ rwlock_init(&buf->del_lock); ++} ++ ++int rb_alloc_buf(ring_buffer_t* buf, unsigned int size) ++{ ++ unsigned long flags; ++ int ret = 0; ++ ++ write_lock_irqsave(&buf->del_lock, flags); ++ ++ /* kfifo size must be a power of 2 ++ * atm kfifo alloc is automatically rounding the size ++ */ ++ ret = kfifo_alloc(&buf->kfifo, size, GFP_ATOMIC); ++ ++ write_unlock_irqrestore(&buf->del_lock, flags); ++ ++ if(ret < 0) ++ printk(KERN_ERR "kfifo_alloc failed\n"); ++ ++ return ret; ++} ++ ++int rb_free_buf(ring_buffer_t* buf) ++{ ++ unsigned long flags; ++ ++ write_lock_irqsave(&buf->del_lock, flags); ++ ++ BUG_ON(!kfifo_initialized(&buf->kfifo)); ++ kfifo_free(&buf->kfifo); ++ ++ write_unlock_irqrestore(&buf->del_lock, flags); ++ ++ return 0; ++} ++ ++/* ++ * Assumption: concurrent writes are serialized externally ++ * ++ * Will only succeed if there is enough space for all len bytes. ++ */ ++int rb_put(ring_buffer_t* buf, char* mem, size_t len) ++{ ++ unsigned long flags; ++ int error = 0; ++ ++ read_lock_irqsave(&buf->del_lock, flags); ++ ++ if (!kfifo_initialized(&buf->kfifo)) { ++ error = -ENODEV; ++ goto out; ++ } ++ ++ if((kfifo_in(&buf->kfifo, mem, len)) < len) { ++ error = -ENOMEM; ++ goto out; ++ } ++ ++ out: ++ read_unlock_irqrestore(&buf->del_lock, flags); ++ return error; ++} ++ ++/* Assumption: concurrent reads are serialized externally */ ++int rb_get(ring_buffer_t* buf, char* mem, size_t len) ++{ ++ unsigned long flags; ++ int error = 0; ++ ++ read_lock_irqsave(&buf->del_lock, flags); ++ if (!kfifo_initialized(&buf->kfifo)) { ++ error = -ENODEV; ++ goto out; ++ } ++ ++ error = kfifo_out(&buf->kfifo, (unsigned char*)mem, len); ++ ++ out: ++ read_unlock_irqrestore(&buf->del_lock, flags); ++ return error; ++} ++ ++/* ++ * Device Driver management ++ */ ++static DEFINE_RAW_SPINLOCK(log_buffer_lock); ++static trace_buffer_t log_buffer; ++ ++static void init_log_buffer(void) ++{ ++ rb_init(&log_buffer.buf); ++ atomic_set(&log_buffer.reader_cnt,0); ++ init_MUTEX(&log_buffer.reader_mutex); ++} ++ ++static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); ++ ++/* ++ * sched_trace_log_message - Write to the trace buffer (log_buffer) ++ * ++ * This is the only function accessing the log_buffer from inside the ++ * kernel for writing. ++ * Concurrent access to sched_trace_log_message must be serialized using ++ * log_buffer_lock ++ * The maximum length of a formatted message is 255 ++ */ ++void sched_trace_log_message(const char* fmt, ...) ++{ ++ unsigned long flags; ++ va_list args; ++ size_t len; ++ char* buf; ++ ++ va_start(args, fmt); ++ local_irq_save(flags); ++ ++ /* format message */ ++ buf = __get_cpu_var(fmt_buffer); ++ len = vscnprintf(buf, MSG_SIZE, fmt, args); ++ ++ raw_spin_lock(&log_buffer_lock); ++ /* Don't copy the trailing null byte, we don't want null bytes ++ * in a text file. ++ */ ++ rb_put(&log_buffer.buf, buf, len); ++ raw_spin_unlock(&log_buffer_lock); ++ ++ local_irq_restore(flags); ++ va_end(args); ++} ++ ++/* ++ * log_read - Read the trace buffer ++ * ++ * This function is called as a file operation from userspace. ++ * Readers can sleep. Access is serialized through reader_mutex ++ */ ++static ssize_t log_read(struct file *filp, char __user *to, size_t len, ++ loff_t *f_pos) ++{ ++ /* we ignore f_pos, this is strictly sequential */ ++ ++ ssize_t error = -EINVAL; ++ char* mem; ++ trace_buffer_t *tbuf = filp->private_data; ++ ++ if (down_interruptible(&tbuf->reader_mutex)) { ++ error = -ERESTARTSYS; ++ goto out; ++ } ++ ++ if (len > MAX_READ_LEN) ++ len = MAX_READ_LEN; ++ ++ mem = kmalloc(len, GFP_KERNEL); ++ if (!mem) { ++ error = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ error = rb_get(&tbuf->buf, mem, len); ++ while (!error) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(110); ++ if (signal_pending(current)) ++ error = -ERESTARTSYS; ++ else ++ error = rb_get(&tbuf->buf, mem, len); ++ } ++ ++ if (error > 0 && copy_to_user(to, mem, error)) ++ error = -EFAULT; ++ ++ kfree(mem); ++ out_unlock: ++ up(&tbuf->reader_mutex); ++ out: ++ return error; ++} ++ ++/* ++ * Enable redirection of printk() messages to the trace buffer. ++ * Defined in kernel/printk.c ++ */ ++extern int trace_override; ++extern int trace_recurse; ++ ++/* ++ * log_open - open the global log message ring buffer. ++ */ ++static int log_open(struct inode *in, struct file *filp) ++{ ++ int error = -EINVAL; ++ trace_buffer_t* tbuf; ++ ++ tbuf = &log_buffer; ++ ++ if (down_interruptible(&tbuf->reader_mutex)) { ++ error = -ERESTARTSYS; ++ goto out; ++ } ++ ++ /* first open must allocate buffers */ ++ if (atomic_inc_return(&tbuf->reader_cnt) == 1) { ++ if ((error = rb_alloc_buf(&tbuf->buf, LITMUS_TRACE_BUF_SIZE))) ++ { ++ atomic_dec(&tbuf->reader_cnt); ++ goto out_unlock; ++ } ++ } ++ ++ error = 0; ++ filp->private_data = tbuf; ++ ++ printk(KERN_DEBUG ++ "sched_trace kfifo with buffer starting at: 0x%p\n", ++ (tbuf->buf.kfifo).buffer); ++ ++ /* override printk() */ ++ trace_override++; ++ ++ out_unlock: ++ up(&tbuf->reader_mutex); ++ out: ++ return error; ++} ++ ++static int log_release(struct inode *in, struct file *filp) ++{ ++ int error = -EINVAL; ++ trace_buffer_t* tbuf = filp->private_data; ++ ++ BUG_ON(!filp->private_data); ++ ++ if (down_interruptible(&tbuf->reader_mutex)) { ++ error = -ERESTARTSYS; ++ goto out; ++ } ++ ++ /* last release must deallocate buffers */ ++ if (atomic_dec_return(&tbuf->reader_cnt) == 0) { ++ error = rb_free_buf(&tbuf->buf); ++ } ++ ++ /* release printk() overriding */ ++ trace_override--; ++ ++ printk(KERN_DEBUG "sched_trace kfifo released\n"); ++ ++ up(&tbuf->reader_mutex); ++ out: ++ return error; ++} ++ ++/* ++ * log_fops - The file operations for accessing the global LITMUS log message ++ * buffer. ++ * ++ * Except for opening the device file it uses the same operations as trace_fops. ++ */ ++static struct file_operations log_fops = { ++ .owner = THIS_MODULE, ++ .open = log_open, ++ .release = log_release, ++ .read = log_read, ++}; ++ ++static struct miscdevice litmus_log_dev = { ++ .name = SCHED_TRACE_NAME, ++ .minor = MISC_DYNAMIC_MINOR, ++ .fops = &log_fops, ++}; ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++void dump_trace_buffer(int max) ++{ ++ char line[80]; ++ int len; ++ int count = 0; ++ ++ /* potential, but very unlikely, race... */ ++ trace_recurse = 1; ++ while ((max == 0 || count++ < max) && ++ (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) { ++ line[len] = '\0'; ++ printk("%s", line); ++ } ++ trace_recurse = 0; ++} ++ ++static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty) ++{ ++ dump_trace_buffer(100); ++} ++ ++static struct sysrq_key_op sysrq_dump_trace_buffer_op = { ++ .handler = sysrq_dump_trace_buffer, ++ .help_msg = "dump-trace-buffer(Y)", ++ .action_msg = "writing content of TRACE() buffer", ++}; ++#endif ++ ++static int __init init_sched_trace(void) ++{ ++ printk("Initializing TRACE() device\n"); ++ init_log_buffer(); ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++ /* offer some debugging help */ ++ if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op)) ++ printk("Registered dump-trace-buffer(Y) magic sysrq.\n"); ++ else ++ printk("Could not register dump-trace-buffer(Y) magic sysrq.\n"); ++#endif ++ ++ ++ return misc_register(&litmus_log_dev); ++} ++ ++static void __exit exit_sched_trace(void) ++{ ++ misc_deregister(&litmus_log_dev); ++} ++ ++module_init(init_sched_trace); ++module_exit(exit_sched_trace); +diff --git a/litmus/srp.c b/litmus/srp.c +new file mode 100644 +index 0000000..71639b9 +--- /dev/null ++++ b/litmus/srp.c +@@ -0,0 +1,318 @@ ++/* ************************************************************************** */ ++/* STACK RESOURCE POLICY */ ++/* ************************************************************************** */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++ ++ ++#ifdef CONFIG_SRP ++ ++struct srp_priority { ++ struct list_head list; ++ unsigned int period; ++ pid_t pid; ++}; ++ ++#define list2prio(l) list_entry(l, struct srp_priority, list) ++ ++/* SRP task priority comparison function. Smaller periods have highest ++ * priority, tie-break is PID. Special case: period == 0 <=> no priority ++ */ ++static int srp_higher_prio(struct srp_priority* first, ++ struct srp_priority* second) ++{ ++ if (!first->period) ++ return 0; ++ else ++ return !second->period || ++ first->period < second->period || ( ++ first->period == second->period && ++ first->pid < second->pid); ++} ++ ++struct srp { ++ struct list_head ceiling; ++ wait_queue_head_t ceiling_blocked; ++}; ++ ++ ++atomic_t srp_objects_in_use = ATOMIC_INIT(0); ++ ++DEFINE_PER_CPU(struct srp, srp); ++ ++ ++/* Initialize SRP semaphores at boot time. */ ++static int __init srp_init(void) ++{ ++ int i; ++ ++ printk("Initializing SRP per-CPU ceilings..."); ++ for (i = 0; i < NR_CPUS; i++) { ++ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); ++ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); ++ } ++ printk(" done!\n"); ++ ++ return 0; ++} ++module_init(srp_init); ++ ++ ++#define system_ceiling(srp) list2prio(srp->ceiling.next) ++ ++ ++#define UNDEF_SEM -2 ++ ++ ++/* struct for uniprocessor SRP "semaphore" */ ++struct srp_semaphore { ++ struct srp_priority ceiling; ++ struct task_struct* owner; ++ int cpu; /* cpu associated with this "semaphore" and resource */ ++}; ++ ++#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) ++ ++static int srp_exceeds_ceiling(struct task_struct* first, ++ struct srp* srp) ++{ ++ return list_empty(&srp->ceiling) || ++ get_rt_period(first) < system_ceiling(srp)->period || ++ (get_rt_period(first) == system_ceiling(srp)->period && ++ first->pid < system_ceiling(srp)->pid) || ++ ceiling2sem(system_ceiling(srp))->owner == first; ++} ++ ++static void srp_add_prio(struct srp* srp, struct srp_priority* prio) ++{ ++ struct list_head *pos; ++ if (in_list(&prio->list)) { ++ printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " ++ "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); ++ return; ++ } ++ list_for_each(pos, &srp->ceiling) ++ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { ++ __list_add(&prio->list, pos->prev, pos); ++ return; ++ } ++ ++ list_add_tail(&prio->list, &srp->ceiling); ++} ++ ++ ++static void* create_srp_semaphore(void) ++{ ++ struct srp_semaphore* sem; ++ ++ sem = kmalloc(sizeof(*sem), GFP_KERNEL); ++ if (!sem) ++ return NULL; ++ ++ INIT_LIST_HEAD(&sem->ceiling.list); ++ sem->ceiling.period = 0; ++ sem->cpu = UNDEF_SEM; ++ sem->owner = NULL; ++ atomic_inc(&srp_objects_in_use); ++ return sem; ++} ++ ++static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg) ++{ ++ struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj; ++ int ret = 0; ++ struct task_struct* t = current; ++ struct srp_priority t_prio; ++ ++ TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); ++ if (!srp_active()) ++ return -EBUSY; ++ ++ if (sem->cpu == UNDEF_SEM) ++ sem->cpu = get_partition(t); ++ else if (sem->cpu != get_partition(t)) ++ ret = -EPERM; ++ ++ if (ret == 0) { ++ t_prio.period = get_rt_period(t); ++ t_prio.pid = t->pid; ++ if (srp_higher_prio(&t_prio, &sem->ceiling)) { ++ sem->ceiling.period = t_prio.period; ++ sem->ceiling.pid = t_prio.pid; ++ } ++ } ++ ++ return ret; ++} ++ ++static void destroy_srp_semaphore(void* sem) ++{ ++ /* XXX invariants */ ++ atomic_dec(&srp_objects_in_use); ++ kfree(sem); ++} ++ ++struct fdso_ops srp_sem_ops = { ++ .create = create_srp_semaphore, ++ .open = open_srp_semaphore, ++ .destroy = destroy_srp_semaphore ++}; ++ ++ ++static void do_srp_down(struct srp_semaphore* sem) ++{ ++ /* Update ceiling. */ ++ srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); ++ WARN_ON(sem->owner != NULL); ++ sem->owner = current; ++ TRACE_CUR("acquired srp 0x%p\n", sem); ++} ++ ++static void do_srp_up(struct srp_semaphore* sem) ++{ ++ /* Determine new system priority ceiling for this CPU. */ ++ WARN_ON(!in_list(&sem->ceiling.list)); ++ if (in_list(&sem->ceiling.list)) ++ list_del(&sem->ceiling.list); ++ ++ sem->owner = NULL; ++ ++ /* Wake tasks on this CPU, if they exceed current ceiling. */ ++ TRACE_CUR("released srp 0x%p\n", sem); ++ wake_up_all(&__get_cpu_var(srp).ceiling_blocked); ++} ++ ++/* Adjust the system-wide priority ceiling if resource is claimed. */ ++asmlinkage long sys_srp_down(int sem_od) ++{ ++ int cpu; ++ int ret = -EINVAL; ++ struct srp_semaphore* sem; ++ ++ /* disabling preemptions is sufficient protection since ++ * SRP is strictly per CPU and we don't interfere with any ++ * interrupt handlers ++ */ ++ preempt_disable(); ++ TS_SRP_DOWN_START; ++ ++ cpu = smp_processor_id(); ++ sem = lookup_srp_sem(sem_od); ++ if (sem && sem->cpu == cpu) { ++ do_srp_down(sem); ++ ret = 0; ++ } ++ ++ TS_SRP_DOWN_END; ++ preempt_enable(); ++ return ret; ++} ++ ++/* Adjust the system-wide priority ceiling if resource is freed. */ ++asmlinkage long sys_srp_up(int sem_od) ++{ ++ int cpu; ++ int ret = -EINVAL; ++ struct srp_semaphore* sem; ++ ++ preempt_disable(); ++ TS_SRP_UP_START; ++ ++ cpu = smp_processor_id(); ++ sem = lookup_srp_sem(sem_od); ++ ++ if (sem && sem->cpu == cpu) { ++ do_srp_up(sem); ++ ret = 0; ++ } ++ ++ TS_SRP_UP_END; ++ preempt_enable(); ++ return ret; ++} ++ ++static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, ++ void *key) ++{ ++ int cpu = smp_processor_id(); ++ struct task_struct *tsk = wait->private; ++ if (cpu != get_partition(tsk)) ++ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", ++ get_partition(tsk)); ++ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) ++ return default_wake_function(wait, mode, sync, key); ++ return 0; ++} ++ ++ ++ ++static void do_ceiling_block(struct task_struct *tsk) ++{ ++ wait_queue_t wait = { ++ .private = tsk, ++ .func = srp_wake_up, ++ .task_list = {NULL, NULL} ++ }; ++ ++ tsk->state = TASK_UNINTERRUPTIBLE; ++ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); ++ tsk->rt_param.srp_non_recurse = 1; ++ preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++ tsk->rt_param.srp_non_recurse = 0; ++ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); ++} ++ ++/* Wait for current task priority to exceed system-wide priority ceiling. ++ */ ++void srp_ceiling_block(void) ++{ ++ struct task_struct *tsk = current; ++ ++ /* Only applies to real-time tasks, but optimize for RT tasks. */ ++ if (unlikely(!is_realtime(tsk))) ++ return; ++ ++ /* Avoid recursive ceiling blocking. */ ++ if (unlikely(tsk->rt_param.srp_non_recurse)) ++ return; ++ ++ /* Bail out early if there aren't any SRP resources around. */ ++ if (likely(!atomic_read(&srp_objects_in_use))) ++ return; ++ ++ preempt_disable(); ++ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { ++ TRACE_CUR("is priority ceiling blocked.\n"); ++ while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) ++ do_ceiling_block(tsk); ++ TRACE_CUR("finally exceeds system ceiling.\n"); ++ } else ++ TRACE_CUR("is not priority ceiling blocked\n"); ++ preempt_enable(); ++} ++ ++ ++#else ++ ++asmlinkage long sys_srp_down(int sem_od) ++{ ++ return -ENOSYS; ++} ++ ++asmlinkage long sys_srp_up(int sem_od) ++{ ++ return -ENOSYS; ++} ++ ++struct fdso_ops srp_sem_ops = {}; ++ ++#endif +diff --git a/litmus/sync.c b/litmus/sync.c +new file mode 100644 +index 0000000..bf75fde +--- /dev/null ++++ b/litmus/sync.c +@@ -0,0 +1,104 @@ ++/* litmus/sync.c - Support for synchronous and asynchronous task system releases. ++ * ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++ ++static DECLARE_COMPLETION(ts_release); ++ ++static long do_wait_for_ts_release(void) ++{ ++ long ret = 0; ++ ++ /* If the interruption races with a release, the completion object ++ * may have a non-zero counter. To avoid this problem, this should ++ * be replaced by wait_for_completion(). ++ * ++ * For debugging purposes, this is interruptible for now. ++ */ ++ ret = wait_for_completion_interruptible(&ts_release); ++ ++ return ret; ++} ++ ++int count_tasks_waiting_for_release(void) ++{ ++ unsigned long flags; ++ int task_count = 0; ++ struct list_head *pos; ++ ++ spin_lock_irqsave(&ts_release.wait.lock, flags); ++ list_for_each(pos, &ts_release.wait.task_list) { ++ task_count++; ++ } ++ spin_unlock_irqrestore(&ts_release.wait.lock, flags); ++ ++ return task_count; ++} ++ ++static long do_release_ts(lt_t start) ++{ ++ int task_count = 0; ++ unsigned long flags; ++ struct list_head *pos; ++ struct task_struct *t; ++ ++ ++ spin_lock_irqsave(&ts_release.wait.lock, flags); ++ TRACE("<<<<<< synchronous task system release >>>>>>\n"); ++ ++ sched_trace_sys_release(&start); ++ list_for_each(pos, &ts_release.wait.task_list) { ++ t = (struct task_struct*) list_entry(pos, ++ struct __wait_queue, ++ task_list)->private; ++ task_count++; ++ litmus->release_at(t, start + t->rt_param.task_params.phase); ++ sched_trace_task_release(t); ++ } ++ ++ spin_unlock_irqrestore(&ts_release.wait.lock, flags); ++ ++ complete_n(&ts_release, task_count); ++ ++ return task_count; ++} ++ ++ ++asmlinkage long sys_wait_for_ts_release(void) ++{ ++ long ret = -EPERM; ++ struct task_struct *t = current; ++ ++ if (is_realtime(t)) ++ ret = do_wait_for_ts_release(); ++ ++ return ret; ++} ++ ++ ++asmlinkage long sys_release_ts(lt_t __user *__delay) ++{ ++ long ret; ++ lt_t delay; ++ ++ /* FIXME: check capabilities... */ ++ ++ ret = copy_from_user(&delay, __delay, sizeof(delay)); ++ if (ret == 0) ++ ret = do_release_ts(litmus_clock() + delay); ++ ++ return ret; ++} +diff --git a/litmus/trace.c b/litmus/trace.c +new file mode 100644 +index 0000000..4403769 +--- /dev/null ++++ b/litmus/trace.c +@@ -0,0 +1,103 @@ ++#include ++ ++#include ++#include ++#include ++ ++/******************************************************************************/ ++/* Allocation */ ++/******************************************************************************/ ++ ++static struct ftdev overhead_dev; ++ ++#define trace_ts_buf overhead_dev.minor[0].buf ++ ++static unsigned int ts_seq_no = 0; ++ ++static inline void __save_timestamp_cpu(unsigned long event, ++ uint8_t type, uint8_t cpu) ++{ ++ unsigned int seq_no; ++ struct timestamp *ts; ++ seq_no = fetch_and_inc((int *) &ts_seq_no); ++ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { ++ ts->event = event; ++ ts->timestamp = ft_timestamp(); ++ ts->seq_no = seq_no; ++ ts->cpu = cpu; ++ ts->task_type = type; ++ ft_buffer_finish_write(trace_ts_buf, ts); ++ } ++} ++ ++static inline void __save_timestamp(unsigned long event, ++ uint8_t type) ++{ ++ __save_timestamp_cpu(event, type, raw_smp_processor_id()); ++} ++ ++feather_callback void save_timestamp(unsigned long event) ++{ ++ __save_timestamp(event, TSK_UNKNOWN); ++} ++ ++feather_callback void save_timestamp_def(unsigned long event, ++ unsigned long type) ++{ ++ __save_timestamp(event, (uint8_t) type); ++} ++ ++feather_callback void save_timestamp_task(unsigned long event, ++ unsigned long t_ptr) ++{ ++ int rt = is_realtime((struct task_struct *) t_ptr); ++ __save_timestamp(event, rt ? TSK_RT : TSK_BE); ++} ++ ++feather_callback void save_timestamp_cpu(unsigned long event, ++ unsigned long cpu) ++{ ++ __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); ++} ++ ++/******************************************************************************/ ++/* DEVICE FILE DRIVER */ ++/******************************************************************************/ ++ ++/* ++ * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) ++ * and we might not get as much ++ */ ++#define NO_TIMESTAMPS (2 << 11) ++ ++/* set MAJOR to 0 to have it dynamically assigned */ ++#define FT_TRACE_MAJOR 252 ++ ++static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) ++{ ++ unsigned int count = NO_TIMESTAMPS; ++ while (count && !trace_ts_buf) { ++ printk("time stamp buffer: trying to allocate %u time stamps.\n", count); ++ ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); ++ count /= 2; ++ } ++ return ftdev->minor[idx].buf ? 0 : -ENOMEM; ++} ++ ++static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) ++{ ++ free_ft_buffer(ftdev->minor[idx].buf); ++ ftdev->minor[idx].buf = NULL; ++} ++ ++static int __init init_ft_overhead_trace(void) ++{ ++ printk("Initializing Feather-Trace overhead tracing device.\n"); ++ ftdev_init(&overhead_dev, THIS_MODULE); ++ overhead_dev.minor_cnt = 1; /* only one buffer */ ++ overhead_dev.alloc = alloc_timestamp_buffer; ++ overhead_dev.free = free_timestamp_buffer; ++ return register_ftdev(&overhead_dev, "ft_trace", FT_TRACE_MAJOR); ++} ++ ++module_init(init_ft_overhead_trace); diff --git a/index.html b/index.html index ddb17b8..088c5f2 100644 --- a/index.html +++ b/index.html @@ -64,8 +64,8 @@ Have a look at our group's - The current version of LITMUSRT is 2010.1 and is based on Linux 2.6.32. - It was released on 05/19/2010 and includes plugins for the following + The current version of LITMUSRT is 2010.2 and is based on Linux 2.6.34. + It was released on 10/21/2010 and includes plugins for the following scheduling policies:

    @@ -298,15 +298,51 @@ Technology and Applications Symposium, pp. 342-353, April 2008. it is also available as a git repository (see Development below).

    - The current release of LITMUSRT is 2010.1. + The current release of LITMUSRT is 2010.2. It consists of our Linux kernel modifications in the form of - a patch against Linux 2.6.32 and + a patch against Linux 2.6.34 and liblitmus, the user-space API for real-time tasks, as well as ft_tools, a collection of tools used for tracing with Feather-Trace (which is part of the LITMUSRT patch).

    + +

    LITMUSRT 2010.2

    +
    +

    + Based on Linux 2.6.34. Released in October 2010. + +

    +

    Files:

    + +

    Major changes since LITMUSRT 2010.1:

    +
      +
    • + Rebased LITMUSRT from Linux 2.6.32 to Linux 2.6.34. +
    • +
    • + Added support for configurable budget enforcement (no enforcement, coarse-grained enforcement on timer ticks, and precise enforcement using high-resolution timers). +
    • +
    • Add support for one single cluster (all cpus) under C-EDF
    • +
    • Made some features optional (C-EDF, PFair, release-master mode).
    • +
    • Fixed several link and compile errors.
    • +
    +
    + +

    LITMUSRT 2010.1

    -- cgit v1.2.2