aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-21 09:25:41 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-21 09:25:41 -0400
commit6e0c5c609ad02f49d7e3e5edc3d65db1e233d857 (patch)
tree62517b45689b743e9976721df89634267e165011
parente40152ee1e1c7a63f4777791863215e3faa37a86 (diff)
parent960145eb829ae7a7b2d029e987f99a6be7a78d6b (diff)
Merge branch 'master' into wip-2.6.34
Merge LitmusRT master and 2.6.34. This commit is just the plain merge with conflicts resolved. It won't compile. Conflicts solved: Makefile arch/x86/include/asm/hw_irq.h arch/x86/include/asm/unistd_32.h arch/x86/kernel/syscall_table_32.S include/linux/hrtimer.h kernel/sched.c kernel/sched_fair.c
-rw-r--r--Makefile4
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/entry_arch.h1
-rw-r--r--arch/x86/include/asm/feather_trace.h17
-rw-r--r--arch/x86/include/asm/feather_trace_32.h80
-rw-r--r--arch/x86/include/asm/feather_trace_64.h69
-rw-r--r--arch/x86/include/asm/hw_irq.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/unistd_32.h6
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ft_event.c112
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/smp.c28
-rw-r--r--arch/x86/kernel/syscall_table_32.S14
-rw-r--r--fs/exec.c13
-rw-r--r--fs/inode.c2
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/hrtimer.h25
-rw-r--r--include/linux/sched.h17
-rw-r--r--include/linux/smp.h5
-rw-r--r--include/linux/tick.h5
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/edf_common.h27
-rw-r--r--include/litmus/fdso.h69
-rw-r--r--include/litmus/feather_buffer.h94
-rw-r--r--include/litmus/feather_trace.h49
-rw-r--r--include/litmus/ftdev.h49
-rw-r--r--include/litmus/jobs.h9
-rw-r--r--include/litmus/litmus.h254
-rw-r--r--include/litmus/rt_domain.h162
-rw-r--r--include/litmus/rt_param.h196
-rw-r--r--include/litmus/sched_plugin.h162
-rw-r--r--include/litmus/sched_trace.h192
-rw-r--r--include/litmus/trace.h113
-rw-r--r--include/litmus/unistd_32.h23
-rw-r--r--include/litmus/unistd_64.h37
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/hrtimer.c82
-rw-r--r--kernel/printk.c14
-rw-r--r--kernel/sched.c106
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/time/tick-sched.c48
-rw-r--r--litmus/Kconfig85
-rw-r--r--litmus/Makefile23
-rw-r--r--litmus/bheap.c314
-rw-r--r--litmus/ctrldev.c150
-rw-r--r--litmus/edf_common.c102
-rw-r--r--litmus/fdso.c281
-rw-r--r--litmus/fmlp.c268
-rw-r--r--litmus/ft_event.c43
-rw-r--r--litmus/ftdev.c359
-rw-r--r--litmus/jobs.c43
-rw-r--r--litmus/litmus.c786
-rw-r--r--litmus/rt_domain.c310
-rw-r--r--litmus/sched_cedf.c772
-rw-r--r--litmus/sched_gsn_edf.c830
-rw-r--r--litmus/sched_litmus.c318
-rw-r--r--litmus/sched_pfair.c896
-rw-r--r--litmus/sched_plugin.c265
-rw-r--r--litmus/sched_psn_edf.c480
-rw-r--r--litmus/sched_task_trace.c204
-rw-r--r--litmus/sched_trace.c378
-rw-r--r--litmus/srp.c318
-rw-r--r--litmus/sync.c104
-rw-r--r--litmus/trace.c103
72 files changed, 9633 insertions, 37 deletions
diff --git a/Makefile b/Makefile
index ebc8225f7a96..316557df634b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 34 3SUBLEVEL = 34
4EXTRAVERSION = 4EXTRAVERSION =-litmus2010
5NAME = Sheep on Meth 5NAME = Sheep on Meth
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -650,7 +650,7 @@ export mod_strip_cmd
650 650
651 651
652ifeq ($(KBUILD_EXTMOD),) 652ifeq ($(KBUILD_EXTMOD),)
653core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ 653core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
654 654
655vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 655vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
656 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 656 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9458685902bd..12fbd5b65f1f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2125,3 +2125,5 @@ source "crypto/Kconfig"
2125source "arch/x86/kvm/Kconfig" 2125source "arch/x86/kvm/Kconfig"
2126 2126
2127source "lib/Kconfig" 2127source "lib/Kconfig"
2128
2129source "litmus/Kconfig"
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..5d07dea2ebb8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,6 +13,7 @@
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 18BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18 19
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
new file mode 100644
index 000000000000..4fd31633405d
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace.h
@@ -0,0 +1,17 @@
1#ifndef _ARCH_FEATHER_TRACE_H
2#define _ARCH_FEATHER_TRACE_H
3
4#include <asm/msr.h>
5
6static inline unsigned long long ft_timestamp(void)
7{
8 return __native_read_tsc();
9}
10
11#ifdef CONFIG_X86_32
12#include "feather_trace_32.h"
13#else
14#include "feather_trace_64.h"
15#endif
16
17#endif
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
new file mode 100644
index 000000000000..192cd09b7850
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -0,0 +1,80 @@
1/* Do not directly include this file. Include feather_trace.h instead */
2
3#define feather_callback __attribute__((regparm(0)))
4
5/*
6 * make the compiler reload any register that is not saved in
7 * a cdecl function call
8 */
9#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
10
11#define ft_event(id, callback) \
12 __asm__ __volatile__( \
13 "1: jmp 2f \n\t" \
14 " call " #callback " \n\t" \
15 ".section __event_table, \"aw\" \n\t" \
16 ".long " #id ", 0, 1b, 2f \n\t" \
17 ".previous \n\t" \
18 "2: \n\t" \
19 : : : CLOBBER_LIST)
20
21#define ft_event0(id, callback) \
22 __asm__ __volatile__( \
23 "1: jmp 2f \n\t" \
24 " subl $4, %%esp \n\t" \
25 " movl $" #id ", (%%esp) \n\t" \
26 " call " #callback " \n\t" \
27 " addl $4, %%esp \n\t" \
28 ".section __event_table, \"aw\" \n\t" \
29 ".long " #id ", 0, 1b, 2f \n\t" \
30 ".previous \n\t" \
31 "2: \n\t" \
32 : : : CLOBBER_LIST)
33
34#define ft_event1(id, callback, param) \
35 __asm__ __volatile__( \
36 "1: jmp 2f \n\t" \
37 " subl $8, %%esp \n\t" \
38 " movl %0, 4(%%esp) \n\t" \
39 " movl $" #id ", (%%esp) \n\t" \
40 " call " #callback " \n\t" \
41 " addl $8, %%esp \n\t" \
42 ".section __event_table, \"aw\" \n\t" \
43 ".long " #id ", 0, 1b, 2f \n\t" \
44 ".previous \n\t" \
45 "2: \n\t" \
46 : : "r" (param) : CLOBBER_LIST)
47
48#define ft_event2(id, callback, param, param2) \
49 __asm__ __volatile__( \
50 "1: jmp 2f \n\t" \
51 " subl $12, %%esp \n\t" \
52 " movl %1, 8(%%esp) \n\t" \
53 " movl %0, 4(%%esp) \n\t" \
54 " movl $" #id ", (%%esp) \n\t" \
55 " call " #callback " \n\t" \
56 " addl $12, %%esp \n\t" \
57 ".section __event_table, \"aw\" \n\t" \
58 ".long " #id ", 0, 1b, 2f \n\t" \
59 ".previous \n\t" \
60 "2: \n\t" \
61 : : "r" (param), "r" (param2) : CLOBBER_LIST)
62
63
64#define ft_event3(id, callback, p, p2, p3) \
65 __asm__ __volatile__( \
66 "1: jmp 2f \n\t" \
67 " subl $16, %%esp \n\t" \
68 " movl %2, 12(%%esp) \n\t" \
69 " movl %1, 8(%%esp) \n\t" \
70 " movl %0, 4(%%esp) \n\t" \
71 " movl $" #id ", (%%esp) \n\t" \
72 " call " #callback " \n\t" \
73 " addl $16, %%esp \n\t" \
74 ".section __event_table, \"aw\" \n\t" \
75 ".long " #id ", 0, 1b, 2f \n\t" \
76 ".previous \n\t" \
77 "2: \n\t" \
78 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
79
80#define __ARCH_HAS_FEATHER_TRACE
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
new file mode 100644
index 000000000000..1cffa4eec5f4
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -0,0 +1,69 @@
1/* Do not directly include this file. Include feather_trace.h instead */
2
3/* regparm is the default on x86_64 */
4#define feather_callback
5
6# define _EVENT_TABLE(id,from,to) \
7 ".section __event_table, \"aw\"\n\t" \
8 ".balign 8\n\t" \
9 ".quad " #id ", 0, " #from ", " #to " \n\t" \
10 ".previous \n\t"
11
12/*
13 * x86_64 callee only owns rbp, rbx, r12 -> r15
14 * the called can freely modify the others
15 */
16#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
17 "r8", "r9", "r10", "r11", "rax"
18
19#define ft_event(id, callback) \
20 __asm__ __volatile__( \
21 "1: jmp 2f \n\t" \
22 " call " #callback " \n\t" \
23 _EVENT_TABLE(id,1b,2f) \
24 "2: \n\t" \
25 : : : CLOBBER_LIST)
26
27#define ft_event0(id, callback) \
28 __asm__ __volatile__( \
29 "1: jmp 2f \n\t" \
30 " movq $" #id ", %%rdi \n\t" \
31 " call " #callback " \n\t" \
32 _EVENT_TABLE(id,1b,2f) \
33 "2: \n\t" \
34 : : : CLOBBER_LIST)
35
36#define ft_event1(id, callback, param) \
37 __asm__ __volatile__( \
38 "1: jmp 2f \n\t" \
39 " movq %0, %%rsi \n\t" \
40 " movq $" #id ", %%rdi \n\t" \
41 " call " #callback " \n\t" \
42 _EVENT_TABLE(id,1b,2f) \
43 "2: \n\t" \
44 : : "r" (param) : CLOBBER_LIST)
45
46#define ft_event2(id, callback, param, param2) \
47 __asm__ __volatile__( \
48 "1: jmp 2f \n\t" \
49 " movq %1, %%rdx \n\t" \
50 " movq %0, %%rsi \n\t" \
51 " movq $" #id ", %%rdi \n\t" \
52 " call " #callback " \n\t" \
53 _EVENT_TABLE(id,1b,2f) \
54 "2: \n\t" \
55 : : "r" (param), "r" (param2) : CLOBBER_LIST)
56
57#define ft_event3(id, callback, p, p2, p3) \
58 __asm__ __volatile__( \
59 "1: jmp 2f \n\t" \
60 " movq %2, %%rcx \n\t" \
61 " movq %1, %%rdx \n\t" \
62 " movq %0, %%rsi \n\t" \
63 " movq $" #id ", %%rdi \n\t" \
64 " call " #callback " \n\t" \
65 _EVENT_TABLE(id,1b,2f) \
66 "2: \n\t" \
67 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
68
69#define __ARCH_HAS_FEATHER_TRACE
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..c17411503f28 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -53,6 +53,8 @@ extern void threshold_interrupt(void);
53extern void call_function_interrupt(void); 53extern void call_function_interrupt(void);
54extern void call_function_single_interrupt(void); 54extern void call_function_single_interrupt(void);
55 55
56extern void pull_timers_interrupt(void);
57
56/* IOAPIC */ 58/* IOAPIC */
57#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 59#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
58extern unsigned long io_apic_irqs; 60extern unsigned long io_apic_irqs;
@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
122extern void smp_reschedule_interrupt(struct pt_regs *); 124extern void smp_reschedule_interrupt(struct pt_regs *);
123extern void smp_call_function_interrupt(struct pt_regs *); 125extern void smp_call_function_interrupt(struct pt_regs *);
124extern void smp_call_function_single_interrupt(struct pt_regs *); 126extern void smp_call_function_single_interrupt(struct pt_regs *);
127extern void smp_pull_timers_interrupt(struct pt_regs *);
125#ifdef CONFIG_X86_32 128#ifdef CONFIG_X86_32
126extern void smp_invalidate_interrupt(struct pt_regs *); 129extern void smp_invalidate_interrupt(struct pt_regs *);
127#else 130#else
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 8767d99c4f64..bb5318bbe0e4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,6 +109,11 @@
109#define LOCAL_TIMER_VECTOR 0xef 109#define LOCAL_TIMER_VECTOR 0xef
110 110
111/* 111/*
112 * LITMUS^RT pull timers IRQ vector
113 */
114#define PULL_TIMERS_VECTOR 0xee
115
116/*
112 * Generic system vector for platform specific use 117 * Generic system vector for platform specific use
113 */ 118 */
114#define X86_PLATFORM_IPI_VECTOR 0xed 119#define X86_PLATFORM_IPI_VECTOR 0xed
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b753ea59703a..91d323f47364 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -173,6 +173,8 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
173extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 173extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
174extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 174extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
175extern unsigned short num_cache_leaves; 175extern unsigned short num_cache_leaves;
176extern int get_shared_cpu_map(cpumask_var_t mask,
177 unsigned int cpu, int index);
176 178
177extern void detect_extended_topology(struct cpuinfo_x86 *c); 179extern void detect_extended_topology(struct cpuinfo_x86 *c);
178extern void detect_ht(struct cpuinfo_x86 *c); 180extern void detect_ht(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index beb9b5f8f8a4..4f61e8b0715a 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -344,9 +344,13 @@
344#define __NR_perf_event_open 336 344#define __NR_perf_event_open 336
345#define __NR_recvmmsg 337 345#define __NR_recvmmsg 337
346 346
347#define __NR_LITMUS 338
348
349#include "litmus/unistd_32.h"
350
347#ifdef __KERNEL__ 351#ifdef __KERNEL__
348 352
349#define NR_syscalls 338 353#define NR_syscalls 339 + NR_litmus_syscalls
350 354
351#define __ARCH_WANT_IPC_PARSE_VERSION 355#define __ARCH_WANT_IPC_PARSE_VERSION
352#define __ARCH_WANT_OLD_READDIR 356#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index ff4307b0e81e..b21c3b269aac 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -664,6 +664,10 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
664#define __NR_recvmmsg 299 664#define __NR_recvmmsg 299
665__SYSCALL(__NR_recvmmsg, sys_recvmmsg) 665__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
666 666
667#define __NR_LITMUS 299
668
669#include "litmus/unistd_64.h"
670
667#ifndef __NO_STUBS 671#ifndef __NO_STUBS
668#define __ARCH_WANT_OLD_READDIR 672#define __ARCH_WANT_OLD_READDIR
669#define __ARCH_WANT_OLD_STAT 673#define __ARCH_WANT_OLD_STAT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c58352209e0..d09934e22ca5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -117,6 +117,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
117 117
118obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 118obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
119 119
120obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
121
120### 122###
121# 64 bit specific files 123# 64 bit specific files
122ifeq ($(CONFIG_X86_64),y) 124ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 95962a93f99a..94d8e475744c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -632,6 +632,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
632static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); 632static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
633#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 633#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
634 634
635/* returns CPUs that share the index cache with cpu */
636int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
637{
638 int ret = 0;
639 struct _cpuid4_info *this_leaf;
640
641 if (index >= num_cache_leaves) {
642 index = num_cache_leaves - 1;
643 ret = index;
644 }
645
646 this_leaf = CPUID4_INFO_IDX(cpu,index);
647 cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
648
649 return ret;
650}
651
635#ifdef CONFIG_SMP 652#ifdef CONFIG_SMP
636static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 653static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
637{ 654{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0697ff139837..b9ec6cd7796f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
1016 call_function_interrupt smp_call_function_interrupt 1016 call_function_interrupt smp_call_function_interrupt
1017apicinterrupt RESCHEDULE_VECTOR \ 1017apicinterrupt RESCHEDULE_VECTOR \
1018 reschedule_interrupt smp_reschedule_interrupt 1018 reschedule_interrupt smp_reschedule_interrupt
1019apicinterrupt PULL_TIMERS_VECTOR \
1020 pull_timers_interrupt smp_pull_timers_interrupt
1019#endif 1021#endif
1020 1022
1021apicinterrupt ERROR_APIC_VECTOR \ 1023apicinterrupt ERROR_APIC_VECTOR \
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
new file mode 100644
index 000000000000..e07ee30dfff9
--- /dev/null
+++ b/arch/x86/kernel/ft_event.c
@@ -0,0 +1,112 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5#ifdef __ARCH_HAS_FEATHER_TRACE
6/* the feather trace management functions assume
7 * exclusive access to the event table
8 */
9
10
11#define BYTE_JUMP 0xeb
12#define BYTE_JUMP_LEN 0x02
13
14/* for each event, there is an entry in the event table */
15struct trace_event {
16 long id;
17 long count;
18 long start_addr;
19 long end_addr;
20};
21
22extern struct trace_event __start___event_table[];
23extern struct trace_event __stop___event_table[];
24
25int ft_enable_event(unsigned long id)
26{
27 struct trace_event* te = __start___event_table;
28 int count = 0;
29 char* delta;
30 unsigned char* instr;
31
32 while (te < __stop___event_table) {
33 if (te->id == id && ++te->count == 1) {
34 instr = (unsigned char*) te->start_addr;
35 /* make sure we don't clobber something wrong */
36 if (*instr == BYTE_JUMP) {
37 delta = (((unsigned char*) te->start_addr) + 1);
38 *delta = 0;
39 }
40 }
41 if (te->id == id)
42 count++;
43 te++;
44 }
45
46 printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
47 return count;
48}
49
50int ft_disable_event(unsigned long id)
51{
52 struct trace_event* te = __start___event_table;
53 int count = 0;
54 char* delta;
55 unsigned char* instr;
56
57 while (te < __stop___event_table) {
58 if (te->id == id && --te->count == 0) {
59 instr = (unsigned char*) te->start_addr;
60 if (*instr == BYTE_JUMP) {
61 delta = (((unsigned char*) te->start_addr) + 1);
62 *delta = te->end_addr - te->start_addr -
63 BYTE_JUMP_LEN;
64 }
65 }
66 if (te->id == id)
67 count++;
68 te++;
69 }
70
71 printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
72 return count;
73}
74
75int ft_disable_all_events(void)
76{
77 struct trace_event* te = __start___event_table;
78 int count = 0;
79 char* delta;
80 unsigned char* instr;
81
82 while (te < __stop___event_table) {
83 if (te->count) {
84 instr = (unsigned char*) te->start_addr;
85 if (*instr == BYTE_JUMP) {
86 delta = (((unsigned char*) te->start_addr)
87 + 1);
88 *delta = te->end_addr - te->start_addr -
89 BYTE_JUMP_LEN;
90 te->count = 0;
91 count++;
92 }
93 }
94 te++;
95 }
96 return count;
97}
98
99int ft_is_event_enabled(unsigned long id)
100{
101 struct trace_event* te = __start___event_table;
102
103 while (te < __stop___event_table) {
104 if (te->id == id)
105 return te->count;
106 te++;
107 }
108 return 0;
109}
110
111#endif
112
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 0ed2d300cd46..a760ce1a2c0d 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -189,6 +189,9 @@ static void __init smp_intr_init(void)
189 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, 189 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
190 call_function_single_interrupt); 190 call_function_single_interrupt);
191 191
192 /* IPI for hrtimer pulling on remote cpus */
193 alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
194
192 /* Low priority IPI to cleanup after moving an irq */ 195 /* Low priority IPI to cleanup after moving an irq */
193 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 196 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
194 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 197 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..97af589a5c0c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,9 @@
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/gfp.h> 24#include <linux/gfp.h>
25 25
26#include <litmus/litmus.h>
27#include <litmus/trace.h>
28
26#include <asm/mtrr.h> 29#include <asm/mtrr.h>
27#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
28#include <asm/mmu_context.h> 31#include <asm/mmu_context.h>
@@ -118,6 +121,7 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 121 WARN_ON(1);
119 return; 122 return;
120 } 123 }
124 TS_SEND_RESCHED_START(cpu);
121 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 125 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 126}
123 127
@@ -147,6 +151,16 @@ void native_send_call_func_ipi(const struct cpumask *mask)
147 free_cpumask_var(allbutself); 151 free_cpumask_var(allbutself);
148} 152}
149 153
154/* trigger timers on remote cpu */
155void smp_send_pull_timers(int cpu)
156{
157 if (unlikely(cpu_is_offline(cpu))) {
158 WARN_ON(1);
159 return;
160 }
161 apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
162}
163
150/* 164/*
151 * this function calls the 'stop' function on all other CPUs in the system. 165 * this function calls the 'stop' function on all other CPUs in the system.
152 */ 166 */
@@ -198,7 +212,12 @@ static void native_smp_send_stop(void)
198void smp_reschedule_interrupt(struct pt_regs *regs) 212void smp_reschedule_interrupt(struct pt_regs *regs)
199{ 213{
200 ack_APIC_irq(); 214 ack_APIC_irq();
215 /* LITMUS^RT needs this interrupt to proper reschedule
216 * on this cpu
217 */
218 set_tsk_need_resched(current);
201 inc_irq_stat(irq_resched_count); 219 inc_irq_stat(irq_resched_count);
220 TS_SEND_RESCHED_END;
202 /* 221 /*
203 * KVM uses this interrupt to force a cpu out of guest mode 222 * KVM uses this interrupt to force a cpu out of guest mode
204 */ 223 */
@@ -222,6 +241,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
222 irq_exit(); 241 irq_exit();
223} 242}
224 243
244extern void hrtimer_pull(void);
245
246void smp_pull_timers_interrupt(struct pt_regs *regs)
247{
248 ack_APIC_irq();
249 TRACE("pull timer interrupt\n");
250 hrtimer_pull();
251}
252
225struct smp_ops smp_ops = { 253struct smp_ops smp_ops = {
226 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 254 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
227 .smp_prepare_cpus = native_smp_prepare_cpus, 255 .smp_prepare_cpus = native_smp_prepare_cpus,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8b3729341216..5da9a68546b7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -337,3 +337,17 @@ ENTRY(sys_call_table)
337 .long sys_rt_tgsigqueueinfo /* 335 */ 337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_event_open 338 .long sys_perf_event_open
339 .long sys_recvmmsg 339 .long sys_recvmmsg
340 .long sys_set_rt_task_param /* LITMUS^RT 338 */
341 .long sys_get_rt_task_param
342 .long sys_complete_job
343 .long sys_od_open
344 .long sys_od_close
345 .long sys_fmlp_down
346 .long sys_fmlp_up
347 .long sys_srp_down
348 .long sys_srp_up
349 .long sys_query_job_no
350 .long sys_wait_for_job_release
351 .long sys_wait_for_ts_release
352 .long sys_release_ts
353 .long sys_null_call
diff --git a/fs/exec.c b/fs/exec.c
index e6e94c626c2c..029308754eea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
19 * current->executable is only used by the procfs. This allows a dispatch 19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep 20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary 21 * trying until we recognize the file or we run out of supported binary
22 * formats. 22 * formats.
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
@@ -56,6 +56,8 @@
56#include <linux/fs_struct.h> 56#include <linux/fs_struct.h>
57#include <linux/pipe_fs_i.h> 57#include <linux/pipe_fs_i.h>
58 58
59#include <litmus/litmus.h>
60
59#include <asm/uaccess.h> 61#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 62#include <asm/mmu_context.h>
61#include <asm/tlb.h> 63#include <asm/tlb.h>
@@ -79,7 +81,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
79 insert ? list_add(&fmt->lh, &formats) : 81 insert ? list_add(&fmt->lh, &formats) :
80 list_add_tail(&fmt->lh, &formats); 82 list_add_tail(&fmt->lh, &formats);
81 write_unlock(&binfmt_lock); 83 write_unlock(&binfmt_lock);
82 return 0; 84 return 0;
83} 85}
84 86
85EXPORT_SYMBOL(__register_binfmt); 87EXPORT_SYMBOL(__register_binfmt);
@@ -1045,7 +1047,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1045 group */ 1047 group */
1046 1048
1047 current->self_exec_id++; 1049 current->self_exec_id++;
1048 1050
1049 flush_signal_handlers(current, 0); 1051 flush_signal_handlers(current, 0);
1050 flush_old_files(current->files); 1052 flush_old_files(current->files);
1051} 1053}
@@ -1135,8 +1137,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1135 return res; 1137 return res;
1136} 1138}
1137 1139
1138/* 1140/*
1139 * Fill the binprm structure from the inode. 1141 * Fill the binprm structure from the inode.
1140 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1142 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1141 * 1143 *
1142 * This may be called multiple times for binary chains (scripts for example). 1144 * This may be called multiple times for binary chains (scripts for example).
@@ -1348,6 +1350,7 @@ int do_execve(char * filename,
1348 goto out_unmark; 1350 goto out_unmark;
1349 1351
1350 sched_exec(); 1352 sched_exec();
1353 litmus_exec();
1351 1354
1352 bprm->file = file; 1355 bprm->file = file;
1353 bprm->filename = filename; 1356 bprm->filename = filename;
diff --git a/fs/inode.c b/fs/inode.c
index 407bf392e20a..aaaaf096aa8e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -271,6 +271,8 @@ void inode_init_once(struct inode *inode)
271#ifdef CONFIG_FSNOTIFY 271#ifdef CONFIG_FSNOTIFY
272 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); 272 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
273#endif 273#endif
274 INIT_LIST_HEAD(&inode->i_obj_list);
275 mutex_init(&inode->i_obj_mutex);
274} 276}
275EXPORT_SYMBOL(inode_init_once); 277EXPORT_SYMBOL(inode_init_once);
276 278
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 4a6b604ef7e4..258bec13d424 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -88,6 +88,7 @@ extern bool completion_done(struct completion *x);
88 88
89extern void complete(struct completion *); 89extern void complete(struct completion *);
90extern void complete_all(struct completion *); 90extern void complete_all(struct completion *);
91extern void complete_n(struct completion *, int n);
91 92
92/** 93/**
93 * INIT_COMPLETION: - reinitialize a completion structure 94 * INIT_COMPLETION: - reinitialize a completion structure
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 44f35aea2f1f..894918440bc8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -15,8 +15,8 @@
15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 15 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
16 * upper limit on files-per-process. 16 * upper limit on files-per-process.
17 * 17 *
18 * Some programs (notably those using select()) may have to be 18 * Some programs (notably those using select()) may have to be
19 * recompiled to take full advantage of the new limits.. 19 * recompiled to take full advantage of the new limits..
20 */ 20 */
21 21
22/* Fixed constants first: */ 22/* Fixed constants first: */
@@ -173,7 +173,7 @@ struct inodes_stat_t {
173#define SEL_EX 4 173#define SEL_EX 4
174 174
175/* public flags for file_system_type */ 175/* public flags for file_system_type */
176#define FS_REQUIRES_DEV 1 176#define FS_REQUIRES_DEV 1
177#define FS_BINARY_MOUNTDATA 2 177#define FS_BINARY_MOUNTDATA 2
178#define FS_HAS_SUBTYPE 4 178#define FS_HAS_SUBTYPE 4
179#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 179#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
@@ -471,7 +471,7 @@ struct iattr {
471 */ 471 */
472#include <linux/quota.h> 472#include <linux/quota.h>
473 473
474/** 474/**
475 * enum positive_aop_returns - aop return codes with specific semantics 475 * enum positive_aop_returns - aop return codes with specific semantics
476 * 476 *
477 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 477 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
@@ -481,7 +481,7 @@ struct iattr {
481 * be a candidate for writeback again in the near 481 * be a candidate for writeback again in the near
482 * future. Other callers must be careful to unlock 482 * future. Other callers must be careful to unlock
483 * the page if they get this return. Returned by 483 * the page if they get this return. Returned by
484 * writepage(); 484 * writepage();
485 * 485 *
486 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 486 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
487 * unlocked it and the page might have been truncated. 487 * unlocked it and the page might have been truncated.
@@ -720,6 +720,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
720 720
721struct posix_acl; 721struct posix_acl;
722#define ACL_NOT_CACHED ((void *)(-1)) 722#define ACL_NOT_CACHED ((void *)(-1))
723struct inode_obj_id_table;
723 724
724struct inode { 725struct inode {
725 struct hlist_node i_hash; 726 struct hlist_node i_hash;
@@ -788,6 +789,8 @@ struct inode {
788 struct posix_acl *i_acl; 789 struct posix_acl *i_acl;
789 struct posix_acl *i_default_acl; 790 struct posix_acl *i_default_acl;
790#endif 791#endif
792 struct list_head i_obj_list;
793 struct mutex i_obj_mutex;
791 void *i_private; /* fs or device private pointer */ 794 void *i_private; /* fs or device private pointer */
792}; 795};
793 796
@@ -1000,10 +1003,10 @@ static inline int file_check_writeable(struct file *filp)
1000 1003
1001#define MAX_NON_LFS ((1UL<<31) - 1) 1004#define MAX_NON_LFS ((1UL<<31) - 1)
1002 1005
1003/* Page cache limit. The filesystems should put that into their s_maxbytes 1006/* Page cache limit. The filesystems should put that into their s_maxbytes
1004 limits, otherwise bad things can happen in VM. */ 1007 limits, otherwise bad things can happen in VM. */
1005#if BITS_PER_LONG==32 1008#if BITS_PER_LONG==32
1006#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1009#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1007#elif BITS_PER_LONG==64 1010#elif BITS_PER_LONG==64
1008#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1011#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1009#endif 1012#endif
@@ -2129,7 +2132,7 @@ extern int may_open(struct path *, int, int);
2129 2132
2130extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2133extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2131extern struct file * open_exec(const char *); 2134extern struct file * open_exec(const char *);
2132 2135
2133/* fs/dcache.c -- generic fs support functions */ 2136/* fs/dcache.c -- generic fs support functions */
2134extern int is_subdir(struct dentry *, struct dentry *); 2137extern int is_subdir(struct dentry *, struct dentry *);
2135extern int path_is_under(struct path *, struct path *); 2138extern int path_is_under(struct path *, struct path *);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5d86fb2309d2..b34823755ee4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -167,6 +167,7 @@ struct hrtimer_clock_base {
167 * @nr_retries: Total number of hrtimer interrupt retries 167 * @nr_retries: Total number of hrtimer interrupt retries
168 * @nr_hangs: Total number of hrtimer interrupt hangs 168 * @nr_hangs: Total number of hrtimer interrupt hangs
169 * @max_hang_time: Maximum time spent in hrtimer_interrupt 169 * @max_hang_time: Maximum time spent in hrtimer_interrupt
170 * @to_pull: LITMUS^RT list of timers to be pulled on this cpu
170 */ 171 */
171struct hrtimer_cpu_base { 172struct hrtimer_cpu_base {
172 raw_spinlock_t lock; 173 raw_spinlock_t lock;
@@ -180,6 +181,26 @@ struct hrtimer_cpu_base {
180 unsigned long nr_hangs; 181 unsigned long nr_hangs;
181 ktime_t max_hang_time; 182 ktime_t max_hang_time;
182#endif 183#endif
184 struct list_head to_pull;
185};
186
187#define HRTIMER_START_ON_INACTIVE 0
188#define HRTIMER_START_ON_QUEUED 1
189
190/*
191 * struct hrtimer_start_on_info - save timer info on remote cpu
192 * @list: list of hrtimer_start_on_info on remote cpu (to_pull)
193 * @timer: timer to be triggered on remote cpu
194 * @time: time event
195 * @mode: timer mode
196 * @state: activity flag
197 */
198struct hrtimer_start_on_info {
199 struct list_head list;
200 struct hrtimer *timer;
201 ktime_t time;
202 enum hrtimer_mode mode;
203 atomic_t state;
183}; 204};
184 205
185static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 206static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
@@ -348,6 +369,10 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
348 unsigned long delta_ns, 369 unsigned long delta_ns,
349 const enum hrtimer_mode mode, int wakeup); 370 const enum hrtimer_mode mode, int wakeup);
350 371
372extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
373 struct hrtimer *timer, ktime_t time,
374 const enum hrtimer_mode mode);
375
351extern int hrtimer_cancel(struct hrtimer *timer); 376extern int hrtimer_cancel(struct hrtimer *timer);
352extern int hrtimer_try_to_cancel(struct hrtimer *timer); 377extern int hrtimer_try_to_cancel(struct hrtimer *timer);
353 378
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b7b81df78b3..225347d97d47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,7 @@
38#define SCHED_BATCH 3 38#define SCHED_BATCH 3
39/* SCHED_ISO: reserved but not implemented yet */ 39/* SCHED_ISO: reserved but not implemented yet */
40#define SCHED_IDLE 5 40#define SCHED_IDLE 5
41#define SCHED_LITMUS 6
41/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ 42/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
42#define SCHED_RESET_ON_FORK 0x40000000 43#define SCHED_RESET_ON_FORK 0x40000000
43 44
@@ -94,6 +95,8 @@ struct sched_param {
94 95
95#include <asm/processor.h> 96#include <asm/processor.h>
96 97
98#include <litmus/rt_param.h>
99
97struct exec_domain; 100struct exec_domain;
98struct futex_pi_state; 101struct futex_pi_state;
99struct robust_list_head; 102struct robust_list_head;
@@ -1166,6 +1169,7 @@ struct sched_rt_entity {
1166}; 1169};
1167 1170
1168struct rcu_node; 1171struct rcu_node;
1172struct od_table_entry;
1169 1173
1170struct task_struct { 1174struct task_struct {
1171 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1175 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
@@ -1250,9 +1254,9 @@ struct task_struct {
1250 unsigned long stack_canary; 1254 unsigned long stack_canary;
1251#endif 1255#endif
1252 1256
1253 /* 1257 /*
1254 * pointers to (original) parent process, youngest child, younger sibling, 1258 * pointers to (original) parent process, youngest child, younger sibling,
1255 * older sibling, respectively. (p->father can be replaced with 1259 * older sibling, respectively. (p->father can be replaced with
1256 * p->real_parent->pid) 1260 * p->real_parent->pid)
1257 */ 1261 */
1258 struct task_struct *real_parent; /* real parent process */ 1262 struct task_struct *real_parent; /* real parent process */
@@ -1464,6 +1468,13 @@ struct task_struct {
1464 int make_it_fail; 1468 int make_it_fail;
1465#endif 1469#endif
1466 struct prop_local_single dirties; 1470 struct prop_local_single dirties;
1471
1472 /* LITMUS RT parameters and state */
1473 struct rt_param rt_param;
1474
1475 /* references to PI semaphores, etc. */
1476 struct od_table_entry *od_table;
1477
1467#ifdef CONFIG_LATENCYTOP 1478#ifdef CONFIG_LATENCYTOP
1468 int latency_record_count; 1479 int latency_record_count;
1469 struct latency_record latency_record[LT_SAVECOUNT]; 1480 struct latency_record latency_record[LT_SAVECOUNT];
@@ -2018,7 +2029,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
2018 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2029 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
2019 2030
2020 return ret; 2031 return ret;
2021} 2032}
2022 2033
2023extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2034extern void block_all_signals(int (*notifier)(void *priv), void *priv,
2024 sigset_t *mask); 2035 sigset_t *mask);
diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfa2d20e35f1..f86d40768e7f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask,
80 void (*func)(void *info), void *info, int wait); 80 void (*func)(void *info), void *info, int wait);
81 81
82/* 82/*
83 * sends a 'pull timer' event to a remote CPU
84 */
85extern void smp_send_pull_timers(int cpu);
86
87/*
83 * Generic and arch helpers 88 * Generic and arch helpers
84 */ 89 */
85#ifdef CONFIG_USE_GENERIC_SMP_HELPERS 90#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
diff --git a/include/linux/tick.h b/include/linux/tick.h
index d2ae79e21be3..25d0cf41d3fd 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -73,6 +73,11 @@ extern int tick_is_oneshot_available(void);
73extern struct tick_device *tick_get_device(int cpu); 73extern struct tick_device *tick_get_device(int cpu);
74 74
75# ifdef CONFIG_HIGH_RES_TIMERS 75# ifdef CONFIG_HIGH_RES_TIMERS
76/* LITMUS^RT tick alignment */
77#define LINUX_DEFAULT_TICKS 0
78#define LITMUS_ALIGNED_TICKS 1
79#define LITMUS_STAGGERED_TICKS 2
80
76extern int tick_init_highres(void); 81extern int tick_init_highres(void);
77extern int tick_program_event(ktime_t expires, int force); 82extern int tick_program_event(ktime_t expires, int force);
78extern void tick_setup_sched_timer(void); 83extern void tick_setup_sched_timer(void);
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..80d4321cc87e
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,27 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25int edf_set_hp_task(struct pi_semaphore *sem);
26int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu);
27#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..286e10f86de0
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,69 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13
14#define MAX_OBJECT_DESCRIPTORS 32
15
16typedef enum {
17 MIN_OBJ_TYPE = 0,
18
19 FMLP_SEM = 0,
20 SRP_SEM = 1,
21
22 MAX_OBJ_TYPE = 1
23} obj_type_t;
24
25struct inode_obj_id {
26 struct list_head list;
27 atomic_t count;
28 struct inode* inode;
29
30 obj_type_t type;
31 void* obj;
32 unsigned int id;
33};
34
35
36struct od_table_entry {
37 unsigned int used;
38
39 struct inode_obj_id* obj;
40 void* extra;
41};
42
43struct fdso_ops {
44 void* (*create) (void);
45 void (*destroy)(void*);
46 int (*open) (struct od_table_entry*, void* __user);
47 int (*close) (struct od_table_entry*);
48};
49
50/* translate a userspace supplied od into the raw table entry
51 * returns NULL if od is invalid
52 */
53struct od_table_entry* __od_lookup(int od);
54
55/* translate a userspace supplied od into the associated object
56 * returns NULL if od is invalid
57 */
58static inline void* od_lookup(int od, obj_type_t type)
59{
60 struct od_table_entry* e = __od_lookup(od);
61 return e && e->obj->type == type ? e->obj->obj : NULL;
62}
63
64#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
65#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
66#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
67
68
69#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 000000000000..6c18277fdfc9
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,94 @@
1#ifndef _FEATHER_BUFFER_H_
2#define _FEATHER_BUFFER_H_
3
4/* requires UINT_MAX and memcpy */
5
6#define SLOT_FREE 0
7#define SLOT_BUSY 1
8#define SLOT_READY 2
9
10struct ft_buffer {
11 unsigned int slot_count;
12 unsigned int slot_size;
13
14 int free_count;
15 unsigned int write_idx;
16 unsigned int read_idx;
17
18 char* slots;
19 void* buffer_mem;
20 unsigned int failed_writes;
21};
22
23static inline int init_ft_buffer(struct ft_buffer* buf,
24 unsigned int slot_count,
25 unsigned int slot_size,
26 char* slots,
27 void* buffer_mem)
28{
29 int i = 0;
30 if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
31 /* The slot count must divide UNIT_MAX + 1 so that when it
32 * wraps around the index correctly points to 0.
33 */
34 return 0;
35 } else {
36 buf->slot_count = slot_count;
37 buf->slot_size = slot_size;
38 buf->slots = slots;
39 buf->buffer_mem = buffer_mem;
40 buf->free_count = slot_count;
41 buf->write_idx = 0;
42 buf->read_idx = 0;
43 buf->failed_writes = 0;
44 for (i = 0; i < slot_count; i++)
45 buf->slots[i] = SLOT_FREE;
46 return 1;
47 }
48}
49
50static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
51{
52 int free = fetch_and_dec(&buf->free_count);
53 unsigned int idx;
54 if (free <= 0) {
55 fetch_and_inc(&buf->free_count);
56 *ptr = 0;
57 fetch_and_inc(&buf->failed_writes);
58 return 0;
59 } else {
60 idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
61 buf->slots[idx] = SLOT_BUSY;
62 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
63 return 1;
64 }
65}
66
67static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
68{
69 unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
70 buf->slots[idx] = SLOT_READY;
71}
72
73
74/* exclusive reader access is assumed */
75static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
76{
77 unsigned int idx;
78 if (buf->free_count == buf->slot_count)
79 /* nothing available */
80 return 0;
81 idx = buf->read_idx % buf->slot_count;
82 if (buf->slots[idx] == SLOT_READY) {
83 memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
84 buf->slot_size);
85 buf->slots[idx] = SLOT_FREE;
86 buf->read_idx++;
87 fetch_and_inc(&buf->free_count);
88 return 1;
89 } else
90 return 0;
91}
92
93
94#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 000000000000..7d27e763406f
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,49 @@
1#ifndef _FEATHER_TRACE_H_
2#define _FEATHER_TRACE_H_
3
4#include <asm/atomic.h>
5#include <asm/feather_trace.h>
6
7int ft_enable_event(unsigned long id);
8int ft_disable_event(unsigned long id);
9int ft_is_event_enabled(unsigned long id);
10int ft_disable_all_events(void);
11
12/* atomic_* funcitons are inline anyway */
13static inline int fetch_and_inc(int *val)
14{
15 return atomic_add_return(1, (atomic_t*) val) - 1;
16}
17
18static inline int fetch_and_dec(int *val)
19{
20 return atomic_sub_return(1, (atomic_t*) val) + 1;
21}
22
23#ifndef __ARCH_HAS_FEATHER_TRACE
24/* provide default implementation */
25
26#define feather_callback
27
28#define MAX_EVENTS 1024
29
30extern int ft_events[MAX_EVENTS];
31
32#define ft_event(id, callback) \
33 if (ft_events[id]) callback();
34
35#define ft_event0(id, callback) \
36 if (ft_events[id]) callback(id);
37
38#define ft_event1(id, callback, param) \
39 if (ft_events[id]) callback(id, param);
40
41#define ft_event2(id, callback, param, param2) \
42 if (ft_events[id]) callback(id, param, param2);
43
44#define ft_event3(id, callback, p, p2, p3) \
45 if (ft_events[id]) callback(id, p, p2, p3);
46
47#endif
48
49#endif
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
new file mode 100644
index 000000000000..7697b4616699
--- /dev/null
+++ b/include/litmus/ftdev.h
@@ -0,0 +1,49 @@
1#ifndef _LITMUS_FTDEV_H_
2#define _LITMUS_FTDEV_H_
3
4#include <litmus/feather_trace.h>
5#include <litmus/feather_buffer.h>
6#include <linux/mutex.h>
7#include <linux/cdev.h>
8
9#define MAX_FTDEV_MINORS NR_CPUS
10
11#define FTDEV_ENABLE_CMD 0
12#define FTDEV_DISABLE_CMD 1
13
14struct ftdev;
15
16/* return 0 if buffer can be opened, otherwise -$REASON */
17typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
18/* return 0 on success, otherwise -$REASON */
19typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
20typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
21
22
23struct ftdev_event;
24
25struct ftdev_minor {
26 struct ft_buffer* buf;
27 unsigned int readers;
28 struct mutex lock;
29 /* FIXME: filter for authorized events */
30 struct ftdev_event* events;
31};
32
33struct ftdev {
34 struct cdev cdev;
35 /* FIXME: don't waste memory, allocate dynamically */
36 struct ftdev_minor minor[MAX_FTDEV_MINORS];
37 unsigned int minor_cnt;
38 ftdev_alloc_t alloc;
39 ftdev_free_t free;
40 ftdev_can_open_t can_open;
41};
42
43struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
44void free_ft_buffer(struct ft_buffer* buf);
45
46void ftdev_init(struct ftdev* ftdev, struct module* owner);
47int register_ftdev(struct ftdev* ftdev, const char* name, int major);
48
49#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..9bd361ef3943
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,9 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6long complete_job(void);
7
8#endif
9
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 000000000000..d515d1af1096
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,254 @@
1/*
2 * Constant definitions related to
3 * scheduling policy.
4 */
5
6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_
8
9#include <linux/jiffies.h>
10#include <litmus/sched_trace.h>
11
12extern atomic_t release_master_cpu;
13
14extern atomic_t __log_seq_no;
15
16#define TRACE(fmt, args...) \
17 sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \
18 raw_smp_processor_id(), ## args)
19
20#define TRACE_TASK(t, fmt, args...) \
21 TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
22
23#define TRACE_CUR(fmt, args...) \
24 TRACE_TASK(current, fmt, ## args)
25
26#define TRACE_BUG_ON(cond) \
27 do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
28 "called from %p current=%s/%d state=%d " \
29 "flags=%x partition=%d cpu=%d rtflags=%d"\
30 " job=%u timeslice=%u\n", \
31 #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
32 current->pid, current->state, current->flags, \
33 get_partition(current), smp_processor_id(), get_rt_flags(current), \
34 current->rt_param.job_params.job_no, \
35 current->rt.time_slice\
36 ); } while(0);
37
38
39/* in_list - is a given list_head queued on some list?
40 */
41static inline int in_list(struct list_head* list)
42{
43 return !( /* case 1: deleted */
44 (list->next == LIST_POISON1 &&
45 list->prev == LIST_POISON2)
46 ||
47 /* case 2: initialized */
48 (list->next == list &&
49 list->prev == list)
50 );
51}
52
53#define NO_CPU 0xffffffff
54
55void litmus_fork(struct task_struct *tsk);
56void litmus_exec(void);
57/* clean up real-time state of a task */
58void exit_litmus(struct task_struct *dead_tsk);
59
60long litmus_admit_task(struct task_struct *tsk);
61void litmus_exit_task(struct task_struct *tsk);
62
63#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
64#define rt_transition_pending(t) \
65 ((t)->rt_param.transition_pending)
66
67#define tsk_rt(t) (&(t)->rt_param)
68
69/* Realtime utility macros */
70#define get_rt_flags(t) (tsk_rt(t)->flags)
71#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
72#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
73#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
74#define get_rt_period(t) (tsk_rt(t)->task_params.period)
75#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
76#define get_partition(t) (tsk_rt(t)->task_params.cpu)
77#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
78#define get_release(t) (tsk_rt(t)->job_params.release)
79#define get_class(t) (tsk_rt(t)->task_params.cls)
80
81inline static int budget_exhausted(struct task_struct* t)
82{
83 return get_exec_time(t) >= get_exec_cost(t);
84}
85
86#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
87
88
89#define is_hrt(t) \
90 (tsk_rt(t)->task_params.class == RT_CLASS_HARD)
91#define is_srt(t) \
92 (tsk_rt(t)->task_params.class == RT_CLASS_SOFT)
93#define is_be(t) \
94 (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT)
95
96/* Our notion of time within LITMUS: kernel monotonic time. */
97static inline lt_t litmus_clock(void)
98{
99 return ktime_to_ns(ktime_get());
100}
101
102/* A macro to convert from nanoseconds to ktime_t. */
103#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
104
105#define get_domain(t) (tsk_rt(t)->domain)
106
107/* Honor the flag in the preempt_count variable that is set
108 * when scheduling is in progress.
109 */
110#define is_running(t) \
111 ((t)->state == TASK_RUNNING || \
112 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
113
114#define is_blocked(t) \
115 (!is_running(t))
116#define is_released(t, now) \
117 (lt_before_eq(get_release(t), now))
118#define is_tardy(t, now) \
119 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
120
121/* real-time comparison macros */
122#define earlier_deadline(a, b) (lt_before(\
123 (a)->rt_param.job_params.deadline,\
124 (b)->rt_param.job_params.deadline))
125#define earlier_release(a, b) (lt_before(\
126 (a)->rt_param.job_params.release,\
127 (b)->rt_param.job_params.release))
128
129void preempt_if_preemptable(struct task_struct* t, int on_cpu);
130
131#ifdef CONFIG_SRP
132void srp_ceiling_block(void);
133#else
134#define srp_ceiling_block() /* nothing */
135#endif
136
137#define bheap2task(hn) ((struct task_struct*) hn->value)
138
139#ifdef CONFIG_NP_SECTION
140
141static inline int is_kernel_np(struct task_struct *t)
142{
143 return tsk_rt(t)->kernel_np;
144}
145
146static inline int is_user_np(struct task_struct *t)
147{
148 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->np_flag : 0;
149}
150
151static inline void request_exit_np(struct task_struct *t)
152{
153 if (is_user_np(t)) {
154 /* Set the flag that tells user space to call
155 * into the kernel at the end of a critical section. */
156 if (likely(tsk_rt(t)->ctrl_page)) {
157 TRACE_TASK(t, "setting delayed_preemption flag\n");
158 tsk_rt(t)->ctrl_page->delayed_preemption = 1;
159 }
160 }
161}
162
163static inline void clear_exit_np(struct task_struct *t)
164{
165 if (likely(tsk_rt(t)->ctrl_page))
166 tsk_rt(t)->ctrl_page->delayed_preemption = 0;
167}
168
169static inline void make_np(struct task_struct *t)
170{
171 tsk_rt(t)->kernel_np++;
172}
173
174/* Caller should check if preemption is necessary when
175 * the function return 0.
176 */
177static inline int take_np(struct task_struct *t)
178{
179 return --tsk_rt(t)->kernel_np;
180}
181
182#else
183
184static inline int is_kernel_np(struct task_struct* t)
185{
186 return 0;
187}
188
189static inline int is_user_np(struct task_struct* t)
190{
191 return 0;
192}
193
194static inline void request_exit_np(struct task_struct *t)
195{
196 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
197 BUG();
198}
199
200static inline void clear_exit_np(struct task_struct* t)
201{
202}
203
204#endif
205
206static inline int is_np(struct task_struct *t)
207{
208#ifdef CONFIG_SCHED_DEBUG_TRACE
209 int kernel, user;
210 kernel = is_kernel_np(t);
211 user = is_user_np(t);
212 if (kernel || user)
213 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
214
215 kernel, user);
216 return kernel || user;
217#else
218 return unlikely(is_kernel_np(t) || is_user_np(t));
219#endif
220}
221
222static inline int is_present(struct task_struct* t)
223{
224 return t && tsk_rt(t)->present;
225}
226
227
228/* make the unit explicit */
229typedef unsigned long quanta_t;
230
231enum round {
232 FLOOR,
233 CEIL
234};
235
236
237/* Tick period is used to convert ns-specified execution
238 * costs and periods into tick-based equivalents.
239 */
240extern ktime_t tick_period;
241
242static inline quanta_t time2quanta(lt_t time, enum round round)
243{
244 s64 quantum_length = ktime_to_ns(tick_period);
245
246 if (do_div(time, quantum_length) && round == CEIL)
247 time++;
248 return (quanta_t) time;
249}
250
251/* By how much is cpu staggered behind CPU 0? */
252u64 cpu_stagger_offset(int cpu);
253
254#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..b452be1d2256
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,162 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 spinlock_t release_lock;
30 struct release_queue release_queue;
31 int release_master;
32
33 /* for moving tasks to the release queue */
34 spinlock_t tobe_lock;
35 struct list_head tobe_released;
36
37 /* how do we check if we need to kick another CPU? */
38 check_resched_needed_t check_resched;
39
40 /* how do we release jobs? */
41 release_jobs_t release_jobs;
42
43 /* how are tasks ordered in the ready queue? */
44 bheap_prio_t order;
45} rt_domain_t;
46
47struct release_heap {
48 /* list_head for per-time-slot list */
49 struct list_head list;
50 lt_t release_time;
51 /* all tasks to be released at release_time */
52 struct bheap heap;
53 /* used to trigger the release */
54 struct hrtimer timer;
55 /* used to delegate releases */
56 struct hrtimer_start_on_info info;
57 /* required for the timer callback */
58 rt_domain_t* dom;
59};
60
61
62static inline struct task_struct* __next_ready(rt_domain_t* rt)
63{
64 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
65 if (hn)
66 return bheap2task(hn);
67 else
68 return NULL;
69}
70
71void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
72 check_resched_needed_t check,
73 release_jobs_t relase);
74
75void __add_ready(rt_domain_t* rt, struct task_struct *new);
76void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
77void __add_release(rt_domain_t* rt, struct task_struct *task);
78
79static inline struct task_struct* __take_ready(rt_domain_t* rt)
80{
81 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
82 if (hn)
83 return bheap2task(hn);
84 else
85 return NULL;
86}
87
88static inline struct task_struct* __peek_ready(rt_domain_t* rt)
89{
90 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
91 if (hn)
92 return bheap2task(hn);
93 else
94 return NULL;
95}
96
97static inline int is_queued(struct task_struct *t)
98{
99 BUG_ON(!tsk_rt(t)->heap_node);
100 return bheap_node_in_heap(tsk_rt(t)->heap_node);
101}
102
103static inline void remove(rt_domain_t* rt, struct task_struct *t)
104{
105 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
106}
107
108static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
109{
110 unsigned long flags;
111 /* first we need the write lock for rt_ready_queue */
112 spin_lock_irqsave(&rt->ready_lock, flags);
113 __add_ready(rt, new);
114 spin_unlock_irqrestore(&rt->ready_lock, flags);
115}
116
117static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
118{
119 unsigned long flags;
120 spin_lock_irqsave(&rt->ready_lock, flags);
121 __merge_ready(rt, tasks);
122 spin_unlock_irqrestore(&rt->ready_lock, flags);
123}
124
125static inline struct task_struct* take_ready(rt_domain_t* rt)
126{
127 unsigned long flags;
128 struct task_struct* ret;
129 /* first we need the write lock for rt_ready_queue */
130 spin_lock_irqsave(&rt->ready_lock, flags);
131 ret = __take_ready(rt);
132 spin_unlock_irqrestore(&rt->ready_lock, flags);
133 return ret;
134}
135
136
137static inline void add_release(rt_domain_t* rt, struct task_struct *task)
138{
139 unsigned long flags;
140 /* first we need the write lock for rt_ready_queue */
141 spin_lock_irqsave(&rt->tobe_lock, flags);
142 __add_release(rt, task);
143 spin_unlock_irqrestore(&rt->tobe_lock, flags);
144}
145
146static inline int __jobs_pending(rt_domain_t* rt)
147{
148 return !bheap_empty(&rt->ready_queue);
149}
150
151static inline int jobs_pending(rt_domain_t* rt)
152{
153 unsigned long flags;
154 int ret;
155 /* first we need the write lock for rt_ready_queue */
156 spin_lock_irqsave(&rt->ready_lock, flags);
157 ret = !bheap_empty(&rt->ready_queue);
158 spin_unlock_irqrestore(&rt->ready_lock, flags);
159 return ret;
160}
161
162#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 000000000000..a7a183f34a80
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,196 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_
7
8/* Litmus time type. */
9typedef unsigned long long lt_t;
10
11static inline int lt_after(lt_t a, lt_t b)
12{
13 return ((long long) b) - ((long long) a) < 0;
14}
15#define lt_before(a, b) lt_after(b, a)
16
17static inline int lt_after_eq(lt_t a, lt_t b)
18{
19 return ((long long) a) - ((long long) b) >= 0;
20}
21#define lt_before_eq(a, b) lt_after_eq(b, a)
22
23/* different types of clients */
24typedef enum {
25 RT_CLASS_HARD,
26 RT_CLASS_SOFT,
27 RT_CLASS_BEST_EFFORT
28} task_class_t;
29
30typedef enum {
31 NO_ENFORCEMENT, /* job may overrun unhindered */
32 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
33 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
34} budget_policy_t;
35
36struct rt_task {
37 lt_t exec_cost;
38 lt_t period;
39 lt_t phase;
40 unsigned int cpu;
41 task_class_t cls;
42 budget_policy_t budget_policy; /* ignored by pfair */
43};
44
45/* The definition of the data that is shared between the kernel and real-time
46 * tasks via a shared page (see litmus/ctrldev.c).
47 *
48 * WARNING: User space can write to this, so don't trust
49 * the correctness of the fields!
50 *
51 * This servees two purposes: to enable efficient signaling
52 * of non-preemptive sections (user->kernel) and
53 * delayed preemptions (kernel->user), and to export
54 * some real-time relevant statistics such as preemption and
55 * migration data to user space. We can't use a device to export
56 * statistics because we want to avoid system call overhead when
57 * determining preemption/migration overheads).
58 */
59struct control_page {
60 /* Is the task currently in a non-preemptive section? */
61 int np_flag;
62 /* Should the task call into the kernel when it leaves
63 * its non-preemptive section? */
64 int delayed_preemption;
65
66 /* to be extended */
67};
68
69/* don't export internal data structures to user space (liblitmus) */
70#ifdef __KERNEL__
71
72struct _rt_domain;
73struct bheap_node;
74struct release_heap;
75
76struct rt_job {
77 /* Time instant the the job was or will be released. */
78 lt_t release;
79 /* What is the current deadline? */
80 lt_t deadline;
81
82 /* How much service has this job received so far? */
83 lt_t exec_time;
84
85 /* Which job is this. This is used to let user space
86 * specify which job to wait for, which is important if jobs
87 * overrun. If we just call sys_sleep_next_period() then we
88 * will unintentionally miss jobs after an overrun.
89 *
90 * Increase this sequence number when a job is released.
91 */
92 unsigned int job_no;
93};
94
95struct pfair_param;
96
97/* RT task parameters for scheduling extensions
98 * These parameters are inherited during clone and therefore must
99 * be explicitly set up before the task set is launched.
100 */
101struct rt_param {
102 /* is the task sleeping? */
103 unsigned int flags:8;
104
105 /* do we need to check for srp blocking? */
106 unsigned int srp_non_recurse:1;
107
108 /* is the task present? (true if it can be scheduled) */
109 unsigned int present:1;
110
111 /* user controlled parameters */
112 struct rt_task task_params;
113
114 /* timing parameters */
115 struct rt_job job_params;
116
117 /* task representing the current "inherited" task
118 * priority, assigned by inherit_priority and
119 * return priority in the scheduler plugins.
120 * could point to self if PI does not result in
121 * an increased task priority.
122 */
123 struct task_struct* inh_task;
124
125#ifdef CONFIG_NP_SECTION
126 /* For the FMLP under PSN-EDF, it is required to make the task
127 * non-preemptive from kernel space. In order not to interfere with
128 * user space, this counter indicates the kernel space np setting.
129 * kernel_np > 0 => task is non-preemptive
130 */
131 unsigned int kernel_np;
132#endif
133
134 /* This field can be used by plugins to store where the task
135 * is currently scheduled. It is the responsibility of the
136 * plugin to avoid race conditions.
137 *
138 * This used by GSN-EDF and PFAIR.
139 */
140 volatile int scheduled_on;
141
142 /* Is the stack of the task currently in use? This is updated by
143 * the LITMUS core.
144 *
145 * Be careful to avoid deadlocks!
146 */
147 volatile int stack_in_use;
148
149 /* This field can be used by plugins to store where the task
150 * is currently linked. It is the responsibility of the plugin
151 * to avoid race conditions.
152 *
153 * Used by GSN-EDF.
154 */
155 volatile int linked_on;
156
157 /* PFAIR/PD^2 state. Allocated on demand. */
158 struct pfair_param* pfair;
159
160 /* Fields saved before BE->RT transition.
161 */
162 int old_policy;
163 int old_prio;
164
165 /* ready queue for this task */
166 struct _rt_domain* domain;
167
168 /* heap element for this task
169 *
170 * Warning: Don't statically allocate this node. The heap
171 * implementation swaps these between tasks, thus after
172 * dequeuing from a heap you may end up with a different node
173 * then the one you had when enqueuing the task. For the same
174 * reason, don't obtain and store references to this node
175 * other than this pointer (which is updated by the heap
176 * implementation).
177 */
178 struct bheap_node* heap_node;
179 struct release_heap* rel_heap;
180
181 /* Used by rt_domain to queue task in release list.
182 */
183 struct list_head list;
184
185 /* Pointer to the page shared between userspace and kernel. */
186 struct control_page * ctrl_page;
187};
188
189/* Possible RT flags */
190#define RT_F_RUNNING 0x00000000
191#define RT_F_SLEEP 0x00000001
192#define RT_F_EXIT_SEM 0x00000008
193
194#endif
195
196#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..9c1c9f28ba79
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,162 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10/* struct for semaphore with priority inheritance */
11struct pi_semaphore {
12 atomic_t count;
13 int sleepers;
14 wait_queue_head_t wait;
15 struct {
16 /* highest-prio holder/waiter */
17 struct task_struct *task;
18 struct task_struct* cpu_task[NR_CPUS];
19 } hp;
20 /* current lock holder */
21 struct task_struct *holder;
22};
23
24/************************ setup/tear down ********************/
25
26typedef long (*activate_plugin_t) (void);
27typedef long (*deactivate_plugin_t) (void);
28
29
30
31/********************* scheduler invocation ******************/
32
33/* Plugin-specific realtime tick handler */
34typedef void (*scheduler_tick_t) (struct task_struct *cur);
35/* Novell make sched decision function */
36typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
37/* Clean up after the task switch has occured.
38 * This function is called after every (even non-rt) task switch.
39 */
40typedef void (*finish_switch_t)(struct task_struct *prev);
41
42
43/********************* task state changes ********************/
44
45/* Called to setup a new real-time task.
46 * Release the first job, enqueue, etc.
47 * Task may already be running.
48 */
49typedef void (*task_new_t) (struct task_struct *task,
50 int on_rq,
51 int running);
52
53/* Called to re-introduce a task after blocking.
54 * Can potentially be called multiple times.
55 */
56typedef void (*task_wake_up_t) (struct task_struct *task);
57/* called to notify the plugin of a blocking real-time task
58 * it will only be called for real-time tasks and before schedule is called */
59typedef void (*task_block_t) (struct task_struct *task);
60/* Called when a real-time task exits or changes to a different scheduling
61 * class.
62 * Free any allocated resources
63 */
64typedef void (*task_exit_t) (struct task_struct *);
65
66/* Called when the new_owner is released from the wait queue
67 * it should now inherit the priority from sem, _before_ it gets readded
68 * to any queue
69 */
70typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
71 struct task_struct *new_owner);
72
73/* Called when the current task releases a semahpore where it might have
74 * inherited a piority from
75 */
76typedef long (*return_priority_t) (struct pi_semaphore *sem);
77
78/* Called when a task tries to acquire a semaphore and fails. Check if its
79 * priority is higher than that of the current holder.
80 */
81typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
82
83
84
85
86/********************* sys call backends ********************/
87/* This function causes the caller to sleep until the next release */
88typedef long (*complete_job_t) (void);
89
90typedef long (*admit_task_t)(struct task_struct* tsk);
91
92typedef void (*release_at_t)(struct task_struct *t, lt_t start);
93
94struct sched_plugin {
95 struct list_head list;
96 /* basic info */
97 char *plugin_name;
98
99 /* setup */
100 activate_plugin_t activate_plugin;
101 deactivate_plugin_t deactivate_plugin;
102
103#ifdef CONFIG_SRP
104 unsigned int srp_active;
105#endif
106
107 /* scheduler invocation */
108 scheduler_tick_t tick;
109 schedule_t schedule;
110 finish_switch_t finish_switch;
111
112 /* syscall backend */
113 complete_job_t complete_job;
114 release_at_t release_at;
115
116 /* task state changes */
117 admit_task_t admit_task;
118
119 task_new_t task_new;
120 task_wake_up_t task_wake_up;
121 task_block_t task_block;
122 task_exit_t task_exit;
123
124#ifdef CONFIG_FMLP
125 /* priority inheritance */
126 unsigned int fmlp_active;
127 inherit_priority_t inherit_priority;
128 return_priority_t return_priority;
129 pi_block_t pi_block;
130#endif
131} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
132
133
134extern struct sched_plugin *litmus;
135
136/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
137extern int cluster_cache_index;
138
139int register_sched_plugin(struct sched_plugin* plugin);
140struct sched_plugin* find_sched_plugin(const char* name);
141int print_sched_plugins(char* buf, int max);
142
143static inline int srp_active(void)
144{
145#ifdef CONFIG_SRP
146 return litmus->srp_active;
147#else
148 return 0;
149#endif
150}
151static inline int fmlp_active(void)
152{
153#ifdef CONFIG_FMLP
154 return litmus->fmlp_active;
155#else
156 return 0;
157#endif
158}
159
160extern struct sched_plugin linux_sched_plugin;
161
162#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 000000000000..e1b0c9712b5f
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,192 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_H_
5#define _LINUX_SCHED_TRACE_H_
6
7/* all times in nanoseconds */
8
9struct st_trace_header {
10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */
14};
15
16#define ST_NAME_LEN 16
17struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19};
20
21struct st_param_data { /* regular params */
22 u32 wcet;
23 u32 period;
24 u32 phase;
25 u8 partition;
26 u8 __unused[3];
27};
28
29struct st_release_data { /* A job is was/is going to be released. */
30 u64 release; /* What's the release time? */
31 u64 deadline; /* By when must it finish? */
32};
33
34struct st_assigned_data { /* A job was asigned to a CPU. */
35 u64 when;
36 u8 target; /* Where should it execute? */
37 u8 __unused[3];
38};
39
40struct st_switch_to_data { /* A process was switched to on a given CPU. */
41 u64 when; /* When did this occur? */
42 u32 exec_time; /* Time the current job has executed. */
43
44};
45
46struct st_switch_away_data { /* A process was switched away from on a given CPU. */
47 u64 when;
48 u64 exec_time;
49};
50
51struct st_completion_data { /* A job completed. */
52 u64 when;
53 u8 forced:1; /* Set to 1 if job overran and kernel advanced to the
54 * next task automatically; set to 0 otherwise.
55 */
56 u8 __uflags:7;
57 u8 __unused[3];
58};
59
60struct st_block_data { /* A task blocks. */
61 u64 when;
62 u64 __unused;
63};
64
65struct st_resume_data { /* A task resumes. */
66 u64 when;
67 u64 __unused;
68};
69
70struct st_sys_release_data {
71 u64 when;
72 u64 release;
73};
74
75#define DATA(x) struct st_ ## x ## _data x;
76
77typedef enum {
78 ST_NAME = 1, /* Start at one, so that we can spot
79 * uninitialized records. */
80 ST_PARAM,
81 ST_RELEASE,
82 ST_ASSIGNED,
83 ST_SWITCH_TO,
84 ST_SWITCH_AWAY,
85 ST_COMPLETION,
86 ST_BLOCK,
87 ST_RESUME,
88 ST_SYS_RELEASE,
89} st_event_record_type_t;
90
91struct st_event_record {
92 struct st_trace_header hdr;
93 union {
94 u64 raw[2];
95
96 DATA(name);
97 DATA(param);
98 DATA(release);
99 DATA(assigned);
100 DATA(switch_to);
101 DATA(switch_away);
102 DATA(completion);
103 DATA(block);
104 DATA(resume);
105 DATA(sys_release);
106
107 } data;
108};
109
110#undef DATA
111
112#ifdef __KERNEL__
113
114#include <linux/sched.h>
115#include <litmus/feather_trace.h>
116
117#ifdef CONFIG_SCHED_TASK_TRACE
118
119#define SCHED_TRACE(id, callback, task) \
120 ft_event1(id, callback, task)
121#define SCHED_TRACE2(id, callback, task, xtra) \
122 ft_event2(id, callback, task, xtra)
123
124/* provide prototypes; needed on sparc64 */
125#ifndef NO_TASK_TRACE_DECLS
126feather_callback void do_sched_trace_task_name(unsigned long id,
127 struct task_struct* task);
128feather_callback void do_sched_trace_task_param(unsigned long id,
129 struct task_struct* task);
130feather_callback void do_sched_trace_task_release(unsigned long id,
131 struct task_struct* task);
132feather_callback void do_sched_trace_task_switch_to(unsigned long id,
133 struct task_struct* task);
134feather_callback void do_sched_trace_task_switch_away(unsigned long id,
135 struct task_struct* task);
136feather_callback void do_sched_trace_task_completion(unsigned long id,
137 struct task_struct* task,
138 unsigned long forced);
139feather_callback void do_sched_trace_task_block(unsigned long id,
140 struct task_struct* task);
141feather_callback void do_sched_trace_task_resume(unsigned long id,
142 struct task_struct* task);
143feather_callback void do_sched_trace_sys_release(unsigned long id,
144 lt_t* start);
145#endif
146
147#else
148
149#define SCHED_TRACE(id, callback, task) /* no tracing */
150#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
151
152#endif
153
154
155#define SCHED_TRACE_BASE_ID 500
156
157
158#define sched_trace_task_name(t) \
159 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t)
160#define sched_trace_task_param(t) \
161 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t)
162#define sched_trace_task_release(t) \
163 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t)
164#define sched_trace_task_switch_to(t) \
165 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t)
166#define sched_trace_task_switch_away(t) \
167 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t)
168#define sched_trace_task_completion(t, forced) \
169 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \
170 (unsigned long) forced)
171#define sched_trace_task_block(t) \
172 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t)
173#define sched_trace_task_resume(t) \
174 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t)
175/* when is a pointer, it does not need an explicit cast to unsigned long */
176#define sched_trace_sys_release(when) \
177 SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when)
178
179#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
180
181#ifdef CONFIG_SCHED_DEBUG_TRACE
182void sched_trace_log_message(const char* fmt, ...);
183void dump_trace_buffer(int max);
184#else
185
186#define sched_trace_log_message(fmt, ...)
187
188#endif
189
190#endif /* __KERNEL__ */
191
192#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 000000000000..b32c71180774
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,113 @@
1#ifndef _SYS_TRACE_H_
2#define _SYS_TRACE_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6#include <litmus/feather_trace.h>
7#include <litmus/feather_buffer.h>
8
9
10/*********************** TIMESTAMPS ************************/
11
12enum task_type_marker {
13 TSK_BE,
14 TSK_RT,
15 TSK_UNKNOWN
16};
17
18struct timestamp {
19 uint64_t timestamp;
20 uint32_t seq_no;
21 uint8_t cpu;
22 uint8_t event;
23 uint8_t task_type;
24};
25
26/* tracing callbacks */
27feather_callback void save_timestamp(unsigned long event);
28feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
29feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
30feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
31
32
33#define TIMESTAMP(id) ft_event0(id, save_timestamp)
34
35#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
36
37#define TTIMESTAMP(id, task) \
38 ft_event1(id, save_timestamp_task, (unsigned long) task)
39
40#define CTIMESTAMP(id, cpu) \
41 ft_event1(id, save_timestamp_cpu, (unsigned long) cpu)
42
43#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
44
45#define TIMESTAMP(id) /* no tracing */
46
47#define DTIMESTAMP(id, def) /* no tracing */
48
49#define TTIMESTAMP(id, task) /* no tracing */
50
51#define CTIMESTAMP(id, cpu) /* no tracing */
52
53#endif
54
55
56/* Convention for timestamps
57 * =========================
58 *
59 * In order to process the trace files with a common tool, we use the following
60 * convention to measure execution times: The end time id of a code segment is
61 * always the next number after the start time event id.
62 */
63
64#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
65 * care
66 * about
67 * next */
68#define TS_SCHED_END(t) TTIMESTAMP(101, t)
69#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
70#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
71
72#define TS_CXS_START(t) TTIMESTAMP(104, t)
73#define TS_CXS_END(t) TTIMESTAMP(105, t)
74
75#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
76#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
77
78#define TS_TICK_START(t) TTIMESTAMP(110, t)
79#define TS_TICK_END(t) TTIMESTAMP(111, t)
80
81
82#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
83#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
84
85#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
86#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
87
88#define TS_ENTER_NP_START TIMESTAMP(140)
89#define TS_ENTER_NP_END TIMESTAMP(141)
90
91#define TS_EXIT_NP_START TIMESTAMP(150)
92#define TS_EXIT_NP_END TIMESTAMP(151)
93
94#define TS_SRP_UP_START TIMESTAMP(160)
95#define TS_SRP_UP_END TIMESTAMP(161)
96#define TS_SRP_DOWN_START TIMESTAMP(162)
97#define TS_SRP_DOWN_END TIMESTAMP(163)
98
99#define TS_PI_UP_START TIMESTAMP(170)
100#define TS_PI_UP_END TIMESTAMP(171)
101#define TS_PI_DOWN_START TIMESTAMP(172)
102#define TS_PI_DOWN_END TIMESTAMP(173)
103
104#define TS_FIFO_UP_START TIMESTAMP(180)
105#define TS_FIFO_UP_END TIMESTAMP(181)
106#define TS_FIFO_DOWN_START TIMESTAMP(182)
107#define TS_FIFO_DOWN_END TIMESTAMP(183)
108
109#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
110#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
111
112
113#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
new file mode 100644
index 000000000000..dbddc6523f8e
--- /dev/null
+++ b/include/litmus/unistd_32.h
@@ -0,0 +1,23 @@
1/*
2 * included from arch/x86/include/asm/unistd_32.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9#define __NR_get_rt_task_param __LSC(1)
10#define __NR_complete_job __LSC(2)
11#define __NR_od_open __LSC(3)
12#define __NR_od_close __LSC(4)
13#define __NR_fmlp_down __LSC(5)
14#define __NR_fmlp_up __LSC(6)
15#define __NR_srp_down __LSC(7)
16#define __NR_srp_up __LSC(8)
17#define __NR_query_job_no __LSC(9)
18#define __NR_wait_for_job_release __LSC(10)
19#define __NR_wait_for_ts_release __LSC(11)
20#define __NR_release_ts __LSC(12)
21#define __NR_null_call __LSC(13)
22
23#define NR_litmus_syscalls 14
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
new file mode 100644
index 000000000000..f0618e75348d
--- /dev/null
+++ b/include/litmus/unistd_64.h
@@ -0,0 +1,37 @@
1/*
2 * included from arch/x86/include/asm/unistd_64.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
10#define __NR_get_rt_task_param __LSC(1)
11__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
12#define __NR_complete_job __LSC(2)
13__SYSCALL(__NR_complete_job, sys_complete_job)
14#define __NR_od_open __LSC(3)
15__SYSCALL(__NR_od_open, sys_od_open)
16#define __NR_od_close __LSC(4)
17__SYSCALL(__NR_od_close, sys_od_close)
18#define __NR_fmlp_down __LSC(5)
19__SYSCALL(__NR_fmlp_down, sys_fmlp_down)
20#define __NR_fmlp_up __LSC(6)
21__SYSCALL(__NR_fmlp_up, sys_fmlp_up)
22#define __NR_srp_down __LSC(7)
23__SYSCALL(__NR_srp_down, sys_srp_down)
24#define __NR_srp_up __LSC(8)
25__SYSCALL(__NR_srp_up, sys_srp_up)
26#define __NR_query_job_no __LSC(9)
27__SYSCALL(__NR_query_job_no, sys_query_job_no)
28#define __NR_wait_for_job_release __LSC(10)
29__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
30#define __NR_wait_for_ts_release __LSC(11)
31__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
32#define __NR_release_ts __LSC(12)
33__SYSCALL(__NR_release_ts, sys_release_ts)
34#define __NR_null_call __LSC(13)
35__SYSCALL(__NR_null_call, sys_null_call)
36
37#define NR_litmus_syscalls 14
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f2683a10ac4..256ce8c2ebc8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -57,6 +57,8 @@
57#include <asm/mmu_context.h> 57#include <asm/mmu_context.h>
58#include "cred-internals.h" 58#include "cred-internals.h"
59 59
60extern void exit_od_table(struct task_struct *t);
61
60static void exit_mm(struct task_struct * tsk); 62static void exit_mm(struct task_struct * tsk);
61 63
62static void __unhash_process(struct task_struct *p) 64static void __unhash_process(struct task_struct *p)
@@ -968,6 +970,8 @@ NORET_TYPE void do_exit(long code)
968 if (unlikely(tsk->audit_context)) 970 if (unlikely(tsk->audit_context))
969 audit_free(tsk); 971 audit_free(tsk);
970 972
973 exit_od_table(tsk);
974
971 tsk->exit_code = code; 975 tsk->exit_code = code;
972 taskstats_exit(tsk, group_dead); 976 taskstats_exit(tsk, group_dead);
973 977
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c14942a0ee3..166eb780dd7d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -75,6 +75,9 @@
75 75
76#include <trace/events/sched.h> 76#include <trace/events/sched.h>
77 77
78#include <litmus/litmus.h>
79#include <litmus/sched_plugin.h>
80
78/* 81/*
79 * Protected counters by write_lock_irq(&tasklist_lock) 82 * Protected counters by write_lock_irq(&tasklist_lock)
80 */ 83 */
@@ -171,6 +174,7 @@ void __put_task_struct(struct task_struct *tsk)
171 WARN_ON(atomic_read(&tsk->usage)); 174 WARN_ON(atomic_read(&tsk->usage));
172 WARN_ON(tsk == current); 175 WARN_ON(tsk == current);
173 176
177 exit_litmus(tsk);
174 exit_creds(tsk); 178 exit_creds(tsk);
175 delayacct_tsk_free(tsk); 179 delayacct_tsk_free(tsk);
176 180
@@ -253,6 +257,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
253 257
254 tsk->stack = ti; 258 tsk->stack = ti;
255 259
260 /* Don't let the new task be a real-time task. */
261 litmus_fork(tsk);
262
256 err = prop_local_init_single(&tsk->dirties); 263 err = prop_local_init_single(&tsk->dirties);
257 if (err) 264 if (err)
258 goto out; 265 goto out;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..c0b440b1f6ee 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -46,6 +46,8 @@
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/timer.h> 47#include <linux/timer.h>
48 48
49#include <litmus/litmus.h>
50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50 52
51#include <trace/events/timer.h> 53#include <trace/events/timer.h>
@@ -1041,6 +1043,85 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1041} 1043}
1042EXPORT_SYMBOL_GPL(hrtimer_start); 1044EXPORT_SYMBOL_GPL(hrtimer_start);
1043 1045
1046/**
1047 * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
1048 */
1049void hrtimer_pull(void)
1050{
1051 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
1052 struct hrtimer_start_on_info *info;
1053 struct list_head *pos, *safe, list;
1054
1055 spin_lock(&base->lock);
1056 list_replace_init(&base->to_pull, &list);
1057 spin_unlock(&base->lock);
1058
1059 list_for_each_safe(pos, safe, &list) {
1060 info = list_entry(pos, struct hrtimer_start_on_info, list);
1061 TRACE("pulled timer 0x%x\n", info->timer);
1062 list_del(pos);
1063 hrtimer_start(info->timer, info->time, info->mode);
1064 }
1065}
1066
1067/**
1068 * hrtimer_start_on - trigger timer arming on remote cpu
1069 * @cpu: remote cpu
1070 * @info: save timer information for enqueuing on remote cpu
1071 * @timer: timer to be pulled
1072 * @time: expire time
1073 * @mode: timer mode
1074 */
1075int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1076 struct hrtimer *timer, ktime_t time,
1077 const enum hrtimer_mode mode)
1078{
1079 unsigned long flags;
1080 struct hrtimer_cpu_base* base;
1081 int in_use = 0, was_empty;
1082
1083 /* serialize access to info through the timer base */
1084 lock_hrtimer_base(timer, &flags);
1085
1086 in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
1087 if (!in_use) {
1088 INIT_LIST_HEAD(&info->list);
1089 info->timer = timer;
1090 info->time = time;
1091 info->mode = mode;
1092 /* mark as in use */
1093 atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
1094 }
1095
1096 unlock_hrtimer_base(timer, &flags);
1097
1098 if (!in_use) {
1099 /* initiate pull */
1100 preempt_disable();
1101 if (cpu == smp_processor_id()) {
1102 /* start timer locally; we may get called
1103 * with rq->lock held, do not wake up anything
1104 */
1105 TRACE("hrtimer_start_on: starting on local CPU\n");
1106 __hrtimer_start_range_ns(info->timer, info->time,
1107 0, info->mode, 0);
1108 } else {
1109 TRACE("hrtimer_start_on: pulling to remote CPU\n");
1110 base = &per_cpu(hrtimer_bases, cpu);
1111 spin_lock_irqsave(&base->lock, flags);
1112 was_empty = list_empty(&base->to_pull);
1113 list_add(&info->list, &base->to_pull);
1114 spin_unlock_irqrestore(&base->lock, flags);
1115 if (was_empty)
1116 /* only send IPI if other no else
1117 * has done so already
1118 */
1119 smp_send_pull_timers(cpu);
1120 }
1121 preempt_enable();
1122 }
1123 return in_use;
1124}
1044 1125
1045/** 1126/**
1046 * hrtimer_try_to_cancel - try to deactivate a timer 1127 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -1631,6 +1712,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1631 cpu_base->clock_base[i].cpu_base = cpu_base; 1712 cpu_base->clock_base[i].cpu_base = cpu_base;
1632 1713
1633 hrtimer_init_hres(cpu_base); 1714 hrtimer_init_hres(cpu_base);
1715 INIT_LIST_HEAD(&cpu_base->to_pull);
1634} 1716}
1635 1717
1636#ifdef CONFIG_HOTPLUG_CPU 1718#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/printk.c b/kernel/printk.c
index 75077ad0b537..ee54355cfdf1 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -71,6 +71,13 @@ int console_printk[4] = {
71}; 71};
72 72
73/* 73/*
74 * divert printk() messages when there is a LITMUS^RT debug listener
75 */
76#include <litmus/litmus.h>
77int trace_override = 0;
78int trace_recurse = 0;
79
80/*
74 * Low level drivers may need that to know if they can schedule in 81 * Low level drivers may need that to know if they can schedule in
75 * their unblank() callback or not. So let's export it. 82 * their unblank() callback or not. So let's export it.
76 */ 83 */
@@ -708,6 +715,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
708 /* Emit the output into the temporary buffer */ 715 /* Emit the output into the temporary buffer */
709 printed_len += vscnprintf(printk_buf + printed_len, 716 printed_len += vscnprintf(printk_buf + printed_len,
710 sizeof(printk_buf) - printed_len, fmt, args); 717 sizeof(printk_buf) - printed_len, fmt, args);
718 /* if LITMUS^RT tracer is active divert printk() msgs */
719 if (trace_override && !trace_recurse)
720 TRACE("%s", printk_buf);
711 721
712 722
713 p = printk_buf; 723 p = printk_buf;
@@ -777,7 +787,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
777 * Try to acquire and then immediately release the 787 * Try to acquire and then immediately release the
778 * console semaphore. The release will do all the 788 * console semaphore. The release will do all the
779 * actual magic (print out buffers, wake up klogd, 789 * actual magic (print out buffers, wake up klogd,
780 * etc). 790 * etc).
781 * 791 *
782 * The acquire_console_semaphore_for_printk() function 792 * The acquire_console_semaphore_for_printk() function
783 * will release 'logbuf_lock' regardless of whether it 793 * will release 'logbuf_lock' regardless of whether it
@@ -1014,7 +1024,7 @@ int printk_needs_cpu(int cpu)
1014 1024
1015void wake_up_klogd(void) 1025void wake_up_klogd(void)
1016{ 1026{
1017 if (waitqueue_active(&log_wait)) 1027 if (!trace_override && waitqueue_active(&log_wait))
1018 __raw_get_cpu_var(printk_pending) = 1; 1028 __raw_get_cpu_var(printk_pending) = 1;
1019} 1029}
1020 1030
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f70ffe..5e3c509e0efe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -78,6 +78,9 @@
78 78
79#include "sched_cpupri.h" 79#include "sched_cpupri.h"
80 80
81#include <litmus/sched_trace.h>
82#include <litmus/trace.h>
83
81#define CREATE_TRACE_POINTS 84#define CREATE_TRACE_POINTS
82#include <trace/events/sched.h> 85#include <trace/events/sched.h>
83 86
@@ -450,6 +453,12 @@ struct rt_rq {
450#endif 453#endif
451}; 454};
452 455
456/* Litmus related fields in a runqueue */
457struct litmus_rq {
458 unsigned long nr_running;
459 struct task_struct *prev;
460};
461
453#ifdef CONFIG_SMP 462#ifdef CONFIG_SMP
454 463
455/* 464/*
@@ -512,6 +521,7 @@ struct rq {
512 521
513 struct cfs_rq cfs; 522 struct cfs_rq cfs;
514 struct rt_rq rt; 523 struct rt_rq rt;
524 struct litmus_rq litmus;
515 525
516#ifdef CONFIG_FAIR_GROUP_SCHED 526#ifdef CONFIG_FAIR_GROUP_SCHED
517 /* list of leaf cfs_rq on this cpu: */ 527 /* list of leaf cfs_rq on this cpu: */
@@ -1833,7 +1843,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1833 1843
1834static const struct sched_class rt_sched_class; 1844static const struct sched_class rt_sched_class;
1835 1845
1836#define sched_class_highest (&rt_sched_class) 1846#define sched_class_highest (&litmus_sched_class)
1837#define for_each_class(class) \ 1847#define for_each_class(class) \
1838 for (class = sched_class_highest; class; class = class->next) 1848 for (class = sched_class_highest; class; class = class->next)
1839 1849
@@ -1932,6 +1942,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
1932#include "sched_idletask.c" 1942#include "sched_idletask.c"
1933#include "sched_fair.c" 1943#include "sched_fair.c"
1934#include "sched_rt.c" 1944#include "sched_rt.c"
1945#include "../litmus/sched_litmus.c"
1935#ifdef CONFIG_SCHED_DEBUG 1946#ifdef CONFIG_SCHED_DEBUG
1936# include "sched_debug.c" 1947# include "sched_debug.c"
1937#endif 1948#endif
@@ -2372,6 +2383,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2372 unsigned long flags; 2383 unsigned long flags;
2373 struct rq *rq; 2384 struct rq *rq;
2374 2385
2386 if (is_realtime(p))
2387 TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
2388
2375 if (!sched_feat(SYNC_WAKEUPS)) 2389 if (!sched_feat(SYNC_WAKEUPS))
2376 wake_flags &= ~WF_SYNC; 2390 wake_flags &= ~WF_SYNC;
2377 2391
@@ -2390,7 +2404,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2390 orig_cpu = cpu; 2404 orig_cpu = cpu;
2391 2405
2392#ifdef CONFIG_SMP 2406#ifdef CONFIG_SMP
2393 if (unlikely(task_running(rq, p))) 2407 if (unlikely(task_running(rq, p)) || is_realtime(p))
2394 goto out_activate; 2408 goto out_activate;
2395 2409
2396 /* 2410 /*
@@ -2497,6 +2511,8 @@ out_running:
2497 } 2511 }
2498#endif 2512#endif
2499out: 2513out:
2514 if (is_realtime(p))
2515 TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2500 task_rq_unlock(rq, &flags); 2516 task_rq_unlock(rq, &flags);
2501 put_cpu(); 2517 put_cpu();
2502 2518
@@ -2814,6 +2830,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2814 */ 2830 */
2815 prev_state = prev->state; 2831 prev_state = prev->state;
2816 finish_arch_switch(prev); 2832 finish_arch_switch(prev);
2833 litmus->finish_switch(prev);
2834 prev->rt_param.stack_in_use = NO_CPU;
2817#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 2835#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2818 local_irq_disable(); 2836 local_irq_disable();
2819#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 2837#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -2843,6 +2861,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2843{ 2861{
2844 if (prev->sched_class->pre_schedule) 2862 if (prev->sched_class->pre_schedule)
2845 prev->sched_class->pre_schedule(rq, prev); 2863 prev->sched_class->pre_schedule(rq, prev);
2864
2865 /* LITMUS^RT not very clean hack: we need to save the prev task
2866 * as our scheduling decision rely on it (as we drop the rq lock
2867 * something in prev can change...); there is no way to escape
2868 * this ack apart from modifying pick_nex_task(rq, _prev_) or
2869 * falling back on the previous solution of decoupling
2870 * scheduling decisions
2871 */
2872 rq->litmus.prev = prev;
2846} 2873}
2847 2874
2848/* rq->lock is NOT held, but preemption is disabled */ 2875/* rq->lock is NOT held, but preemption is disabled */
@@ -3520,18 +3547,26 @@ void scheduler_tick(void)
3520 3547
3521 sched_clock_tick(); 3548 sched_clock_tick();
3522 3549
3550 TS_TICK_START(current);
3551
3523 raw_spin_lock(&rq->lock); 3552 raw_spin_lock(&rq->lock);
3524 update_rq_clock(rq); 3553 update_rq_clock(rq);
3525 update_cpu_load(rq); 3554 update_cpu_load(rq);
3526 curr->sched_class->task_tick(rq, curr, 0); 3555 curr->sched_class->task_tick(rq, curr, 0);
3556
3557 /* litmus_tick may force current to resched */
3558 litmus_tick(rq, curr);
3559
3527 raw_spin_unlock(&rq->lock); 3560 raw_spin_unlock(&rq->lock);
3528 3561
3529 perf_event_task_tick(curr); 3562 perf_event_task_tick(curr);
3530 3563
3531#ifdef CONFIG_SMP 3564#ifdef CONFIG_SMP
3532 rq->idle_at_tick = idle_cpu(cpu); 3565 rq->idle_at_tick = idle_cpu(cpu);
3533 trigger_load_balance(rq, cpu); 3566 if (!is_realtime(current))
3567 trigger_load_balance(rq, cpu);
3534#endif 3568#endif
3569 TS_TICK_END(current);
3535} 3570}
3536 3571
3537notrace unsigned long get_parent_ip(unsigned long addr) 3572notrace unsigned long get_parent_ip(unsigned long addr)
@@ -3672,12 +3707,20 @@ pick_next_task(struct rq *rq)
3672 /* 3707 /*
3673 * Optimization: we know that if all tasks are in 3708 * Optimization: we know that if all tasks are in
3674 * the fair class we can call that function directly: 3709 * the fair class we can call that function directly:
3675 */ 3710
3676 if (likely(rq->nr_running == rq->cfs.nr_running)) { 3711 * NOT IN LITMUS^RT!
3712
3713 * This breaks many assumptions in the plugins.
3714 * Do not uncomment without thinking long and hard
3715 * about how this affects global plugins such as GSN-EDF.
3716
3717 if (rq->nr_running == rq->cfs.nr_running) {
3718 TRACE("taking shortcut in pick_next_task()\n");
3677 p = fair_sched_class.pick_next_task(rq); 3719 p = fair_sched_class.pick_next_task(rq);
3678 if (likely(p)) 3720 if (likely(p))
3679 return p; 3721 return p;
3680 } 3722 }
3723 */
3681 3724
3682 class = sched_class_highest; 3725 class = sched_class_highest;
3683 for ( ; ; ) { 3726 for ( ; ; ) {
@@ -3712,6 +3755,8 @@ need_resched:
3712 3755
3713 release_kernel_lock(prev); 3756 release_kernel_lock(prev);
3714need_resched_nonpreemptible: 3757need_resched_nonpreemptible:
3758 TS_SCHED_START;
3759 sched_trace_task_switch_away(prev);
3715 3760
3716 schedule_debug(prev); 3761 schedule_debug(prev);
3717 3762
@@ -3746,15 +3791,22 @@ need_resched_nonpreemptible:
3746 rq->curr = next; 3791 rq->curr = next;
3747 ++*switch_count; 3792 ++*switch_count;
3748 3793
3794 TS_SCHED_END(next);
3795 TS_CXS_START(next);
3749 context_switch(rq, prev, next); /* unlocks the rq */ 3796 context_switch(rq, prev, next); /* unlocks the rq */
3797 TS_CXS_END(current);
3750 /* 3798 /*
3751 * the context switch might have flipped the stack from under 3799 * the context switch might have flipped the stack from under
3752 * us, hence refresh the local variables. 3800 * us, hence refresh the local variables.
3753 */ 3801 */
3754 cpu = smp_processor_id(); 3802 cpu = smp_processor_id();
3755 rq = cpu_rq(cpu); 3803 rq = cpu_rq(cpu);
3756 } else 3804 } else {
3805 TS_SCHED_END(prev);
3757 raw_spin_unlock_irq(&rq->lock); 3806 raw_spin_unlock_irq(&rq->lock);
3807 }
3808
3809 sched_trace_task_switch_to(current);
3758 3810
3759 post_schedule(rq); 3811 post_schedule(rq);
3760 3812
@@ -3767,6 +3819,9 @@ need_resched_nonpreemptible:
3767 preempt_enable_no_resched(); 3819 preempt_enable_no_resched();
3768 if (need_resched()) 3820 if (need_resched())
3769 goto need_resched; 3821 goto need_resched;
3822
3823 if (srp_active())
3824 srp_ceiling_block();
3770} 3825}
3771EXPORT_SYMBOL(schedule); 3826EXPORT_SYMBOL(schedule);
3772 3827
@@ -4043,6 +4098,17 @@ void complete_all(struct completion *x)
4043} 4098}
4044EXPORT_SYMBOL(complete_all); 4099EXPORT_SYMBOL(complete_all);
4045 4100
4101void complete_n(struct completion *x, int n)
4102{
4103 unsigned long flags;
4104
4105 spin_lock_irqsave(&x->wait.lock, flags);
4106 x->done += n;
4107 __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
4108 spin_unlock_irqrestore(&x->wait.lock, flags);
4109}
4110EXPORT_SYMBOL(complete_n);
4111
4046static inline long __sched 4112static inline long __sched
4047do_wait_for_common(struct completion *x, long timeout, int state) 4113do_wait_for_common(struct completion *x, long timeout, int state)
4048{ 4114{
@@ -4471,7 +4537,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4471 p->normal_prio = normal_prio(p); 4537 p->normal_prio = normal_prio(p);
4472 /* we are holding p->pi_lock already */ 4538 /* we are holding p->pi_lock already */
4473 p->prio = rt_mutex_getprio(p); 4539 p->prio = rt_mutex_getprio(p);
4474 if (rt_prio(p->prio)) 4540 if (p->policy == SCHED_LITMUS)
4541 p->sched_class = &litmus_sched_class;
4542 else if (rt_prio(p->prio))
4475 p->sched_class = &rt_sched_class; 4543 p->sched_class = &rt_sched_class;
4476 else 4544 else
4477 p->sched_class = &fair_sched_class; 4545 p->sched_class = &fair_sched_class;
@@ -4516,7 +4584,7 @@ recheck:
4516 4584
4517 if (policy != SCHED_FIFO && policy != SCHED_RR && 4585 if (policy != SCHED_FIFO && policy != SCHED_RR &&
4518 policy != SCHED_NORMAL && policy != SCHED_BATCH && 4586 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
4519 policy != SCHED_IDLE) 4587 policy != SCHED_IDLE && policy != SCHED_LITMUS)
4520 return -EINVAL; 4588 return -EINVAL;
4521 } 4589 }
4522 4590
@@ -4531,6 +4599,8 @@ recheck:
4531 return -EINVAL; 4599 return -EINVAL;
4532 if (rt_policy(policy) != (param->sched_priority != 0)) 4600 if (rt_policy(policy) != (param->sched_priority != 0))
4533 return -EINVAL; 4601 return -EINVAL;
4602 if (policy == SCHED_LITMUS && policy == p->policy)
4603 return -EINVAL;
4534 4604
4535 /* 4605 /*
4536 * Allow unprivileged RT tasks to decrease priority: 4606 * Allow unprivileged RT tasks to decrease priority:
@@ -4585,6 +4655,12 @@ recheck:
4585 return retval; 4655 return retval;
4586 } 4656 }
4587 4657
4658 if (policy == SCHED_LITMUS) {
4659 retval = litmus_admit_task(p);
4660 if (retval)
4661 return retval;
4662 }
4663
4588 /* 4664 /*
4589 * make sure no PI-waiters arrive (or leave) while we are 4665 * make sure no PI-waiters arrive (or leave) while we are
4590 * changing the priority of the task: 4666 * changing the priority of the task:
@@ -4612,10 +4688,19 @@ recheck:
4612 4688
4613 p->sched_reset_on_fork = reset_on_fork; 4689 p->sched_reset_on_fork = reset_on_fork;
4614 4690
4691 if (p->policy == SCHED_LITMUS)
4692 litmus_exit_task(p);
4693
4615 oldprio = p->prio; 4694 oldprio = p->prio;
4616 prev_class = p->sched_class; 4695 prev_class = p->sched_class;
4617 __setscheduler(rq, p, policy, param->sched_priority); 4696 __setscheduler(rq, p, policy, param->sched_priority);
4618 4697
4698 if (policy == SCHED_LITMUS) {
4699 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
4700 p->rt_param.present = running;
4701 litmus->task_new(p, on_rq, running);
4702 }
4703
4619 if (running) 4704 if (running)
4620 p->sched_class->set_curr_task(rq); 4705 p->sched_class->set_curr_task(rq);
4621 if (on_rq) { 4706 if (on_rq) {
@@ -4785,10 +4870,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4785 rcu_read_lock(); 4870 rcu_read_lock();
4786 4871
4787 p = find_process_by_pid(pid); 4872 p = find_process_by_pid(pid);
4788 if (!p) { 4873 /* Don't set affinity if task not found and for LITMUS tasks */
4874 if (!p || is_realtime(p)) {
4789 rcu_read_unlock(); 4875 rcu_read_unlock();
4790 put_online_cpus(); 4876 put_online_cpus();
4791 return -ESRCH; 4877 return p ? -EPERM : -ESRCH;
4792 } 4878 }
4793 4879
4794 /* Prevent p going away */ 4880 /* Prevent p going away */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5a5ea2cd924f..a4a741dfebfe 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1708,7 +1708,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1708 int sync = wake_flags & WF_SYNC; 1708 int sync = wake_flags & WF_SYNC;
1709 int scale = cfs_rq->nr_running >= sched_nr_latency; 1709 int scale = cfs_rq->nr_running >= sched_nr_latency;
1710 1710
1711 if (unlikely(rt_prio(p->prio))) 1711 if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) {
1712 goto preempt; 1712 goto preempt;
1713 1713
1714 if (unlikely(p->sched_class != &fair_sched_class)) 1714 if (unlikely(p->sched_class != &fair_sched_class))
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b5b920ae2ea7..c2fbb02c1b54 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1014,7 +1014,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1014 */ 1014 */
1015static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1015static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1016{ 1016{
1017 if (p->prio < rq->curr->prio) { 1017 if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
1018 resched_task(rq->curr); 1018 resched_task(rq->curr);
1019 return; 1019 return;
1020 } 1020 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f992762d7f51..0adc54bd7c7c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -721,6 +721,46 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
721} 721}
722 722
723/** 723/**
724 * tick_set_quanta_type - get the quanta type as a boot option
725 * Default is standard setup with ticks staggered over first
726 * half of tick period.
727 */
728int quanta_type = LINUX_DEFAULT_TICKS;
729static int __init tick_set_quanta_type(char *str)
730{
731 if (strcmp("aligned", str) == 0) {
732 quanta_type = LITMUS_ALIGNED_TICKS;
733 printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
734 }
735 else if (strcmp("staggered", str) == 0) {
736 quanta_type = LITMUS_STAGGERED_TICKS;
737 printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
738 }
739 return 1;
740}
741__setup("quanta=", tick_set_quanta_type);
742
743u64 cpu_stagger_offset(int cpu)
744{
745 u64 offset = 0;
746 switch (quanta_type) {
747 case LITMUS_ALIGNED_TICKS:
748 offset = 0;
749 break;
750 case LITMUS_STAGGERED_TICKS:
751 offset = ktime_to_ns(tick_period);
752 do_div(offset, num_possible_cpus());
753 offset *= cpu;
754 break;
755 default:
756 offset = ktime_to_ns(tick_period) >> 1;
757 do_div(offset, num_possible_cpus());
758 offset *= cpu;
759 }
760 return offset;
761}
762
763/**
724 * tick_setup_sched_timer - setup the tick emulation timer 764 * tick_setup_sched_timer - setup the tick emulation timer
725 */ 765 */
726void tick_setup_sched_timer(void) 766void tick_setup_sched_timer(void)
@@ -737,9 +777,11 @@ void tick_setup_sched_timer(void)
737 777
738 /* Get the next period (per cpu) */ 778 /* Get the next period (per cpu) */
739 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 779 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
740 offset = ktime_to_ns(tick_period) >> 1; 780
741 do_div(offset, num_possible_cpus()); 781 /* Offset must be set correctly to achieve desired quanta type. */
742 offset *= smp_processor_id(); 782 offset = cpu_stagger_offset(smp_processor_id());
783
784 /* Add the correct offset to expiration time */
743 hrtimer_add_expires_ns(&ts->sched_timer, offset); 785 hrtimer_add_expires_ns(&ts->sched_timer, offset);
744 786
745 for (;;) { 787 for (;;) {
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 000000000000..874794f64af1
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,85 @@
1menu "LITMUS^RT"
2
3menu "Real-Time Synchronization"
4
5config NP_SECTION
6 bool "Non-preemptive section support"
7 default n
8 help
9 Allow tasks to become non-preemptable.
10 Note that plugins still need to explicitly support non-preemptivity.
11 Currently, only GSN-EDF and PSN-EDF have such support.
12
13 This is required to support the FMLP.
14 If disabled, all tasks will be considered preemptable at all times.
15
16config SRP
17 bool "Stack Resource Policy (SRP)"
18 default n
19 help
20 Include support for Baker's Stack Resource Policy.
21
22 Say Yes if you want FMLP local long critical section
23 synchronization support.
24
25config FMLP
26 bool "FMLP support"
27 depends on NP_SECTION
28 default n
29 help
30 Include support for deterministic multiprocessor real-time
31 synchronization support.
32
33 Say Yes if you want FMLP long critical section
34 synchronization support.
35
36endmenu
37
38menu "Tracing"
39
40config FEATHER_TRACE
41 bool "Feather-Trace Infrastructure"
42 default y
43 help
44 Feather-Trace basic tracing infrastructure. Includes device file
45 driver and instrumentation point support.
46
47
48config SCHED_TASK_TRACE
49 bool "Trace real-time tasks"
50 depends on FEATHER_TRACE
51 default y
52 help
53 Include support for the sched_trace_XXX() tracing functions. This
54 allows the collection of real-time task events such as job
55 completions, job releases, early completions, etc. This results in a
56 small overhead in the scheduling code. Disable if the overhead is not
57 acceptable (e.g., benchmarking).
58
59 Say Yes for debugging.
60 Say No for overhead tracing.
61
62config SCHED_OVERHEAD_TRACE
63 bool "Record timestamps for overhead measurements"
64 depends on FEATHER_TRACE
65 default n
66 help
67 Export event stream for overhead tracing.
68 Say Yes for overhead tracing.
69
70config SCHED_DEBUG_TRACE
71 bool "TRACE() debugging"
72 default y
73 help
74 Include support for sched_trace_log_messageg(), which is used to
75 implement TRACE(). If disabled, no TRACE() messages will be included
76 in the kernel, and no overheads due to debugging statements will be
77 incurred by the scheduler. Disable if the overhead is not acceptable
78 (e.g. benchmarking).
79
80 Say Yes for debugging.
81 Say No for overhead tracing.
82
83endmenu
84
85endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 000000000000..0cc33e8bee51
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,23 @@
1#
2# Makefile for LITMUS^RT
3#
4
5obj-y = sched_plugin.o litmus.o \
6 jobs.o \
7 sync.o \
8 rt_domain.o \
9 edf_common.o \
10 fdso.o \
11 srp.o \
12 fmlp.o \
13 bheap.o \
14 ctrldev.o \
15 sched_gsn_edf.o \
16 sched_psn_edf.o \
17 sched_cedf.o \
18 sched_pfair.o
19
20obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
21obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
22obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
23obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..528af97f18a6
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,314 @@
1#include "linux/kernel.h"
2#include "litmus/bheap.h"
3
4void bheap_init(struct bheap* heap)
5{
6 heap->head = NULL;
7 heap->min = NULL;
8}
9
10void bheap_node_init(struct bheap_node** _h, void* value)
11{
12 struct bheap_node* h = *_h;
13 h->parent = NULL;
14 h->next = NULL;
15 h->child = NULL;
16 h->degree = NOT_IN_HEAP;
17 h->value = value;
18 h->ref = _h;
19}
20
21
22/* make child a subtree of root */
23static void __bheap_link(struct bheap_node* root,
24 struct bheap_node* child)
25{
26 child->parent = root;
27 child->next = root->child;
28 root->child = child;
29 root->degree++;
30}
31
32/* merge root lists */
33static struct bheap_node* __bheap_merge(struct bheap_node* a,
34 struct bheap_node* b)
35{
36 struct bheap_node* head = NULL;
37 struct bheap_node** pos = &head;
38
39 while (a && b) {
40 if (a->degree < b->degree) {
41 *pos = a;
42 a = a->next;
43 } else {
44 *pos = b;
45 b = b->next;
46 }
47 pos = &(*pos)->next;
48 }
49 if (a)
50 *pos = a;
51 else
52 *pos = b;
53 return head;
54}
55
56/* reverse a linked list of nodes. also clears parent pointer */
57static struct bheap_node* __bheap_reverse(struct bheap_node* h)
58{
59 struct bheap_node* tail = NULL;
60 struct bheap_node* next;
61
62 if (!h)
63 return h;
64
65 h->parent = NULL;
66 while (h->next) {
67 next = h->next;
68 h->next = tail;
69 tail = h;
70 h = next;
71 h->parent = NULL;
72 }
73 h->next = tail;
74 return h;
75}
76
77static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
78 struct bheap_node** prev, struct bheap_node** node)
79{
80 struct bheap_node *_prev, *cur;
81 *prev = NULL;
82
83 if (!heap->head) {
84 *node = NULL;
85 return;
86 }
87
88 *node = heap->head;
89 _prev = heap->head;
90 cur = heap->head->next;
91 while (cur) {
92 if (higher_prio(cur, *node)) {
93 *node = cur;
94 *prev = _prev;
95 }
96 _prev = cur;
97 cur = cur->next;
98 }
99}
100
101static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
102 struct bheap_node* h2)
103{
104 struct bheap_node* h1;
105 struct bheap_node *prev, *x, *next;
106 if (!h2)
107 return;
108 h1 = heap->head;
109 if (!h1) {
110 heap->head = h2;
111 return;
112 }
113 h1 = __bheap_merge(h1, h2);
114 prev = NULL;
115 x = h1;
116 next = x->next;
117 while (next) {
118 if (x->degree != next->degree ||
119 (next->next && next->next->degree == x->degree)) {
120 /* nothing to do, advance */
121 prev = x;
122 x = next;
123 } else if (higher_prio(x, next)) {
124 /* x becomes the root of next */
125 x->next = next->next;
126 __bheap_link(x, next);
127 } else {
128 /* next becomes the root of x */
129 if (prev)
130 prev->next = next;
131 else
132 h1 = next;
133 __bheap_link(next, x);
134 x = next;
135 }
136 next = x->next;
137 }
138 heap->head = h1;
139}
140
141static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
142 struct bheap* heap)
143{
144 struct bheap_node *prev, *node;
145 __bheap_min(higher_prio, heap, &prev, &node);
146 if (!node)
147 return NULL;
148 if (prev)
149 prev->next = node->next;
150 else
151 heap->head = node->next;
152 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
153 return node;
154}
155
156/* insert (and reinitialize) a node into the heap */
157void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
158 struct bheap_node* node)
159{
160 struct bheap_node *min;
161 node->child = NULL;
162 node->parent = NULL;
163 node->next = NULL;
164 node->degree = 0;
165 if (heap->min && higher_prio(node, heap->min)) {
166 /* swap min cache */
167 min = heap->min;
168 min->child = NULL;
169 min->parent = NULL;
170 min->next = NULL;
171 min->degree = 0;
172 __bheap_union(higher_prio, heap, min);
173 heap->min = node;
174 } else
175 __bheap_union(higher_prio, heap, node);
176}
177
178void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
179{
180 struct bheap_node* min;
181 if (heap->min) {
182 min = heap->min;
183 heap->min = NULL;
184 bheap_insert(higher_prio, heap, min);
185 }
186}
187
188/* merge addition into target */
189void bheap_union(bheap_prio_t higher_prio,
190 struct bheap* target, struct bheap* addition)
191{
192 /* first insert any cached minima, if necessary */
193 bheap_uncache_min(higher_prio, target);
194 bheap_uncache_min(higher_prio, addition);
195 __bheap_union(higher_prio, target, addition->head);
196 /* this is a destructive merge */
197 addition->head = NULL;
198}
199
200struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
201 struct bheap* heap)
202{
203 if (!heap->min)
204 heap->min = __bheap_extract_min(higher_prio, heap);
205 return heap->min;
206}
207
208struct bheap_node* bheap_take(bheap_prio_t higher_prio,
209 struct bheap* heap)
210{
211 struct bheap_node *node;
212 if (!heap->min)
213 heap->min = __bheap_extract_min(higher_prio, heap);
214 node = heap->min;
215 heap->min = NULL;
216 if (node)
217 node->degree = NOT_IN_HEAP;
218 return node;
219}
220
221int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
222{
223 struct bheap_node *parent;
224 struct bheap_node** tmp_ref;
225 void* tmp;
226
227 /* bubble up */
228 parent = node->parent;
229 while (parent && higher_prio(node, parent)) {
230 /* swap parent and node */
231 tmp = parent->value;
232 parent->value = node->value;
233 node->value = tmp;
234 /* swap references */
235 *(parent->ref) = node;
236 *(node->ref) = parent;
237 tmp_ref = parent->ref;
238 parent->ref = node->ref;
239 node->ref = tmp_ref;
240 /* step up */
241 node = parent;
242 parent = node->parent;
243 }
244
245 return parent != NULL;
246}
247
248void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
249 struct bheap_node* node)
250{
251 struct bheap_node *parent, *prev, *pos;
252 struct bheap_node** tmp_ref;
253 void* tmp;
254
255 if (heap->min != node) {
256 /* bubble up */
257 parent = node->parent;
258 while (parent) {
259 /* swap parent and node */
260 tmp = parent->value;
261 parent->value = node->value;
262 node->value = tmp;
263 /* swap references */
264 *(parent->ref) = node;
265 *(node->ref) = parent;
266 tmp_ref = parent->ref;
267 parent->ref = node->ref;
268 node->ref = tmp_ref;
269 /* step up */
270 node = parent;
271 parent = node->parent;
272 }
273 /* now delete:
274 * first find prev */
275 prev = NULL;
276 pos = heap->head;
277 while (pos != node) {
278 prev = pos;
279 pos = pos->next;
280 }
281 /* we have prev, now remove node */
282 if (prev)
283 prev->next = node->next;
284 else
285 heap->head = node->next;
286 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
287 } else
288 heap->min = NULL;
289 node->degree = NOT_IN_HEAP;
290}
291
292/* allocate a heap node for value and insert into the heap */
293int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
294 void* value, int gfp_flags)
295{
296 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
297 if (likely(hn)) {
298 bheap_node_init(&hn, value);
299 bheap_insert(higher_prio, heap, hn);
300 }
301 return hn != NULL;
302}
303
304void* bheap_take_del(bheap_prio_t higher_prio,
305 struct bheap* heap)
306{
307 struct bheap_node* hn = bheap_take(higher_prio, heap);
308 void* ret = NULL;
309 if (hn) {
310 ret = hn->value;
311 bheap_node_free(hn);
312 }
313 return ret;
314}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..6677a67cc945
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,150 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33 unsigned long pfn;
34
35 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
36
37 /* Increase ref count. Is decreased when vma is destroyed. */
38 get_page(ctrl);
39
40 /* compute page frame number */
41 pfn = page_to_pfn(ctrl);
42
43 TRACE_CUR(CTRL_NAME
44 ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n",
45 tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start,
46 vma->vm_page_prot);
47
48 /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
49 * userspace actually gets a copy-on-write page. */
50 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
51
52 if (err)
53 TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
54
55 return err;
56}
57
58static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
59{
60 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
61 vma->vm_flags, vma->vm_page_prot);
62
63 TRACE_CUR(CTRL_NAME
64 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
65 (void*) vma->vm_start, (void*) vma->vm_end, vma,
66 vma->vm_private_data, current->comm,
67 current->pid);
68}
69
70static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
71 struct vm_fault* vmf)
72{
73 /* This function should never be called, since
74 * all pages should have been mapped by mmap()
75 * already. */
76 TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags);
77
78 /* nope, you only get one page */
79 return VM_FAULT_SIGBUS;
80}
81
82static struct vm_operations_struct litmus_ctrl_vm_ops = {
83 .close = litmus_ctrl_vm_close,
84 .fault = litmus_ctrl_vm_fault,
85};
86
87static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
88{
89 int err = 0;
90
91 /* first make sure mapper knows what he's doing */
92
93 /* you can only get one page */
94 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
95 return -EINVAL;
96
97 /* you can only map the "first" page */
98 if (vma->vm_pgoff != 0)
99 return -EINVAL;
100
101 /* you can't share it with anyone */
102 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
103 return -EINVAL;
104
105 vma->vm_ops = &litmus_ctrl_vm_ops;
106 /* this mapping should not be kept across forks,
107 * and cannot be expanded */
108 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
109
110 err = alloc_ctrl_page(current);
111 if (!err)
112 err = map_ctrl_page(current, vma);
113
114 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
115 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
116
117 return err;
118}
119
120static struct file_operations litmus_ctrl_fops = {
121 .owner = THIS_MODULE,
122 .mmap = litmus_ctrl_mmap,
123};
124
125static struct miscdevice litmus_ctrl_dev = {
126 .name = CTRL_NAME,
127 .minor = MISC_DYNAMIC_MINOR,
128 .fops = &litmus_ctrl_fops,
129};
130
131static int __init init_litmus_ctrl_dev(void)
132{
133 int err;
134
135 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
136
137 printk("Initializing LITMUS^RT control device.\n");
138 err = misc_register(&litmus_ctrl_dev);
139 if (err)
140 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
141 return err;
142}
143
144static void __exit exit_litmus_ctrl_dev(void)
145{
146 misc_deregister(&litmus_ctrl_dev);
147}
148
149module_init(init_litmus_ctrl_dev);
150module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..06daec66c984
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,102 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17/* edf_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int edf_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (first && first == second) {
30 TRACE_TASK(first,
31 "WARNING: pointless edf priority comparison.\n");
32 return 0;
33 }
34
35
36 /* Check for inherited priorities. Change task
37 * used for comparison in such a case.
38 */
39 if (first && first->rt_param.inh_task)
40 first_task = first->rt_param.inh_task;
41 if (second && second->rt_param.inh_task)
42 second_task = second->rt_param.inh_task;
43
44 return
45 /* it has to exist in order to have higher priority */
46 first_task && (
47 /* does the second task exist and is it a real-time task? If
48 * not, the first task (which is a RT task) has higher
49 * priority.
50 */
51 !second_task || !is_realtime(second_task) ||
52
53 /* is the deadline of the first task earlier?
54 * Then it has higher priority.
55 */
56 earlier_deadline(first_task, second_task) ||
57
58 /* Do we have a deadline tie?
59 * Then break by PID.
60 */
61 (get_deadline(first_task) == get_deadline(second_task) &&
62 (first_task->pid < second_task->pid ||
63
64 /* If the PIDs are the same then the task with the inherited
65 * priority wins.
66 */
67 (first_task->pid == second_task->pid &&
68 !second->rt_param.inh_task))));
69}
70
71int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
72{
73 return edf_higher_prio(bheap2task(a), bheap2task(b));
74}
75
76void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
77 release_jobs_t release)
78{
79 rt_domain_init(rt, edf_ready_order, resched, release);
80}
81
82/* need_to_preempt - check whether the task t needs to be preempted
83 * call only with irqs disabled and with ready_lock acquired
84 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
85 */
86int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
87{
88 /* we need the read lock for edf_ready_queue */
89 /* no need to preempt if there is nothing pending */
90 if (!__jobs_pending(rt))
91 return 0;
92 /* we need to reschedule if t doesn't exist */
93 if (!t)
94 return 1;
95
96 /* NOTE: We cannot check for non-preemptibility since we
97 * don't know what address space we're currently in.
98 */
99
100 /* make sure to get non-rt stuff out of the way */
101 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
102}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..85be716941d8
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,281 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops fmlp_sem_ops;
22extern struct fdso_ops srp_sem_ops;
23
24static const struct fdso_ops* fdso_ops[] = {
25 &fmlp_sem_ops,
26 &srp_sem_ops,
27};
28
29static void* fdso_create(obj_type_t type)
30{
31 if (fdso_ops[type]->create)
32 return fdso_ops[type]->create();
33 else
34 return NULL;
35}
36
37static void fdso_destroy(obj_type_t type, void* obj)
38{
39 fdso_ops[type]->destroy(obj);
40}
41
42static int fdso_open(struct od_table_entry* entry, void* __user config)
43{
44 if (fdso_ops[entry->obj->type]->open)
45 return fdso_ops[entry->obj->type]->open(entry, config);
46 else
47 return 0;
48}
49
50static int fdso_close(struct od_table_entry* entry)
51{
52 if (fdso_ops[entry->obj->type]->close)
53 return fdso_ops[entry->obj->type]->close(entry);
54 else
55 return 0;
56}
57
58/* inode must be locked already */
59static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
60 obj_type_t type,
61 unsigned int id)
62{
63 struct inode_obj_id* obj;
64 void* raw_obj;
65
66 raw_obj = fdso_create(type);
67 if (!raw_obj)
68 return NULL;
69
70 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
71 if (!obj)
72 return NULL;
73 INIT_LIST_HEAD(&obj->list);
74 atomic_set(&obj->count, 1);
75 obj->type = type;
76 obj->id = id;
77 obj->obj = raw_obj;
78 obj->inode = inode;
79
80 list_add(&obj->list, &inode->i_obj_list);
81 atomic_inc(&inode->i_count);
82
83 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
84 return obj;
85}
86
87/* inode must be locked already */
88static struct inode_obj_id* get_inode_obj(struct inode* inode,
89 obj_type_t type,
90 unsigned int id)
91{
92 struct list_head* pos;
93 struct inode_obj_id* obj = NULL;
94
95 list_for_each(pos, &inode->i_obj_list) {
96 obj = list_entry(pos, struct inode_obj_id, list);
97 if (obj->id == id && obj->type == type) {
98 atomic_inc(&obj->count);
99 return obj;
100 }
101 }
102 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
103 return NULL;
104}
105
106
107static void put_inode_obj(struct inode_obj_id* obj)
108{
109 struct inode* inode;
110 int let_go = 0;
111
112 inode = obj->inode;
113 if (atomic_dec_and_test(&obj->count)) {
114
115 mutex_lock(&inode->i_obj_mutex);
116 /* no new references can be obtained */
117 if (!atomic_read(&obj->count)) {
118 list_del(&obj->list);
119 fdso_destroy(obj->type, obj->obj);
120 kfree(obj);
121 let_go = 1;
122 }
123 mutex_unlock(&inode->i_obj_mutex);
124 if (let_go)
125 iput(inode);
126 }
127}
128
129static struct od_table_entry* get_od_entry(struct task_struct* t)
130{
131 struct od_table_entry* table;
132 int i;
133
134
135 table = t->od_table;
136 if (!table) {
137 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
138 GFP_KERNEL);
139 t->od_table = table;
140 }
141
142 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
143 if (!table[i].used) {
144 table[i].used = 1;
145 return table + i;
146 }
147 return NULL;
148}
149
150static int put_od_entry(struct od_table_entry* od)
151{
152 put_inode_obj(od->obj);
153 od->used = 0;
154 return 0;
155}
156
157void exit_od_table(struct task_struct* t)
158{
159 int i;
160
161 if (t->od_table) {
162 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
163 if (t->od_table[i].used)
164 put_od_entry(t->od_table + i);
165 kfree(t->od_table);
166 t->od_table = NULL;
167 }
168}
169
170static int do_sys_od_open(struct file* file, obj_type_t type, int id,
171 void* __user config)
172{
173 int idx = 0, err;
174 struct inode* inode;
175 struct inode_obj_id* obj = NULL;
176 struct od_table_entry* entry;
177
178 inode = file->f_dentry->d_inode;
179
180 entry = get_od_entry(current);
181 if (!entry)
182 return -ENOMEM;
183
184 mutex_lock(&inode->i_obj_mutex);
185 obj = get_inode_obj(inode, type, id);
186 if (!obj)
187 obj = alloc_inode_obj(inode, type, id);
188 if (!obj) {
189 idx = -ENOMEM;
190 entry->used = 0;
191 } else {
192 entry->obj = obj;
193 entry->extra = NULL;
194 idx = entry - current->od_table;
195 }
196
197 mutex_unlock(&inode->i_obj_mutex);
198
199 err = fdso_open(entry, config);
200 if (err < 0) {
201 /* The class rejected the open call.
202 * We need to clean up and tell user space.
203 */
204 put_od_entry(entry);
205 idx = err;
206 }
207
208 return idx;
209}
210
211
212struct od_table_entry* __od_lookup(int od)
213{
214 struct task_struct *t = current;
215
216 if (!t->od_table)
217 return NULL;
218 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
219 return NULL;
220 if (!t->od_table[od].used)
221 return NULL;
222 return t->od_table + od;
223}
224
225
226asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
227{
228 int ret = 0;
229 struct file* file;
230
231 /*
232 1) get file from fd, get inode from file
233 2) lock inode
234 3) try to lookup object
235 4) if not present create and enqueue object, inc inode refcnt
236 5) increment refcnt of object
237 6) alloc od_table_entry, setup ptrs
238 7) unlock inode
239 8) return offset in od_table as OD
240 */
241
242 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
243 ret = -EINVAL;
244 goto out;
245 }
246
247 file = fget(fd);
248 if (!file) {
249 ret = -EBADF;
250 goto out;
251 }
252
253 ret = do_sys_od_open(file, type, obj_id, config);
254
255 fput(file);
256
257out:
258 return ret;
259}
260
261
262asmlinkage long sys_od_close(int od)
263{
264 int ret = -EINVAL;
265 struct task_struct *t = current;
266
267 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
268 return ret;
269
270 if (!t->od_table || !t->od_table[od].used)
271 return ret;
272
273
274 /* give the class a chance to reject the close
275 */
276 ret = fdso_close(t->od_table + od);
277 if (ret == 0)
278 ret = put_od_entry(t->od_table + od);
279
280 return ret;
281}
diff --git a/litmus/fmlp.c b/litmus/fmlp.c
new file mode 100644
index 000000000000..03fa7358d5eb
--- /dev/null
+++ b/litmus/fmlp.c
@@ -0,0 +1,268 @@
1/*
2 * FMLP implementation.
3 * Much of the code here is borrowed from include/asm-i386/semaphore.h
4 */
5
6#include <asm/atomic.h>
7
8#include <linux/semaphore.h>
9#include <linux/sched.h>
10#include <linux/wait.h>
11#include <linux/spinlock.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/edf_common.h>
16
17#include <litmus/fdso.h>
18
19#include <litmus/trace.h>
20
21#ifdef CONFIG_FMLP
22
23static void* create_fmlp_semaphore(void)
24{
25 struct pi_semaphore* sem;
26 int i;
27
28 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
29 if (!sem)
30 return NULL;
31 atomic_set(&sem->count, 1);
32 sem->sleepers = 0;
33 init_waitqueue_head(&sem->wait);
34 sem->hp.task = NULL;
35 sem->holder = NULL;
36 for (i = 0; i < NR_CPUS; i++)
37 sem->hp.cpu_task[i] = NULL;
38 return sem;
39}
40
41static int open_fmlp_semaphore(struct od_table_entry* entry, void* __user arg)
42{
43 if (!fmlp_active())
44 return -EBUSY;
45 return 0;
46}
47
48static void destroy_fmlp_semaphore(void* sem)
49{
50 /* XXX assert invariants */
51 kfree(sem);
52}
53
54struct fdso_ops fmlp_sem_ops = {
55 .create = create_fmlp_semaphore,
56 .open = open_fmlp_semaphore,
57 .destroy = destroy_fmlp_semaphore
58};
59
60struct wq_pair {
61 struct task_struct* tsk;
62 struct pi_semaphore* sem;
63};
64
65static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
66 void *key)
67{
68 struct wq_pair* wqp = (struct wq_pair*) wait->private;
69 set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
70 litmus->inherit_priority(wqp->sem, wqp->tsk);
71 TRACE_TASK(wqp->tsk,
72 "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
73 /* point to task for default_wake_function() */
74 wait->private = wqp->tsk;
75 default_wake_function(wait, mode, sync, key);
76
77 /* Always return true since we know that if we encountered a task
78 * that was already running the wake_up raced with the schedule in
79 * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
80 * immediately and own the lock. We must not wake up another task in
81 * any case.
82 */
83 return 1;
84}
85
86/* caller is responsible for locking */
87int edf_set_hp_task(struct pi_semaphore *sem)
88{
89 struct list_head *tmp, *next;
90 struct task_struct *queued;
91 int ret = 0;
92
93 sem->hp.task = NULL;
94 list_for_each_safe(tmp, next, &sem->wait.task_list) {
95 queued = ((struct wq_pair*)
96 list_entry(tmp, wait_queue_t,
97 task_list)->private)->tsk;
98
99 /* Compare task prios, find high prio task. */
100 if (edf_higher_prio(queued, sem->hp.task)) {
101 sem->hp.task = queued;
102 ret = 1;
103 }
104 }
105 return ret;
106}
107
108/* caller is responsible for locking */
109int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu)
110{
111 struct list_head *tmp, *next;
112 struct task_struct *queued;
113 int ret = 0;
114
115 sem->hp.cpu_task[cpu] = NULL;
116 list_for_each_safe(tmp, next, &sem->wait.task_list) {
117 queued = ((struct wq_pair*)
118 list_entry(tmp, wait_queue_t,
119 task_list)->private)->tsk;
120
121 /* Compare task prios, find high prio task. */
122 if (get_partition(queued) == cpu &&
123 edf_higher_prio(queued, sem->hp.cpu_task[cpu])) {
124 sem->hp.cpu_task[cpu] = queued;
125 ret = 1;
126 }
127 }
128 return ret;
129}
130
131static int do_fmlp_down(struct pi_semaphore* sem)
132{
133 unsigned long flags;
134 struct task_struct *tsk = current;
135 struct wq_pair pair;
136 int suspended = 1;
137 wait_queue_t wait = {
138 .private = &pair,
139 .func = rt_pi_wake_up,
140 .task_list = {NULL, NULL}
141 };
142
143 pair.tsk = tsk;
144 pair.sem = sem;
145 spin_lock_irqsave(&sem->wait.lock, flags);
146
147 if (atomic_dec_return(&sem->count) < 0 ||
148 waitqueue_active(&sem->wait)) {
149 /* we need to suspend */
150 tsk->state = TASK_UNINTERRUPTIBLE;
151 add_wait_queue_exclusive_locked(&sem->wait, &wait);
152
153 TRACE_CUR("suspends on PI lock %p\n", sem);
154 litmus->pi_block(sem, tsk);
155
156 /* release lock before sleeping */
157 spin_unlock_irqrestore(&sem->wait.lock, flags);
158
159 TS_PI_DOWN_END;
160 preempt_enable_no_resched();
161
162
163 /* we depend on the FIFO order
164 * Thus, we don't need to recheck when we wake up, we
165 * are guaranteed to have the lock since there is only one
166 * wake up per release
167 */
168 schedule();
169
170 TRACE_CUR("woke up, now owns PI lock %p\n", sem);
171
172 /* try_to_wake_up() set our state to TASK_RUNNING,
173 * all we need to do is to remove our wait queue entry
174 */
175 remove_wait_queue(&sem->wait, &wait);
176 } else {
177 /* no priority inheritance necessary, since there are no queued
178 * tasks.
179 */
180 suspended = 0;
181 TRACE_CUR("acquired PI lock %p, no contention\n", sem);
182 sem->holder = tsk;
183
184 /* don't know if we're global or partitioned. */
185 sem->hp.task = tsk;
186 sem->hp.cpu_task[get_partition(tsk)] = tsk;
187
188 litmus->inherit_priority(sem, tsk);
189 spin_unlock_irqrestore(&sem->wait.lock, flags);
190 }
191 return suspended;
192}
193
194static void do_fmlp_up(struct pi_semaphore* sem)
195{
196 unsigned long flags;
197
198 spin_lock_irqsave(&sem->wait.lock, flags);
199
200 TRACE_CUR("releases PI lock %p\n", sem);
201 litmus->return_priority(sem);
202 sem->holder = NULL;
203 if (atomic_inc_return(&sem->count) < 1)
204 /* there is a task queued */
205 wake_up_locked(&sem->wait);
206
207 spin_unlock_irqrestore(&sem->wait.lock, flags);
208}
209
210asmlinkage long sys_fmlp_down(int sem_od)
211{
212 long ret = 0;
213 struct pi_semaphore * sem;
214 int suspended = 0;
215
216 preempt_disable();
217 TS_PI_DOWN_START;
218
219 sem = lookup_fmlp_sem(sem_od);
220 if (sem)
221 suspended = do_fmlp_down(sem);
222 else
223 ret = -EINVAL;
224
225 if (!suspended) {
226 TS_PI_DOWN_END;
227 preempt_enable();
228 }
229
230 return ret;
231}
232
233asmlinkage long sys_fmlp_up(int sem_od)
234{
235 long ret = 0;
236 struct pi_semaphore * sem;
237
238 preempt_disable();
239 TS_PI_UP_START;
240
241 sem = lookup_fmlp_sem(sem_od);
242 if (sem)
243 do_fmlp_up(sem);
244 else
245 ret = -EINVAL;
246
247
248 TS_PI_UP_END;
249 preempt_enable();
250
251 return ret;
252}
253
254#else
255
256struct fdso_ops fmlp_sem_ops = {};
257
258asmlinkage long sys_fmlp_down(int sem_od)
259{
260 return -ENOSYS;
261}
262
263asmlinkage long sys_fmlp_up(int sem_od)
264{
265 return -ENOSYS;
266}
267
268#endif
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 000000000000..6084b6d6b364
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,43 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5#ifndef __ARCH_HAS_FEATHER_TRACE
6/* provide dummy implementation */
7
8int ft_events[MAX_EVENTS];
9
10int ft_enable_event(unsigned long id)
11{
12 if (id < MAX_EVENTS) {
13 ft_events[id]++;
14 return 1;
15 } else
16 return 0;
17}
18
19int ft_disable_event(unsigned long id)
20{
21 if (id < MAX_EVENTS && ft_events[id]) {
22 ft_events[id]--;
23 return 1;
24 } else
25 return 0;
26}
27
28int ft_disable_all_events(void)
29{
30 int i;
31
32 for (i = 0; i < MAX_EVENTS; i++)
33 ft_events[i] = 0;
34
35 return MAX_EVENTS;
36}
37
38int ft_is_event_enabled(unsigned long id)
39{
40 return id < MAX_EVENTS && ft_events[id];
41}
42
43#endif
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
new file mode 100644
index 000000000000..8b2d74d816a2
--- /dev/null
+++ b/litmus/ftdev.c
@@ -0,0 +1,359 @@
1#include <linux/sched.h>
2#include <linux/fs.h>
3#include <linux/cdev.h>
4#include <asm/uaccess.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8#include <litmus/feather_trace.h>
9#include <litmus/ftdev.h>
10
11struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
12{
13 struct ft_buffer* buf;
14 size_t total = (size + 1) * count;
15 char* mem;
16 int order = 0, pages = 1;
17
18 buf = kmalloc(sizeof(*buf), GFP_KERNEL);
19 if (!buf)
20 return NULL;
21
22 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
23 while (pages < total) {
24 order++;
25 pages *= 2;
26 }
27
28 mem = (char*) __get_free_pages(GFP_KERNEL, order);
29 if (!mem) {
30 kfree(buf);
31 return NULL;
32 }
33
34 if (!init_ft_buffer(buf, count, size,
35 mem + (count * size), /* markers at the end */
36 mem)) { /* buffer objects */
37 free_pages((unsigned long) mem, order);
38 kfree(buf);
39 return NULL;
40 }
41 return buf;
42}
43
44void free_ft_buffer(struct ft_buffer* buf)
45{
46 int order = 0, pages = 1;
47 size_t total;
48
49 if (buf) {
50 total = (buf->slot_size + 1) * buf->slot_count;
51 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
52 while (pages < total) {
53 order++;
54 pages *= 2;
55 }
56 free_pages((unsigned long) buf->buffer_mem, order);
57 kfree(buf);
58 }
59}
60
61struct ftdev_event {
62 int id;
63 struct ftdev_event* next;
64};
65
66static int activate(struct ftdev_event** chain, int id)
67{
68 struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
69 if (ev) {
70 printk(KERN_INFO
71 "Enabling feather-trace event %d.\n", (int) id);
72 ft_enable_event(id);
73 ev->id = id;
74 ev->next = *chain;
75 *chain = ev;
76 }
77 return ev ? 0 : -ENOMEM;
78}
79
80static void deactivate(struct ftdev_event** chain, int id)
81{
82 struct ftdev_event **cur = chain;
83 struct ftdev_event *nxt;
84 while (*cur) {
85 if ((*cur)->id == id) {
86 nxt = (*cur)->next;
87 kfree(*cur);
88 *cur = nxt;
89 printk(KERN_INFO
90 "Disabling feather-trace event %d.\n", (int) id);
91 ft_disable_event(id);
92 break;
93 }
94 cur = &(*cur)->next;
95 }
96}
97
98static int ftdev_open(struct inode *in, struct file *filp)
99{
100 struct ftdev* ftdev;
101 struct ftdev_minor* ftdm;
102 unsigned int buf_idx = iminor(in);
103 int err = 0;
104
105 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
106
107 if (buf_idx >= ftdev->minor_cnt) {
108 err = -ENODEV;
109 goto out;
110 }
111 if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
112 goto out;
113
114 ftdm = ftdev->minor + buf_idx;
115 filp->private_data = ftdm;
116
117 if (mutex_lock_interruptible(&ftdm->lock)) {
118 err = -ERESTARTSYS;
119 goto out;
120 }
121
122 if (!ftdm->readers && ftdev->alloc)
123 err = ftdev->alloc(ftdev, buf_idx);
124 if (0 == err)
125 ftdm->readers++;
126
127 mutex_unlock(&ftdm->lock);
128out:
129 return err;
130}
131
132static int ftdev_release(struct inode *in, struct file *filp)
133{
134 struct ftdev* ftdev;
135 struct ftdev_minor* ftdm;
136 unsigned int buf_idx = iminor(in);
137 int err = 0;
138
139 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
140
141 if (buf_idx >= ftdev->minor_cnt) {
142 err = -ENODEV;
143 goto out;
144 }
145 ftdm = ftdev->minor + buf_idx;
146
147 if (mutex_lock_interruptible(&ftdm->lock)) {
148 err = -ERESTARTSYS;
149 goto out;
150 }
151
152 if (ftdm->readers == 1) {
153 while (ftdm->events)
154 deactivate(&ftdm->events, ftdm->events->id);
155
156 /* wait for any pending events to complete */
157 set_current_state(TASK_UNINTERRUPTIBLE);
158 schedule_timeout(HZ);
159
160 printk(KERN_ALERT "Failed trace writes: %u\n",
161 ftdm->buf->failed_writes);
162
163 if (ftdev->free)
164 ftdev->free(ftdev, buf_idx);
165 }
166
167 ftdm->readers--;
168 mutex_unlock(&ftdm->lock);
169out:
170 return err;
171}
172
173/* based on ft_buffer_read
174 * @returns < 0 : page fault
175 * = 0 : no data available
176 * = 1 : one slot copied
177 */
178static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
179{
180 unsigned int idx;
181 int err = 0;
182 if (buf->free_count != buf->slot_count) {
183 /* data available */
184 idx = buf->read_idx % buf->slot_count;
185 if (buf->slots[idx] == SLOT_READY) {
186 err = copy_to_user(dest, ((char*) buf->buffer_mem) +
187 idx * buf->slot_size,
188 buf->slot_size);
189 if (err == 0) {
190 /* copy ok */
191 buf->slots[idx] = SLOT_FREE;
192 buf->read_idx++;
193 fetch_and_inc(&buf->free_count);
194 err = 1;
195 }
196 }
197 }
198 return err;
199}
200
201static ssize_t ftdev_read(struct file *filp,
202 char __user *to, size_t len, loff_t *f_pos)
203{
204 /* we ignore f_pos, this is strictly sequential */
205
206 ssize_t err = 0;
207 size_t chunk;
208 int copied;
209 struct ftdev_minor* ftdm = filp->private_data;
210
211 if (mutex_lock_interruptible(&ftdm->lock)) {
212 err = -ERESTARTSYS;
213 goto out;
214 }
215
216
217 chunk = ftdm->buf->slot_size;
218 while (len >= chunk) {
219 copied = ft_buffer_copy_to_user(ftdm->buf, to);
220 if (copied == 1) {
221 len -= chunk;
222 to += chunk;
223 err += chunk;
224 } else if (err == 0 && copied == 0 && ftdm->events) {
225 /* Only wait if there are any events enabled and only
226 * if we haven't copied some data yet. We cannot wait
227 * here with copied data because that data would get
228 * lost if the task is interrupted (e.g., killed).
229 */
230 set_current_state(TASK_INTERRUPTIBLE);
231 schedule_timeout(50);
232 if (signal_pending(current)) {
233 if (err == 0)
234 /* nothing read yet, signal problem */
235 err = -ERESTARTSYS;
236 break;
237 }
238 } else if (copied < 0) {
239 /* page fault */
240 err = copied;
241 break;
242 } else
243 /* nothing left to get, return to user space */
244 break;
245 }
246 mutex_unlock(&ftdm->lock);
247out:
248 return err;
249}
250
251typedef uint32_t cmd_t;
252
253static ssize_t ftdev_write(struct file *filp, const char __user *from,
254 size_t len, loff_t *f_pos)
255{
256 struct ftdev_minor* ftdm = filp->private_data;
257 ssize_t err = -EINVAL;
258 cmd_t cmd;
259 cmd_t id;
260
261 if (len % sizeof(cmd) || len < 2 * sizeof(cmd))
262 goto out;
263
264 if (copy_from_user(&cmd, from, sizeof(cmd))) {
265 err = -EFAULT;
266 goto out;
267 }
268 len -= sizeof(cmd);
269 from += sizeof(cmd);
270
271 if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD)
272 goto out;
273
274 if (mutex_lock_interruptible(&ftdm->lock)) {
275 err = -ERESTARTSYS;
276 goto out;
277 }
278
279 err = sizeof(cmd);
280 while (len) {
281 if (copy_from_user(&id, from, sizeof(cmd))) {
282 err = -EFAULT;
283 goto out_unlock;
284 }
285 /* FIXME: check id against list of acceptable events */
286 len -= sizeof(cmd);
287 from += sizeof(cmd);
288 if (cmd == FTDEV_DISABLE_CMD)
289 deactivate(&ftdm->events, id);
290 else if (activate(&ftdm->events, id) != 0) {
291 err = -ENOMEM;
292 goto out_unlock;
293 }
294 err += sizeof(cmd);
295 }
296
297out_unlock:
298 mutex_unlock(&ftdm->lock);
299out:
300 return err;
301}
302
303struct file_operations ftdev_fops = {
304 .owner = THIS_MODULE,
305 .open = ftdev_open,
306 .release = ftdev_release,
307 .write = ftdev_write,
308 .read = ftdev_read,
309};
310
311
312void ftdev_init(struct ftdev* ftdev, struct module* owner)
313{
314 int i;
315 cdev_init(&ftdev->cdev, &ftdev_fops);
316 ftdev->cdev.owner = owner;
317 ftdev->cdev.ops = &ftdev_fops;
318 ftdev->minor_cnt = 0;
319 for (i = 0; i < MAX_FTDEV_MINORS; i++) {
320 mutex_init(&ftdev->minor[i].lock);
321 ftdev->minor[i].readers = 0;
322 ftdev->minor[i].buf = NULL;
323 ftdev->minor[i].events = NULL;
324 }
325 ftdev->alloc = NULL;
326 ftdev->free = NULL;
327 ftdev->can_open = NULL;
328}
329
330int register_ftdev(struct ftdev* ftdev, const char* name, int major)
331{
332 dev_t trace_dev;
333 int error = 0;
334
335 if(major) {
336 trace_dev = MKDEV(major, 0);
337 error = register_chrdev_region(trace_dev, ftdev->minor_cnt,
338 name);
339 } else {
340 error = alloc_chrdev_region(&trace_dev, 0, ftdev->minor_cnt,
341 name);
342 major = MAJOR(trace_dev);
343 }
344 if (error)
345 {
346 printk(KERN_WARNING "ftdev(%s): "
347 "Could not register major/minor number %d/%u\n",
348 name, major, ftdev->minor_cnt);
349 return error;
350 }
351 error = cdev_add(&ftdev->cdev, trace_dev, ftdev->minor_cnt);
352 if (error) {
353 printk(KERN_WARNING "ftdev(%s): "
354 "Could not add cdev for major/minor = %d/%u.\n",
355 name, major, ftdev->minor_cnt);
356 return error;
357 }
358 return error;
359}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..36e314625d86
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,43 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/litmus.h>
7#include <litmus/jobs.h>
8
9void prepare_for_next_period(struct task_struct *t)
10{
11 BUG_ON(!t);
12 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
14 t->rt_param.job_params.deadline += get_rt_period(t);
15 t->rt_param.job_params.exec_time = 0;
16 /* update job sequence number */
17 t->rt_param.job_params.job_no++;
18
19 /* don't confuse Linux */
20 t->rt.time_slice = 1;
21}
22
23void release_at(struct task_struct *t, lt_t start)
24{
25 t->rt_param.job_params.deadline = start;
26 prepare_for_next_period(t);
27 set_rt_flags(t, RT_F_RUNNING);
28}
29
30
31/*
32 * Deactivate current task until the beginning of the next period.
33 */
34long complete_job(void)
35{
36 /* Mark that we do not excute anymore */
37 set_rt_flags(current, RT_F_SLEEP);
38 /* call schedule, this will return when a new job arrives
39 * it also takes care of preparing for the next release
40 */
41 schedule();
42 return 0;
43}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..5bf848386e1c
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,786 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9
10#include <linux/module.h>
11#include <linux/proc_fs.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <linux/sched.h>
16#include <litmus/sched_plugin.h>
17
18#include <litmus/bheap.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/rt_domain.h>
23
24/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_SPINLOCK(task_transition_lock);
27/* synchronize plugin switching */
28atomic_t cannot_use_plugin = ATOMIC_INIT(0);
29
30/* Give log messages sequential IDs. */
31atomic_t __log_seq_no = ATOMIC_INIT(0);
32
33/* current master CPU for handling timer IRQs */
34atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
35
36static struct kmem_cache * bheap_node_cache;
37extern struct kmem_cache * release_heap_cache;
38
39struct bheap_node* bheap_node_alloc(int gfp_flags)
40{
41 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
42}
43
44void bheap_node_free(struct bheap_node* hn)
45{
46 kmem_cache_free(bheap_node_cache, hn);
47}
48
49struct release_heap* release_heap_alloc(int gfp_flags);
50void release_heap_free(struct release_heap* rh);
51
52/*
53 * sys_set_task_rt_param
54 * @pid: Pid of the task which scheduling parameters must be changed
55 * @param: New real-time extension parameters such as the execution cost and
56 * period
57 * Syscall for manipulating with task rt extension params
58 * Returns EFAULT if param is NULL.
59 * ESRCH if pid is not corrsponding
60 * to a valid task.
61 * EINVAL if either period or execution cost is <=0
62 * EPERM if pid is a real-time task
63 * 0 if success
64 *
65 * Only non-real-time tasks may be configured with this system call
66 * to avoid races with the scheduler. In practice, this means that a
67 * task's parameters must be set _before_ calling sys_prepare_rt_task()
68 *
69 * find_task_by_vpid() assumes that we are in the same namespace of the
70 * target.
71 */
72asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
73{
74 struct rt_task tp;
75 struct task_struct *target;
76 int retval = -EINVAL;
77
78 printk("Setting up rt task parameters for process %d.\n", pid);
79
80 if (pid < 0 || param == 0) {
81 goto out;
82 }
83 if (copy_from_user(&tp, param, sizeof(tp))) {
84 retval = -EFAULT;
85 goto out;
86 }
87
88 /* Task search and manipulation must be protected */
89 read_lock_irq(&tasklist_lock);
90 if (!(target = find_task_by_vpid(pid))) {
91 retval = -ESRCH;
92 goto out_unlock;
93 }
94
95 if (is_realtime(target)) {
96 /* The task is already a real-time task.
97 * We cannot not allow parameter changes at this point.
98 */
99 retval = -EBUSY;
100 goto out_unlock;
101 }
102
103 if (tp.exec_cost <= 0)
104 goto out_unlock;
105 if (tp.period <= 0)
106 goto out_unlock;
107 if (!cpu_online(tp.cpu))
108 goto out_unlock;
109 if (tp.period < tp.exec_cost)
110 {
111 printk(KERN_INFO "litmus: real-time task %d rejected "
112 "because wcet > period\n", pid);
113 goto out_unlock;
114 }
115 if (tp.budget_policy != NO_ENFORCEMENT &&
116 tp.budget_policy != QUANTUM_ENFORCEMENT)
117 {
118 printk(KERN_INFO "litmus: real-time task %d rejected "
119 "because unsupported budget enforcement policy specified\n", pid);
120 goto out_unlock;
121 }
122
123 target->rt_param.task_params = tp;
124
125 retval = 0;
126 out_unlock:
127 read_unlock_irq(&tasklist_lock);
128 out:
129 return retval;
130}
131
132/*
133 * Getter of task's RT params
134 * returns EINVAL if param or pid is NULL
135 * returns ESRCH if pid does not correspond to a valid task
136 * returns EFAULT if copying of parameters has failed.
137 *
138 * find_task_by_vpid() assumes that we are in the same namespace of the
139 * target.
140 */
141asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
142{
143 int retval = -EINVAL;
144 struct task_struct *source;
145 struct rt_task lp;
146 if (param == 0 || pid < 0)
147 goto out;
148 read_lock(&tasklist_lock);
149 if (!(source = find_task_by_vpid(pid))) {
150 retval = -ESRCH;
151 goto out_unlock;
152 }
153 lp = source->rt_param.task_params;
154 read_unlock(&tasklist_lock);
155 /* Do copying outside the lock */
156 retval =
157 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
158 return retval;
159 out_unlock:
160 read_unlock(&tasklist_lock);
161 out:
162 return retval;
163
164}
165
166/*
167 * This is the crucial function for periodic task implementation,
168 * It checks if a task is periodic, checks if such kind of sleep
169 * is permitted and calls plugin-specific sleep, which puts the
170 * task into a wait array.
171 * returns 0 on successful wakeup
172 * returns EPERM if current conditions do not permit such sleep
173 * returns EINVAL if current task is not able to go to sleep
174 */
175asmlinkage long sys_complete_job(void)
176{
177 int retval = -EPERM;
178 if (!is_realtime(current)) {
179 retval = -EINVAL;
180 goto out;
181 }
182 /* Task with negative or zero period cannot sleep */
183 if (get_rt_period(current) <= 0) {
184 retval = -EINVAL;
185 goto out;
186 }
187 /* The plugin has to put the task into an
188 * appropriate queue and call schedule
189 */
190 retval = litmus->complete_job();
191 out:
192 return retval;
193}
194
195/* This is an "improved" version of sys_complete_job that
196 * addresses the problem of unintentionally missing a job after
197 * an overrun.
198 *
199 * returns 0 on successful wakeup
200 * returns EPERM if current conditions do not permit such sleep
201 * returns EINVAL if current task is not able to go to sleep
202 */
203asmlinkage long sys_wait_for_job_release(unsigned int job)
204{
205 int retval = -EPERM;
206 if (!is_realtime(current)) {
207 retval = -EINVAL;
208 goto out;
209 }
210
211 /* Task with negative or zero period cannot sleep */
212 if (get_rt_period(current) <= 0) {
213 retval = -EINVAL;
214 goto out;
215 }
216
217 retval = 0;
218
219 /* first wait until we have "reached" the desired job
220 *
221 * This implementation has at least two problems:
222 *
223 * 1) It doesn't gracefully handle the wrap around of
224 * job_no. Since LITMUS is a prototype, this is not much
225 * of a problem right now.
226 *
227 * 2) It is theoretically racy if a job release occurs
228 * between checking job_no and calling sleep_next_period().
229 * A proper solution would requiring adding another callback
230 * in the plugin structure and testing the condition with
231 * interrupts disabled.
232 *
233 * FIXME: At least problem 2 should be taken care of eventually.
234 */
235 while (!retval && job > current->rt_param.job_params.job_no)
236 /* If the last job overran then job <= job_no and we
237 * don't send the task to sleep.
238 */
239 retval = litmus->complete_job();
240 out:
241 return retval;
242}
243
244/* This is a helper syscall to query the current job sequence number.
245 *
246 * returns 0 on successful query
247 * returns EPERM if task is not a real-time task.
248 * returns EFAULT if &job is not a valid pointer.
249 */
250asmlinkage long sys_query_job_no(unsigned int __user *job)
251{
252 int retval = -EPERM;
253 if (is_realtime(current))
254 retval = put_user(current->rt_param.job_params.job_no, job);
255
256 return retval;
257}
258
259/* sys_null_call() is only used for determining raw system call
260 * overheads (kernel entry, kernel exit). It has no useful side effects.
261 * If ts is non-NULL, then the current Feather-Trace time is recorded.
262 */
263asmlinkage long sys_null_call(cycles_t __user *ts)
264{
265 long ret = 0;
266 cycles_t now;
267
268 if (ts) {
269 now = get_cycles();
270 ret = put_user(now, ts);
271 }
272
273 return ret;
274}
275
276/* p is a real-time task. Re-init its state as a best-effort task. */
277static void reinit_litmus_state(struct task_struct* p, int restore)
278{
279 struct rt_task user_config = {};
280 void* ctrl_page = NULL;
281
282 if (restore) {
283 /* Safe user-space provided configuration data.
284 * and allocated page. */
285 user_config = p->rt_param.task_params;
286 ctrl_page = p->rt_param.ctrl_page;
287 }
288
289 /* We probably should not be inheriting any task's priority
290 * at this point in time.
291 */
292 WARN_ON(p->rt_param.inh_task);
293
294 /* We need to restore the priority of the task. */
295// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); XXX why is this commented?
296
297 /* Cleanup everything else. */
298 memset(&p->rt_param, 0, sizeof(p->rt_param));
299
300 /* Restore preserved fields. */
301 if (restore) {
302 p->rt_param.task_params = user_config;
303 p->rt_param.ctrl_page = ctrl_page;
304 }
305}
306
307long litmus_admit_task(struct task_struct* tsk)
308{
309 long retval = 0;
310 unsigned long flags;
311
312 BUG_ON(is_realtime(tsk));
313
314 if (get_rt_period(tsk) == 0 ||
315 get_exec_cost(tsk) > get_rt_period(tsk)) {
316 TRACE_TASK(tsk, "litmus admit: invalid task parameters "
317 "(%lu, %lu)\n",
318 get_exec_cost(tsk), get_rt_period(tsk));
319 retval = -EINVAL;
320 goto out;
321 }
322
323 if (!cpu_online(get_partition(tsk))) {
324 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
325 get_partition(tsk));
326 retval = -EINVAL;
327 goto out;
328 }
329
330 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
331
332 /* avoid scheduler plugin changing underneath us */
333 spin_lock_irqsave(&task_transition_lock, flags);
334
335 /* allocate heap node for this task */
336 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
337 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
338
339 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
340 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
341
342 bheap_node_free(tsk_rt(tsk)->heap_node);
343 release_heap_free(tsk_rt(tsk)->rel_heap);
344
345 retval = -ENOMEM;
346 goto out_unlock;
347 } else {
348 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
349 }
350
351 retval = litmus->admit_task(tsk);
352
353 if (!retval) {
354 sched_trace_task_name(tsk);
355 sched_trace_task_param(tsk);
356 atomic_inc(&rt_task_count);
357 }
358
359out_unlock:
360 spin_unlock_irqrestore(&task_transition_lock, flags);
361out:
362 return retval;
363}
364
365void litmus_exit_task(struct task_struct* tsk)
366{
367 if (is_realtime(tsk)) {
368 sched_trace_task_completion(tsk, 1);
369
370 litmus->task_exit(tsk);
371
372 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
373 bheap_node_free(tsk_rt(tsk)->heap_node);
374 release_heap_free(tsk_rt(tsk)->rel_heap);
375
376 atomic_dec(&rt_task_count);
377 reinit_litmus_state(tsk, 1);
378 }
379}
380
381/* IPI callback to synchronize plugin switching */
382static void synch_on_plugin_switch(void* info)
383{
384 while (atomic_read(&cannot_use_plugin))
385 cpu_relax();
386}
387
388/* Switching a plugin in use is tricky.
389 * We must watch out that no real-time tasks exists
390 * (and that none is created in parallel) and that the plugin is not
391 * currently in use on any processor (in theory).
392 */
393int switch_sched_plugin(struct sched_plugin* plugin)
394{
395 unsigned long flags;
396 int ret = 0;
397
398 BUG_ON(!plugin);
399
400 /* forbid other cpus to use the plugin */
401 atomic_set(&cannot_use_plugin, 1);
402 /* send IPI to force other CPUs to synch with us */
403 smp_call_function(synch_on_plugin_switch, NULL, 0);
404
405 /* stop task transitions */
406 spin_lock_irqsave(&task_transition_lock, flags);
407
408 /* don't switch if there are active real-time tasks */
409 if (atomic_read(&rt_task_count) == 0) {
410 ret = litmus->deactivate_plugin();
411 if (0 != ret)
412 goto out;
413 ret = plugin->activate_plugin();
414 if (0 != ret) {
415 printk(KERN_INFO "Can't activate %s (%d).\n",
416 plugin->plugin_name, ret);
417 plugin = &linux_sched_plugin;
418 }
419 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
420 litmus = plugin;
421 } else
422 ret = -EBUSY;
423out:
424 spin_unlock_irqrestore(&task_transition_lock, flags);
425 atomic_set(&cannot_use_plugin, 0);
426 return ret;
427}
428
429/* Called upon fork.
430 * p is the newly forked task.
431 */
432void litmus_fork(struct task_struct* p)
433{
434 if (is_realtime(p))
435 /* clean out any litmus related state, don't preserve anything */
436 reinit_litmus_state(p, 0);
437 else
438 /* non-rt tasks might have ctrl_page set */
439 tsk_rt(p)->ctrl_page = NULL;
440
441 /* od tables are never inherited across a fork */
442 p->od_table = NULL;
443}
444
445/* Called upon execve().
446 * current is doing the exec.
447 * Don't let address space specific stuff leak.
448 */
449void litmus_exec(void)
450{
451 struct task_struct* p = current;
452
453 if (is_realtime(p)) {
454 WARN_ON(p->rt_param.inh_task);
455 if (tsk_rt(p)->ctrl_page) {
456 free_page((unsigned long) tsk_rt(p)->ctrl_page);
457 tsk_rt(p)->ctrl_page = NULL;
458 }
459 }
460}
461
462void exit_litmus(struct task_struct *dead_tsk)
463{
464 /* We also allow non-RT tasks to
465 * allocate control pages to allow
466 * measurements with non-RT tasks.
467 * So check if we need to free the page
468 * in any case.
469 */
470 if (tsk_rt(dead_tsk)->ctrl_page) {
471 TRACE_TASK(dead_tsk,
472 "freeing ctrl_page %p\n",
473 tsk_rt(dead_tsk)->ctrl_page);
474 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
475 }
476
477 /* main cleanup only for RT tasks */
478 if (is_realtime(dead_tsk))
479 litmus_exit_task(dead_tsk);
480}
481
482
483#ifdef CONFIG_MAGIC_SYSRQ
484int sys_kill(int pid, int sig);
485
486static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
487{
488 struct task_struct *t;
489 read_lock(&tasklist_lock);
490 for_each_process(t) {
491 if (is_realtime(t)) {
492 sys_kill(t->pid, SIGKILL);
493 }
494 }
495 read_unlock(&tasklist_lock);
496}
497
498static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
499 .handler = sysrq_handle_kill_rt_tasks,
500 .help_msg = "quit-rt-tasks(X)",
501 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
502};
503#endif
504
505/* in litmus/sync.c */
506int count_tasks_waiting_for_release(void);
507
508static int proc_read_stats(char *page, char **start,
509 off_t off, int count,
510 int *eof, void *data)
511{
512 int len;
513
514 len = snprintf(page, PAGE_SIZE,
515 "real-time tasks = %d\n"
516 "ready for release = %d\n",
517 atomic_read(&rt_task_count),
518 count_tasks_waiting_for_release());
519 return len;
520}
521
522static int proc_read_plugins(char *page, char **start,
523 off_t off, int count,
524 int *eof, void *data)
525{
526 int len;
527
528 len = print_sched_plugins(page, PAGE_SIZE);
529 return len;
530}
531
532static int proc_read_curr(char *page, char **start,
533 off_t off, int count,
534 int *eof, void *data)
535{
536 int len;
537
538 len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
539 return len;
540}
541
542static int proc_write_curr(struct file *file,
543 const char *buffer,
544 unsigned long count,
545 void *data)
546{
547 int len, ret;
548 char name[65];
549 struct sched_plugin* found;
550
551 if(count > 64)
552 len = 64;
553 else
554 len = count;
555
556 if(copy_from_user(name, buffer, len))
557 return -EFAULT;
558
559 name[len] = '\0';
560 /* chomp name */
561 if (len > 1 && name[len - 1] == '\n')
562 name[len - 1] = '\0';
563
564 found = find_sched_plugin(name);
565
566 if (found) {
567 ret = switch_sched_plugin(found);
568 if (ret != 0)
569 printk(KERN_INFO "Could not switch plugin: %d\n", ret);
570 } else
571 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
572
573 return len;
574}
575
576static int proc_read_cluster_size(char *page, char **start,
577 off_t off, int count,
578 int *eof, void *data)
579{
580 int len;
581 if (cluster_cache_index == 2)
582 len = snprintf(page, PAGE_SIZE, "L2\n");
583 else if (cluster_cache_index == 3)
584 len = snprintf(page, PAGE_SIZE, "L3\n");
585 else if (cluster_cache_index == 1)
586 len = snprintf(page, PAGE_SIZE, "L1\n");
587 else
588 len = snprintf(page, PAGE_SIZE, "ALL\n");
589
590 return len;
591}
592
593static int proc_write_cluster_size(struct file *file,
594 const char *buffer,
595 unsigned long count,
596 void *data)
597{
598 int len;
599 /* L2, L3 */
600 char cache_name[33];
601
602 if(count > 32)
603 len = 32;
604 else
605 len = count;
606
607 if(copy_from_user(cache_name, buffer, len))
608 return -EFAULT;
609
610 cache_name[len] = '\0';
611 /* chomp name */
612 if (len > 1 && cache_name[len - 1] == '\n')
613 cache_name[len - 1] = '\0';
614
615 /* do a quick and dirty comparison to find the cluster size */
616 if (!strcmp(cache_name, "L2"))
617 cluster_cache_index = 2;
618 else if (!strcmp(cache_name, "L3"))
619 cluster_cache_index = 3;
620 else if (!strcmp(cache_name, "L1"))
621 cluster_cache_index = 1;
622 else if (!strcmp(cache_name, "ALL"))
623 cluster_cache_index = num_online_cpus();
624 else
625 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
626
627 return len;
628}
629
630static int proc_read_release_master(char *page, char **start,
631 off_t off, int count,
632 int *eof, void *data)
633{
634 int len, master;
635 master = atomic_read(&release_master_cpu);
636 if (master == NO_CPU)
637 len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
638 else
639 len = snprintf(page, PAGE_SIZE, "%d\n", master);
640 return len;
641}
642
643static int proc_write_release_master(struct file *file,
644 const char *buffer,
645 unsigned long count,
646 void *data)
647{
648 int cpu, err, online = 0;
649 char msg[64];
650
651 if (count > 63)
652 return -EINVAL;
653
654 if (copy_from_user(msg, buffer, count))
655 return -EFAULT;
656
657 /* terminate */
658 msg[count] = '\0';
659 /* chomp */
660 if (count > 1 && msg[count - 1] == '\n')
661 msg[count - 1] = '\0';
662
663 if (strcmp(msg, "NO_CPU") == 0) {
664 atomic_set(&release_master_cpu, NO_CPU);
665 return count;
666 } else {
667 err = sscanf(msg, "%d", &cpu);
668 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
669 atomic_set(&release_master_cpu, cpu);
670 return count;
671 } else {
672 TRACE("invalid release master: '%s' "
673 "(err:%d cpu:%d online:%d)\n",
674 msg, err, cpu, online);
675 return -EINVAL;
676 }
677 }
678}
679
680static struct proc_dir_entry *litmus_dir = NULL,
681 *curr_file = NULL,
682 *stat_file = NULL,
683 *plugs_file = NULL,
684 *clus_cache_idx_file = NULL,
685 *release_master_file = NULL;
686
687static int __init init_litmus_proc(void)
688{
689 litmus_dir = proc_mkdir("litmus", NULL);
690 if (!litmus_dir) {
691 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
692 return -ENOMEM;
693 }
694
695 curr_file = create_proc_entry("active_plugin",
696 0644, litmus_dir);
697 if (!curr_file) {
698 printk(KERN_ERR "Could not allocate active_plugin "
699 "procfs entry.\n");
700 return -ENOMEM;
701 }
702 curr_file->read_proc = proc_read_curr;
703 curr_file->write_proc = proc_write_curr;
704
705 release_master_file = create_proc_entry("release_master",
706 0644, litmus_dir);
707 if (!release_master_file) {
708 printk(KERN_ERR "Could not allocate release_master "
709 "procfs entry.\n");
710 return -ENOMEM;
711 }
712 release_master_file->read_proc = proc_read_release_master;
713 release_master_file->write_proc = proc_write_release_master;
714
715 clus_cache_idx_file = create_proc_entry("cluster_cache",
716 0644, litmus_dir);
717 if (!clus_cache_idx_file) {
718 printk(KERN_ERR "Could not allocate cluster_cache "
719 "procfs entry.\n");
720 return -ENOMEM;
721 }
722 clus_cache_idx_file->read_proc = proc_read_cluster_size;
723 clus_cache_idx_file->write_proc = proc_write_cluster_size;
724
725 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
726 proc_read_stats, NULL);
727
728 plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
729 proc_read_plugins, NULL);
730
731 return 0;
732}
733
734static void exit_litmus_proc(void)
735{
736 if (plugs_file)
737 remove_proc_entry("plugins", litmus_dir);
738 if (stat_file)
739 remove_proc_entry("stats", litmus_dir);
740 if (curr_file)
741 remove_proc_entry("active_plugin", litmus_dir);
742 if (clus_cache_idx_file)
743 remove_proc_entry("cluster_cache", litmus_dir);
744 if (release_master_file)
745 remove_proc_entry("release_master", litmus_dir);
746 if (litmus_dir)
747 remove_proc_entry("litmus", NULL);
748}
749
750extern struct sched_plugin linux_sched_plugin;
751
752static int __init _init_litmus(void)
753{
754 /* Common initializers,
755 * mode change lock is used to enforce single mode change
756 * operation.
757 */
758 printk("Starting LITMUS^RT kernel\n");
759
760 register_sched_plugin(&linux_sched_plugin);
761
762 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
763 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
764
765#ifdef CONFIG_MAGIC_SYSRQ
766 /* offer some debugging help */
767 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
768 printk("Registered kill rt tasks magic sysrq.\n");
769 else
770 printk("Could not register kill rt tasks magic sysrq.\n");
771#endif
772
773 init_litmus_proc();
774
775 return 0;
776}
777
778static void _exit_litmus(void)
779{
780 exit_litmus_proc();
781 kmem_cache_destroy(bheap_node_cache);
782 kmem_cache_destroy(release_heap_cache);
783}
784
785module_init(_init_litmus);
786module_exit(_exit_litmus);
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..609ff0f82abb
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,310 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24static int dummy_resched(rt_domain_t *rt)
25{
26 return 0;
27}
28
29static int dummy_order(struct bheap_node* a, struct bheap_node* b)
30{
31 return 0;
32}
33
34/* default implementation: use default lock */
35static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
36{
37 merge_ready(rt, tasks);
38}
39
40static unsigned int time2slot(lt_t time)
41{
42 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
43}
44
45static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
46{
47 unsigned long flags;
48 struct release_heap* rh;
49
50 TRACE("on_release_timer(0x%p) starts.\n", timer);
51
52 TS_RELEASE_START;
53
54 rh = container_of(timer, struct release_heap, timer);
55
56 spin_lock_irqsave(&rh->dom->release_lock, flags);
57 TRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
58 /* remove from release queue */
59 list_del(&rh->list);
60 spin_unlock_irqrestore(&rh->dom->release_lock, flags);
61 TRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
62
63 /* call release callback */
64 rh->dom->release_jobs(rh->dom, &rh->heap);
65 /* WARNING: rh can be referenced from other CPUs from now on. */
66
67 TS_RELEASE_END;
68
69 TRACE("on_release_timer(0x%p) ends.\n", timer);
70
71 return HRTIMER_NORESTART;
72}
73
74/* allocated in litmus.c */
75struct kmem_cache * release_heap_cache;
76
77struct release_heap* release_heap_alloc(int gfp_flags)
78{
79 struct release_heap* rh;
80 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
81 if (rh) {
82 /* initialize timer */
83 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
84 rh->timer.function = on_release_timer;
85 }
86 return rh;
87}
88
89void release_heap_free(struct release_heap* rh)
90{
91 /* make sure timer is no longer in use */
92 hrtimer_cancel(&rh->timer);
93 kmem_cache_free(release_heap_cache, rh);
94}
95
96/* Caller must hold release lock.
97 * Will return heap for given time. If no such heap exists prior to
98 * the invocation it will be created.
99 */
100static struct release_heap* get_release_heap(rt_domain_t *rt,
101 struct task_struct* t,
102 int use_task_heap)
103{
104 struct list_head* pos;
105 struct release_heap* heap = NULL;
106 struct release_heap* rh;
107 lt_t release_time = get_release(t);
108 unsigned int slot = time2slot(release_time);
109
110 /* initialize pos for the case that the list is empty */
111 pos = rt->release_queue.slot[slot].next;
112 list_for_each(pos, &rt->release_queue.slot[slot]) {
113 rh = list_entry(pos, struct release_heap, list);
114 if (release_time == rh->release_time) {
115 /* perfect match -- this happens on hyperperiod
116 * boundaries
117 */
118 heap = rh;
119 break;
120 } else if (lt_before(release_time, rh->release_time)) {
121 /* we need to insert a new node since rh is
122 * already in the future
123 */
124 break;
125 }
126 }
127 if (!heap && use_task_heap) {
128 /* use pre-allocated release heap */
129 rh = tsk_rt(t)->rel_heap;
130
131 rh->dom = rt;
132 rh->release_time = release_time;
133
134 /* add to release queue */
135 list_add(&rh->list, pos->prev);
136 heap = rh;
137 }
138 return heap;
139}
140
141static void reinit_release_heap(struct task_struct* t)
142{
143 struct release_heap* rh;
144
145 /* use pre-allocated release heap */
146 rh = tsk_rt(t)->rel_heap;
147
148 /* Make sure it is safe to use. The timer callback could still
149 * be executing on another CPU; hrtimer_cancel() will wait
150 * until the timer callback has completed. However, under no
151 * circumstances should the timer be active (= yet to be
152 * triggered).
153 *
154 * WARNING: If the CPU still holds the release_lock at this point,
155 * deadlock may occur!
156 */
157 BUG_ON(hrtimer_cancel(&rh->timer));
158
159 /* initialize */
160 bheap_init(&rh->heap);
161 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
162}
163/* arm_release_timer() - start local release timer or trigger
164 * remote timer (pull timer)
165 *
166 * Called by add_release() with:
167 * - tobe_lock taken
168 * - IRQ disabled
169 */
170static void arm_release_timer(rt_domain_t *_rt)
171{
172 rt_domain_t *rt = _rt;
173 struct list_head list;
174 struct list_head *pos, *safe;
175 struct task_struct* t;
176 struct release_heap* rh;
177
178 TRACE("arm_release_timer() at %llu\n", litmus_clock());
179 list_replace_init(&rt->tobe_released, &list);
180
181 list_for_each_safe(pos, safe, &list) {
182 /* pick task of work list */
183 t = list_entry(pos, struct task_struct, rt_param.list);
184 sched_trace_task_release(t);
185 list_del(pos);
186
187 /* put into release heap while holding release_lock */
188 spin_lock(&rt->release_lock);
189 TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
190
191 rh = get_release_heap(rt, t, 0);
192 if (!rh) {
193 /* need to use our own, but drop lock first */
194 spin_unlock(&rt->release_lock);
195 TRACE_TASK(t, "Dropped release_lock 0x%p\n",
196 &rt->release_lock);
197
198 reinit_release_heap(t);
199 TRACE_TASK(t, "release_heap ready\n");
200
201 spin_lock(&rt->release_lock);
202 TRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
203 &rt->release_lock);
204
205 rh = get_release_heap(rt, t, 1);
206 }
207 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
208 TRACE_TASK(t, "arm_release_timer(): added to release heap\n");
209
210 spin_unlock(&rt->release_lock);
211 TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
212
213 /* To avoid arming the timer multiple times, we only let the
214 * owner do the arming (which is the "first" task to reference
215 * this release_heap anyway).
216 */
217 if (rh == tsk_rt(t)->rel_heap) {
218 TRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
219 /* we cannot arm the timer using hrtimer_start()
220 * as it may deadlock on rq->lock
221 *
222 * PINNED mode is ok on both local and remote CPU
223 */
224 if (rt->release_master == NO_CPU)
225 __hrtimer_start_range_ns(&rh->timer,
226 ns_to_ktime(rh->release_time),
227 0, HRTIMER_MODE_ABS_PINNED, 0);
228 else
229 hrtimer_start_on(rt->release_master,
230 &rh->info, &rh->timer,
231 ns_to_ktime(rh->release_time),
232 HRTIMER_MODE_ABS_PINNED);
233 } else
234 TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
235 }
236}
237
238void rt_domain_init(rt_domain_t *rt,
239 bheap_prio_t order,
240 check_resched_needed_t check,
241 release_jobs_t release
242 )
243{
244 int i;
245
246 BUG_ON(!rt);
247 if (!check)
248 check = dummy_resched;
249 if (!release)
250 release = default_release_jobs;
251 if (!order)
252 order = dummy_order;
253
254 rt->release_master = NO_CPU;
255
256 bheap_init(&rt->ready_queue);
257 INIT_LIST_HEAD(&rt->tobe_released);
258 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
259 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
260
261 spin_lock_init(&rt->ready_lock);
262 spin_lock_init(&rt->release_lock);
263 spin_lock_init(&rt->tobe_lock);
264
265 rt->check_resched = check;
266 rt->release_jobs = release;
267 rt->order = order;
268}
269
270/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
271 * @new: the newly released task
272 */
273void __add_ready(rt_domain_t* rt, struct task_struct *new)
274{
275 TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
276 new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
277 get_release(new), litmus_clock());
278
279 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
280
281 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
282 rt->check_resched(rt);
283}
284
285/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
286 * @tasks - the newly released tasks
287 */
288void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
289{
290 bheap_union(rt->order, &rt->ready_queue, tasks);
291 rt->check_resched(rt);
292}
293
294/* add_release - add a real-time task to the rt release queue.
295 * @task: the sleeping task
296 */
297void __add_release(rt_domain_t* rt, struct task_struct *task)
298{
299 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
300 list_add(&tsk_rt(task)->list, &rt->tobe_released);
301 task->rt_param.domain = rt;
302
303 /* start release timer */
304 TS_SCHED2_START(task);
305
306 arm_release_timer(rt);
307
308 TS_SCHED2_END(task);
309}
310
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..e57a11afda16
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,772 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31
32#include <litmus/litmus.h>
33#include <litmus/jobs.h>
34#include <litmus/sched_plugin.h>
35#include <litmus/edf_common.h>
36#include <litmus/sched_trace.h>
37
38#include <litmus/bheap.h>
39
40#include <linux/module.h>
41
42/* forward declaration... a funny thing with C ;) */
43struct clusterdomain;
44
45/* cpu_entry_t - maintain the linked and scheduled state
46 *
47 * A cpu also contains a pointer to the cedf_domain_t cluster
48 * that owns it (struct clusterdomain*)
49 */
50typedef struct {
51 int cpu;
52 struct clusterdomain* cluster; /* owning cluster */
53 struct task_struct* linked; /* only RT tasks */
54 struct task_struct* scheduled; /* only RT tasks */
55 atomic_t will_schedule; /* prevent unneeded IPIs */
56 struct bheap_node* hn;
57} cpu_entry_t;
58
59/* one cpu_entry_t per CPU */
60DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
61
62#define set_will_schedule() \
63 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
64#define clear_will_schedule() \
65 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
66#define test_will_schedule(cpu) \
67 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
68
69/*
70 * In C-EDF there is a cedf domain _per_ cluster
71 * The number of clusters is dynamically determined accordingly to the
72 * total cpu number and the cluster size
73 */
74typedef struct clusterdomain {
75 /* rt_domain for this cluster */
76 rt_domain_t domain;
77 /* cpus in this cluster */
78 cpu_entry_t* *cpus;
79 /* map of this cluster cpus */
80 cpumask_var_t cpu_map;
81 /* the cpus queue themselves according to priority in here */
82 struct bheap_node *heap_node;
83 struct bheap cpu_heap;
84 /* lock for this cluster */
85#define lock domain.ready_lock
86} cedf_domain_t;
87
88/* a cedf_domain per cluster; allocation is done at init/activation time */
89cedf_domain_t *cedf;
90
91#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
92#define task_cpu_cluster(task) remote_cluster(get_partition(task))
93
94/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
95 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
96 * information during the initialization of the plugin (e.g., topology)
97#define WANT_ALL_SCHED_EVENTS
98 */
99#define VERBOSE_INIT
100
101static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
102{
103 cpu_entry_t *a, *b;
104 a = _a->value;
105 b = _b->value;
106 /* Note that a and b are inverted: we want the lowest-priority CPU at
107 * the top of the heap.
108 */
109 return edf_higher_prio(b->linked, a->linked);
110}
111
112/* update_cpu_position - Move the cpu entry to the correct place to maintain
113 * order in the cpu queue. Caller must hold cedf lock.
114 */
115static void update_cpu_position(cpu_entry_t *entry)
116{
117 cedf_domain_t *cluster = entry->cluster;
118
119 if (likely(bheap_node_in_heap(entry->hn)))
120 bheap_delete(cpu_lower_prio,
121 &cluster->cpu_heap,
122 entry->hn);
123
124 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
125}
126
127/* caller must hold cedf lock */
128static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
129{
130 struct bheap_node* hn;
131 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
132 return hn->value;
133}
134
135
136/* link_task_to_cpu - Update the link of a CPU.
137 * Handles the case where the to-be-linked task is already
138 * scheduled on a different CPU.
139 */
140static noinline void link_task_to_cpu(struct task_struct* linked,
141 cpu_entry_t *entry)
142{
143 cpu_entry_t *sched;
144 struct task_struct* tmp;
145 int on_cpu;
146
147 BUG_ON(linked && !is_realtime(linked));
148
149 /* Currently linked task is set to be unlinked. */
150 if (entry->linked) {
151 entry->linked->rt_param.linked_on = NO_CPU;
152 }
153
154 /* Link new task to CPU. */
155 if (linked) {
156 set_rt_flags(linked, RT_F_RUNNING);
157 /* handle task is already scheduled somewhere! */
158 on_cpu = linked->rt_param.scheduled_on;
159 if (on_cpu != NO_CPU) {
160 sched = &per_cpu(cedf_cpu_entries, on_cpu);
161 /* this should only happen if not linked already */
162 BUG_ON(sched->linked == linked);
163
164 /* If we are already scheduled on the CPU to which we
165 * wanted to link, we don't need to do the swap --
166 * we just link ourselves to the CPU and depend on
167 * the caller to get things right.
168 */
169 if (entry != sched) {
170 TRACE_TASK(linked,
171 "already scheduled on %d, updating link.\n",
172 sched->cpu);
173 tmp = sched->linked;
174 linked->rt_param.linked_on = sched->cpu;
175 sched->linked = linked;
176 update_cpu_position(sched);
177 linked = tmp;
178 }
179 }
180 if (linked) /* might be NULL due to swap */
181 linked->rt_param.linked_on = entry->cpu;
182 }
183 entry->linked = linked;
184#ifdef WANT_ALL_SCHED_EVENTS
185 if (linked)
186 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
187 else
188 TRACE("NULL linked to %d.\n", entry->cpu);
189#endif
190 update_cpu_position(entry);
191}
192
193/* unlink - Make sure a task is not linked any longer to an entry
194 * where it was linked before. Must hold cedf_lock.
195 */
196static noinline void unlink(struct task_struct* t)
197{
198 cpu_entry_t *entry;
199
200 if (unlikely(!t)) {
201 TRACE_BUG_ON(!t);
202 return;
203 }
204
205
206 if (t->rt_param.linked_on != NO_CPU) {
207 /* unlink */
208 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
209 t->rt_param.linked_on = NO_CPU;
210 link_task_to_cpu(NULL, entry);
211 } else if (is_queued(t)) {
212 /* This is an interesting situation: t is scheduled,
213 * but was just recently unlinked. It cannot be
214 * linked anywhere else (because then it would have
215 * been relinked to this CPU), thus it must be in some
216 * queue. We must remove it from the list in this
217 * case.
218 *
219 * in C-EDF case is should be somewhere in the queue for
220 * its domain, therefore and we can get the domain using
221 * task_cpu_cluster
222 */
223 remove(&(task_cpu_cluster(t))->domain, t);
224 }
225}
226
227
228/* preempt - force a CPU to reschedule
229 */
230static void preempt(cpu_entry_t *entry)
231{
232 preempt_if_preemptable(entry->scheduled, entry->cpu);
233}
234
235/* requeue - Put an unlinked task into gsn-edf domain.
236 * Caller must hold cedf_lock.
237 */
238static noinline void requeue(struct task_struct* task)
239{
240 cedf_domain_t *cluster = task_cpu_cluster(task);
241 BUG_ON(!task);
242 /* sanity check before insertion */
243 BUG_ON(is_queued(task));
244
245 if (is_released(task, litmus_clock()))
246 __add_ready(&cluster->domain, task);
247 else {
248 /* it has got to wait */
249 add_release(&cluster->domain, task);
250 }
251}
252
253/* check for any necessary preemptions */
254static void check_for_preemptions(cedf_domain_t *cluster)
255{
256 struct task_struct *task;
257 cpu_entry_t* last;
258
259 for(last = lowest_prio_cpu(cluster);
260 edf_preemption_needed(&cluster->domain, last->linked);
261 last = lowest_prio_cpu(cluster)) {
262 /* preemption necessary */
263 task = __take_ready(&cluster->domain);
264 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
265 task->pid, last->cpu);
266 if (last->linked)
267 requeue(last->linked);
268 link_task_to_cpu(task, last);
269 preempt(last);
270 }
271}
272
273/* cedf_job_arrival: task is either resumed or released */
274static noinline void cedf_job_arrival(struct task_struct* task)
275{
276 cedf_domain_t *cluster = task_cpu_cluster(task);
277 BUG_ON(!task);
278
279 requeue(task);
280 check_for_preemptions(cluster);
281}
282
283static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
284{
285 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
286 unsigned long flags;
287
288 spin_lock_irqsave(&cluster->lock, flags);
289
290 __merge_ready(&cluster->domain, tasks);
291 check_for_preemptions(cluster);
292
293 spin_unlock_irqrestore(&cluster->lock, flags);
294}
295
296/* caller holds cedf_lock */
297static noinline void job_completion(struct task_struct *t, int forced)
298{
299 BUG_ON(!t);
300
301 sched_trace_task_completion(t, forced);
302
303 TRACE_TASK(t, "job_completion().\n");
304
305 /* set flags */
306 set_rt_flags(t, RT_F_SLEEP);
307 /* prepare for next period */
308 prepare_for_next_period(t);
309 if (is_released(t, litmus_clock()))
310 sched_trace_task_release(t);
311 /* unlink */
312 unlink(t);
313 /* requeue
314 * But don't requeue a blocking task. */
315 if (is_running(t))
316 cedf_job_arrival(t);
317}
318
319/* cedf_tick - this function is called for every local timer
320 * interrupt.
321 *
322 * checks whether the current task has expired and checks
323 * whether we need to preempt it if it has not expired
324 */
325static void cedf_tick(struct task_struct* t)
326{
327 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
328 if (!is_np(t)) {
329 /* np tasks will be preempted when they become
330 * preemptable again
331 */
332 set_tsk_need_resched(t);
333 set_will_schedule();
334 TRACE("cedf_scheduler_tick: "
335 "%d is preemptable "
336 " => FORCE_RESCHED\n", t->pid);
337 } else if (is_user_np(t)) {
338 TRACE("cedf_scheduler_tick: "
339 "%d is non-preemptable, "
340 "preemption delayed.\n", t->pid);
341 request_exit_np(t);
342 }
343 }
344}
345
346/* Getting schedule() right is a bit tricky. schedule() may not make any
347 * assumptions on the state of the current task since it may be called for a
348 * number of reasons. The reasons include a scheduler_tick() determined that it
349 * was necessary, because sys_exit_np() was called, because some Linux
350 * subsystem determined so, or even (in the worst case) because there is a bug
351 * hidden somewhere. Thus, we must take extreme care to determine what the
352 * current state is.
353 *
354 * The CPU could currently be scheduling a task (or not), be linked (or not).
355 *
356 * The following assertions for the scheduled task could hold:
357 *
358 * - !is_running(scheduled) // the job blocks
359 * - scheduled->timeslice == 0 // the job completed (forcefully)
360 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
361 * - linked != scheduled // we need to reschedule (for any reason)
362 * - is_np(scheduled) // rescheduling must be delayed,
363 * sys_exit_np must be requested
364 *
365 * Any of these can occur together.
366 */
367static struct task_struct* cedf_schedule(struct task_struct * prev)
368{
369 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
370 cedf_domain_t *cluster = entry->cluster;
371 int out_of_time, sleep, preempt, np, exists, blocks;
372 struct task_struct* next = NULL;
373
374 spin_lock(&cluster->lock);
375 clear_will_schedule();
376
377 /* sanity checking */
378 BUG_ON(entry->scheduled && entry->scheduled != prev);
379 BUG_ON(entry->scheduled && !is_realtime(prev));
380 BUG_ON(is_realtime(prev) && !entry->scheduled);
381
382 /* (0) Determine state */
383 exists = entry->scheduled != NULL;
384 blocks = exists && !is_running(entry->scheduled);
385 out_of_time = exists &&
386 budget_enforced(entry->scheduled) &&
387 budget_exhausted(entry->scheduled);
388 np = exists && is_np(entry->scheduled);
389 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
390 preempt = entry->scheduled != entry->linked;
391
392#ifdef WANT_ALL_SCHED_EVENTS
393 TRACE_TASK(prev, "invoked cedf_schedule.\n");
394#endif
395
396 if (exists)
397 TRACE_TASK(prev,
398 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
399 "state:%d sig:%d\n",
400 blocks, out_of_time, np, sleep, preempt,
401 prev->state, signal_pending(prev));
402 if (entry->linked && preempt)
403 TRACE_TASK(prev, "will be preempted by %s/%d\n",
404 entry->linked->comm, entry->linked->pid);
405
406
407 /* If a task blocks we have no choice but to reschedule.
408 */
409 if (blocks)
410 unlink(entry->scheduled);
411
412 /* Request a sys_exit_np() call if we would like to preempt but cannot.
413 * We need to make sure to update the link structure anyway in case
414 * that we are still linked. Multiple calls to request_exit_np() don't
415 * hurt.
416 */
417 if (np && (out_of_time || preempt || sleep)) {
418 unlink(entry->scheduled);
419 request_exit_np(entry->scheduled);
420 }
421
422 /* Any task that is preemptable and either exhausts its execution
423 * budget or wants to sleep completes. We may have to reschedule after
424 * this. Don't do a job completion if we block (can't have timers running
425 * for blocked jobs). Preemption go first for the same reason.
426 */
427 if (!np && (out_of_time || sleep) && !blocks && !preempt)
428 job_completion(entry->scheduled, !sleep);
429
430 /* Link pending task if we became unlinked.
431 */
432 if (!entry->linked)
433 link_task_to_cpu(__take_ready(&cluster->domain), entry);
434
435 /* The final scheduling decision. Do we need to switch for some reason?
436 * If linked is different from scheduled, then select linked as next.
437 */
438 if ((!np || blocks) &&
439 entry->linked != entry->scheduled) {
440 /* Schedule a linked job? */
441 if (entry->linked) {
442 entry->linked->rt_param.scheduled_on = entry->cpu;
443 next = entry->linked;
444 }
445 if (entry->scheduled) {
446 /* not gonna be scheduled soon */
447 entry->scheduled->rt_param.scheduled_on = NO_CPU;
448 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
449 }
450 } else
451 /* Only override Linux scheduler if we have a real-time task
452 * scheduled that needs to continue.
453 */
454 if (exists)
455 next = prev;
456
457 spin_unlock(&cluster->lock);
458
459#ifdef WANT_ALL_SCHED_EVENTS
460 TRACE("cedf_lock released, next=0x%p\n", next);
461
462 if (next)
463 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
464 else if (exists && !next)
465 TRACE("becomes idle at %llu.\n", litmus_clock());
466#endif
467
468
469 return next;
470}
471
472
473/* _finish_switch - we just finished the switch away from prev
474 */
475static void cedf_finish_switch(struct task_struct *prev)
476{
477 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
478
479 entry->scheduled = is_realtime(current) ? current : NULL;
480#ifdef WANT_ALL_SCHED_EVENTS
481 TRACE_TASK(prev, "switched away from\n");
482#endif
483}
484
485
486/* Prepare a task for running in RT mode
487 */
488static void cedf_task_new(struct task_struct * t, int on_rq, int running)
489{
490 unsigned long flags;
491 cpu_entry_t* entry;
492 cedf_domain_t* cluster;
493
494 TRACE("gsn edf: task new %d\n", t->pid);
495
496 /* the cluster doesn't change even if t is running */
497 cluster = task_cpu_cluster(t);
498
499 spin_lock_irqsave(&cluster->domain.ready_lock, flags);
500
501 /* setup job params */
502 release_at(t, litmus_clock());
503
504 if (running) {
505 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
506 BUG_ON(entry->scheduled);
507
508 entry->scheduled = t;
509 tsk_rt(t)->scheduled_on = task_cpu(t);
510 } else {
511 t->rt_param.scheduled_on = NO_CPU;
512 }
513 t->rt_param.linked_on = NO_CPU;
514
515 cedf_job_arrival(t);
516 spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
517}
518
519static void cedf_task_wake_up(struct task_struct *task)
520{
521 unsigned long flags;
522 lt_t now;
523 cedf_domain_t *cluster;
524
525 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
526
527 cluster = task_cpu_cluster(task);
528
529 spin_lock_irqsave(&cluster->lock, flags);
530 /* We need to take suspensions because of semaphores into
531 * account! If a job resumes after being suspended due to acquiring
532 * a semaphore, it should never be treated as a new job release.
533 */
534 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
535 set_rt_flags(task, RT_F_RUNNING);
536 } else {
537 now = litmus_clock();
538 if (is_tardy(task, now)) {
539 /* new sporadic release */
540 release_at(task, now);
541 sched_trace_task_release(task);
542 }
543 else {
544 if (task->rt.time_slice) {
545 /* came back in time before deadline
546 */
547 set_rt_flags(task, RT_F_RUNNING);
548 }
549 }
550 }
551 cedf_job_arrival(task);
552 spin_unlock_irqrestore(&cluster->lock, flags);
553}
554
555static void cedf_task_block(struct task_struct *t)
556{
557 unsigned long flags;
558 cedf_domain_t *cluster;
559
560 TRACE_TASK(t, "block at %llu\n", litmus_clock());
561
562 cluster = task_cpu_cluster(t);
563
564 /* unlink if necessary */
565 spin_lock_irqsave(&cluster->lock, flags);
566 unlink(t);
567 spin_unlock_irqrestore(&cluster->lock, flags);
568
569 BUG_ON(!is_realtime(t));
570}
571
572
573static void cedf_task_exit(struct task_struct * t)
574{
575 unsigned long flags;
576 cedf_domain_t *cluster = task_cpu_cluster(t);
577
578 /* unlink if necessary */
579 spin_lock_irqsave(&cluster->lock, flags);
580 unlink(t);
581 if (tsk_rt(t)->scheduled_on != NO_CPU) {
582 cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
583 tsk_rt(t)->scheduled_on = NO_CPU;
584 }
585 spin_unlock_irqrestore(&cluster->lock, flags);
586
587 BUG_ON(!is_realtime(t));
588 TRACE_TASK(t, "RIP\n");
589}
590
591static long cedf_admit_task(struct task_struct* tsk)
592{
593 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
594}
595
596/* total number of cluster */
597static int num_clusters;
598/* we do not support cluster of different sizes */
599static unsigned int cluster_size;
600
601#ifdef VERBOSE_INIT
602static void print_cluster_topology(cpumask_var_t mask, int cpu)
603{
604 int chk;
605 char buf[255];
606
607 chk = cpulist_scnprintf(buf, 254, mask);
608 buf[chk] = '\0';
609 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
610
611}
612#endif
613
614static int clusters_allocated = 0;
615
616static void cleanup_cedf(void)
617{
618 int i;
619
620 if (clusters_allocated) {
621 for (i = 0; i < num_clusters; i++) {
622 kfree(cedf[i].cpus);
623 kfree(cedf[i].heap_node);
624 free_cpumask_var(cedf[i].cpu_map);
625 }
626
627 kfree(cedf);
628 }
629}
630
631static long cedf_activate_plugin(void)
632{
633 int i, j, cpu, ccpu, cpu_count;
634 cpu_entry_t *entry;
635
636 cpumask_var_t mask;
637 int chk = 0;
638
639 /* de-allocate old clusters, if any */
640 cleanup_cedf();
641
642 printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n",
643 cluster_cache_index);
644
645 /* need to get cluster_size first */
646 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
647 return -ENOMEM;
648
649 if (unlikely(cluster_cache_index == num_online_cpus())) {
650
651 cluster_size = num_online_cpus();
652 } else {
653
654 chk = get_shared_cpu_map(mask, 0, cluster_cache_index);
655 if (chk) {
656 /* if chk != 0 then it is the max allowed index */
657 printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n",
658 cluster_cache_index);
659 printk(KERN_INFO "C-EDF: Using cache index = %d\n",
660 chk);
661 cluster_cache_index = chk;
662 }
663
664 cluster_size = cpumask_weight(mask);
665 }
666
667 if ((num_online_cpus() % cluster_size) != 0) {
668 /* this can't be right, some cpus are left out */
669 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
670 num_online_cpus(), cluster_size);
671 return -1;
672 }
673
674 num_clusters = num_online_cpus() / cluster_size;
675 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
676 num_clusters, cluster_size);
677
678 /* initialize clusters */
679 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
680 for (i = 0; i < num_clusters; i++) {
681
682 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
683 GFP_ATOMIC);
684 cedf[i].heap_node = kmalloc(
685 cluster_size * sizeof(struct bheap_node),
686 GFP_ATOMIC);
687 bheap_init(&(cedf[i].cpu_heap));
688 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
689
690 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
691 return -ENOMEM;
692 }
693
694 /* cycle through cluster and add cpus to them */
695 for (i = 0; i < num_clusters; i++) {
696
697 for_each_online_cpu(cpu) {
698 /* check if the cpu is already in a cluster */
699 for (j = 0; j < num_clusters; j++)
700 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
701 break;
702 /* if it is in a cluster go to next cpu */
703 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
704 continue;
705
706 /* this cpu isn't in any cluster */
707 /* get the shared cpus */
708 if (unlikely(cluster_cache_index == num_online_cpus()))
709 cpumask_copy(mask, cpu_online_mask);
710 else
711 get_shared_cpu_map(mask, cpu, cluster_cache_index);
712
713 cpumask_copy(cedf[i].cpu_map, mask);
714#ifdef VERBOSE_INIT
715 print_cluster_topology(mask, cpu);
716#endif
717 /* add cpus to current cluster and init cpu_entry_t */
718 cpu_count = 0;
719 for_each_cpu(ccpu, cedf[i].cpu_map) {
720
721 entry = &per_cpu(cedf_cpu_entries, ccpu);
722 cedf[i].cpus[cpu_count] = entry;
723 atomic_set(&entry->will_schedule, 0);
724 entry->cpu = ccpu;
725 entry->cluster = &cedf[i];
726 entry->hn = &(cedf[i].heap_node[cpu_count]);
727 bheap_node_init(&entry->hn, entry);
728
729 cpu_count++;
730
731 entry->linked = NULL;
732 entry->scheduled = NULL;
733 update_cpu_position(entry);
734 }
735 /* done with this cluster */
736 break;
737 }
738 }
739
740 free_cpumask_var(mask);
741 clusters_allocated = 1;
742 return 0;
743}
744
745/* Plugin object */
746static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
747 .plugin_name = "C-EDF",
748 .finish_switch = cedf_finish_switch,
749 .tick = cedf_tick,
750 .task_new = cedf_task_new,
751 .complete_job = complete_job,
752 .task_exit = cedf_task_exit,
753 .schedule = cedf_schedule,
754 .task_wake_up = cedf_task_wake_up,
755 .task_block = cedf_task_block,
756 .admit_task = cedf_admit_task,
757 .activate_plugin = cedf_activate_plugin,
758};
759
760
761static int __init init_cedf(void)
762{
763 return register_sched_plugin(&cedf_plugin);
764}
765
766static void clean_cedf(void)
767{
768 cleanup_cedf();
769}
770
771module_init(init_cedf);
772module_exit(clean_cedf);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 000000000000..6137c74729cb
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,830 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14
15#include <litmus/litmus.h>
16#include <litmus/jobs.h>
17#include <litmus/sched_plugin.h>
18#include <litmus/edf_common.h>
19#include <litmus/sched_trace.h>
20
21#include <litmus/bheap.h>
22
23#include <linux/module.h>
24
25/* Overview of GSN-EDF operations.
26 *
27 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
28 * description only covers how the individual operations are implemented in
29 * LITMUS.
30 *
31 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
32 * structure (NOT the actually scheduled
33 * task). If there is another linked task To
34 * already it will set To->linked_on = NO_CPU
35 * (thereby removing its association with this
36 * CPU). However, it will not requeue the
37 * previously linked task (if any). It will set
38 * T's state to RT_F_RUNNING and check whether
39 * it is already running somewhere else. If T
40 * is scheduled somewhere else it will link
41 * it to that CPU instead (and pull the linked
42 * task to cpu). T may be NULL.
43 *
44 * unlink(T) - Unlink removes T from all scheduler data
45 * structures. If it is linked to some CPU it
46 * will link NULL to that CPU. If it is
47 * currently queued in the gsnedf queue it will
48 * be removed from the rt_domain. It is safe to
49 * call unlink(T) if T is not linked. T may not
50 * be NULL.
51 *
52 * requeue(T) - Requeue will insert T into the appropriate
53 * queue. If the system is in real-time mode and
54 * the T is released already, it will go into the
55 * ready queue. If the system is not in
56 * real-time mode is T, then T will go into the
57 * release queue. If T's release time is in the
58 * future, it will go into the release
59 * queue. That means that T's release time/job
60 * no/etc. has to be updated before requeu(T) is
61 * called. It is not safe to call requeue(T)
62 * when T is already queued. T may not be NULL.
63 *
64 * gsnedf_job_arrival(T) - This is the catch all function when T enters
65 * the system after either a suspension or at a
66 * job release. It will queue T (which means it
67 * is not safe to call gsnedf_job_arrival(T) if
68 * T is already queued) and then check whether a
69 * preemption is necessary. If a preemption is
70 * necessary it will update the linkage
71 * accordingly and cause scheduled to be called
72 * (either with an IPI or need_resched). It is
73 * safe to call gsnedf_job_arrival(T) if T's
74 * next job has not been actually released yet
75 * (releast time in the future). T will be put
76 * on the release queue in that case.
77 *
78 * job_completion(T) - Take care of everything that needs to be done
79 * to prepare T for its next release and place
80 * it in the right queue with
81 * gsnedf_job_arrival().
82 *
83 *
84 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
85 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
86 * the functions will automatically propagate pending task from the ready queue
87 * to a linked task. This is the job of the calling function ( by means of
88 * __take_ready).
89 */
90
91
92/* cpu_entry_t - maintain the linked and scheduled state
93 */
94typedef struct {
95 int cpu;
96 struct task_struct* linked; /* only RT tasks */
97 struct task_struct* scheduled; /* only RT tasks */
98 atomic_t will_schedule; /* prevent unneeded IPIs */
99 struct bheap_node* hn;
100} cpu_entry_t;
101DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
102
103cpu_entry_t* gsnedf_cpus[NR_CPUS];
104
105#define set_will_schedule() \
106 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
107#define clear_will_schedule() \
108 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
109#define test_will_schedule(cpu) \
110 (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
111
112
113/* the cpus queue themselves according to priority in here */
114static struct bheap_node gsnedf_heap_node[NR_CPUS];
115static struct bheap gsnedf_cpu_heap;
116
117static rt_domain_t gsnedf;
118#define gsnedf_lock (gsnedf.ready_lock)
119
120
121/* Uncomment this if you want to see all scheduling decisions in the
122 * TRACE() log.
123#define WANT_ALL_SCHED_EVENTS
124 */
125
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
127{
128 cpu_entry_t *a, *b;
129 a = _a->value;
130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return edf_higher_prio(b->linked, a->linked);
135}
136
137/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold gsnedf lock.
139 */
140static void update_cpu_position(cpu_entry_t *entry)
141{
142 if (likely(bheap_node_in_heap(entry->hn)))
143 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
144 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
145}
146
147/* caller must hold gsnedf lock */
148static cpu_entry_t* lowest_prio_cpu(void)
149{
150 struct bheap_node* hn;
151 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
152 return hn->value;
153}
154
155
156/* link_task_to_cpu - Update the link of a CPU.
157 * Handles the case where the to-be-linked task is already
158 * scheduled on a different CPU.
159 */
160static noinline void link_task_to_cpu(struct task_struct* linked,
161 cpu_entry_t *entry)
162{
163 cpu_entry_t *sched;
164 struct task_struct* tmp;
165 int on_cpu;
166
167 BUG_ON(linked && !is_realtime(linked));
168
169 /* Currently linked task is set to be unlinked. */
170 if (entry->linked) {
171 entry->linked->rt_param.linked_on = NO_CPU;
172 }
173
174 /* Link new task to CPU. */
175 if (linked) {
176 set_rt_flags(linked, RT_F_RUNNING);
177 /* handle task is already scheduled somewhere! */
178 on_cpu = linked->rt_param.scheduled_on;
179 if (on_cpu != NO_CPU) {
180 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
181 /* this should only happen if not linked already */
182 BUG_ON(sched->linked == linked);
183
184 /* If we are already scheduled on the CPU to which we
185 * wanted to link, we don't need to do the swap --
186 * we just link ourselves to the CPU and depend on
187 * the caller to get things right.
188 */
189 if (entry != sched) {
190 TRACE_TASK(linked,
191 "already scheduled on %d, updating link.\n",
192 sched->cpu);
193 tmp = sched->linked;
194 linked->rt_param.linked_on = sched->cpu;
195 sched->linked = linked;
196 update_cpu_position(sched);
197 linked = tmp;
198 }
199 }
200 if (linked) /* might be NULL due to swap */
201 linked->rt_param.linked_on = entry->cpu;
202 }
203 entry->linked = linked;
204#ifdef WANT_ALL_SCHED_EVENTS
205 if (linked)
206 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
207 else
208 TRACE("NULL linked to %d.\n", entry->cpu);
209#endif
210 update_cpu_position(entry);
211}
212
213/* unlink - Make sure a task is not linked any longer to an entry
214 * where it was linked before. Must hold gsnedf_lock.
215 */
216static noinline void unlink(struct task_struct* t)
217{
218 cpu_entry_t *entry;
219
220 if (unlikely(!t)) {
221 TRACE_BUG_ON(!t);
222 return;
223 }
224
225 if (t->rt_param.linked_on != NO_CPU) {
226 /* unlink */
227 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
228 t->rt_param.linked_on = NO_CPU;
229 link_task_to_cpu(NULL, entry);
230 } else if (is_queued(t)) {
231 /* This is an interesting situation: t is scheduled,
232 * but was just recently unlinked. It cannot be
233 * linked anywhere else (because then it would have
234 * been relinked to this CPU), thus it must be in some
235 * queue. We must remove it from the list in this
236 * case.
237 */
238 remove(&gsnedf, t);
239 }
240}
241
242
243/* preempt - force a CPU to reschedule
244 */
245static void preempt(cpu_entry_t *entry)
246{
247 preempt_if_preemptable(entry->scheduled, entry->cpu);
248}
249
250/* requeue - Put an unlinked task into gsn-edf domain.
251 * Caller must hold gsnedf_lock.
252 */
253static noinline void requeue(struct task_struct* task)
254{
255 BUG_ON(!task);
256 /* sanity check before insertion */
257 BUG_ON(is_queued(task));
258
259 if (is_released(task, litmus_clock()))
260 __add_ready(&gsnedf, task);
261 else {
262 /* it has got to wait */
263 add_release(&gsnedf, task);
264 }
265}
266
267/* check for any necessary preemptions */
268static void check_for_preemptions(void)
269{
270 struct task_struct *task;
271 cpu_entry_t* last;
272
273 for(last = lowest_prio_cpu();
274 edf_preemption_needed(&gsnedf, last->linked);
275 last = lowest_prio_cpu()) {
276 /* preemption necessary */
277 task = __take_ready(&gsnedf);
278 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
279 task->pid, last->cpu);
280 if (last->linked)
281 requeue(last->linked);
282 link_task_to_cpu(task, last);
283 preempt(last);
284 }
285}
286
287/* gsnedf_job_arrival: task is either resumed or released */
288static noinline void gsnedf_job_arrival(struct task_struct* task)
289{
290 BUG_ON(!task);
291
292 requeue(task);
293 check_for_preemptions();
294}
295
296static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
297{
298 unsigned long flags;
299
300 spin_lock_irqsave(&gsnedf_lock, flags);
301
302 __merge_ready(rt, tasks);
303 check_for_preemptions();
304
305 spin_unlock_irqrestore(&gsnedf_lock, flags);
306}
307
308/* caller holds gsnedf_lock */
309static noinline void job_completion(struct task_struct *t, int forced)
310{
311 BUG_ON(!t);
312
313 sched_trace_task_completion(t, forced);
314
315 TRACE_TASK(t, "job_completion().\n");
316
317 /* set flags */
318 set_rt_flags(t, RT_F_SLEEP);
319 /* prepare for next period */
320 prepare_for_next_period(t);
321 if (is_released(t, litmus_clock()))
322 sched_trace_task_release(t);
323 /* unlink */
324 unlink(t);
325 /* requeue
326 * But don't requeue a blocking task. */
327 if (is_running(t))
328 gsnedf_job_arrival(t);
329}
330
331/* gsnedf_tick - this function is called for every local timer
332 * interrupt.
333 *
334 * checks whether the current task has expired and checks
335 * whether we need to preempt it if it has not expired
336 */
337static void gsnedf_tick(struct task_struct* t)
338{
339 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
340 if (!is_np(t)) {
341 /* np tasks will be preempted when they become
342 * preemptable again
343 */
344 set_tsk_need_resched(t);
345 set_will_schedule();
346 TRACE("gsnedf_scheduler_tick: "
347 "%d is preemptable "
348 " => FORCE_RESCHED\n", t->pid);
349 } else if (is_user_np(t)) {
350 TRACE("gsnedf_scheduler_tick: "
351 "%d is non-preemptable, "
352 "preemption delayed.\n", t->pid);
353 request_exit_np(t);
354 }
355 }
356}
357
358/* Getting schedule() right is a bit tricky. schedule() may not make any
359 * assumptions on the state of the current task since it may be called for a
360 * number of reasons. The reasons include a scheduler_tick() determined that it
361 * was necessary, because sys_exit_np() was called, because some Linux
362 * subsystem determined so, or even (in the worst case) because there is a bug
363 * hidden somewhere. Thus, we must take extreme care to determine what the
364 * current state is.
365 *
366 * The CPU could currently be scheduling a task (or not), be linked (or not).
367 *
368 * The following assertions for the scheduled task could hold:
369 *
370 * - !is_running(scheduled) // the job blocks
371 * - scheduled->timeslice == 0 // the job completed (forcefully)
372 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
373 * - linked != scheduled // we need to reschedule (for any reason)
374 * - is_np(scheduled) // rescheduling must be delayed,
375 * sys_exit_np must be requested
376 *
377 * Any of these can occur together.
378 */
379static struct task_struct* gsnedf_schedule(struct task_struct * prev)
380{
381 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
382 int out_of_time, sleep, preempt, np, exists, blocks;
383 struct task_struct* next = NULL;
384
385 /* Bail out early if we are the release master.
386 * The release master never schedules any real-time tasks.
387 */
388 if (gsnedf.release_master == entry->cpu)
389 return NULL;
390
391 spin_lock(&gsnedf_lock);
392 clear_will_schedule();
393
394 /* sanity checking */
395 BUG_ON(entry->scheduled && entry->scheduled != prev);
396 BUG_ON(entry->scheduled && !is_realtime(prev));
397 BUG_ON(is_realtime(prev) && !entry->scheduled);
398
399 /* (0) Determine state */
400 exists = entry->scheduled != NULL;
401 blocks = exists && !is_running(entry->scheduled);
402 out_of_time = exists &&
403 budget_enforced(entry->scheduled) &&
404 budget_exhausted(entry->scheduled);
405 np = exists && is_np(entry->scheduled);
406 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
407 preempt = entry->scheduled != entry->linked;
408
409#ifdef WANT_ALL_SCHED_EVENTS
410 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
411#endif
412
413 if (exists)
414 TRACE_TASK(prev,
415 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
416 "state:%d sig:%d\n",
417 blocks, out_of_time, np, sleep, preempt,
418 prev->state, signal_pending(prev));
419 if (entry->linked && preempt)
420 TRACE_TASK(prev, "will be preempted by %s/%d\n",
421 entry->linked->comm, entry->linked->pid);
422
423
424 /* If a task blocks we have no choice but to reschedule.
425 */
426 if (blocks)
427 unlink(entry->scheduled);
428
429 /* Request a sys_exit_np() call if we would like to preempt but cannot.
430 * We need to make sure to update the link structure anyway in case
431 * that we are still linked. Multiple calls to request_exit_np() don't
432 * hurt.
433 */
434 if (np && (out_of_time || preempt || sleep)) {
435 unlink(entry->scheduled);
436 request_exit_np(entry->scheduled);
437 }
438
439 /* Any task that is preemptable and either exhausts its execution
440 * budget or wants to sleep completes. We may have to reschedule after
441 * this. Don't do a job completion if we block (can't have timers running
442 * for blocked jobs). Preemption go first for the same reason.
443 */
444 if (!np && (out_of_time || sleep) && !blocks && !preempt)
445 job_completion(entry->scheduled, !sleep);
446
447 /* Link pending task if we became unlinked.
448 */
449 if (!entry->linked)
450 link_task_to_cpu(__take_ready(&gsnedf), entry);
451
452 /* The final scheduling decision. Do we need to switch for some reason?
453 * If linked is different from scheduled, then select linked as next.
454 */
455 if ((!np || blocks) &&
456 entry->linked != entry->scheduled) {
457 /* Schedule a linked job? */
458 if (entry->linked) {
459 entry->linked->rt_param.scheduled_on = entry->cpu;
460 next = entry->linked;
461 }
462 if (entry->scheduled) {
463 /* not gonna be scheduled soon */
464 entry->scheduled->rt_param.scheduled_on = NO_CPU;
465 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
466 }
467 } else
468 /* Only override Linux scheduler if we have a real-time task
469 * scheduled that needs to continue.
470 */
471 if (exists)
472 next = prev;
473
474 spin_unlock(&gsnedf_lock);
475
476#ifdef WANT_ALL_SCHED_EVENTS
477 TRACE("gsnedf_lock released, next=0x%p\n", next);
478
479 if (next)
480 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
481 else if (exists && !next)
482 TRACE("becomes idle at %llu.\n", litmus_clock());
483#endif
484
485
486 return next;
487}
488
489
490/* _finish_switch - we just finished the switch away from prev
491 */
492static void gsnedf_finish_switch(struct task_struct *prev)
493{
494 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
495
496 entry->scheduled = is_realtime(current) ? current : NULL;
497#ifdef WANT_ALL_SCHED_EVENTS
498 TRACE_TASK(prev, "switched away from\n");
499#endif
500}
501
502
503/* Prepare a task for running in RT mode
504 */
505static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
506{
507 unsigned long flags;
508 cpu_entry_t* entry;
509
510 TRACE("gsn edf: task new %d\n", t->pid);
511
512 spin_lock_irqsave(&gsnedf_lock, flags);
513
514 /* setup job params */
515 release_at(t, litmus_clock());
516
517 if (running) {
518 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
519 BUG_ON(entry->scheduled);
520
521 if (entry->cpu != gsnedf.release_master) {
522 entry->scheduled = t;
523 tsk_rt(t)->scheduled_on = task_cpu(t);
524 } else {
525 /* do not schedule on release master */
526 preempt(entry); /* force resched */
527 tsk_rt(t)->scheduled_on = NO_CPU;
528 }
529 } else {
530 t->rt_param.scheduled_on = NO_CPU;
531 }
532 t->rt_param.linked_on = NO_CPU;
533
534 gsnedf_job_arrival(t);
535 spin_unlock_irqrestore(&gsnedf_lock, flags);
536}
537
538static void gsnedf_task_wake_up(struct task_struct *task)
539{
540 unsigned long flags;
541 lt_t now;
542
543 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
544
545 spin_lock_irqsave(&gsnedf_lock, flags);
546 /* We need to take suspensions because of semaphores into
547 * account! If a job resumes after being suspended due to acquiring
548 * a semaphore, it should never be treated as a new job release.
549 */
550 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
551 set_rt_flags(task, RT_F_RUNNING);
552 } else {
553 now = litmus_clock();
554 if (is_tardy(task, now)) {
555 /* new sporadic release */
556 release_at(task, now);
557 sched_trace_task_release(task);
558 }
559 else {
560 if (task->rt.time_slice) {
561 /* came back in time before deadline
562 */
563 set_rt_flags(task, RT_F_RUNNING);
564 }
565 }
566 }
567 gsnedf_job_arrival(task);
568 spin_unlock_irqrestore(&gsnedf_lock, flags);
569}
570
571static void gsnedf_task_block(struct task_struct *t)
572{
573 unsigned long flags;
574
575 TRACE_TASK(t, "block at %llu\n", litmus_clock());
576
577 /* unlink if necessary */
578 spin_lock_irqsave(&gsnedf_lock, flags);
579 unlink(t);
580 spin_unlock_irqrestore(&gsnedf_lock, flags);
581
582 BUG_ON(!is_realtime(t));
583}
584
585
586static void gsnedf_task_exit(struct task_struct * t)
587{
588 unsigned long flags;
589
590 /* unlink if necessary */
591 spin_lock_irqsave(&gsnedf_lock, flags);
592 unlink(t);
593 if (tsk_rt(t)->scheduled_on != NO_CPU) {
594 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
595 tsk_rt(t)->scheduled_on = NO_CPU;
596 }
597 spin_unlock_irqrestore(&gsnedf_lock, flags);
598
599 BUG_ON(!is_realtime(t));
600 TRACE_TASK(t, "RIP\n");
601}
602
603#ifdef CONFIG_FMLP
604
605/* Update the queue position of a task that got it's priority boosted via
606 * priority inheritance. */
607static void update_queue_position(struct task_struct *holder)
608{
609 /* We don't know whether holder is in the ready queue. It should, but
610 * on a budget overrun it may already be in a release queue. Hence,
611 * calling unlink() is not possible since it assumes that the task is
612 * not in a release queue. However, we can safely check whether
613 * sem->holder is currently in a queue or scheduled after locking both
614 * the release and the ready queue lock. */
615
616 /* Assumption: caller holds gsnedf_lock */
617
618 int check_preempt = 0;
619
620 if (tsk_rt(holder)->linked_on != NO_CPU) {
621 TRACE_TASK(holder, "%s: linked on %d\n",
622 __FUNCTION__, tsk_rt(holder)->linked_on);
623 /* Holder is scheduled; need to re-order CPUs.
624 * We can't use heap_decrease() here since
625 * the cpu_heap is ordered in reverse direction, so
626 * it is actually an increase. */
627 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
628 gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
629 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
630 gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
631 } else {
632 /* holder may be queued: first stop queue changes */
633 spin_lock(&gsnedf.release_lock);
634 if (is_queued(holder)) {
635 TRACE_TASK(holder, "%s: is queued\n",
636 __FUNCTION__);
637 /* We need to update the position
638 * of holder in some heap. Note that this
639 * may be a release heap. */
640 check_preempt =
641 !bheap_decrease(edf_ready_order,
642 tsk_rt(holder)->heap_node);
643 } else {
644 /* Nothing to do: if it is not queued and not linked
645 * then it is currently being moved by other code
646 * (e.g., a timer interrupt handler) that will use the
647 * correct priority when enqueuing the task. */
648 TRACE_TASK(holder, "%s: is NOT queued => Done.\n",
649 __FUNCTION__);
650 }
651 spin_unlock(&gsnedf.release_lock);
652
653 /* If holder was enqueued in a release heap, then the following
654 * preemption check is pointless, but we can't easily detect
655 * that case. If you want to fix this, then consider that
656 * simply adding a state flag requires O(n) time to update when
657 * releasing n tasks, which conflicts with the goal to have
658 * O(log n) merges. */
659 if (check_preempt) {
660 /* heap_decrease() hit the top level of the heap: make
661 * sure preemption checks get the right task, not the
662 * potentially stale cache. */
663 bheap_uncache_min(edf_ready_order,
664 &gsnedf.ready_queue);
665 check_for_preemptions();
666 }
667 }
668}
669
670static long gsnedf_pi_block(struct pi_semaphore *sem,
671 struct task_struct *new_waiter)
672{
673 /* This callback has to handle the situation where a new waiter is
674 * added to the wait queue of the semaphore.
675 *
676 * We must check if has a higher priority than the currently
677 * highest-priority task, and then potentially reschedule.
678 */
679
680 BUG_ON(!new_waiter);
681
682 if (edf_higher_prio(new_waiter, sem->hp.task)) {
683 TRACE_TASK(new_waiter, " boosts priority via %p\n", sem);
684 /* called with IRQs disabled */
685 spin_lock(&gsnedf_lock);
686 /* store new highest-priority task */
687 sem->hp.task = new_waiter;
688 if (sem->holder) {
689 TRACE_TASK(sem->holder,
690 " holds %p and will inherit from %s/%d\n",
691 sem,
692 new_waiter->comm, new_waiter->pid);
693 /* let holder inherit */
694 sem->holder->rt_param.inh_task = new_waiter;
695 update_queue_position(sem->holder);
696 }
697 spin_unlock(&gsnedf_lock);
698 }
699
700 return 0;
701}
702
703static long gsnedf_inherit_priority(struct pi_semaphore *sem,
704 struct task_struct *new_owner)
705{
706 /* We don't need to acquire the gsnedf_lock since at the time of this
707 * call new_owner isn't actually scheduled yet (it's still sleeping)
708 * and since the calling function already holds sem->wait.lock, which
709 * prevents concurrent sem->hp.task changes.
710 */
711
712 if (sem->hp.task && sem->hp.task != new_owner) {
713 new_owner->rt_param.inh_task = sem->hp.task;
714 TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
715 sem->hp.task->comm, sem->hp.task->pid);
716 } else
717 TRACE_TASK(new_owner,
718 "cannot inherit priority, "
719 "no higher priority job waits.\n");
720 return 0;
721}
722
723/* This function is called on a semaphore release, and assumes that
724 * the current task is also the semaphore holder.
725 */
726static long gsnedf_return_priority(struct pi_semaphore *sem)
727{
728 struct task_struct* t = current;
729 int ret = 0;
730
731 /* Find new highest-priority semaphore task
732 * if holder task is the current hp.task.
733 *
734 * Calling function holds sem->wait.lock.
735 */
736 if (t == sem->hp.task)
737 edf_set_hp_task(sem);
738
739 TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
740
741 if (t->rt_param.inh_task) {
742 /* interrupts already disabled by PI code */
743 spin_lock(&gsnedf_lock);
744
745 /* Reset inh_task to NULL. */
746 t->rt_param.inh_task = NULL;
747
748 /* Check if rescheduling is necessary */
749 unlink(t);
750 gsnedf_job_arrival(t);
751 spin_unlock(&gsnedf_lock);
752 }
753
754 return ret;
755}
756
757#endif
758
759static long gsnedf_admit_task(struct task_struct* tsk)
760{
761 return 0;
762}
763
764static long gsnedf_activate_plugin(void)
765{
766 int cpu;
767 cpu_entry_t *entry;
768
769 bheap_init(&gsnedf_cpu_heap);
770 gsnedf.release_master = atomic_read(&release_master_cpu);
771
772 for_each_online_cpu(cpu) {
773 entry = &per_cpu(gsnedf_cpu_entries, cpu);
774 bheap_node_init(&entry->hn, entry);
775 atomic_set(&entry->will_schedule, 0);
776 entry->linked = NULL;
777 entry->scheduled = NULL;
778 if (cpu != gsnedf.release_master) {
779 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
780 update_cpu_position(entry);
781 } else {
782 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
783 }
784 }
785 return 0;
786}
787
788/* Plugin object */
789static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
790 .plugin_name = "GSN-EDF",
791 .finish_switch = gsnedf_finish_switch,
792 .tick = gsnedf_tick,
793 .task_new = gsnedf_task_new,
794 .complete_job = complete_job,
795 .task_exit = gsnedf_task_exit,
796 .schedule = gsnedf_schedule,
797 .task_wake_up = gsnedf_task_wake_up,
798 .task_block = gsnedf_task_block,
799#ifdef CONFIG_FMLP
800 .fmlp_active = 1,
801 .pi_block = gsnedf_pi_block,
802 .inherit_priority = gsnedf_inherit_priority,
803 .return_priority = gsnedf_return_priority,
804#endif
805 .admit_task = gsnedf_admit_task,
806 .activate_plugin = gsnedf_activate_plugin,
807};
808
809
810static int __init init_gsn_edf(void)
811{
812 int cpu;
813 cpu_entry_t *entry;
814
815 bheap_init(&gsnedf_cpu_heap);
816 /* initialize CPU state */
817 for (cpu = 0; cpu < NR_CPUS; cpu++) {
818 entry = &per_cpu(gsnedf_cpu_entries, cpu);
819 gsnedf_cpus[cpu] = entry;
820 atomic_set(&entry->will_schedule, 0);
821 entry->cpu = cpu;
822 entry->hn = &gsnedf_heap_node[cpu];
823 bheap_node_init(&entry->hn, entry);
824 }
825 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
826 return register_sched_plugin(&gsn_edf_plugin);
827}
828
829
830module_init(init_gsn_edf);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
new file mode 100644
index 000000000000..c1fc7748e590
--- /dev/null
+++ b/litmus/sched_litmus.c
@@ -0,0 +1,318 @@
1/* This file is included from kernel/sched.c */
2
3#include <litmus/litmus.h>
4#include <litmus/sched_plugin.h>
5
6static void update_time_litmus(struct rq *rq, struct task_struct *p)
7{
8 u64 delta = rq->clock - p->se.exec_start;
9 if (unlikely((s64)delta < 0))
10 delta = 0;
11 /* per job counter */
12 p->rt_param.job_params.exec_time += delta;
13 /* task counter */
14 p->se.sum_exec_runtime += delta;
15 /* sched_clock() */
16 p->se.exec_start = rq->clock;
17 cpuacct_charge(p, delta);
18}
19
20static void double_rq_lock(struct rq *rq1, struct rq *rq2);
21static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
22
23/*
24 * litmus_tick gets called by scheduler_tick() with HZ freq
25 * Interrupts are disabled
26 */
27static void litmus_tick(struct rq *rq, struct task_struct *p)
28{
29 TS_PLUGIN_TICK_START;
30
31 if (is_realtime(p))
32 update_time_litmus(rq, p);
33
34 /* plugin tick */
35 litmus->tick(p);
36
37 return;
38}
39
40static struct task_struct *
41litmus_schedule(struct rq *rq, struct task_struct *prev)
42{
43 struct rq* other_rq;
44 struct task_struct *next;
45
46 long was_running;
47 lt_t _maybe_deadlock = 0;
48
49 /* let the plugin schedule */
50 next = litmus->schedule(prev);
51
52 /* check if a global plugin pulled a task from a different RQ */
53 if (next && task_rq(next) != rq) {
54 /* we need to migrate the task */
55 other_rq = task_rq(next);
56 TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
57
58 /* while we drop the lock, the prev task could change its
59 * state
60 */
61 was_running = is_running(prev);
62 mb();
63 spin_unlock(&rq->lock);
64
65 /* Don't race with a concurrent switch. This could deadlock in
66 * the case of cross or circular migrations. It's the job of
67 * the plugin to make sure that doesn't happen.
68 */
69 TRACE_TASK(next, "stack_in_use=%d\n",
70 next->rt_param.stack_in_use);
71 if (next->rt_param.stack_in_use != NO_CPU) {
72 TRACE_TASK(next, "waiting to deschedule\n");
73 _maybe_deadlock = litmus_clock();
74 }
75 while (next->rt_param.stack_in_use != NO_CPU) {
76 cpu_relax();
77 mb();
78 if (next->rt_param.stack_in_use == NO_CPU)
79 TRACE_TASK(next,"descheduled. Proceeding.\n");
80
81 if (lt_before(_maybe_deadlock + 10000000,
82 litmus_clock())) {
83 /* We've been spinning for 10ms.
84 * Something can't be right!
85 * Let's abandon the task and bail out; at least
86 * we will have debug info instead of a hard
87 * deadlock.
88 */
89 TRACE_TASK(next,"stack too long in use. "
90 "Deadlock?\n");
91 next = NULL;
92
93 /* bail out */
94 spin_lock(&rq->lock);
95 return next;
96 }
97 }
98#ifdef __ARCH_WANT_UNLOCKED_CTXSW
99 if (next->oncpu)
100 TRACE_TASK(next, "waiting for !oncpu");
101 while (next->oncpu) {
102 cpu_relax();
103 mb();
104 }
105#endif
106 double_rq_lock(rq, other_rq);
107 mb();
108 if (is_realtime(prev) && is_running(prev) != was_running) {
109 TRACE_TASK(prev,
110 "state changed while we dropped"
111 " the lock: is_running=%d, was_running=%d\n",
112 is_running(prev), was_running);
113 if (is_running(prev) && !was_running) {
114 /* prev task became unblocked
115 * we need to simulate normal sequence of events
116 * to scheduler plugins.
117 */
118 litmus->task_block(prev);
119 litmus->task_wake_up(prev);
120 }
121 }
122
123 set_task_cpu(next, smp_processor_id());
124
125 /* DEBUG: now that we have the lock we need to make sure a
126 * couple of things still hold:
127 * - it is still a real-time task
128 * - it is still runnable (could have been stopped)
129 * If either is violated, then the active plugin is
130 * doing something wrong.
131 */
132 if (!is_realtime(next) || !is_running(next)) {
133 /* BAD BAD BAD */
134 TRACE_TASK(next,"BAD: migration invariant FAILED: "
135 "rt=%d running=%d\n",
136 is_realtime(next),
137 is_running(next));
138 /* drop the task */
139 next = NULL;
140 }
141 /* release the other CPU's runqueue, but keep ours */
142 spin_unlock(&other_rq->lock);
143 }
144 if (next) {
145 next->rt_param.stack_in_use = rq->cpu;
146 next->se.exec_start = rq->clock;
147 }
148
149 return next;
150}
151
152static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
153 int wakeup)
154{
155 if (wakeup) {
156 sched_trace_task_resume(p);
157 tsk_rt(p)->present = 1;
158 litmus->task_wake_up(p);
159
160 rq->litmus.nr_running++;
161 } else
162 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
163}
164
165static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep)
166{
167 if (sleep) {
168 litmus->task_block(p);
169 tsk_rt(p)->present = 0;
170 sched_trace_task_block(p);
171
172 rq->litmus.nr_running--;
173 } else
174 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
175}
176
177static void yield_task_litmus(struct rq *rq)
178{
179 BUG_ON(rq->curr != current);
180 /* sched_yield() is called to trigger delayed preemptions.
181 * Thus, mark the current task as needing to be rescheduled.
182 * This will cause the scheduler plugin to be invoked, which can
183 * then determine if a preemption is still required.
184 */
185 clear_exit_np(current);
186 set_tsk_need_resched(current);
187}
188
189/* Plugins are responsible for this.
190 */
191static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
192{
193}
194
195static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
196{
197}
198
199static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
200{
201 update_time_litmus(rq, prev);
202 if (!is_running(prev))
203 tsk_rt(prev)->present = 0;
204}
205
206/* pick_next_task_litmus() - litmus_schedule() function
207 *
208 * return the next task to be scheduled
209 */
210static struct task_struct *pick_next_task_litmus(struct rq *rq)
211{
212 /* get the to-be-switched-out task (prev) */
213 struct task_struct *prev = rq->litmus.prev;
214 struct task_struct *next;
215
216 /* if not called from schedule() but from somewhere
217 * else (e.g., migration), return now!
218 */
219 if(!rq->litmus.prev)
220 return NULL;
221
222 rq->litmus.prev = NULL;
223
224 TS_PLUGIN_SCHED_START;
225 next = litmus_schedule(rq, prev);
226 TS_PLUGIN_SCHED_END;
227
228 return next;
229}
230
231static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
232{
233 /* nothing to do; tick related tasks are done by litmus_tick() */
234 return;
235}
236
237static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running)
238{
239}
240
241static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
242 int oldprio, int running)
243{
244}
245
246unsigned int get_rr_interval_litmus(struct task_struct *p)
247{
248 /* return infinity */
249 return 0;
250}
251
252/* This is called when a task became a real-time task, either due to a SCHED_*
253 * class transition or due to PI mutex inheritance. We don't handle Linux PI
254 * mutex inheritance yet (and probably never will). Use LITMUS provided
255 * synchronization primitives instead.
256 */
257static void set_curr_task_litmus(struct rq *rq)
258{
259 rq->curr->se.exec_start = rq->clock;
260}
261
262
263#ifdef CONFIG_SMP
264/* execve tries to rebalance task in this scheduling domain */
265static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
266{
267 /* preemption is already disabled.
268 * We don't want to change cpu here
269 */
270 return smp_processor_id();
271}
272
273/* we don't repartition at runtime */
274
275static unsigned long
276load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
277 unsigned long max_load_move,
278 struct sched_domain *sd, enum cpu_idle_type idle,
279 int *all_pinned, int *this_best_prio)
280{
281 return 0;
282}
283
284static int
285move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
286 struct sched_domain *sd, enum cpu_idle_type idle)
287{
288 return 0;
289}
290#endif
291
292const struct sched_class litmus_sched_class = {
293 .next = &rt_sched_class,
294 .enqueue_task = enqueue_task_litmus,
295 .dequeue_task = dequeue_task_litmus,
296 .yield_task = yield_task_litmus,
297
298 .check_preempt_curr = check_preempt_curr_litmus,
299
300 .pick_next_task = pick_next_task_litmus,
301 .put_prev_task = put_prev_task_litmus,
302
303#ifdef CONFIG_SMP
304 .select_task_rq = select_task_rq_litmus,
305
306 .load_balance = load_balance_litmus,
307 .move_one_task = move_one_task_litmus,
308 .pre_schedule = pre_schedule_litmus,
309#endif
310
311 .set_curr_task = set_curr_task_litmus,
312 .task_tick = task_tick_litmus,
313
314 .get_rr_interval = get_rr_interval_litmus,
315
316 .prio_changed = prio_changed_litmus,
317 .switched_to = switched_to_litmus,
318};
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
new file mode 100644
index 000000000000..2ea39223e7f0
--- /dev/null
+++ b/litmus/sched_pfair.c
@@ -0,0 +1,896 @@
1/*
2 * kernel/sched_pfair.c
3 *
4 * Implementation of the (global) Pfair scheduling algorithm.
5 *
6 */
7
8#include <asm/div64.h>
9#include <linux/delay.h>
10#include <linux/module.h>
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/list.h>
15
16#include <litmus/litmus.h>
17#include <litmus/jobs.h>
18#include <litmus/rt_domain.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21
22#include <litmus/bheap.h>
23
24struct subtask {
25 /* measured in quanta relative to job release */
26 quanta_t release;
27 quanta_t deadline;
28 quanta_t overlap; /* called "b bit" by PD^2 */
29 quanta_t group_deadline;
30};
31
32struct pfair_param {
33 quanta_t quanta; /* number of subtasks */
34 quanta_t cur; /* index of current subtask */
35
36 quanta_t release; /* in quanta */
37 quanta_t period; /* in quanta */
38
39 quanta_t last_quantum; /* when scheduled last */
40 int last_cpu; /* where scheduled last */
41
42 unsigned int sporadic_release; /* On wakeup, new sporadic release? */
43
44 struct subtask subtasks[0]; /* allocate together with pfair_param */
45};
46
47#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
48
49struct pfair_state {
50 int cpu;
51 volatile quanta_t cur_tick; /* updated by the CPU that is advancing
52 * the time */
53 volatile quanta_t local_tick; /* What tick is the local CPU currently
54 * executing? Updated only by the local
55 * CPU. In QEMU, this may lag behind the
56 * current tick. In a real system, with
57 * proper timers and aligned quanta,
58 * that should only be the
59 * case for a very short time after the
60 * time advanced. With staggered quanta,
61 * it will lag for the duration of the
62 * offset.
63 */
64
65 struct task_struct* linked; /* the task that should be executing */
66 struct task_struct* local; /* the local copy of linked */
67 struct task_struct* scheduled; /* what is actually scheduled */
68
69 unsigned long missed_quanta;
70 lt_t offset; /* stagger offset */
71};
72
73/* Currently, we limit the maximum period of any task to 2000 quanta.
74 * The reason is that it makes the implementation easier since we do not
75 * need to reallocate the release wheel on task arrivals.
76 * In the future
77 */
78#define PFAIR_MAX_PERIOD 2000
79
80/* This is the release queue wheel. It is indexed by pfair_time %
81 * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be
82 * merged with the ready queue.
83 */
84static struct bheap release_queue[PFAIR_MAX_PERIOD];
85
86DEFINE_PER_CPU(struct pfair_state, pfair_state);
87struct pfair_state* *pstate; /* short cut */
88
89static quanta_t pfair_time = 0; /* the "official" PFAIR clock */
90static quanta_t merge_time = 0; /* Updated after the release queue has been
91 * merged. Used by drop_all_references().
92 */
93
94static rt_domain_t pfair;
95
96/* The pfair_lock is used to serialize all scheduling events.
97 */
98#define pfair_lock pfair.ready_lock
99
100/* Enable for lots of trace info.
101 * #define PFAIR_DEBUG
102 */
103
104#ifdef PFAIR_DEBUG
105#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
106#define PTRACE(f, args...) TRACE(f, ## args)
107#else
108#define PTRACE_TASK(t, f, args...)
109#define PTRACE(f, args...)
110#endif
111
112/* gcc will inline all of these accessor functions... */
113static struct subtask* cur_subtask(struct task_struct* t)
114{
115 return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
116}
117
118static quanta_t cur_deadline(struct task_struct* t)
119{
120 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
121}
122
123
124static quanta_t cur_sub_release(struct task_struct* t)
125{
126 return cur_subtask(t)->release + tsk_pfair(t)->release;
127}
128
129static quanta_t cur_release(struct task_struct* t)
130{
131#ifdef EARLY_RELEASE
132 /* only the release of the first subtask counts when we early
133 * release */
134 return tsk_pfair(t)->release;
135#else
136 return cur_sub_release(t);
137#endif
138}
139
140static quanta_t cur_overlap(struct task_struct* t)
141{
142 return cur_subtask(t)->overlap;
143}
144
145static quanta_t cur_group_deadline(struct task_struct* t)
146{
147 quanta_t gdl = cur_subtask(t)->group_deadline;
148 if (gdl)
149 return gdl + tsk_pfair(t)->release;
150 else
151 return gdl;
152}
153
154
155static int pfair_higher_prio(struct task_struct* first,
156 struct task_struct* second)
157{
158 return /* first task must exist */
159 first && (
160 /* Does the second task exist and is it a real-time task? If
161 * not, the first task (which is a RT task) has higher
162 * priority.
163 */
164 !second || !is_realtime(second) ||
165
166 /* Is the (subtask) deadline of the first task earlier?
167 * Then it has higher priority.
168 */
169 time_before(cur_deadline(first), cur_deadline(second)) ||
170
171 /* Do we have a deadline tie?
172 * Then break by B-bit.
173 */
174 (cur_deadline(first) == cur_deadline(second) &&
175 (cur_overlap(first) > cur_overlap(second) ||
176
177 /* Do we have a B-bit tie?
178 * Then break by group deadline.
179 */
180 (cur_overlap(first) == cur_overlap(second) &&
181 (time_after(cur_group_deadline(first),
182 cur_group_deadline(second)) ||
183
184 /* Do we have a group deadline tie?
185 * Then break by PID, which are unique.
186 */
187 (cur_group_deadline(first) ==
188 cur_group_deadline(second) &&
189 first->pid < second->pid))))));
190}
191
192int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
193{
194 return pfair_higher_prio(bheap2task(a), bheap2task(b));
195}
196
197/* return the proper release queue for time t */
198static struct bheap* relq(quanta_t t)
199{
200 struct bheap* rq = &release_queue[t % PFAIR_MAX_PERIOD];
201 return rq;
202}
203
204static void prepare_release(struct task_struct* t, quanta_t at)
205{
206 tsk_pfair(t)->release = at;
207 tsk_pfair(t)->cur = 0;
208}
209
210static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
211{
212 bheap_insert(pfair_ready_order, queue,
213 tsk_rt(t)->heap_node);
214}
215
216static void pfair_add_release(struct task_struct* t)
217{
218 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
219 __pfair_add_release(t, relq(cur_release(t)));
220}
221
222/* pull released tasks from the release queue */
223static void poll_releases(quanta_t time)
224{
225 __merge_ready(&pfair, relq(time));
226 merge_time = time;
227}
228
229static void check_preempt(struct task_struct* t)
230{
231 int cpu = NO_CPU;
232 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
233 tsk_rt(t)->present) {
234 /* the task can be scheduled and
235 * is not scheduled where it ought to be scheduled
236 */
237 cpu = tsk_rt(t)->linked_on != NO_CPU ?
238 tsk_rt(t)->linked_on :
239 tsk_rt(t)->scheduled_on;
240 PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
241 tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
242 /* preempt */
243 if (cpu == smp_processor_id())
244 set_tsk_need_resched(current);
245 else {
246 smp_send_reschedule(cpu);
247 }
248 }
249}
250
251/* caller must hold pfair_lock */
252static void drop_all_references(struct task_struct *t)
253{
254 int cpu;
255 struct pfair_state* s;
256 struct bheap* q;
257 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
258 /* figure out what queue the node is in */
259 if (time_before_eq(cur_release(t), merge_time))
260 q = &pfair.ready_queue;
261 else
262 q = relq(cur_release(t));
263 bheap_delete(pfair_ready_order, q,
264 tsk_rt(t)->heap_node);
265 }
266 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
267 s = &per_cpu(pfair_state, cpu);
268 if (s->linked == t)
269 s->linked = NULL;
270 if (s->local == t)
271 s->local = NULL;
272 if (s->scheduled == t)
273 s->scheduled = NULL;
274 }
275}
276
277/* returns 1 if the task needs to go the release queue */
278static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
279{
280 struct pfair_param* p = tsk_pfair(t);
281 int to_relq;
282 p->cur = (p->cur + 1) % p->quanta;
283 if (!p->cur) {
284 sched_trace_task_completion(t, 1);
285 if (tsk_rt(t)->present) {
286 /* we start a new job */
287 prepare_for_next_period(t);
288 sched_trace_task_release(t);
289 get_rt_flags(t) = RT_F_RUNNING;
290 p->release += p->period;
291 } else {
292 /* remove task from system until it wakes */
293 drop_all_references(t);
294 tsk_pfair(t)->sporadic_release = 1;
295 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
296 cpu, p->cur);
297 return 0;
298 }
299 }
300 to_relq = time_after(cur_release(t), time);
301 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n",
302 cpu, p->cur, to_relq);
303 return to_relq;
304}
305
306static void advance_subtasks(quanta_t time)
307{
308 int cpu, missed;
309 struct task_struct* l;
310 struct pfair_param* p;
311
312 for_each_online_cpu(cpu) {
313 l = pstate[cpu]->linked;
314 missed = pstate[cpu]->linked != pstate[cpu]->local;
315 if (l) {
316 p = tsk_pfair(l);
317 p->last_quantum = time;
318 p->last_cpu = cpu;
319 if (advance_subtask(time, l, cpu)) {
320 pstate[cpu]->linked = NULL;
321 pfair_add_release(l);
322 }
323 }
324 }
325}
326
327static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
328{
329 int cpu;
330 if (tsk_rt(t)->scheduled_on != NO_CPU) {
331 /* always observe scheduled_on linkage */
332 default_cpu = tsk_rt(t)->scheduled_on;
333 } else if (tsk_pfair(t)->last_quantum == time - 1) {
334 /* back2back quanta */
335 /* Only observe last_quantum if no scheduled_on is in the way.
336 * This should only kick in if a CPU missed quanta, and that
337 * *should* only happen in QEMU.
338 */
339 cpu = tsk_pfair(t)->last_cpu;
340 if (!pstate[cpu]->linked ||
341 tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
342 default_cpu = cpu;
343 }
344 }
345 return default_cpu;
346}
347
348/* returns one if linking was redirected */
349static int pfair_link(quanta_t time, int cpu,
350 struct task_struct* t)
351{
352 int target = target_cpu(time, t, cpu);
353 struct task_struct* prev = pstate[cpu]->linked;
354 struct task_struct* other;
355
356 if (target != cpu) {
357 other = pstate[target]->linked;
358 pstate[target]->linked = t;
359 tsk_rt(t)->linked_on = target;
360 if (!other)
361 /* linked ok, but reschedule this CPU */
362 return 1;
363 if (target < cpu) {
364 /* link other to cpu instead */
365 tsk_rt(other)->linked_on = cpu;
366 pstate[cpu]->linked = other;
367 if (prev) {
368 /* prev got pushed back into the ready queue */
369 tsk_rt(prev)->linked_on = NO_CPU;
370 __add_ready(&pfair, prev);
371 }
372 /* we are done with this cpu */
373 return 0;
374 } else {
375 /* re-add other, it's original CPU was not considered yet */
376 tsk_rt(other)->linked_on = NO_CPU;
377 __add_ready(&pfair, other);
378 /* reschedule this CPU */
379 return 1;
380 }
381 } else {
382 pstate[cpu]->linked = t;
383 tsk_rt(t)->linked_on = cpu;
384 if (prev) {
385 /* prev got pushed back into the ready queue */
386 tsk_rt(prev)->linked_on = NO_CPU;
387 __add_ready(&pfair, prev);
388 }
389 /* we are done with this CPU */
390 return 0;
391 }
392}
393
394static void schedule_subtasks(quanta_t time)
395{
396 int cpu, retry;
397
398 for_each_online_cpu(cpu) {
399 retry = 1;
400 while (retry) {
401 if (pfair_higher_prio(__peek_ready(&pfair),
402 pstate[cpu]->linked))
403 retry = pfair_link(time, cpu,
404 __take_ready(&pfair));
405 else
406 retry = 0;
407 }
408 }
409}
410
411static void schedule_next_quantum(quanta_t time)
412{
413 int cpu;
414
415 /* called with interrupts disabled */
416 PTRACE("--- Q %lu at %llu PRE-SPIN\n",
417 time, litmus_clock());
418 spin_lock(&pfair_lock);
419 PTRACE("<<< Q %lu at %llu\n",
420 time, litmus_clock());
421
422 sched_trace_quantum_boundary();
423
424 advance_subtasks(time);
425 poll_releases(time);
426 schedule_subtasks(time);
427
428 for (cpu = 0; cpu < num_online_cpus(); cpu++)
429 if (pstate[cpu]->linked)
430 PTRACE_TASK(pstate[cpu]->linked,
431 " linked on %d.\n", cpu);
432 else
433 PTRACE("(null) linked on %d.\n", cpu);
434
435 /* We are done. Advance time. */
436 mb();
437 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
438 if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) {
439 TRACE("BAD Quantum not acked on %d "
440 "(l:%lu c:%lu p:%lu)\n",
441 cpu,
442 pstate[cpu]->local_tick,
443 pstate[cpu]->cur_tick,
444 pfair_time);
445 pstate[cpu]->missed_quanta++;
446 }
447 pstate[cpu]->cur_tick = time;
448 }
449 PTRACE(">>> Q %lu at %llu\n",
450 time, litmus_clock());
451 spin_unlock(&pfair_lock);
452}
453
454static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
455{
456 quanta_t loc;
457
458 goto first; /* skip mb() on first iteration */
459 do {
460 cpu_relax();
461 mb();
462 first: loc = state->cur_tick;
463 /* FIXME: what if loc > cur? */
464 } while (time_before(loc, q));
465 PTRACE("observed cur_tick:%lu >= q:%lu\n",
466 loc, q);
467}
468
469static quanta_t current_quantum(struct pfair_state* state)
470{
471 lt_t t = litmus_clock() - state->offset;
472 return time2quanta(t, FLOOR);
473}
474
475static void catchup_quanta(quanta_t from, quanta_t target,
476 struct pfair_state* state)
477{
478 quanta_t cur = from, time;
479 TRACE("+++< BAD catching up quanta from %lu to %lu\n",
480 from, target);
481 while (time_before(cur, target)) {
482 wait_for_quantum(cur, state);
483 cur++;
484 time = cmpxchg(&pfair_time,
485 cur - 1, /* expected */
486 cur /* next */
487 );
488 if (time == cur - 1)
489 schedule_next_quantum(cur);
490 }
491 TRACE("+++> catching up done\n");
492}
493
494/* pfair_tick - this function is called for every local timer
495 * interrupt.
496 */
497static void pfair_tick(struct task_struct* t)
498{
499 struct pfair_state* state = &__get_cpu_var(pfair_state);
500 quanta_t time, cur;
501 int retry = 10;
502
503 do {
504 cur = current_quantum(state);
505 PTRACE("q %lu at %llu\n", cur, litmus_clock());
506
507 /* Attempt to advance time. First CPU to get here
508 * will prepare the next quantum.
509 */
510 time = cmpxchg(&pfair_time,
511 cur - 1, /* expected */
512 cur /* next */
513 );
514 if (time == cur - 1) {
515 /* exchange succeeded */
516 wait_for_quantum(cur - 1, state);
517 schedule_next_quantum(cur);
518 retry = 0;
519 } else if (time_before(time, cur - 1)) {
520 /* the whole system missed a tick !? */
521 catchup_quanta(time, cur, state);
522 retry--;
523 } else if (time_after(time, cur)) {
524 /* our timer lagging behind!? */
525 TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
526 retry--;
527 } else {
528 /* Some other CPU already started scheduling
529 * this quantum. Let it do its job and then update.
530 */
531 retry = 0;
532 }
533 } while (retry);
534
535 /* Spin locally until time advances. */
536 wait_for_quantum(cur, state);
537
538 /* copy assignment */
539 /* FIXME: what if we race with a future update? Corrupted state? */
540 state->local = state->linked;
541 /* signal that we are done */
542 mb();
543 state->local_tick = state->cur_tick;
544
545 if (state->local != current
546 && (is_realtime(current) || is_present(state->local)))
547 set_tsk_need_resched(current);
548}
549
550static int safe_to_schedule(struct task_struct* t, int cpu)
551{
552 int where = tsk_rt(t)->scheduled_on;
553 if (where != NO_CPU && where != cpu) {
554 TRACE_TASK(t, "BAD: can't be scheduled on %d, "
555 "scheduled already on %d.\n", cpu, where);
556 return 0;
557 } else
558 return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
559}
560
561static struct task_struct* pfair_schedule(struct task_struct * prev)
562{
563 struct pfair_state* state = &__get_cpu_var(pfair_state);
564 int blocks;
565 struct task_struct* next = NULL;
566
567 spin_lock(&pfair_lock);
568
569 blocks = is_realtime(prev) && !is_running(prev);
570
571 if (state->local && safe_to_schedule(state->local, state->cpu))
572 next = state->local;
573
574 if (prev != next) {
575 tsk_rt(prev)->scheduled_on = NO_CPU;
576 if (next)
577 tsk_rt(next)->scheduled_on = state->cpu;
578 }
579
580 spin_unlock(&pfair_lock);
581
582 if (next)
583 TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
584 tsk_pfair(next)->release, pfair_time, litmus_clock());
585 else if (is_realtime(prev))
586 TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock());
587
588 return next;
589}
590
591static void pfair_task_new(struct task_struct * t, int on_rq, int running)
592{
593 unsigned long flags;
594
595 TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
596
597 spin_lock_irqsave(&pfair_lock, flags);
598 if (running)
599 t->rt_param.scheduled_on = task_cpu(t);
600 else
601 t->rt_param.scheduled_on = NO_CPU;
602
603 prepare_release(t, pfair_time + 1);
604 tsk_pfair(t)->sporadic_release = 0;
605 pfair_add_release(t);
606 check_preempt(t);
607
608 spin_unlock_irqrestore(&pfair_lock, flags);
609}
610
611static void pfair_task_wake_up(struct task_struct *t)
612{
613 unsigned long flags;
614 lt_t now;
615
616 TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
617 litmus_clock(), cur_release(t), pfair_time);
618
619 spin_lock_irqsave(&pfair_lock, flags);
620
621 /* It is a little unclear how to deal with Pfair
622 * tasks that block for a while and then wake. For now,
623 * if a task blocks and wakes before its next job release,
624 * then it may resume if it is currently linked somewhere
625 * (as if it never blocked at all). Otherwise, we have a
626 * new sporadic job release.
627 */
628 if (tsk_pfair(t)->sporadic_release) {
629 now = litmus_clock();
630 release_at(t, now);
631 prepare_release(t, time2quanta(now, CEIL));
632 sched_trace_task_release(t);
633 /* FIXME: race with pfair_time advancing */
634 pfair_add_release(t);
635 tsk_pfair(t)->sporadic_release = 0;
636 }
637
638 check_preempt(t);
639
640 spin_unlock_irqrestore(&pfair_lock, flags);
641 TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
642}
643
644static void pfair_task_block(struct task_struct *t)
645{
646 BUG_ON(!is_realtime(t));
647 TRACE_TASK(t, "blocks at %llu, state:%d\n",
648 litmus_clock(), t->state);
649}
650
651static void pfair_task_exit(struct task_struct * t)
652{
653 unsigned long flags;
654
655 BUG_ON(!is_realtime(t));
656
657 /* Remote task from release or ready queue, and ensure
658 * that it is not the scheduled task for ANY CPU. We
659 * do this blanket check because occassionally when
660 * tasks exit while blocked, the task_cpu of the task
661 * might not be the same as the CPU that the PFAIR scheduler
662 * has chosen for it.
663 */
664 spin_lock_irqsave(&pfair_lock, flags);
665
666 TRACE_TASK(t, "RIP, state:%d\n", t->state);
667 drop_all_references(t);
668
669 spin_unlock_irqrestore(&pfair_lock, flags);
670
671 kfree(t->rt_param.pfair);
672 t->rt_param.pfair = NULL;
673}
674
675
676static void pfair_release_at(struct task_struct* task, lt_t start)
677{
678 unsigned long flags;
679 quanta_t release;
680
681 BUG_ON(!is_realtime(task));
682
683 spin_lock_irqsave(&pfair_lock, flags);
684 release_at(task, start);
685 release = time2quanta(start, CEIL);
686
687 if (release - pfair_time >= PFAIR_MAX_PERIOD)
688 release = pfair_time + PFAIR_MAX_PERIOD;
689
690 TRACE_TASK(task, "sys release at %lu\n", release);
691
692 drop_all_references(task);
693 prepare_release(task, release);
694 pfair_add_release(task);
695
696 /* Clear sporadic release flag, since this release subsumes any
697 * sporadic release on wake.
698 */
699 tsk_pfair(task)->sporadic_release = 0;
700
701 spin_unlock_irqrestore(&pfair_lock, flags);
702}
703
704static void init_subtask(struct subtask* sub, unsigned long i,
705 lt_t quanta, lt_t period)
706{
707 /* since i is zero-based, the formulas are shifted by one */
708 lt_t tmp;
709
710 /* release */
711 tmp = period * i;
712 do_div(tmp, quanta); /* floor */
713 sub->release = (quanta_t) tmp;
714
715 /* deadline */
716 tmp = period * (i + 1);
717 if (do_div(tmp, quanta)) /* ceil */
718 tmp++;
719 sub->deadline = (quanta_t) tmp;
720
721 /* next release */
722 tmp = period * (i + 1);
723 do_div(tmp, quanta); /* floor */
724 sub->overlap = sub->deadline - (quanta_t) tmp;
725
726 /* Group deadline.
727 * Based on the formula given in Uma's thesis.
728 */
729 if (2 * quanta >= period) {
730 /* heavy */
731 tmp = (sub->deadline - (i + 1)) * period;
732 if (period > quanta &&
733 do_div(tmp, (period - quanta))) /* ceil */
734 tmp++;
735 sub->group_deadline = (quanta_t) tmp;
736 } else
737 sub->group_deadline = 0;
738}
739
740static void dump_subtasks(struct task_struct* t)
741{
742 unsigned long i;
743 for (i = 0; i < t->rt_param.pfair->quanta; i++)
744 TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
745 i + 1,
746 t->rt_param.pfair->subtasks[i].release,
747 t->rt_param.pfair->subtasks[i].deadline,
748 t->rt_param.pfair->subtasks[i].overlap,
749 t->rt_param.pfair->subtasks[i].group_deadline);
750}
751
752static long pfair_admit_task(struct task_struct* t)
753{
754 lt_t quanta;
755 lt_t period;
756 s64 quantum_length = ktime_to_ns(tick_period);
757 struct pfair_param* param;
758 unsigned long i;
759
760 /* Pfair is a tick-based method, so the time
761 * of interest is jiffies. Calculate tick-based
762 * times for everything.
763 * (Ceiling of exec cost, floor of period.)
764 */
765
766 quanta = get_exec_cost(t);
767 period = get_rt_period(t);
768
769 quanta = time2quanta(get_exec_cost(t), CEIL);
770
771 if (do_div(period, quantum_length))
772 printk(KERN_WARNING
773 "The period of %s/%d is not a multiple of %llu.\n",
774 t->comm, t->pid, (unsigned long long) quantum_length);
775
776 if (period >= PFAIR_MAX_PERIOD) {
777 printk(KERN_WARNING
778 "PFAIR: Rejecting task %s/%d; its period is too long.\n",
779 t->comm, t->pid);
780 return -EINVAL;
781 }
782
783 if (quanta == period) {
784 /* special case: task has weight 1.0 */
785 printk(KERN_INFO
786 "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
787 t->comm, t->pid, quanta, period);
788 quanta = 1;
789 period = 1;
790 }
791
792 param = kmalloc(sizeof(*param) +
793 quanta * sizeof(struct subtask), GFP_ATOMIC);
794
795 if (!param)
796 return -ENOMEM;
797
798 param->quanta = quanta;
799 param->cur = 0;
800 param->release = 0;
801 param->period = period;
802
803 for (i = 0; i < quanta; i++)
804 init_subtask(param->subtasks + i, i, quanta, period);
805
806 if (t->rt_param.pfair)
807 /* get rid of stale allocation */
808 kfree(t->rt_param.pfair);
809
810 t->rt_param.pfair = param;
811
812 /* spew out some debug info */
813 dump_subtasks(t);
814
815 return 0;
816}
817
818static long pfair_activate_plugin(void)
819{
820 int cpu;
821 struct pfair_state* state;
822
823 state = &__get_cpu_var(pfair_state);
824 pfair_time = current_quantum(state);
825
826 TRACE("Activating PFAIR at q=%lu\n", pfair_time);
827
828 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
829 state = &per_cpu(pfair_state, cpu);
830 state->cur_tick = pfair_time;
831 state->local_tick = pfair_time;
832 state->missed_quanta = 0;
833 state->offset = cpu_stagger_offset(cpu);
834 }
835
836 return 0;
837}
838
839/* Plugin object */
840static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
841 .plugin_name = "PFAIR",
842 .tick = pfair_tick,
843 .task_new = pfair_task_new,
844 .task_exit = pfair_task_exit,
845 .schedule = pfair_schedule,
846 .task_wake_up = pfair_task_wake_up,
847 .task_block = pfair_task_block,
848 .admit_task = pfair_admit_task,
849 .release_at = pfair_release_at,
850 .complete_job = complete_job,
851 .activate_plugin = pfair_activate_plugin,
852};
853
854static int __init init_pfair(void)
855{
856 int cpu, i;
857 struct pfair_state *state;
858
859
860 /*
861 * initialize short_cut for per-cpu pfair state;
862 * there may be a problem here if someone removes a cpu
863 * while we are doing this initialization... and if cpus
864 * are added / removed later... is it a _real_ problem?
865 */
866 pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
867
868 /* initialize release queue */
869 for (i = 0; i < PFAIR_MAX_PERIOD; i++)
870 bheap_init(&release_queue[i]);
871
872 /* initialize CPU state */
873 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
874 state = &per_cpu(pfair_state, cpu);
875 state->cpu = cpu;
876 state->cur_tick = 0;
877 state->local_tick = 0;
878 state->linked = NULL;
879 state->local = NULL;
880 state->scheduled = NULL;
881 state->missed_quanta = 0;
882 state->offset = cpu_stagger_offset(cpu);
883 pstate[cpu] = state;
884 }
885
886 rt_domain_init(&pfair, pfair_ready_order, NULL, NULL);
887 return register_sched_plugin(&pfair_plugin);
888}
889
890static void __exit clean_pfair(void)
891{
892 kfree(pstate);
893}
894
895module_init(init_pfair);
896module_exit(clean_pfair);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..3767b30e610a
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,265 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9
10#include <litmus/litmus.h>
11#include <litmus/sched_plugin.h>
12
13#include <litmus/jobs.h>
14
15/*
16 * Generic function to trigger preemption on either local or remote cpu
17 * from scheduler plugins. The key feature is that this function is
18 * non-preemptive section aware and does not invoke the scheduler / send
19 * IPIs if the to-be-preempted task is actually non-preemptive.
20 */
21void preempt_if_preemptable(struct task_struct* t, int on_cpu)
22{
23 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
24 * on_cpu is currently scheduling background work.
25 */
26
27 int send_ipi;
28
29 if (smp_processor_id() == on_cpu) {
30 /* local CPU case */
31 if (t) {
32 /* check if we need to poke userspace */
33 if (is_user_np(t))
34 /* yes, poke it */
35 request_exit_np(t);
36 else
37 /* no, see if we are allowed to preempt the
38 * currently-executing task */
39 if (!is_kernel_np(t))
40 set_tsk_need_resched(t);
41 } else
42 /* move non-real-time task out of the way */
43 set_tsk_need_resched(current);
44 } else {
45 /* remote CPU case */
46 if (!t)
47 /* currently schedules non-real-time work */
48 send_ipi = 1;
49 else {
50 /* currently schedules real-time work */
51 if (is_user_np(t)) {
52 /* need to notify user space of delayed
53 * preemption */
54
55 /* to avoid a race, set the flag, then test
56 * again */
57 request_exit_np(t);
58 /* make sure it got written */
59 mb();
60 }
61 /* Only send an ipi if remote task might have raced our
62 * request, i.e., send an IPI to make sure if it exited
63 * its critical section.
64 */
65 send_ipi = !is_np(t) && !is_kernel_np(t);
66 }
67 if (likely(send_ipi))
68 smp_send_reschedule(on_cpu);
69 }
70}
71
72
73/*************************************************************
74 * Dummy plugin functions *
75 *************************************************************/
76
77static void litmus_dummy_finish_switch(struct task_struct * prev)
78{
79}
80
81static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
82{
83 return NULL;
84}
85
86static void litmus_dummy_tick(struct task_struct* tsk)
87{
88}
89
90static long litmus_dummy_admit_task(struct task_struct* tsk)
91{
92 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
93 tsk->comm, tsk->pid);
94 return -EINVAL;
95}
96
97static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
98{
99}
100
101static void litmus_dummy_task_wake_up(struct task_struct *task)
102{
103}
104
105static void litmus_dummy_task_block(struct task_struct *task)
106{
107}
108
109static void litmus_dummy_task_exit(struct task_struct *task)
110{
111}
112
113static long litmus_dummy_complete_job(void)
114{
115 return -ENOSYS;
116}
117
118static long litmus_dummy_activate_plugin(void)
119{
120 return 0;
121}
122
123static long litmus_dummy_deactivate_plugin(void)
124{
125 return 0;
126}
127
128#ifdef CONFIG_FMLP
129
130static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
131 struct task_struct *new_owner)
132{
133 return -ENOSYS;
134}
135
136static long litmus_dummy_return_priority(struct pi_semaphore *sem)
137{
138 return -ENOSYS;
139}
140
141static long litmus_dummy_pi_block(struct pi_semaphore *sem,
142 struct task_struct *new_waiter)
143{
144 return -ENOSYS;
145}
146
147#endif
148
149
150/* The default scheduler plugin. It doesn't do anything and lets Linux do its
151 * job.
152 */
153struct sched_plugin linux_sched_plugin = {
154 .plugin_name = "Linux",
155 .tick = litmus_dummy_tick,
156 .task_new = litmus_dummy_task_new,
157 .task_exit = litmus_dummy_task_exit,
158 .task_wake_up = litmus_dummy_task_wake_up,
159 .task_block = litmus_dummy_task_block,
160 .complete_job = litmus_dummy_complete_job,
161 .schedule = litmus_dummy_schedule,
162 .finish_switch = litmus_dummy_finish_switch,
163 .activate_plugin = litmus_dummy_activate_plugin,
164 .deactivate_plugin = litmus_dummy_deactivate_plugin,
165#ifdef CONFIG_FMLP
166 .inherit_priority = litmus_dummy_inherit_priority,
167 .return_priority = litmus_dummy_return_priority,
168 .pi_block = litmus_dummy_pi_block,
169#endif
170 .admit_task = litmus_dummy_admit_task
171};
172
173/*
174 * The cluster size is needed in C-EDF: it makes sense only to cluster
175 * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster
176 * all the CPUs that shares a L2 cache, while cluster_cache_index = 3
177 * we cluster all CPs that shares a L3 cache
178 */
179int cluster_cache_index = 2;
180
181/*
182 * The reference to current plugin that is used to schedule tasks within
183 * the system. It stores references to actual function implementations
184 * Should be initialized by calling "init_***_plugin()"
185 */
186struct sched_plugin *litmus = &linux_sched_plugin;
187
188/* the list of registered scheduling plugins */
189static LIST_HEAD(sched_plugins);
190static DEFINE_SPINLOCK(sched_plugins_lock);
191
192#define CHECK(func) {\
193 if (!plugin->func) \
194 plugin->func = litmus_dummy_ ## func;}
195
196/* FIXME: get reference to module */
197int register_sched_plugin(struct sched_plugin* plugin)
198{
199 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
200 plugin->plugin_name);
201
202 /* make sure we don't trip over null pointers later */
203 CHECK(finish_switch);
204 CHECK(schedule);
205 CHECK(tick);
206 CHECK(task_wake_up);
207 CHECK(task_exit);
208 CHECK(task_block);
209 CHECK(task_new);
210 CHECK(complete_job);
211 CHECK(activate_plugin);
212 CHECK(deactivate_plugin);
213#ifdef CONFIG_FMLP
214 CHECK(inherit_priority);
215 CHECK(return_priority);
216 CHECK(pi_block);
217#endif
218 CHECK(admit_task);
219
220 if (!plugin->release_at)
221 plugin->release_at = release_at;
222
223 spin_lock(&sched_plugins_lock);
224 list_add(&plugin->list, &sched_plugins);
225 spin_unlock(&sched_plugins_lock);
226
227 return 0;
228}
229
230
231/* FIXME: reference counting, etc. */
232struct sched_plugin* find_sched_plugin(const char* name)
233{
234 struct list_head *pos;
235 struct sched_plugin *plugin;
236
237 spin_lock(&sched_plugins_lock);
238 list_for_each(pos, &sched_plugins) {
239 plugin = list_entry(pos, struct sched_plugin, list);
240 if (!strcmp(plugin->plugin_name, name))
241 goto out_unlock;
242 }
243 plugin = NULL;
244
245out_unlock:
246 spin_unlock(&sched_plugins_lock);
247 return plugin;
248}
249
250int print_sched_plugins(char* buf, int max)
251{
252 int count = 0;
253 struct list_head *pos;
254 struct sched_plugin *plugin;
255
256 spin_lock(&sched_plugins_lock);
257 list_for_each(pos, &sched_plugins) {
258 plugin = list_entry(pos, struct sched_plugin, list);
259 count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
260 if (max - count <= 0)
261 break;
262 }
263 spin_unlock(&sched_plugins_lock);
264 return count;
265}
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..af0b30cb8b89
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,480 @@
1/*
2 * kernel/sched_psn_edf.c
3 *
4 * Implementation of the PSN-EDF scheduler plugin.
5 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
6 *
7 * Suspensions and non-preemptable sections are supported.
8 * Priority inheritance is not supported.
9 */
10
11#include <linux/percpu.h>
12#include <linux/sched.h>
13#include <linux/list.h>
14#include <linux/spinlock.h>
15
16#include <linux/module.h>
17
18#include <litmus/litmus.h>
19#include <litmus/jobs.h>
20#include <litmus/sched_plugin.h>
21#include <litmus/edf_common.h>
22
23
24typedef struct {
25 rt_domain_t domain;
26 int cpu;
27 struct task_struct* scheduled; /* only RT tasks */
28/*
29 * scheduling lock slock
30 * protects the domain and serializes scheduling decisions
31 */
32#define slock domain.ready_lock
33
34} psnedf_domain_t;
35
36DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
37
38#define local_edf (&__get_cpu_var(psnedf_domains).domain)
39#define local_pedf (&__get_cpu_var(psnedf_domains))
40#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
41#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
42#define task_edf(task) remote_edf(get_partition(task))
43#define task_pedf(task) remote_pedf(get_partition(task))
44
45
46static void psnedf_domain_init(psnedf_domain_t* pedf,
47 check_resched_needed_t check,
48 release_jobs_t release,
49 int cpu)
50{
51 edf_domain_init(&pedf->domain, check, release);
52 pedf->cpu = cpu;
53 pedf->scheduled = NULL;
54}
55
56static void requeue(struct task_struct* t, rt_domain_t *edf)
57{
58 if (t->state != TASK_RUNNING)
59 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
60
61 set_rt_flags(t, RT_F_RUNNING);
62 if (is_released(t, litmus_clock()))
63 __add_ready(edf, t);
64 else
65 add_release(edf, t); /* it has got to wait */
66}
67
68/* we assume the lock is being held */
69static void preempt(psnedf_domain_t *pedf)
70{
71 preempt_if_preemptable(pedf->scheduled, pedf->cpu);
72}
73
74/* This check is trivial in partioned systems as we only have to consider
75 * the CPU of the partition.
76 */
77static int psnedf_check_resched(rt_domain_t *edf)
78{
79 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
80
81 /* because this is a callback from rt_domain_t we already hold
82 * the necessary lock for the ready queue
83 */
84 if (edf_preemption_needed(edf, pedf->scheduled)) {
85 preempt(pedf);
86 return 1;
87 } else
88 return 0;
89}
90
91static void job_completion(struct task_struct* t, int forced)
92{
93 sched_trace_task_completion(t,forced);
94 TRACE_TASK(t, "job_completion().\n");
95
96 set_rt_flags(t, RT_F_SLEEP);
97 prepare_for_next_period(t);
98}
99
100static void psnedf_tick(struct task_struct *t)
101{
102 psnedf_domain_t *pedf = local_pedf;
103
104 /* Check for inconsistency. We don't need the lock for this since
105 * ->scheduled is only changed in schedule, which obviously is not
106 * executing in parallel on this CPU
107 */
108 BUG_ON(is_realtime(t) && t != pedf->scheduled);
109
110 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
111 if (!is_np(t)) {
112 set_tsk_need_resched(t);
113 TRACE("psnedf_scheduler_tick: "
114 "%d is preemptable "
115 " => FORCE_RESCHED\n", t->pid);
116 } else if (is_user_np(t)) {
117 TRACE("psnedf_scheduler_tick: "
118 "%d is non-preemptable, "
119 "preemption delayed.\n", t->pid);
120 request_exit_np(t);
121 }
122 }
123}
124
125static struct task_struct* psnedf_schedule(struct task_struct * prev)
126{
127 psnedf_domain_t* pedf = local_pedf;
128 rt_domain_t* edf = &pedf->domain;
129 struct task_struct* next;
130
131 int out_of_time, sleep, preempt,
132 np, exists, blocks, resched;
133
134 spin_lock(&pedf->slock);
135
136 /* sanity checking
137 * differently from gedf, when a task exits (dead)
138 * pedf->schedule may be null and prev _is_ realtime
139 */
140 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
141 BUG_ON(pedf->scheduled && !is_realtime(prev));
142
143 /* (0) Determine state */
144 exists = pedf->scheduled != NULL;
145 blocks = exists && !is_running(pedf->scheduled);
146 out_of_time = exists &&
147 budget_enforced(pedf->scheduled) &&
148 budget_exhausted(pedf->scheduled);
149 np = exists && is_np(pedf->scheduled);
150 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
151 preempt = edf_preemption_needed(edf, prev);
152
153 /* If we need to preempt do so.
154 * The following checks set resched to 1 in case of special
155 * circumstances.
156 */
157 resched = preempt;
158
159 /* If a task blocks we have no choice but to reschedule.
160 */
161 if (blocks)
162 resched = 1;
163
164 /* Request a sys_exit_np() call if we would like to preempt but cannot.
165 * Multiple calls to request_exit_np() don't hurt.
166 */
167 if (np && (out_of_time || preempt || sleep))
168 request_exit_np(pedf->scheduled);
169
170 /* Any task that is preemptable and either exhausts its execution
171 * budget or wants to sleep completes. We may have to reschedule after
172 * this.
173 */
174 if (!np && (out_of_time || sleep) && !blocks) {
175 job_completion(pedf->scheduled, !sleep);
176 resched = 1;
177 }
178
179 /* The final scheduling decision. Do we need to switch for some reason?
180 * Switch if we are in RT mode and have no task or if we need to
181 * resched.
182 */
183 next = NULL;
184 if ((!np || blocks) && (resched || !exists)) {
185 /* Take care of a previously scheduled
186 * job by taking it out of the Linux runqueue.
187 */
188 if (pedf->scheduled && !blocks)
189 requeue(pedf->scheduled, edf);
190 next = __take_ready(edf);
191 } else
192 /* Only override Linux scheduler if we have a real-time task
193 * scheduled that needs to continue.
194 */
195 if (exists)
196 next = prev;
197
198 if (next) {
199 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
200 set_rt_flags(next, RT_F_RUNNING);
201 } else {
202 TRACE("becoming idle at %llu\n", litmus_clock());
203 }
204
205 pedf->scheduled = next;
206 spin_unlock(&pedf->slock);
207
208 return next;
209}
210
211
212/* Prepare a task for running in RT mode
213 */
214static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
215{
216 rt_domain_t* edf = task_edf(t);
217 psnedf_domain_t* pedf = task_pedf(t);
218 unsigned long flags;
219
220 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
221 t->rt_param.task_params.cpu);
222
223 /* setup job parameters */
224 release_at(t, litmus_clock());
225
226 /* The task should be running in the queue, otherwise signal
227 * code will try to wake it up with fatal consequences.
228 */
229 spin_lock_irqsave(&pedf->slock, flags);
230 if (running) {
231 /* there shouldn't be anything else running at the time */
232 BUG_ON(pedf->scheduled);
233 pedf->scheduled = t;
234 } else {
235 requeue(t, edf);
236 /* maybe we have to reschedule */
237 preempt(pedf);
238 }
239 spin_unlock_irqrestore(&pedf->slock, flags);
240}
241
242static void psnedf_task_wake_up(struct task_struct *task)
243{
244 unsigned long flags;
245 psnedf_domain_t* pedf = task_pedf(task);
246 rt_domain_t* edf = task_edf(task);
247 lt_t now;
248
249 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
250 spin_lock_irqsave(&pedf->slock, flags);
251 BUG_ON(is_queued(task));
252 /* We need to take suspensions because of semaphores into
253 * account! If a job resumes after being suspended due to acquiring
254 * a semaphore, it should never be treated as a new job release.
255 *
256 * FIXME: This should be done in some more predictable and userspace-controlled way.
257 */
258 now = litmus_clock();
259 if (is_tardy(task, now) &&
260 get_rt_flags(task) != RT_F_EXIT_SEM) {
261 /* new sporadic release */
262 release_at(task, now);
263 sched_trace_task_release(task);
264 }
265
266 /* Only add to ready queue if it is not the currently-scheduled
267 * task. This could be the case if a task was woken up concurrently
268 * on a remote CPU before the executing CPU got around to actually
269 * de-scheduling the task, i.e., wake_up() raced with schedule()
270 * and won.
271 */
272 if (pedf->scheduled != task)
273 requeue(task, edf);
274
275 spin_unlock_irqrestore(&pedf->slock, flags);
276 TRACE_TASK(task, "wake up done\n");
277}
278
279static void psnedf_task_block(struct task_struct *t)
280{
281 /* only running tasks can block, thus t is in no queue */
282 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
283
284 BUG_ON(!is_realtime(t));
285 BUG_ON(is_queued(t));
286}
287
288static void psnedf_task_exit(struct task_struct * t)
289{
290 unsigned long flags;
291 psnedf_domain_t* pedf = task_pedf(t);
292 rt_domain_t* edf;
293
294 spin_lock_irqsave(&pedf->slock, flags);
295 if (is_queued(t)) {
296 /* dequeue */
297 edf = task_edf(t);
298 remove(edf, t);
299 }
300 if (pedf->scheduled == t)
301 pedf->scheduled = NULL;
302
303 TRACE_TASK(t, "RIP, now reschedule\n");
304
305 preempt(pedf);
306 spin_unlock_irqrestore(&pedf->slock, flags);
307}
308
309#ifdef CONFIG_FMLP
310static long psnedf_pi_block(struct pi_semaphore *sem,
311 struct task_struct *new_waiter)
312{
313 psnedf_domain_t* pedf;
314 rt_domain_t* edf;
315 struct task_struct* t;
316 int cpu = get_partition(new_waiter);
317
318 BUG_ON(!new_waiter);
319
320 if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
321 TRACE_TASK(new_waiter, " boosts priority\n");
322 pedf = task_pedf(new_waiter);
323 edf = task_edf(new_waiter);
324
325 /* interrupts already disabled */
326 spin_lock(&pedf->slock);
327
328 /* store new highest-priority task */
329 sem->hp.cpu_task[cpu] = new_waiter;
330 if (sem->holder &&
331 get_partition(sem->holder) == get_partition(new_waiter)) {
332 /* let holder inherit */
333 sem->holder->rt_param.inh_task = new_waiter;
334 t = sem->holder;
335 if (is_queued(t)) {
336 /* queued in domain*/
337 remove(edf, t);
338 /* readd to make priority change take place */
339 /* FIXME: this looks outdated */
340 if (is_released(t, litmus_clock()))
341 __add_ready(edf, t);
342 else
343 add_release(edf, t);
344 }
345 }
346
347 /* check if we need to reschedule */
348 if (edf_preemption_needed(edf, current))
349 preempt(pedf);
350
351 spin_unlock(&pedf->slock);
352 }
353
354 return 0;
355}
356
357static long psnedf_inherit_priority(struct pi_semaphore *sem,
358 struct task_struct *new_owner)
359{
360 int cpu = get_partition(new_owner);
361
362 new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
363 if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
364 TRACE_TASK(new_owner,
365 "inherited priority from %s/%d\n",
366 sem->hp.cpu_task[cpu]->comm,
367 sem->hp.cpu_task[cpu]->pid);
368 } else
369 TRACE_TASK(new_owner,
370 "cannot inherit priority: "
371 "no higher priority job waits on this CPU!\n");
372 /* make new owner non-preemptable as required by FMLP under
373 * PSN-EDF.
374 */
375 make_np(new_owner);
376 return 0;
377}
378
379
380/* This function is called on a semaphore release, and assumes that
381 * the current task is also the semaphore holder.
382 */
383static long psnedf_return_priority(struct pi_semaphore *sem)
384{
385 struct task_struct* t = current;
386 psnedf_domain_t* pedf = task_pedf(t);
387 rt_domain_t* edf = task_edf(t);
388 int ret = 0;
389 int cpu = get_partition(current);
390 int still_np;
391
392
393 /* Find new highest-priority semaphore task
394 * if holder task is the current hp.cpu_task[cpu].
395 *
396 * Calling function holds sem->wait.lock.
397 */
398 if (t == sem->hp.cpu_task[cpu])
399 edf_set_hp_cpu_task(sem, cpu);
400
401 still_np = take_np(current);
402
403 /* Since we don't nest resources, this
404 * should always be zero */
405 BUG_ON(still_np);
406
407 if (current->rt_param.inh_task) {
408 TRACE_CUR("return priority of %s/%d\n",
409 current->rt_param.inh_task->comm,
410 current->rt_param.inh_task->pid);
411 } else
412 TRACE_CUR(" no priority to return %p\n", sem);
413
414
415 /* Always check for delayed preemptions that might have become
416 * necessary due to non-preemptive execution.
417 */
418 spin_lock(&pedf->slock);
419
420 /* Reset inh_task to NULL. */
421 current->rt_param.inh_task = NULL;
422
423 /* check if we need to reschedule */
424 if (edf_preemption_needed(edf, current))
425 preempt(pedf);
426
427 spin_unlock(&pedf->slock);
428
429
430 return ret;
431}
432
433#endif
434
435static long psnedf_admit_task(struct task_struct* tsk)
436{
437 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
438}
439
440/* Plugin object */
441static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
442 .plugin_name = "PSN-EDF",
443#ifdef CONFIG_SRP
444 .srp_active = 1,
445#endif
446 .tick = psnedf_tick,
447 .task_new = psnedf_task_new,
448 .complete_job = complete_job,
449 .task_exit = psnedf_task_exit,
450 .schedule = psnedf_schedule,
451 .task_wake_up = psnedf_task_wake_up,
452 .task_block = psnedf_task_block,
453#ifdef CONFIG_FMLP
454 .fmlp_active = 1,
455 .pi_block = psnedf_pi_block,
456 .inherit_priority = psnedf_inherit_priority,
457 .return_priority = psnedf_return_priority,
458#endif
459 .admit_task = psnedf_admit_task
460};
461
462
463static int __init init_psn_edf(void)
464{
465 int i;
466
467 /* We do not really want to support cpu hotplug, do we? ;)
468 * However, if we are so crazy to do so,
469 * we cannot use num_online_cpu()
470 */
471 for (i = 0; i < num_online_cpus(); i++) {
472 psnedf_domain_init(remote_pedf(i),
473 psnedf_check_resched,
474 NULL, i);
475 }
476 return register_sched_plugin(&psn_edf_plugin);
477}
478
479module_init(init_psn_edf);
480
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
new file mode 100644
index 000000000000..39a543e22d41
--- /dev/null
+++ b/litmus/sched_task_trace.c
@@ -0,0 +1,204 @@
1/*
2 * sched_task_trace.c -- record scheduling events to a byte stream
3 */
4
5#define NO_TASK_TRACE_DECLS
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/percpu.h>
10
11#include <litmus/ftdev.h>
12#include <litmus/litmus.h>
13
14#include <litmus/sched_trace.h>
15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h>
17
18
19/* set MAJOR to 0 to have it dynamically assigned */
20#define FT_TASK_TRACE_MAJOR 253
21#define NO_EVENTS 4096 /* this is a buffer of 12 4k pages per CPU */
22
23#define now() litmus_clock()
24
25struct local_buffer {
26 struct st_event_record record[NO_EVENTS];
27 char flag[NO_EVENTS];
28 struct ft_buffer ftbuf;
29};
30
31DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
32
33static struct ftdev st_dev;
34
35static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
36{
37 return cpu_online(cpu) ? 0 : -ENODEV;
38}
39
40static int __init init_sched_task_trace(void)
41{
42 struct local_buffer* buf;
43 int i, ok = 0;
44 ftdev_init(&st_dev, THIS_MODULE);
45 for (i = 0; i < NR_CPUS; i++) {
46 buf = &per_cpu(st_event_buffer, i);
47 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
48 sizeof(struct st_event_record),
49 buf->flag,
50 buf->record);
51 st_dev.minor[i].buf = &buf->ftbuf;
52 }
53 if (ok == NR_CPUS) {
54 st_dev.minor_cnt = NR_CPUS;
55 st_dev.can_open = st_dev_can_open;
56 return register_ftdev(&st_dev, "sched_trace", FT_TASK_TRACE_MAJOR);
57 } else {
58 return -EINVAL;
59 }
60}
61
62module_init(init_sched_task_trace);
63
64
65static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
66{
67 struct st_event_record* rec = NULL;
68 struct local_buffer* buf;
69
70 buf = &get_cpu_var(st_event_buffer);
71 if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
72 rec->hdr.type = type;
73 rec->hdr.cpu = smp_processor_id();
74 rec->hdr.pid = t ? t->pid : 0;
75 rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
76 } else {
77 put_cpu_var(st_event_buffer);
78 }
79 /* rec will be NULL if it failed */
80 return rec;
81}
82
83static inline void put_record(struct st_event_record* rec)
84{
85 struct local_buffer* buf;
86 buf = &__get_cpu_var(st_event_buffer);
87 ft_buffer_finish_write(&buf->ftbuf, rec);
88 put_cpu_var(st_event_buffer);
89}
90
91feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
92{
93 struct task_struct *t = (struct task_struct*) _task;
94 struct st_event_record* rec = get_record(ST_NAME, t);
95 int i;
96 if (rec) {
97 for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
98 rec->data.name.cmd[i] = t->comm[i];
99 put_record(rec);
100 }
101}
102
103feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
104{
105 struct task_struct *t = (struct task_struct*) _task;
106 struct st_event_record* rec = get_record(ST_PARAM, t);
107 if (rec) {
108 rec->data.param.wcet = get_exec_cost(t);
109 rec->data.param.period = get_rt_period(t);
110 rec->data.param.phase = get_rt_phase(t);
111 rec->data.param.partition = get_partition(t);
112 put_record(rec);
113 }
114}
115
116feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
117{
118 struct task_struct *t = (struct task_struct*) _task;
119 struct st_event_record* rec = get_record(ST_RELEASE, t);
120 if (rec) {
121 rec->data.release.release = get_release(t);
122 rec->data.release.deadline = get_deadline(t);
123 put_record(rec);
124 }
125}
126
127/* skipped: st_assigned_data, we don't use it atm */
128
129feather_callback void do_sched_trace_task_switch_to(unsigned long id,
130 unsigned long _task)
131{
132 struct task_struct *t = (struct task_struct*) _task;
133 struct st_event_record* rec;
134 if (is_realtime(t)) {
135 rec = get_record(ST_SWITCH_TO, t);
136 if (rec) {
137 rec->data.switch_to.when = now();
138 rec->data.switch_to.exec_time = get_exec_time(t);
139 put_record(rec);
140 }
141 }
142}
143
144feather_callback void do_sched_trace_task_switch_away(unsigned long id,
145 unsigned long _task)
146{
147 struct task_struct *t = (struct task_struct*) _task;
148 struct st_event_record* rec;
149 if (is_realtime(t)) {
150 rec = get_record(ST_SWITCH_AWAY, t);
151 if (rec) {
152 rec->data.switch_away.when = now();
153 rec->data.switch_away.exec_time = get_exec_time(t);
154 put_record(rec);
155 }
156 }
157}
158
159feather_callback void do_sched_trace_task_completion(unsigned long id,
160 unsigned long _task,
161 unsigned long forced)
162{
163 struct task_struct *t = (struct task_struct*) _task;
164 struct st_event_record* rec = get_record(ST_COMPLETION, t);
165 if (rec) {
166 rec->data.completion.when = now();
167 rec->data.completion.forced = forced;
168 put_record(rec);
169 }
170}
171
172feather_callback void do_sched_trace_task_block(unsigned long id,
173 unsigned long _task)
174{
175 struct task_struct *t = (struct task_struct*) _task;
176 struct st_event_record* rec = get_record(ST_BLOCK, t);
177 if (rec) {
178 rec->data.block.when = now();
179 put_record(rec);
180 }
181}
182
183feather_callback void do_sched_trace_task_resume(unsigned long id,
184 unsigned long _task)
185{
186 struct task_struct *t = (struct task_struct*) _task;
187 struct st_event_record* rec = get_record(ST_RESUME, t);
188 if (rec) {
189 rec->data.resume.when = now();
190 put_record(rec);
191 }
192}
193
194feather_callback void do_sched_trace_sys_release(unsigned long id,
195 unsigned long _start)
196{
197 lt_t *start = (lt_t*) _start;
198 struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
199 if (rec) {
200 rec->data.sys_release.when = now();
201 rec->data.sys_release.release = *start;
202 put_record(rec);
203 }
204}
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 000000000000..ad0b138d4b01
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,378 @@
1/*
2 * sched_trace.c -- record scheduling events to a byte stream.
3 */
4#include <linux/spinlock.h>
5#include <linux/semaphore.h>
6
7#include <linux/fs.h>
8#include <linux/miscdevice.h>
9#include <asm/uaccess.h>
10#include <linux/module.h>
11#include <linux/sysrq.h>
12
13#include <linux/kfifo.h>
14
15#include <litmus/sched_trace.h>
16#include <litmus/litmus.h>
17
18#define SCHED_TRACE_NAME "litmus/log"
19
20/* Allocate a buffer of about 32k per CPU */
21#define LITMUS_TRACE_BUF_PAGES 8
22#define LITMUS_TRACE_BUF_SIZE (PAGE_SIZE * LITMUS_TRACE_BUF_PAGES * NR_CPUS)
23
24/* Max length of one read from the buffer */
25#define MAX_READ_LEN (64 * 1024)
26
27/* Max length for one write --- from kernel --- to the buffer */
28#define MSG_SIZE 255
29
30/* Inner ring buffer structure */
31typedef struct {
32 rwlock_t del_lock;
33
34 /* the buffer */
35 struct kfifo *kfifo;
36} ring_buffer_t;
37
38/* Main buffer structure */
39typedef struct {
40 ring_buffer_t buf;
41 atomic_t reader_cnt;
42 struct semaphore reader_mutex;
43} trace_buffer_t;
44
45
46/*
47 * Inner buffer management functions
48 */
49void rb_init(ring_buffer_t* buf)
50{
51 rwlock_init(&buf->del_lock);
52 buf->kfifo = NULL;
53}
54
55int rb_alloc_buf(ring_buffer_t* buf, unsigned int size)
56{
57 unsigned long flags;
58
59 write_lock_irqsave(&buf->del_lock, flags);
60
61 buf->kfifo = kfifo_alloc(size, GFP_ATOMIC, NULL);
62
63 write_unlock_irqrestore(&buf->del_lock, flags);
64
65 if(IS_ERR(buf->kfifo)) {
66 printk(KERN_ERR "kfifo_alloc failed\n");
67 return PTR_ERR(buf->kfifo);
68 }
69
70 return 0;
71}
72
73int rb_free_buf(ring_buffer_t* buf)
74{
75 unsigned long flags;
76
77 write_lock_irqsave(&buf->del_lock, flags);
78
79 BUG_ON(!buf->kfifo);
80 kfifo_free(buf->kfifo);
81
82 buf->kfifo = NULL;
83
84 write_unlock_irqrestore(&buf->del_lock, flags);
85
86 return 0;
87}
88
89/*
90 * Assumption: concurrent writes are serialized externally
91 *
92 * Will only succeed if there is enough space for all len bytes.
93 */
94int rb_put(ring_buffer_t* buf, char* mem, size_t len)
95{
96 unsigned long flags;
97 int error = 0;
98
99 read_lock_irqsave(&buf->del_lock, flags);
100
101 if (!buf->kfifo) {
102 error = -ENODEV;
103 goto out;
104 }
105
106 if((__kfifo_put(buf->kfifo, mem, len)) < len) {
107 error = -ENOMEM;
108 goto out;
109 }
110
111 out:
112 read_unlock_irqrestore(&buf->del_lock, flags);
113 return error;
114}
115
116/* Assumption: concurrent reads are serialized externally */
117int rb_get(ring_buffer_t* buf, char* mem, size_t len)
118{
119 unsigned long flags;
120 int error = 0;
121
122 read_lock_irqsave(&buf->del_lock, flags);
123 if (!buf->kfifo) {
124 error = -ENODEV;
125 goto out;
126 }
127
128 error = __kfifo_get(buf->kfifo, (unsigned char*)mem, len);
129
130 out:
131 read_unlock_irqrestore(&buf->del_lock, flags);
132 return error;
133}
134
135/*
136 * Device Driver management
137 */
138static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
139static trace_buffer_t log_buffer;
140
141static void init_log_buffer(void)
142{
143 rb_init(&log_buffer.buf);
144 atomic_set(&log_buffer.reader_cnt,0);
145 init_MUTEX(&log_buffer.reader_mutex);
146}
147
148static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
149
150/*
151 * sched_trace_log_message - Write to the trace buffer (log_buffer)
152 *
153 * This is the only function accessing the log_buffer from inside the
154 * kernel for writing.
155 * Concurrent access to sched_trace_log_message must be serialized using
156 * log_buffer_lock
157 * The maximum length of a formatted message is 255
158 */
159void sched_trace_log_message(const char* fmt, ...)
160{
161 unsigned long flags;
162 va_list args;
163 size_t len;
164 char* buf;
165
166 va_start(args, fmt);
167 local_irq_save(flags);
168
169 /* format message */
170 buf = __get_cpu_var(fmt_buffer);
171 len = vscnprintf(buf, MSG_SIZE, fmt, args);
172
173 spin_lock(&log_buffer_lock);
174 /* Don't copy the trailing null byte, we don't want null bytes
175 * in a text file.
176 */
177 rb_put(&log_buffer.buf, buf, len);
178 spin_unlock(&log_buffer_lock);
179
180 local_irq_restore(flags);
181 va_end(args);
182}
183
184/*
185 * log_read - Read the trace buffer
186 *
187 * This function is called as a file operation from userspace.
188 * Readers can sleep. Access is serialized through reader_mutex
189 */
190static ssize_t log_read(struct file *filp, char __user *to, size_t len,
191 loff_t *f_pos)
192{
193 /* we ignore f_pos, this is strictly sequential */
194
195 ssize_t error = -EINVAL;
196 char* mem;
197 trace_buffer_t *tbuf = filp->private_data;
198
199 if (down_interruptible(&tbuf->reader_mutex)) {
200 error = -ERESTARTSYS;
201 goto out;
202 }
203
204 if (len > MAX_READ_LEN)
205 len = MAX_READ_LEN;
206
207 mem = kmalloc(len, GFP_KERNEL);
208 if (!mem) {
209 error = -ENOMEM;
210 goto out_unlock;
211 }
212
213 error = rb_get(&tbuf->buf, mem, len);
214 while (!error) {
215 set_current_state(TASK_INTERRUPTIBLE);
216 schedule_timeout(110);
217 if (signal_pending(current))
218 error = -ERESTARTSYS;
219 else
220 error = rb_get(&tbuf->buf, mem, len);
221 }
222
223 if (error > 0 && copy_to_user(to, mem, error))
224 error = -EFAULT;
225
226 kfree(mem);
227 out_unlock:
228 up(&tbuf->reader_mutex);
229 out:
230 return error;
231}
232
233/*
234 * Enable redirection of printk() messages to the trace buffer.
235 * Defined in kernel/printk.c
236 */
237extern int trace_override;
238extern int trace_recurse;
239
240/*
241 * log_open - open the global log message ring buffer.
242 */
243static int log_open(struct inode *in, struct file *filp)
244{
245 int error = -EINVAL;
246 trace_buffer_t* tbuf;
247
248 tbuf = &log_buffer;
249
250 if (down_interruptible(&tbuf->reader_mutex)) {
251 error = -ERESTARTSYS;
252 goto out;
253 }
254
255 /* first open must allocate buffers */
256 if (atomic_inc_return(&tbuf->reader_cnt) == 1) {
257 if ((error = rb_alloc_buf(&tbuf->buf, LITMUS_TRACE_BUF_SIZE)))
258 {
259 atomic_dec(&tbuf->reader_cnt);
260 goto out_unlock;
261 }
262 }
263
264 error = 0;
265 filp->private_data = tbuf;
266
267 printk(KERN_DEBUG
268 "sched_trace kfifo at 0x%p with buffer starting at: 0x%p\n",
269 tbuf->buf.kfifo, &((tbuf->buf.kfifo)->buffer));
270
271 /* override printk() */
272 trace_override++;
273
274 out_unlock:
275 up(&tbuf->reader_mutex);
276 out:
277 return error;
278}
279
280static int log_release(struct inode *in, struct file *filp)
281{
282 int error = -EINVAL;
283 trace_buffer_t* tbuf = filp->private_data;
284
285 BUG_ON(!filp->private_data);
286
287 if (down_interruptible(&tbuf->reader_mutex)) {
288 error = -ERESTARTSYS;
289 goto out;
290 }
291
292 /* last release must deallocate buffers */
293 if (atomic_dec_return(&tbuf->reader_cnt) == 0) {
294 error = rb_free_buf(&tbuf->buf);
295 }
296
297 /* release printk() overriding */
298 trace_override--;
299
300 printk(KERN_DEBUG "sched_trace kfifo released\n");
301
302 up(&tbuf->reader_mutex);
303 out:
304 return error;
305}
306
307/*
308 * log_fops - The file operations for accessing the global LITMUS log message
309 * buffer.
310 *
311 * Except for opening the device file it uses the same operations as trace_fops.
312 */
313static struct file_operations log_fops = {
314 .owner = THIS_MODULE,
315 .open = log_open,
316 .release = log_release,
317 .read = log_read,
318};
319
320static struct miscdevice litmus_log_dev = {
321 .name = SCHED_TRACE_NAME,
322 .minor = MISC_DYNAMIC_MINOR,
323 .fops = &log_fops,
324};
325
326#ifdef CONFIG_MAGIC_SYSRQ
327void dump_trace_buffer(int max)
328{
329 char line[80];
330 int len;
331 int count = 0;
332
333 /* potential, but very unlikely, race... */
334 trace_recurse = 1;
335 while ((max == 0 || count++ < max) &&
336 (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) {
337 line[len] = '\0';
338 printk("%s", line);
339 }
340 trace_recurse = 0;
341}
342
343static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty)
344{
345 dump_trace_buffer(100);
346}
347
348static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
349 .handler = sysrq_dump_trace_buffer,
350 .help_msg = "dump-trace-buffer(Y)",
351 .action_msg = "writing content of TRACE() buffer",
352};
353#endif
354
355static int __init init_sched_trace(void)
356{
357 printk("Initializing TRACE() device\n");
358 init_log_buffer();
359
360#ifdef CONFIG_MAGIC_SYSRQ
361 /* offer some debugging help */
362 if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
363 printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
364 else
365 printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
366#endif
367
368
369 return misc_register(&litmus_log_dev);
370}
371
372static void __exit exit_sched_trace(void)
373{
374 misc_deregister(&litmus_log_dev);
375}
376
377module_init(init_sched_trace);
378module_exit(exit_sched_trace);
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..71639b991630
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,318 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/wait.h>
7#include <litmus/litmus.h>
8#include <litmus/sched_plugin.h>
9
10#include <litmus/fdso.h>
11
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_SRP
16
17struct srp_priority {
18 struct list_head list;
19 unsigned int period;
20 pid_t pid;
21};
22
23#define list2prio(l) list_entry(l, struct srp_priority, list)
24
25/* SRP task priority comparison function. Smaller periods have highest
26 * priority, tie-break is PID. Special case: period == 0 <=> no priority
27 */
28static int srp_higher_prio(struct srp_priority* first,
29 struct srp_priority* second)
30{
31 if (!first->period)
32 return 0;
33 else
34 return !second->period ||
35 first->period < second->period || (
36 first->period == second->period &&
37 first->pid < second->pid);
38}
39
40struct srp {
41 struct list_head ceiling;
42 wait_queue_head_t ceiling_blocked;
43};
44
45
46atomic_t srp_objects_in_use = ATOMIC_INIT(0);
47
48DEFINE_PER_CPU(struct srp, srp);
49
50
51/* Initialize SRP semaphores at boot time. */
52static int __init srp_init(void)
53{
54 int i;
55
56 printk("Initializing SRP per-CPU ceilings...");
57 for (i = 0; i < NR_CPUS; i++) {
58 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
59 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
60 }
61 printk(" done!\n");
62
63 return 0;
64}
65module_init(srp_init);
66
67
68#define system_ceiling(srp) list2prio(srp->ceiling.next)
69
70
71#define UNDEF_SEM -2
72
73
74/* struct for uniprocessor SRP "semaphore" */
75struct srp_semaphore {
76 struct srp_priority ceiling;
77 struct task_struct* owner;
78 int cpu; /* cpu associated with this "semaphore" and resource */
79};
80
81#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
82
83static int srp_exceeds_ceiling(struct task_struct* first,
84 struct srp* srp)
85{
86 return list_empty(&srp->ceiling) ||
87 get_rt_period(first) < system_ceiling(srp)->period ||
88 (get_rt_period(first) == system_ceiling(srp)->period &&
89 first->pid < system_ceiling(srp)->pid) ||
90 ceiling2sem(system_ceiling(srp))->owner == first;
91}
92
93static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
94{
95 struct list_head *pos;
96 if (in_list(&prio->list)) {
97 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
98 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
99 return;
100 }
101 list_for_each(pos, &srp->ceiling)
102 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
103 __list_add(&prio->list, pos->prev, pos);
104 return;
105 }
106
107 list_add_tail(&prio->list, &srp->ceiling);
108}
109
110
111static void* create_srp_semaphore(void)
112{
113 struct srp_semaphore* sem;
114
115 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
116 if (!sem)
117 return NULL;
118
119 INIT_LIST_HEAD(&sem->ceiling.list);
120 sem->ceiling.period = 0;
121 sem->cpu = UNDEF_SEM;
122 sem->owner = NULL;
123 atomic_inc(&srp_objects_in_use);
124 return sem;
125}
126
127static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg)
128{
129 struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj;
130 int ret = 0;
131 struct task_struct* t = current;
132 struct srp_priority t_prio;
133
134 TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
135 if (!srp_active())
136 return -EBUSY;
137
138 if (sem->cpu == UNDEF_SEM)
139 sem->cpu = get_partition(t);
140 else if (sem->cpu != get_partition(t))
141 ret = -EPERM;
142
143 if (ret == 0) {
144 t_prio.period = get_rt_period(t);
145 t_prio.pid = t->pid;
146 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
147 sem->ceiling.period = t_prio.period;
148 sem->ceiling.pid = t_prio.pid;
149 }
150 }
151
152 return ret;
153}
154
155static void destroy_srp_semaphore(void* sem)
156{
157 /* XXX invariants */
158 atomic_dec(&srp_objects_in_use);
159 kfree(sem);
160}
161
162struct fdso_ops srp_sem_ops = {
163 .create = create_srp_semaphore,
164 .open = open_srp_semaphore,
165 .destroy = destroy_srp_semaphore
166};
167
168
169static void do_srp_down(struct srp_semaphore* sem)
170{
171 /* Update ceiling. */
172 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
173 WARN_ON(sem->owner != NULL);
174 sem->owner = current;
175 TRACE_CUR("acquired srp 0x%p\n", sem);
176}
177
178static void do_srp_up(struct srp_semaphore* sem)
179{
180 /* Determine new system priority ceiling for this CPU. */
181 WARN_ON(!in_list(&sem->ceiling.list));
182 if (in_list(&sem->ceiling.list))
183 list_del(&sem->ceiling.list);
184
185 sem->owner = NULL;
186
187 /* Wake tasks on this CPU, if they exceed current ceiling. */
188 TRACE_CUR("released srp 0x%p\n", sem);
189 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
190}
191
192/* Adjust the system-wide priority ceiling if resource is claimed. */
193asmlinkage long sys_srp_down(int sem_od)
194{
195 int cpu;
196 int ret = -EINVAL;
197 struct srp_semaphore* sem;
198
199 /* disabling preemptions is sufficient protection since
200 * SRP is strictly per CPU and we don't interfere with any
201 * interrupt handlers
202 */
203 preempt_disable();
204 TS_SRP_DOWN_START;
205
206 cpu = smp_processor_id();
207 sem = lookup_srp_sem(sem_od);
208 if (sem && sem->cpu == cpu) {
209 do_srp_down(sem);
210 ret = 0;
211 }
212
213 TS_SRP_DOWN_END;
214 preempt_enable();
215 return ret;
216}
217
218/* Adjust the system-wide priority ceiling if resource is freed. */
219asmlinkage long sys_srp_up(int sem_od)
220{
221 int cpu;
222 int ret = -EINVAL;
223 struct srp_semaphore* sem;
224
225 preempt_disable();
226 TS_SRP_UP_START;
227
228 cpu = smp_processor_id();
229 sem = lookup_srp_sem(sem_od);
230
231 if (sem && sem->cpu == cpu) {
232 do_srp_up(sem);
233 ret = 0;
234 }
235
236 TS_SRP_UP_END;
237 preempt_enable();
238 return ret;
239}
240
241static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
242 void *key)
243{
244 int cpu = smp_processor_id();
245 struct task_struct *tsk = wait->private;
246 if (cpu != get_partition(tsk))
247 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
248 get_partition(tsk));
249 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
250 return default_wake_function(wait, mode, sync, key);
251 return 0;
252}
253
254
255
256static void do_ceiling_block(struct task_struct *tsk)
257{
258 wait_queue_t wait = {
259 .private = tsk,
260 .func = srp_wake_up,
261 .task_list = {NULL, NULL}
262 };
263
264 tsk->state = TASK_UNINTERRUPTIBLE;
265 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
266 tsk->rt_param.srp_non_recurse = 1;
267 preempt_enable_no_resched();
268 schedule();
269 preempt_disable();
270 tsk->rt_param.srp_non_recurse = 0;
271 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
272}
273
274/* Wait for current task priority to exceed system-wide priority ceiling.
275 */
276void srp_ceiling_block(void)
277{
278 struct task_struct *tsk = current;
279
280 /* Only applies to real-time tasks, but optimize for RT tasks. */
281 if (unlikely(!is_realtime(tsk)))
282 return;
283
284 /* Avoid recursive ceiling blocking. */
285 if (unlikely(tsk->rt_param.srp_non_recurse))
286 return;
287
288 /* Bail out early if there aren't any SRP resources around. */
289 if (likely(!atomic_read(&srp_objects_in_use)))
290 return;
291
292 preempt_disable();
293 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
294 TRACE_CUR("is priority ceiling blocked.\n");
295 while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
296 do_ceiling_block(tsk);
297 TRACE_CUR("finally exceeds system ceiling.\n");
298 } else
299 TRACE_CUR("is not priority ceiling blocked\n");
300 preempt_enable();
301}
302
303
304#else
305
306asmlinkage long sys_srp_down(int sem_od)
307{
308 return -ENOSYS;
309}
310
311asmlinkage long sys_srp_up(int sem_od)
312{
313 return -ENOSYS;
314}
315
316struct fdso_ops srp_sem_ops = {};
317
318#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..bf75fde5450b
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,104 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19static DECLARE_COMPLETION(ts_release);
20
21static long do_wait_for_ts_release(void)
22{
23 long ret = 0;
24
25 /* If the interruption races with a release, the completion object
26 * may have a non-zero counter. To avoid this problem, this should
27 * be replaced by wait_for_completion().
28 *
29 * For debugging purposes, this is interruptible for now.
30 */
31 ret = wait_for_completion_interruptible(&ts_release);
32
33 return ret;
34}
35
36int count_tasks_waiting_for_release(void)
37{
38 unsigned long flags;
39 int task_count = 0;
40 struct list_head *pos;
41
42 spin_lock_irqsave(&ts_release.wait.lock, flags);
43 list_for_each(pos, &ts_release.wait.task_list) {
44 task_count++;
45 }
46 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
47
48 return task_count;
49}
50
51static long do_release_ts(lt_t start)
52{
53 int task_count = 0;
54 unsigned long flags;
55 struct list_head *pos;
56 struct task_struct *t;
57
58
59 spin_lock_irqsave(&ts_release.wait.lock, flags);
60 TRACE("<<<<<< synchronous task system release >>>>>>\n");
61
62 sched_trace_sys_release(&start);
63 list_for_each(pos, &ts_release.wait.task_list) {
64 t = (struct task_struct*) list_entry(pos,
65 struct __wait_queue,
66 task_list)->private;
67 task_count++;
68 litmus->release_at(t, start + t->rt_param.task_params.phase);
69 sched_trace_task_release(t);
70 }
71
72 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
73
74 complete_n(&ts_release, task_count);
75
76 return task_count;
77}
78
79
80asmlinkage long sys_wait_for_ts_release(void)
81{
82 long ret = -EPERM;
83 struct task_struct *t = current;
84
85 if (is_realtime(t))
86 ret = do_wait_for_ts_release();
87
88 return ret;
89}
90
91
92asmlinkage long sys_release_ts(lt_t __user *__delay)
93{
94 long ret;
95 lt_t delay;
96
97 /* FIXME: check capabilities... */
98
99 ret = copy_from_user(&delay, __delay, sizeof(delay));
100 if (ret == 0)
101 ret = do_release_ts(litmus_clock() + delay);
102
103 return ret;
104}
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 000000000000..440376998dc9
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,103 @@
1#include <linux/module.h>
2
3#include <litmus/ftdev.h>
4#include <litmus/litmus.h>
5#include <litmus/trace.h>
6
7/******************************************************************************/
8/* Allocation */
9/******************************************************************************/
10
11static struct ftdev overhead_dev;
12
13#define trace_ts_buf overhead_dev.minor[0].buf
14
15static unsigned int ts_seq_no = 0;
16
17static inline void __save_timestamp_cpu(unsigned long event,
18 uint8_t type, uint8_t cpu)
19{
20 unsigned int seq_no;
21 struct timestamp *ts;
22 seq_no = fetch_and_inc((int *) &ts_seq_no);
23 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
24 ts->event = event;
25 ts->timestamp = ft_timestamp();
26 ts->seq_no = seq_no;
27 ts->cpu = cpu;
28 ts->task_type = type;
29 ft_buffer_finish_write(trace_ts_buf, ts);
30 }
31}
32
33static inline void __save_timestamp(unsigned long event,
34 uint8_t type)
35{
36 __save_timestamp_cpu(event, type, raw_smp_processor_id());
37}
38
39feather_callback void save_timestamp(unsigned long event)
40{
41 __save_timestamp(event, TSK_UNKNOWN);
42}
43
44feather_callback void save_timestamp_def(unsigned long event,
45 unsigned long type)
46{
47 __save_timestamp(event, (uint8_t) type);
48}
49
50feather_callback void save_timestamp_task(unsigned long event,
51 unsigned long t_ptr)
52{
53 int rt = is_realtime((struct task_struct *) t_ptr);
54 __save_timestamp(event, rt ? TSK_RT : TSK_BE);
55}
56
57feather_callback void save_timestamp_cpu(unsigned long event,
58 unsigned long cpu)
59{
60 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
61}
62
63/******************************************************************************/
64/* DEVICE FILE DRIVER */
65/******************************************************************************/
66
67/*
68 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
69 * and we might not get as much
70 */
71#define NO_TIMESTAMPS (2 << 11)
72
73/* set MAJOR to 0 to have it dynamically assigned */
74#define FT_TRACE_MAJOR 252
75
76static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
77{
78 unsigned int count = NO_TIMESTAMPS;
79 while (count && !trace_ts_buf) {
80 printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
81 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
82 count /= 2;
83 }
84 return ftdev->minor[idx].buf ? 0 : -ENOMEM;
85}
86
87static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
88{
89 free_ft_buffer(ftdev->minor[idx].buf);
90 ftdev->minor[idx].buf = NULL;
91}
92
93static int __init init_ft_overhead_trace(void)
94{
95 printk("Initializing Feather-Trace overhead tracing device.\n");
96 ftdev_init(&overhead_dev, THIS_MODULE);
97 overhead_dev.minor_cnt = 1; /* only one buffer */
98 overhead_dev.alloc = alloc_timestamp_buffer;
99 overhead_dev.free = free_timestamp_buffer;
100 return register_ftdev(&overhead_dev, "ft_trace", FT_TRACE_MAJOR);
101}
102
103module_init(init_ft_overhead_trace);