diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-21 09:25:41 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-21 09:25:41 -0400 |
commit | 6e0c5c609ad02f49d7e3e5edc3d65db1e233d857 (patch) | |
tree | 62517b45689b743e9976721df89634267e165011 | |
parent | e40152ee1e1c7a63f4777791863215e3faa37a86 (diff) | |
parent | 960145eb829ae7a7b2d029e987f99a6be7a78d6b (diff) |
Merge branch 'master' into wip-2.6.34
Merge LitmusRT master and 2.6.34. This commit is just the plain merge
with conflicts resolved. It won't compile.
Conflicts solved:
Makefile
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/unistd_32.h
arch/x86/kernel/syscall_table_32.S
include/linux/hrtimer.h
kernel/sched.c
kernel/sched_fair.c
72 files changed, 9633 insertions, 37 deletions
@@ -1,7 +1,7 @@ | |||
1 | VERSION = 2 | 1 | VERSION = 2 |
2 | PATCHLEVEL = 6 | 2 | PATCHLEVEL = 6 |
3 | SUBLEVEL = 34 | 3 | SUBLEVEL = 34 |
4 | EXTRAVERSION = | 4 | EXTRAVERSION =-litmus2010 |
5 | NAME = Sheep on Meth | 5 | NAME = Sheep on Meth |
6 | 6 | ||
7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
@@ -650,7 +650,7 @@ export mod_strip_cmd | |||
650 | 650 | ||
651 | 651 | ||
652 | ifeq ($(KBUILD_EXTMOD),) | 652 | ifeq ($(KBUILD_EXTMOD),) |
653 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ | 653 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ |
654 | 654 | ||
655 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 655 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
656 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 656 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..12fbd5b65f1f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -2125,3 +2125,5 @@ source "crypto/Kconfig" | |||
2125 | source "arch/x86/kvm/Kconfig" | 2125 | source "arch/x86/kvm/Kconfig" |
2126 | 2126 | ||
2127 | source "lib/Kconfig" | 2127 | source "lib/Kconfig" |
2128 | |||
2129 | source "litmus/Kconfig" | ||
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 8e8ec663a98f..5d07dea2ebb8 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -13,6 +13,7 @@ | |||
13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | 13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) |
14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR) | ||
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 17 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 18 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
18 | 19 | ||
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h new file mode 100644 index 000000000000..4fd31633405d --- /dev/null +++ b/arch/x86/include/asm/feather_trace.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #ifndef _ARCH_FEATHER_TRACE_H | ||
2 | #define _ARCH_FEATHER_TRACE_H | ||
3 | |||
4 | #include <asm/msr.h> | ||
5 | |||
6 | static inline unsigned long long ft_timestamp(void) | ||
7 | { | ||
8 | return __native_read_tsc(); | ||
9 | } | ||
10 | |||
11 | #ifdef CONFIG_X86_32 | ||
12 | #include "feather_trace_32.h" | ||
13 | #else | ||
14 | #include "feather_trace_64.h" | ||
15 | #endif | ||
16 | |||
17 | #endif | ||
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h new file mode 100644 index 000000000000..192cd09b7850 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_32.h | |||
@@ -0,0 +1,80 @@ | |||
1 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
2 | |||
3 | #define feather_callback __attribute__((regparm(0))) | ||
4 | |||
5 | /* | ||
6 | * make the compiler reload any register that is not saved in | ||
7 | * a cdecl function call | ||
8 | */ | ||
9 | #define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx" | ||
10 | |||
11 | #define ft_event(id, callback) \ | ||
12 | __asm__ __volatile__( \ | ||
13 | "1: jmp 2f \n\t" \ | ||
14 | " call " #callback " \n\t" \ | ||
15 | ".section __event_table, \"aw\" \n\t" \ | ||
16 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
17 | ".previous \n\t" \ | ||
18 | "2: \n\t" \ | ||
19 | : : : CLOBBER_LIST) | ||
20 | |||
21 | #define ft_event0(id, callback) \ | ||
22 | __asm__ __volatile__( \ | ||
23 | "1: jmp 2f \n\t" \ | ||
24 | " subl $4, %%esp \n\t" \ | ||
25 | " movl $" #id ", (%%esp) \n\t" \ | ||
26 | " call " #callback " \n\t" \ | ||
27 | " addl $4, %%esp \n\t" \ | ||
28 | ".section __event_table, \"aw\" \n\t" \ | ||
29 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
30 | ".previous \n\t" \ | ||
31 | "2: \n\t" \ | ||
32 | : : : CLOBBER_LIST) | ||
33 | |||
34 | #define ft_event1(id, callback, param) \ | ||
35 | __asm__ __volatile__( \ | ||
36 | "1: jmp 2f \n\t" \ | ||
37 | " subl $8, %%esp \n\t" \ | ||
38 | " movl %0, 4(%%esp) \n\t" \ | ||
39 | " movl $" #id ", (%%esp) \n\t" \ | ||
40 | " call " #callback " \n\t" \ | ||
41 | " addl $8, %%esp \n\t" \ | ||
42 | ".section __event_table, \"aw\" \n\t" \ | ||
43 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
44 | ".previous \n\t" \ | ||
45 | "2: \n\t" \ | ||
46 | : : "r" (param) : CLOBBER_LIST) | ||
47 | |||
48 | #define ft_event2(id, callback, param, param2) \ | ||
49 | __asm__ __volatile__( \ | ||
50 | "1: jmp 2f \n\t" \ | ||
51 | " subl $12, %%esp \n\t" \ | ||
52 | " movl %1, 8(%%esp) \n\t" \ | ||
53 | " movl %0, 4(%%esp) \n\t" \ | ||
54 | " movl $" #id ", (%%esp) \n\t" \ | ||
55 | " call " #callback " \n\t" \ | ||
56 | " addl $12, %%esp \n\t" \ | ||
57 | ".section __event_table, \"aw\" \n\t" \ | ||
58 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
59 | ".previous \n\t" \ | ||
60 | "2: \n\t" \ | ||
61 | : : "r" (param), "r" (param2) : CLOBBER_LIST) | ||
62 | |||
63 | |||
64 | #define ft_event3(id, callback, p, p2, p3) \ | ||
65 | __asm__ __volatile__( \ | ||
66 | "1: jmp 2f \n\t" \ | ||
67 | " subl $16, %%esp \n\t" \ | ||
68 | " movl %2, 12(%%esp) \n\t" \ | ||
69 | " movl %1, 8(%%esp) \n\t" \ | ||
70 | " movl %0, 4(%%esp) \n\t" \ | ||
71 | " movl $" #id ", (%%esp) \n\t" \ | ||
72 | " call " #callback " \n\t" \ | ||
73 | " addl $16, %%esp \n\t" \ | ||
74 | ".section __event_table, \"aw\" \n\t" \ | ||
75 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
76 | ".previous \n\t" \ | ||
77 | "2: \n\t" \ | ||
78 | : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) | ||
79 | |||
80 | #define __ARCH_HAS_FEATHER_TRACE | ||
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h new file mode 100644 index 000000000000..1cffa4eec5f4 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_64.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
2 | |||
3 | /* regparm is the default on x86_64 */ | ||
4 | #define feather_callback | ||
5 | |||
6 | # define _EVENT_TABLE(id,from,to) \ | ||
7 | ".section __event_table, \"aw\"\n\t" \ | ||
8 | ".balign 8\n\t" \ | ||
9 | ".quad " #id ", 0, " #from ", " #to " \n\t" \ | ||
10 | ".previous \n\t" | ||
11 | |||
12 | /* | ||
13 | * x86_64 callee only owns rbp, rbx, r12 -> r15 | ||
14 | * the called can freely modify the others | ||
15 | */ | ||
16 | #define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ | ||
17 | "r8", "r9", "r10", "r11", "rax" | ||
18 | |||
19 | #define ft_event(id, callback) \ | ||
20 | __asm__ __volatile__( \ | ||
21 | "1: jmp 2f \n\t" \ | ||
22 | " call " #callback " \n\t" \ | ||
23 | _EVENT_TABLE(id,1b,2f) \ | ||
24 | "2: \n\t" \ | ||
25 | : : : CLOBBER_LIST) | ||
26 | |||
27 | #define ft_event0(id, callback) \ | ||
28 | __asm__ __volatile__( \ | ||
29 | "1: jmp 2f \n\t" \ | ||
30 | " movq $" #id ", %%rdi \n\t" \ | ||
31 | " call " #callback " \n\t" \ | ||
32 | _EVENT_TABLE(id,1b,2f) \ | ||
33 | "2: \n\t" \ | ||
34 | : : : CLOBBER_LIST) | ||
35 | |||
36 | #define ft_event1(id, callback, param) \ | ||
37 | __asm__ __volatile__( \ | ||
38 | "1: jmp 2f \n\t" \ | ||
39 | " movq %0, %%rsi \n\t" \ | ||
40 | " movq $" #id ", %%rdi \n\t" \ | ||
41 | " call " #callback " \n\t" \ | ||
42 | _EVENT_TABLE(id,1b,2f) \ | ||
43 | "2: \n\t" \ | ||
44 | : : "r" (param) : CLOBBER_LIST) | ||
45 | |||
46 | #define ft_event2(id, callback, param, param2) \ | ||
47 | __asm__ __volatile__( \ | ||
48 | "1: jmp 2f \n\t" \ | ||
49 | " movq %1, %%rdx \n\t" \ | ||
50 | " movq %0, %%rsi \n\t" \ | ||
51 | " movq $" #id ", %%rdi \n\t" \ | ||
52 | " call " #callback " \n\t" \ | ||
53 | _EVENT_TABLE(id,1b,2f) \ | ||
54 | "2: \n\t" \ | ||
55 | : : "r" (param), "r" (param2) : CLOBBER_LIST) | ||
56 | |||
57 | #define ft_event3(id, callback, p, p2, p3) \ | ||
58 | __asm__ __volatile__( \ | ||
59 | "1: jmp 2f \n\t" \ | ||
60 | " movq %2, %%rcx \n\t" \ | ||
61 | " movq %1, %%rdx \n\t" \ | ||
62 | " movq %0, %%rsi \n\t" \ | ||
63 | " movq $" #id ", %%rdi \n\t" \ | ||
64 | " call " #callback " \n\t" \ | ||
65 | _EVENT_TABLE(id,1b,2f) \ | ||
66 | "2: \n\t" \ | ||
67 | : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) | ||
68 | |||
69 | #define __ARCH_HAS_FEATHER_TRACE | ||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 46c0fe05f230..c17411503f28 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -53,6 +53,8 @@ extern void threshold_interrupt(void); | |||
53 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
54 | extern void call_function_single_interrupt(void); | 54 | extern void call_function_single_interrupt(void); |
55 | 55 | ||
56 | extern void pull_timers_interrupt(void); | ||
57 | |||
56 | /* IOAPIC */ | 58 | /* IOAPIC */ |
57 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) | 59 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) |
58 | extern unsigned long io_apic_irqs; | 60 | extern unsigned long io_apic_irqs; |
@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | |||
122 | extern void smp_reschedule_interrupt(struct pt_regs *); | 124 | extern void smp_reschedule_interrupt(struct pt_regs *); |
123 | extern void smp_call_function_interrupt(struct pt_regs *); | 125 | extern void smp_call_function_interrupt(struct pt_regs *); |
124 | extern void smp_call_function_single_interrupt(struct pt_regs *); | 126 | extern void smp_call_function_single_interrupt(struct pt_regs *); |
127 | extern void smp_pull_timers_interrupt(struct pt_regs *); | ||
125 | #ifdef CONFIG_X86_32 | 128 | #ifdef CONFIG_X86_32 |
126 | extern void smp_invalidate_interrupt(struct pt_regs *); | 129 | extern void smp_invalidate_interrupt(struct pt_regs *); |
127 | #else | 130 | #else |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 8767d99c4f64..bb5318bbe0e4 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -109,6 +109,11 @@ | |||
109 | #define LOCAL_TIMER_VECTOR 0xef | 109 | #define LOCAL_TIMER_VECTOR 0xef |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * LITMUS^RT pull timers IRQ vector | ||
113 | */ | ||
114 | #define PULL_TIMERS_VECTOR 0xee | ||
115 | |||
116 | /* | ||
112 | * Generic system vector for platform specific use | 117 | * Generic system vector for platform specific use |
113 | */ | 118 | */ |
114 | #define X86_PLATFORM_IPI_VECTOR 0xed | 119 | #define X86_PLATFORM_IPI_VECTOR 0xed |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..91d323f47364 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -173,6 +173,8 @@ extern void print_cpu_info(struct cpuinfo_x86 *); | |||
173 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | 173 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
174 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | 174 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
175 | extern unsigned short num_cache_leaves; | 175 | extern unsigned short num_cache_leaves; |
176 | extern int get_shared_cpu_map(cpumask_var_t mask, | ||
177 | unsigned int cpu, int index); | ||
176 | 178 | ||
177 | extern void detect_extended_topology(struct cpuinfo_x86 *c); | 179 | extern void detect_extended_topology(struct cpuinfo_x86 *c); |
178 | extern void detect_ht(struct cpuinfo_x86 *c); | 180 | extern void detect_ht(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index beb9b5f8f8a4..4f61e8b0715a 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -344,9 +344,13 @@ | |||
344 | #define __NR_perf_event_open 336 | 344 | #define __NR_perf_event_open 336 |
345 | #define __NR_recvmmsg 337 | 345 | #define __NR_recvmmsg 337 |
346 | 346 | ||
347 | #define __NR_LITMUS 338 | ||
348 | |||
349 | #include "litmus/unistd_32.h" | ||
350 | |||
347 | #ifdef __KERNEL__ | 351 | #ifdef __KERNEL__ |
348 | 352 | ||
349 | #define NR_syscalls 338 | 353 | #define NR_syscalls 339 + NR_litmus_syscalls |
350 | 354 | ||
351 | #define __ARCH_WANT_IPC_PARSE_VERSION | 355 | #define __ARCH_WANT_IPC_PARSE_VERSION |
352 | #define __ARCH_WANT_OLD_READDIR | 356 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index ff4307b0e81e..b21c3b269aac 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -664,6 +664,10 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) | |||
664 | #define __NR_recvmmsg 299 | 664 | #define __NR_recvmmsg 299 |
665 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) | 665 | __SYSCALL(__NR_recvmmsg, sys_recvmmsg) |
666 | 666 | ||
667 | #define __NR_LITMUS 299 | ||
668 | |||
669 | #include "litmus/unistd_64.h" | ||
670 | |||
667 | #ifndef __NO_STUBS | 671 | #ifndef __NO_STUBS |
668 | #define __ARCH_WANT_OLD_READDIR | 672 | #define __ARCH_WANT_OLD_READDIR |
669 | #define __ARCH_WANT_OLD_STAT | 673 | #define __ARCH_WANT_OLD_STAT |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352209e0..d09934e22ca5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -117,6 +117,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
117 | 117 | ||
118 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 118 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
119 | 119 | ||
120 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o | ||
121 | |||
120 | ### | 122 | ### |
121 | # 64 bit specific files | 123 | # 64 bit specific files |
122 | ifeq ($(CONFIG_X86_64),y) | 124 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 95962a93f99a..94d8e475744c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -632,6 +632,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
632 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | 632 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); |
633 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) | 633 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) |
634 | 634 | ||
635 | /* returns CPUs that share the index cache with cpu */ | ||
636 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
637 | { | ||
638 | int ret = 0; | ||
639 | struct _cpuid4_info *this_leaf; | ||
640 | |||
641 | if (index >= num_cache_leaves) { | ||
642 | index = num_cache_leaves - 1; | ||
643 | ret = index; | ||
644 | } | ||
645 | |||
646 | this_leaf = CPUID4_INFO_IDX(cpu,index); | ||
647 | cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map)); | ||
648 | |||
649 | return ret; | ||
650 | } | ||
651 | |||
635 | #ifdef CONFIG_SMP | 652 | #ifdef CONFIG_SMP |
636 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 653 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
637 | { | 654 | { |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0697ff139837..b9ec6cd7796f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \ | |||
1016 | call_function_interrupt smp_call_function_interrupt | 1016 | call_function_interrupt smp_call_function_interrupt |
1017 | apicinterrupt RESCHEDULE_VECTOR \ | 1017 | apicinterrupt RESCHEDULE_VECTOR \ |
1018 | reschedule_interrupt smp_reschedule_interrupt | 1018 | reschedule_interrupt smp_reschedule_interrupt |
1019 | apicinterrupt PULL_TIMERS_VECTOR \ | ||
1020 | pull_timers_interrupt smp_pull_timers_interrupt | ||
1019 | #endif | 1021 | #endif |
1020 | 1022 | ||
1021 | apicinterrupt ERROR_APIC_VECTOR \ | 1023 | apicinterrupt ERROR_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c new file mode 100644 index 000000000000..e07ee30dfff9 --- /dev/null +++ b/arch/x86/kernel/ft_event.c | |||
@@ -0,0 +1,112 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <litmus/feather_trace.h> | ||
4 | |||
5 | #ifdef __ARCH_HAS_FEATHER_TRACE | ||
6 | /* the feather trace management functions assume | ||
7 | * exclusive access to the event table | ||
8 | */ | ||
9 | |||
10 | |||
11 | #define BYTE_JUMP 0xeb | ||
12 | #define BYTE_JUMP_LEN 0x02 | ||
13 | |||
14 | /* for each event, there is an entry in the event table */ | ||
15 | struct trace_event { | ||
16 | long id; | ||
17 | long count; | ||
18 | long start_addr; | ||
19 | long end_addr; | ||
20 | }; | ||
21 | |||
22 | extern struct trace_event __start___event_table[]; | ||
23 | extern struct trace_event __stop___event_table[]; | ||
24 | |||
25 | int ft_enable_event(unsigned long id) | ||
26 | { | ||
27 | struct trace_event* te = __start___event_table; | ||
28 | int count = 0; | ||
29 | char* delta; | ||
30 | unsigned char* instr; | ||
31 | |||
32 | while (te < __stop___event_table) { | ||
33 | if (te->id == id && ++te->count == 1) { | ||
34 | instr = (unsigned char*) te->start_addr; | ||
35 | /* make sure we don't clobber something wrong */ | ||
36 | if (*instr == BYTE_JUMP) { | ||
37 | delta = (((unsigned char*) te->start_addr) + 1); | ||
38 | *delta = 0; | ||
39 | } | ||
40 | } | ||
41 | if (te->id == id) | ||
42 | count++; | ||
43 | te++; | ||
44 | } | ||
45 | |||
46 | printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count); | ||
47 | return count; | ||
48 | } | ||
49 | |||
50 | int ft_disable_event(unsigned long id) | ||
51 | { | ||
52 | struct trace_event* te = __start___event_table; | ||
53 | int count = 0; | ||
54 | char* delta; | ||
55 | unsigned char* instr; | ||
56 | |||
57 | while (te < __stop___event_table) { | ||
58 | if (te->id == id && --te->count == 0) { | ||
59 | instr = (unsigned char*) te->start_addr; | ||
60 | if (*instr == BYTE_JUMP) { | ||
61 | delta = (((unsigned char*) te->start_addr) + 1); | ||
62 | *delta = te->end_addr - te->start_addr - | ||
63 | BYTE_JUMP_LEN; | ||
64 | } | ||
65 | } | ||
66 | if (te->id == id) | ||
67 | count++; | ||
68 | te++; | ||
69 | } | ||
70 | |||
71 | printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count); | ||
72 | return count; | ||
73 | } | ||
74 | |||
75 | int ft_disable_all_events(void) | ||
76 | { | ||
77 | struct trace_event* te = __start___event_table; | ||
78 | int count = 0; | ||
79 | char* delta; | ||
80 | unsigned char* instr; | ||
81 | |||
82 | while (te < __stop___event_table) { | ||
83 | if (te->count) { | ||
84 | instr = (unsigned char*) te->start_addr; | ||
85 | if (*instr == BYTE_JUMP) { | ||
86 | delta = (((unsigned char*) te->start_addr) | ||
87 | + 1); | ||
88 | *delta = te->end_addr - te->start_addr - | ||
89 | BYTE_JUMP_LEN; | ||
90 | te->count = 0; | ||
91 | count++; | ||
92 | } | ||
93 | } | ||
94 | te++; | ||
95 | } | ||
96 | return count; | ||
97 | } | ||
98 | |||
99 | int ft_is_event_enabled(unsigned long id) | ||
100 | { | ||
101 | struct trace_event* te = __start___event_table; | ||
102 | |||
103 | while (te < __stop___event_table) { | ||
104 | if (te->id == id) | ||
105 | return te->count; | ||
106 | te++; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | #endif | ||
112 | |||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 0ed2d300cd46..a760ce1a2c0d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -189,6 +189,9 @@ static void __init smp_intr_init(void) | |||
189 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | 189 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
190 | call_function_single_interrupt); | 190 | call_function_single_interrupt); |
191 | 191 | ||
192 | /* IPI for hrtimer pulling on remote cpus */ | ||
193 | alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt); | ||
194 | |||
192 | /* Low priority IPI to cleanup after moving an irq */ | 195 | /* Low priority IPI to cleanup after moving an irq */ |
193 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 196 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
194 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 197 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d801210945d6..97af589a5c0c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -23,6 +23,9 @@ | |||
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/gfp.h> | 24 | #include <linux/gfp.h> |
25 | 25 | ||
26 | #include <litmus/litmus.h> | ||
27 | #include <litmus/trace.h> | ||
28 | |||
26 | #include <asm/mtrr.h> | 29 | #include <asm/mtrr.h> |
27 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
28 | #include <asm/mmu_context.h> | 31 | #include <asm/mmu_context.h> |
@@ -118,6 +121,7 @@ static void native_smp_send_reschedule(int cpu) | |||
118 | WARN_ON(1); | 121 | WARN_ON(1); |
119 | return; | 122 | return; |
120 | } | 123 | } |
124 | TS_SEND_RESCHED_START(cpu); | ||
121 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); | 125 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
122 | } | 126 | } |
123 | 127 | ||
@@ -147,6 +151,16 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
147 | free_cpumask_var(allbutself); | 151 | free_cpumask_var(allbutself); |
148 | } | 152 | } |
149 | 153 | ||
154 | /* trigger timers on remote cpu */ | ||
155 | void smp_send_pull_timers(int cpu) | ||
156 | { | ||
157 | if (unlikely(cpu_is_offline(cpu))) { | ||
158 | WARN_ON(1); | ||
159 | return; | ||
160 | } | ||
161 | apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR); | ||
162 | } | ||
163 | |||
150 | /* | 164 | /* |
151 | * this function calls the 'stop' function on all other CPUs in the system. | 165 | * this function calls the 'stop' function on all other CPUs in the system. |
152 | */ | 166 | */ |
@@ -198,7 +212,12 @@ static void native_smp_send_stop(void) | |||
198 | void smp_reschedule_interrupt(struct pt_regs *regs) | 212 | void smp_reschedule_interrupt(struct pt_regs *regs) |
199 | { | 213 | { |
200 | ack_APIC_irq(); | 214 | ack_APIC_irq(); |
215 | /* LITMUS^RT needs this interrupt to proper reschedule | ||
216 | * on this cpu | ||
217 | */ | ||
218 | set_tsk_need_resched(current); | ||
201 | inc_irq_stat(irq_resched_count); | 219 | inc_irq_stat(irq_resched_count); |
220 | TS_SEND_RESCHED_END; | ||
202 | /* | 221 | /* |
203 | * KVM uses this interrupt to force a cpu out of guest mode | 222 | * KVM uses this interrupt to force a cpu out of guest mode |
204 | */ | 223 | */ |
@@ -222,6 +241,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
222 | irq_exit(); | 241 | irq_exit(); |
223 | } | 242 | } |
224 | 243 | ||
244 | extern void hrtimer_pull(void); | ||
245 | |||
246 | void smp_pull_timers_interrupt(struct pt_regs *regs) | ||
247 | { | ||
248 | ack_APIC_irq(); | ||
249 | TRACE("pull timer interrupt\n"); | ||
250 | hrtimer_pull(); | ||
251 | } | ||
252 | |||
225 | struct smp_ops smp_ops = { | 253 | struct smp_ops smp_ops = { |
226 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 254 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
227 | .smp_prepare_cpus = native_smp_prepare_cpus, | 255 | .smp_prepare_cpus = native_smp_prepare_cpus, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 8b3729341216..5da9a68546b7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -337,3 +337,17 @@ ENTRY(sys_call_table) | |||
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_event_open | 338 | .long sys_perf_event_open |
339 | .long sys_recvmmsg | 339 | .long sys_recvmmsg |
340 | .long sys_set_rt_task_param /* LITMUS^RT 338 */ | ||
341 | .long sys_get_rt_task_param | ||
342 | .long sys_complete_job | ||
343 | .long sys_od_open | ||
344 | .long sys_od_close | ||
345 | .long sys_fmlp_down | ||
346 | .long sys_fmlp_up | ||
347 | .long sys_srp_down | ||
348 | .long sys_srp_up | ||
349 | .long sys_query_job_no | ||
350 | .long sys_wait_for_job_release | ||
351 | .long sys_wait_for_ts_release | ||
352 | .long sys_release_ts | ||
353 | .long sys_null_call | ||
@@ -19,7 +19,7 @@ | |||
19 | * current->executable is only used by the procfs. This allows a dispatch | 19 | * current->executable is only used by the procfs. This allows a dispatch |
20 | * table to check for several different types of binary formats. We keep | 20 | * table to check for several different types of binary formats. We keep |
21 | * trying until we recognize the file or we run out of supported binary | 21 | * trying until we recognize the file or we run out of supported binary |
22 | * formats. | 22 | * formats. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
@@ -56,6 +56,8 @@ | |||
56 | #include <linux/fs_struct.h> | 56 | #include <linux/fs_struct.h> |
57 | #include <linux/pipe_fs_i.h> | 57 | #include <linux/pipe_fs_i.h> |
58 | 58 | ||
59 | #include <litmus/litmus.h> | ||
60 | |||
59 | #include <asm/uaccess.h> | 61 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 62 | #include <asm/mmu_context.h> |
61 | #include <asm/tlb.h> | 63 | #include <asm/tlb.h> |
@@ -79,7 +81,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert) | |||
79 | insert ? list_add(&fmt->lh, &formats) : | 81 | insert ? list_add(&fmt->lh, &formats) : |
80 | list_add_tail(&fmt->lh, &formats); | 82 | list_add_tail(&fmt->lh, &formats); |
81 | write_unlock(&binfmt_lock); | 83 | write_unlock(&binfmt_lock); |
82 | return 0; | 84 | return 0; |
83 | } | 85 | } |
84 | 86 | ||
85 | EXPORT_SYMBOL(__register_binfmt); | 87 | EXPORT_SYMBOL(__register_binfmt); |
@@ -1045,7 +1047,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1045 | group */ | 1047 | group */ |
1046 | 1048 | ||
1047 | current->self_exec_id++; | 1049 | current->self_exec_id++; |
1048 | 1050 | ||
1049 | flush_signal_handlers(current, 0); | 1051 | flush_signal_handlers(current, 0); |
1050 | flush_old_files(current->files); | 1052 | flush_old_files(current->files); |
1051 | } | 1053 | } |
@@ -1135,8 +1137,8 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1135 | return res; | 1137 | return res; |
1136 | } | 1138 | } |
1137 | 1139 | ||
1138 | /* | 1140 | /* |
1139 | * Fill the binprm structure from the inode. | 1141 | * Fill the binprm structure from the inode. |
1140 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes | 1142 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes |
1141 | * | 1143 | * |
1142 | * This may be called multiple times for binary chains (scripts for example). | 1144 | * This may be called multiple times for binary chains (scripts for example). |
@@ -1348,6 +1350,7 @@ int do_execve(char * filename, | |||
1348 | goto out_unmark; | 1350 | goto out_unmark; |
1349 | 1351 | ||
1350 | sched_exec(); | 1352 | sched_exec(); |
1353 | litmus_exec(); | ||
1351 | 1354 | ||
1352 | bprm->file = file; | 1355 | bprm->file = file; |
1353 | bprm->filename = filename; | 1356 | bprm->filename = filename; |
diff --git a/fs/inode.c b/fs/inode.c index 407bf392e20a..aaaaf096aa8e 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -271,6 +271,8 @@ void inode_init_once(struct inode *inode) | |||
271 | #ifdef CONFIG_FSNOTIFY | 271 | #ifdef CONFIG_FSNOTIFY |
272 | INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); | 272 | INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); |
273 | #endif | 273 | #endif |
274 | INIT_LIST_HEAD(&inode->i_obj_list); | ||
275 | mutex_init(&inode->i_obj_mutex); | ||
274 | } | 276 | } |
275 | EXPORT_SYMBOL(inode_init_once); | 277 | EXPORT_SYMBOL(inode_init_once); |
276 | 278 | ||
diff --git a/include/linux/completion.h b/include/linux/completion.h index 4a6b604ef7e4..258bec13d424 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
@@ -88,6 +88,7 @@ extern bool completion_done(struct completion *x); | |||
88 | 88 | ||
89 | extern void complete(struct completion *); | 89 | extern void complete(struct completion *); |
90 | extern void complete_all(struct completion *); | 90 | extern void complete_all(struct completion *); |
91 | extern void complete_n(struct completion *, int n); | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * INIT_COMPLETION: - reinitialize a completion structure | 94 | * INIT_COMPLETION: - reinitialize a completion structure |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 44f35aea2f1f..894918440bc8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -15,8 +15,8 @@ | |||
15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 15 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
16 | * upper limit on files-per-process. | 16 | * upper limit on files-per-process. |
17 | * | 17 | * |
18 | * Some programs (notably those using select()) may have to be | 18 | * Some programs (notably those using select()) may have to be |
19 | * recompiled to take full advantage of the new limits.. | 19 | * recompiled to take full advantage of the new limits.. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | /* Fixed constants first: */ | 22 | /* Fixed constants first: */ |
@@ -173,7 +173,7 @@ struct inodes_stat_t { | |||
173 | #define SEL_EX 4 | 173 | #define SEL_EX 4 |
174 | 174 | ||
175 | /* public flags for file_system_type */ | 175 | /* public flags for file_system_type */ |
176 | #define FS_REQUIRES_DEV 1 | 176 | #define FS_REQUIRES_DEV 1 |
177 | #define FS_BINARY_MOUNTDATA 2 | 177 | #define FS_BINARY_MOUNTDATA 2 |
178 | #define FS_HAS_SUBTYPE 4 | 178 | #define FS_HAS_SUBTYPE 4 |
179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 179 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
@@ -471,7 +471,7 @@ struct iattr { | |||
471 | */ | 471 | */ |
472 | #include <linux/quota.h> | 472 | #include <linux/quota.h> |
473 | 473 | ||
474 | /** | 474 | /** |
475 | * enum positive_aop_returns - aop return codes with specific semantics | 475 | * enum positive_aop_returns - aop return codes with specific semantics |
476 | * | 476 | * |
477 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 477 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
@@ -481,7 +481,7 @@ struct iattr { | |||
481 | * be a candidate for writeback again in the near | 481 | * be a candidate for writeback again in the near |
482 | * future. Other callers must be careful to unlock | 482 | * future. Other callers must be careful to unlock |
483 | * the page if they get this return. Returned by | 483 | * the page if they get this return. Returned by |
484 | * writepage(); | 484 | * writepage(); |
485 | * | 485 | * |
486 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 486 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
487 | * unlocked it and the page might have been truncated. | 487 | * unlocked it and the page might have been truncated. |
@@ -720,6 +720,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping) | |||
720 | 720 | ||
721 | struct posix_acl; | 721 | struct posix_acl; |
722 | #define ACL_NOT_CACHED ((void *)(-1)) | 722 | #define ACL_NOT_CACHED ((void *)(-1)) |
723 | struct inode_obj_id_table; | ||
723 | 724 | ||
724 | struct inode { | 725 | struct inode { |
725 | struct hlist_node i_hash; | 726 | struct hlist_node i_hash; |
@@ -788,6 +789,8 @@ struct inode { | |||
788 | struct posix_acl *i_acl; | 789 | struct posix_acl *i_acl; |
789 | struct posix_acl *i_default_acl; | 790 | struct posix_acl *i_default_acl; |
790 | #endif | 791 | #endif |
792 | struct list_head i_obj_list; | ||
793 | struct mutex i_obj_mutex; | ||
791 | void *i_private; /* fs or device private pointer */ | 794 | void *i_private; /* fs or device private pointer */ |
792 | }; | 795 | }; |
793 | 796 | ||
@@ -1000,10 +1003,10 @@ static inline int file_check_writeable(struct file *filp) | |||
1000 | 1003 | ||
1001 | #define MAX_NON_LFS ((1UL<<31) - 1) | 1004 | #define MAX_NON_LFS ((1UL<<31) - 1) |
1002 | 1005 | ||
1003 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 1006 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
1004 | limits, otherwise bad things can happen in VM. */ | 1007 | limits, otherwise bad things can happen in VM. */ |
1005 | #if BITS_PER_LONG==32 | 1008 | #if BITS_PER_LONG==32 |
1006 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 1009 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
1007 | #elif BITS_PER_LONG==64 | 1010 | #elif BITS_PER_LONG==64 |
1008 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 1011 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
1009 | #endif | 1012 | #endif |
@@ -2129,7 +2132,7 @@ extern int may_open(struct path *, int, int); | |||
2129 | 2132 | ||
2130 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2133 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2131 | extern struct file * open_exec(const char *); | 2134 | extern struct file * open_exec(const char *); |
2132 | 2135 | ||
2133 | /* fs/dcache.c -- generic fs support functions */ | 2136 | /* fs/dcache.c -- generic fs support functions */ |
2134 | extern int is_subdir(struct dentry *, struct dentry *); | 2137 | extern int is_subdir(struct dentry *, struct dentry *); |
2135 | extern int path_is_under(struct path *, struct path *); | 2138 | extern int path_is_under(struct path *, struct path *); |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5d86fb2309d2..b34823755ee4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -167,6 +167,7 @@ struct hrtimer_clock_base { | |||
167 | * @nr_retries: Total number of hrtimer interrupt retries | 167 | * @nr_retries: Total number of hrtimer interrupt retries |
168 | * @nr_hangs: Total number of hrtimer interrupt hangs | 168 | * @nr_hangs: Total number of hrtimer interrupt hangs |
169 | * @max_hang_time: Maximum time spent in hrtimer_interrupt | 169 | * @max_hang_time: Maximum time spent in hrtimer_interrupt |
170 | * @to_pull: LITMUS^RT list of timers to be pulled on this cpu | ||
170 | */ | 171 | */ |
171 | struct hrtimer_cpu_base { | 172 | struct hrtimer_cpu_base { |
172 | raw_spinlock_t lock; | 173 | raw_spinlock_t lock; |
@@ -180,6 +181,26 @@ struct hrtimer_cpu_base { | |||
180 | unsigned long nr_hangs; | 181 | unsigned long nr_hangs; |
181 | ktime_t max_hang_time; | 182 | ktime_t max_hang_time; |
182 | #endif | 183 | #endif |
184 | struct list_head to_pull; | ||
185 | }; | ||
186 | |||
187 | #define HRTIMER_START_ON_INACTIVE 0 | ||
188 | #define HRTIMER_START_ON_QUEUED 1 | ||
189 | |||
190 | /* | ||
191 | * struct hrtimer_start_on_info - save timer info on remote cpu | ||
192 | * @list: list of hrtimer_start_on_info on remote cpu (to_pull) | ||
193 | * @timer: timer to be triggered on remote cpu | ||
194 | * @time: time event | ||
195 | * @mode: timer mode | ||
196 | * @state: activity flag | ||
197 | */ | ||
198 | struct hrtimer_start_on_info { | ||
199 | struct list_head list; | ||
200 | struct hrtimer *timer; | ||
201 | ktime_t time; | ||
202 | enum hrtimer_mode mode; | ||
203 | atomic_t state; | ||
183 | }; | 204 | }; |
184 | 205 | ||
185 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) | 206 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) |
@@ -348,6 +369,10 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
348 | unsigned long delta_ns, | 369 | unsigned long delta_ns, |
349 | const enum hrtimer_mode mode, int wakeup); | 370 | const enum hrtimer_mode mode, int wakeup); |
350 | 371 | ||
372 | extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info, | ||
373 | struct hrtimer *timer, ktime_t time, | ||
374 | const enum hrtimer_mode mode); | ||
375 | |||
351 | extern int hrtimer_cancel(struct hrtimer *timer); | 376 | extern int hrtimer_cancel(struct hrtimer *timer); |
352 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); | 377 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); |
353 | 378 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b7b81df78b3..225347d97d47 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -38,6 +38,7 @@ | |||
38 | #define SCHED_BATCH 3 | 38 | #define SCHED_BATCH 3 |
39 | /* SCHED_ISO: reserved but not implemented yet */ | 39 | /* SCHED_ISO: reserved but not implemented yet */ |
40 | #define SCHED_IDLE 5 | 40 | #define SCHED_IDLE 5 |
41 | #define SCHED_LITMUS 6 | ||
41 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ | 42 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ |
42 | #define SCHED_RESET_ON_FORK 0x40000000 | 43 | #define SCHED_RESET_ON_FORK 0x40000000 |
43 | 44 | ||
@@ -94,6 +95,8 @@ struct sched_param { | |||
94 | 95 | ||
95 | #include <asm/processor.h> | 96 | #include <asm/processor.h> |
96 | 97 | ||
98 | #include <litmus/rt_param.h> | ||
99 | |||
97 | struct exec_domain; | 100 | struct exec_domain; |
98 | struct futex_pi_state; | 101 | struct futex_pi_state; |
99 | struct robust_list_head; | 102 | struct robust_list_head; |
@@ -1166,6 +1169,7 @@ struct sched_rt_entity { | |||
1166 | }; | 1169 | }; |
1167 | 1170 | ||
1168 | struct rcu_node; | 1171 | struct rcu_node; |
1172 | struct od_table_entry; | ||
1169 | 1173 | ||
1170 | struct task_struct { | 1174 | struct task_struct { |
1171 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1175 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
@@ -1250,9 +1254,9 @@ struct task_struct { | |||
1250 | unsigned long stack_canary; | 1254 | unsigned long stack_canary; |
1251 | #endif | 1255 | #endif |
1252 | 1256 | ||
1253 | /* | 1257 | /* |
1254 | * pointers to (original) parent process, youngest child, younger sibling, | 1258 | * pointers to (original) parent process, youngest child, younger sibling, |
1255 | * older sibling, respectively. (p->father can be replaced with | 1259 | * older sibling, respectively. (p->father can be replaced with |
1256 | * p->real_parent->pid) | 1260 | * p->real_parent->pid) |
1257 | */ | 1261 | */ |
1258 | struct task_struct *real_parent; /* real parent process */ | 1262 | struct task_struct *real_parent; /* real parent process */ |
@@ -1464,6 +1468,13 @@ struct task_struct { | |||
1464 | int make_it_fail; | 1468 | int make_it_fail; |
1465 | #endif | 1469 | #endif |
1466 | struct prop_local_single dirties; | 1470 | struct prop_local_single dirties; |
1471 | |||
1472 | /* LITMUS RT parameters and state */ | ||
1473 | struct rt_param rt_param; | ||
1474 | |||
1475 | /* references to PI semaphores, etc. */ | ||
1476 | struct od_table_entry *od_table; | ||
1477 | |||
1467 | #ifdef CONFIG_LATENCYTOP | 1478 | #ifdef CONFIG_LATENCYTOP |
1468 | int latency_record_count; | 1479 | int latency_record_count; |
1469 | struct latency_record latency_record[LT_SAVECOUNT]; | 1480 | struct latency_record latency_record[LT_SAVECOUNT]; |
@@ -2018,7 +2029,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s | |||
2018 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | 2029 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); |
2019 | 2030 | ||
2020 | return ret; | 2031 | return ret; |
2021 | } | 2032 | } |
2022 | 2033 | ||
2023 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, | 2034 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, |
2024 | sigset_t *mask); | 2035 | sigset_t *mask); |
diff --git a/include/linux/smp.h b/include/linux/smp.h index cfa2d20e35f1..f86d40768e7f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask, | |||
80 | void (*func)(void *info), void *info, int wait); | 80 | void (*func)(void *info), void *info, int wait); |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * sends a 'pull timer' event to a remote CPU | ||
84 | */ | ||
85 | extern void smp_send_pull_timers(int cpu); | ||
86 | |||
87 | /* | ||
83 | * Generic and arch helpers | 88 | * Generic and arch helpers |
84 | */ | 89 | */ |
85 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 90 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
diff --git a/include/linux/tick.h b/include/linux/tick.h index d2ae79e21be3..25d0cf41d3fd 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -73,6 +73,11 @@ extern int tick_is_oneshot_available(void); | |||
73 | extern struct tick_device *tick_get_device(int cpu); | 73 | extern struct tick_device *tick_get_device(int cpu); |
74 | 74 | ||
75 | # ifdef CONFIG_HIGH_RES_TIMERS | 75 | # ifdef CONFIG_HIGH_RES_TIMERS |
76 | /* LITMUS^RT tick alignment */ | ||
77 | #define LINUX_DEFAULT_TICKS 0 | ||
78 | #define LITMUS_ALIGNED_TICKS 1 | ||
79 | #define LITMUS_STAGGERED_TICKS 2 | ||
80 | |||
76 | extern int tick_init_highres(void); | 81 | extern int tick_init_highres(void); |
77 | extern int tick_program_event(ktime_t expires, int force); | 82 | extern int tick_program_event(ktime_t expires, int force); |
78 | extern void tick_setup_sched_timer(void); | 83 | extern void tick_setup_sched_timer(void); |
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h new file mode 100644 index 000000000000..cf4864a498d8 --- /dev/null +++ b/include/litmus/bheap.h | |||
@@ -0,0 +1,77 @@ | |||
1 | /* bheaps.h -- Binomial Heaps | ||
2 | * | ||
3 | * (c) 2008, 2009 Bjoern Brandenburg | ||
4 | */ | ||
5 | |||
6 | #ifndef BHEAP_H | ||
7 | #define BHEAP_H | ||
8 | |||
9 | #define NOT_IN_HEAP UINT_MAX | ||
10 | |||
11 | struct bheap_node { | ||
12 | struct bheap_node* parent; | ||
13 | struct bheap_node* next; | ||
14 | struct bheap_node* child; | ||
15 | |||
16 | unsigned int degree; | ||
17 | void* value; | ||
18 | struct bheap_node** ref; | ||
19 | }; | ||
20 | |||
21 | struct bheap { | ||
22 | struct bheap_node* head; | ||
23 | /* We cache the minimum of the heap. | ||
24 | * This speeds up repeated peek operations. | ||
25 | */ | ||
26 | struct bheap_node* min; | ||
27 | }; | ||
28 | |||
29 | typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b); | ||
30 | |||
31 | void bheap_init(struct bheap* heap); | ||
32 | void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value); | ||
33 | |||
34 | static inline int bheap_node_in_heap(struct bheap_node* h) | ||
35 | { | ||
36 | return h->degree != NOT_IN_HEAP; | ||
37 | } | ||
38 | |||
39 | static inline int bheap_empty(struct bheap* heap) | ||
40 | { | ||
41 | return heap->head == NULL && heap->min == NULL; | ||
42 | } | ||
43 | |||
44 | /* insert (and reinitialize) a node into the heap */ | ||
45 | void bheap_insert(bheap_prio_t higher_prio, | ||
46 | struct bheap* heap, | ||
47 | struct bheap_node* node); | ||
48 | |||
49 | /* merge addition into target */ | ||
50 | void bheap_union(bheap_prio_t higher_prio, | ||
51 | struct bheap* target, | ||
52 | struct bheap* addition); | ||
53 | |||
54 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
55 | struct bheap* heap); | ||
56 | |||
57 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
58 | struct bheap* heap); | ||
59 | |||
60 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap); | ||
61 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node); | ||
62 | |||
63 | void bheap_delete(bheap_prio_t higher_prio, | ||
64 | struct bheap* heap, | ||
65 | struct bheap_node* node); | ||
66 | |||
67 | /* allocate from memcache */ | ||
68 | struct bheap_node* bheap_node_alloc(int gfp_flags); | ||
69 | void bheap_node_free(struct bheap_node* hn); | ||
70 | |||
71 | /* allocate a heap node for value and insert into the heap */ | ||
72 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
73 | void* value, int gfp_flags); | ||
74 | |||
75 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
76 | struct bheap* heap); | ||
77 | #endif | ||
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h new file mode 100644 index 000000000000..80d4321cc87e --- /dev/null +++ b/include/litmus/edf_common.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * EDF common data structures and utility functions shared by all EDF | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_EDF_COMMON_H__ | ||
11 | #define __UNC_EDF_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int edf_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
22 | |||
23 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
24 | |||
25 | int edf_set_hp_task(struct pi_semaphore *sem); | ||
26 | int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu); | ||
27 | #endif | ||
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h new file mode 100644 index 000000000000..286e10f86de0 --- /dev/null +++ b/include/litmus/fdso.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* fdso.h - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_FDSO_H_ | ||
7 | #define _LINUX_FDSO_H_ | ||
8 | |||
9 | #include <linux/list.h> | ||
10 | #include <asm/atomic.h> | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | |||
14 | #define MAX_OBJECT_DESCRIPTORS 32 | ||
15 | |||
16 | typedef enum { | ||
17 | MIN_OBJ_TYPE = 0, | ||
18 | |||
19 | FMLP_SEM = 0, | ||
20 | SRP_SEM = 1, | ||
21 | |||
22 | MAX_OBJ_TYPE = 1 | ||
23 | } obj_type_t; | ||
24 | |||
25 | struct inode_obj_id { | ||
26 | struct list_head list; | ||
27 | atomic_t count; | ||
28 | struct inode* inode; | ||
29 | |||
30 | obj_type_t type; | ||
31 | void* obj; | ||
32 | unsigned int id; | ||
33 | }; | ||
34 | |||
35 | |||
36 | struct od_table_entry { | ||
37 | unsigned int used; | ||
38 | |||
39 | struct inode_obj_id* obj; | ||
40 | void* extra; | ||
41 | }; | ||
42 | |||
43 | struct fdso_ops { | ||
44 | void* (*create) (void); | ||
45 | void (*destroy)(void*); | ||
46 | int (*open) (struct od_table_entry*, void* __user); | ||
47 | int (*close) (struct od_table_entry*); | ||
48 | }; | ||
49 | |||
50 | /* translate a userspace supplied od into the raw table entry | ||
51 | * returns NULL if od is invalid | ||
52 | */ | ||
53 | struct od_table_entry* __od_lookup(int od); | ||
54 | |||
55 | /* translate a userspace supplied od into the associated object | ||
56 | * returns NULL if od is invalid | ||
57 | */ | ||
58 | static inline void* od_lookup(int od, obj_type_t type) | ||
59 | { | ||
60 | struct od_table_entry* e = __od_lookup(od); | ||
61 | return e && e->obj->type == type ? e->obj->obj : NULL; | ||
62 | } | ||
63 | |||
64 | #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) | ||
65 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) | ||
66 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) | ||
67 | |||
68 | |||
69 | #endif | ||
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h new file mode 100644 index 000000000000..6c18277fdfc9 --- /dev/null +++ b/include/litmus/feather_buffer.h | |||
@@ -0,0 +1,94 @@ | |||
1 | #ifndef _FEATHER_BUFFER_H_ | ||
2 | #define _FEATHER_BUFFER_H_ | ||
3 | |||
4 | /* requires UINT_MAX and memcpy */ | ||
5 | |||
6 | #define SLOT_FREE 0 | ||
7 | #define SLOT_BUSY 1 | ||
8 | #define SLOT_READY 2 | ||
9 | |||
10 | struct ft_buffer { | ||
11 | unsigned int slot_count; | ||
12 | unsigned int slot_size; | ||
13 | |||
14 | int free_count; | ||
15 | unsigned int write_idx; | ||
16 | unsigned int read_idx; | ||
17 | |||
18 | char* slots; | ||
19 | void* buffer_mem; | ||
20 | unsigned int failed_writes; | ||
21 | }; | ||
22 | |||
23 | static inline int init_ft_buffer(struct ft_buffer* buf, | ||
24 | unsigned int slot_count, | ||
25 | unsigned int slot_size, | ||
26 | char* slots, | ||
27 | void* buffer_mem) | ||
28 | { | ||
29 | int i = 0; | ||
30 | if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { | ||
31 | /* The slot count must divide UNIT_MAX + 1 so that when it | ||
32 | * wraps around the index correctly points to 0. | ||
33 | */ | ||
34 | return 0; | ||
35 | } else { | ||
36 | buf->slot_count = slot_count; | ||
37 | buf->slot_size = slot_size; | ||
38 | buf->slots = slots; | ||
39 | buf->buffer_mem = buffer_mem; | ||
40 | buf->free_count = slot_count; | ||
41 | buf->write_idx = 0; | ||
42 | buf->read_idx = 0; | ||
43 | buf->failed_writes = 0; | ||
44 | for (i = 0; i < slot_count; i++) | ||
45 | buf->slots[i] = SLOT_FREE; | ||
46 | return 1; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) | ||
51 | { | ||
52 | int free = fetch_and_dec(&buf->free_count); | ||
53 | unsigned int idx; | ||
54 | if (free <= 0) { | ||
55 | fetch_and_inc(&buf->free_count); | ||
56 | *ptr = 0; | ||
57 | fetch_and_inc(&buf->failed_writes); | ||
58 | return 0; | ||
59 | } else { | ||
60 | idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; | ||
61 | buf->slots[idx] = SLOT_BUSY; | ||
62 | *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; | ||
63 | return 1; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) | ||
68 | { | ||
69 | unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; | ||
70 | buf->slots[idx] = SLOT_READY; | ||
71 | } | ||
72 | |||
73 | |||
74 | /* exclusive reader access is assumed */ | ||
75 | static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) | ||
76 | { | ||
77 | unsigned int idx; | ||
78 | if (buf->free_count == buf->slot_count) | ||
79 | /* nothing available */ | ||
80 | return 0; | ||
81 | idx = buf->read_idx % buf->slot_count; | ||
82 | if (buf->slots[idx] == SLOT_READY) { | ||
83 | memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, | ||
84 | buf->slot_size); | ||
85 | buf->slots[idx] = SLOT_FREE; | ||
86 | buf->read_idx++; | ||
87 | fetch_and_inc(&buf->free_count); | ||
88 | return 1; | ||
89 | } else | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | |||
94 | #endif | ||
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h new file mode 100644 index 000000000000..7d27e763406f --- /dev/null +++ b/include/litmus/feather_trace.h | |||
@@ -0,0 +1,49 @@ | |||
1 | #ifndef _FEATHER_TRACE_H_ | ||
2 | #define _FEATHER_TRACE_H_ | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | #include <asm/feather_trace.h> | ||
6 | |||
7 | int ft_enable_event(unsigned long id); | ||
8 | int ft_disable_event(unsigned long id); | ||
9 | int ft_is_event_enabled(unsigned long id); | ||
10 | int ft_disable_all_events(void); | ||
11 | |||
12 | /* atomic_* funcitons are inline anyway */ | ||
13 | static inline int fetch_and_inc(int *val) | ||
14 | { | ||
15 | return atomic_add_return(1, (atomic_t*) val) - 1; | ||
16 | } | ||
17 | |||
18 | static inline int fetch_and_dec(int *val) | ||
19 | { | ||
20 | return atomic_sub_return(1, (atomic_t*) val) + 1; | ||
21 | } | ||
22 | |||
23 | #ifndef __ARCH_HAS_FEATHER_TRACE | ||
24 | /* provide default implementation */ | ||
25 | |||
26 | #define feather_callback | ||
27 | |||
28 | #define MAX_EVENTS 1024 | ||
29 | |||
30 | extern int ft_events[MAX_EVENTS]; | ||
31 | |||
32 | #define ft_event(id, callback) \ | ||
33 | if (ft_events[id]) callback(); | ||
34 | |||
35 | #define ft_event0(id, callback) \ | ||
36 | if (ft_events[id]) callback(id); | ||
37 | |||
38 | #define ft_event1(id, callback, param) \ | ||
39 | if (ft_events[id]) callback(id, param); | ||
40 | |||
41 | #define ft_event2(id, callback, param, param2) \ | ||
42 | if (ft_events[id]) callback(id, param, param2); | ||
43 | |||
44 | #define ft_event3(id, callback, p, p2, p3) \ | ||
45 | if (ft_events[id]) callback(id, p, p2, p3); | ||
46 | |||
47 | #endif | ||
48 | |||
49 | #endif | ||
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h new file mode 100644 index 000000000000..7697b4616699 --- /dev/null +++ b/include/litmus/ftdev.h | |||
@@ -0,0 +1,49 @@ | |||
1 | #ifndef _LITMUS_FTDEV_H_ | ||
2 | #define _LITMUS_FTDEV_H_ | ||
3 | |||
4 | #include <litmus/feather_trace.h> | ||
5 | #include <litmus/feather_buffer.h> | ||
6 | #include <linux/mutex.h> | ||
7 | #include <linux/cdev.h> | ||
8 | |||
9 | #define MAX_FTDEV_MINORS NR_CPUS | ||
10 | |||
11 | #define FTDEV_ENABLE_CMD 0 | ||
12 | #define FTDEV_DISABLE_CMD 1 | ||
13 | |||
14 | struct ftdev; | ||
15 | |||
16 | /* return 0 if buffer can be opened, otherwise -$REASON */ | ||
17 | typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no); | ||
18 | /* return 0 on success, otherwise -$REASON */ | ||
19 | typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no); | ||
20 | typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no); | ||
21 | |||
22 | |||
23 | struct ftdev_event; | ||
24 | |||
25 | struct ftdev_minor { | ||
26 | struct ft_buffer* buf; | ||
27 | unsigned int readers; | ||
28 | struct mutex lock; | ||
29 | /* FIXME: filter for authorized events */ | ||
30 | struct ftdev_event* events; | ||
31 | }; | ||
32 | |||
33 | struct ftdev { | ||
34 | struct cdev cdev; | ||
35 | /* FIXME: don't waste memory, allocate dynamically */ | ||
36 | struct ftdev_minor minor[MAX_FTDEV_MINORS]; | ||
37 | unsigned int minor_cnt; | ||
38 | ftdev_alloc_t alloc; | ||
39 | ftdev_free_t free; | ||
40 | ftdev_can_open_t can_open; | ||
41 | }; | ||
42 | |||
43 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size); | ||
44 | void free_ft_buffer(struct ft_buffer* buf); | ||
45 | |||
46 | void ftdev_init(struct ftdev* ftdev, struct module* owner); | ||
47 | int register_ftdev(struct ftdev* ftdev, const char* name, int major); | ||
48 | |||
49 | #endif | ||
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h new file mode 100644 index 000000000000..9bd361ef3943 --- /dev/null +++ b/include/litmus/jobs.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef __LITMUS_JOBS_H__ | ||
2 | #define __LITMUS_JOBS_H__ | ||
3 | |||
4 | void prepare_for_next_period(struct task_struct *t); | ||
5 | void release_at(struct task_struct *t, lt_t start); | ||
6 | long complete_job(void); | ||
7 | |||
8 | #endif | ||
9 | |||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h new file mode 100644 index 000000000000..d515d1af1096 --- /dev/null +++ b/include/litmus/litmus.h | |||
@@ -0,0 +1,254 @@ | |||
1 | /* | ||
2 | * Constant definitions related to | ||
3 | * scheduling policy. | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_LITMUS_H_ | ||
7 | #define _LINUX_LITMUS_H_ | ||
8 | |||
9 | #include <linux/jiffies.h> | ||
10 | #include <litmus/sched_trace.h> | ||
11 | |||
12 | extern atomic_t release_master_cpu; | ||
13 | |||
14 | extern atomic_t __log_seq_no; | ||
15 | |||
16 | #define TRACE(fmt, args...) \ | ||
17 | sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \ | ||
18 | raw_smp_processor_id(), ## args) | ||
19 | |||
20 | #define TRACE_TASK(t, fmt, args...) \ | ||
21 | TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args) | ||
22 | |||
23 | #define TRACE_CUR(fmt, args...) \ | ||
24 | TRACE_TASK(current, fmt, ## args) | ||
25 | |||
26 | #define TRACE_BUG_ON(cond) \ | ||
27 | do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \ | ||
28 | "called from %p current=%s/%d state=%d " \ | ||
29 | "flags=%x partition=%d cpu=%d rtflags=%d"\ | ||
30 | " job=%u timeslice=%u\n", \ | ||
31 | #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \ | ||
32 | current->pid, current->state, current->flags, \ | ||
33 | get_partition(current), smp_processor_id(), get_rt_flags(current), \ | ||
34 | current->rt_param.job_params.job_no, \ | ||
35 | current->rt.time_slice\ | ||
36 | ); } while(0); | ||
37 | |||
38 | |||
39 | /* in_list - is a given list_head queued on some list? | ||
40 | */ | ||
41 | static inline int in_list(struct list_head* list) | ||
42 | { | ||
43 | return !( /* case 1: deleted */ | ||
44 | (list->next == LIST_POISON1 && | ||
45 | list->prev == LIST_POISON2) | ||
46 | || | ||
47 | /* case 2: initialized */ | ||
48 | (list->next == list && | ||
49 | list->prev == list) | ||
50 | ); | ||
51 | } | ||
52 | |||
53 | #define NO_CPU 0xffffffff | ||
54 | |||
55 | void litmus_fork(struct task_struct *tsk); | ||
56 | void litmus_exec(void); | ||
57 | /* clean up real-time state of a task */ | ||
58 | void exit_litmus(struct task_struct *dead_tsk); | ||
59 | |||
60 | long litmus_admit_task(struct task_struct *tsk); | ||
61 | void litmus_exit_task(struct task_struct *tsk); | ||
62 | |||
63 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) | ||
64 | #define rt_transition_pending(t) \ | ||
65 | ((t)->rt_param.transition_pending) | ||
66 | |||
67 | #define tsk_rt(t) (&(t)->rt_param) | ||
68 | |||
69 | /* Realtime utility macros */ | ||
70 | #define get_rt_flags(t) (tsk_rt(t)->flags) | ||
71 | #define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) | ||
72 | #define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) | ||
73 | #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) | ||
74 | #define get_rt_period(t) (tsk_rt(t)->task_params.period) | ||
75 | #define get_rt_phase(t) (tsk_rt(t)->task_params.phase) | ||
76 | #define get_partition(t) (tsk_rt(t)->task_params.cpu) | ||
77 | #define get_deadline(t) (tsk_rt(t)->job_params.deadline) | ||
78 | #define get_release(t) (tsk_rt(t)->job_params.release) | ||
79 | #define get_class(t) (tsk_rt(t)->task_params.cls) | ||
80 | |||
81 | inline static int budget_exhausted(struct task_struct* t) | ||
82 | { | ||
83 | return get_exec_time(t) >= get_exec_cost(t); | ||
84 | } | ||
85 | |||
86 | #define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) | ||
87 | |||
88 | |||
89 | #define is_hrt(t) \ | ||
90 | (tsk_rt(t)->task_params.class == RT_CLASS_HARD) | ||
91 | #define is_srt(t) \ | ||
92 | (tsk_rt(t)->task_params.class == RT_CLASS_SOFT) | ||
93 | #define is_be(t) \ | ||
94 | (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT) | ||
95 | |||
96 | /* Our notion of time within LITMUS: kernel monotonic time. */ | ||
97 | static inline lt_t litmus_clock(void) | ||
98 | { | ||
99 | return ktime_to_ns(ktime_get()); | ||
100 | } | ||
101 | |||
102 | /* A macro to convert from nanoseconds to ktime_t. */ | ||
103 | #define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t) | ||
104 | |||
105 | #define get_domain(t) (tsk_rt(t)->domain) | ||
106 | |||
107 | /* Honor the flag in the preempt_count variable that is set | ||
108 | * when scheduling is in progress. | ||
109 | */ | ||
110 | #define is_running(t) \ | ||
111 | ((t)->state == TASK_RUNNING || \ | ||
112 | task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) | ||
113 | |||
114 | #define is_blocked(t) \ | ||
115 | (!is_running(t)) | ||
116 | #define is_released(t, now) \ | ||
117 | (lt_before_eq(get_release(t), now)) | ||
118 | #define is_tardy(t, now) \ | ||
119 | (lt_before_eq(tsk_rt(t)->job_params.deadline, now)) | ||
120 | |||
121 | /* real-time comparison macros */ | ||
122 | #define earlier_deadline(a, b) (lt_before(\ | ||
123 | (a)->rt_param.job_params.deadline,\ | ||
124 | (b)->rt_param.job_params.deadline)) | ||
125 | #define earlier_release(a, b) (lt_before(\ | ||
126 | (a)->rt_param.job_params.release,\ | ||
127 | (b)->rt_param.job_params.release)) | ||
128 | |||
129 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); | ||
130 | |||
131 | #ifdef CONFIG_SRP | ||
132 | void srp_ceiling_block(void); | ||
133 | #else | ||
134 | #define srp_ceiling_block() /* nothing */ | ||
135 | #endif | ||
136 | |||
137 | #define bheap2task(hn) ((struct task_struct*) hn->value) | ||
138 | |||
139 | #ifdef CONFIG_NP_SECTION | ||
140 | |||
141 | static inline int is_kernel_np(struct task_struct *t) | ||
142 | { | ||
143 | return tsk_rt(t)->kernel_np; | ||
144 | } | ||
145 | |||
146 | static inline int is_user_np(struct task_struct *t) | ||
147 | { | ||
148 | return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->np_flag : 0; | ||
149 | } | ||
150 | |||
151 | static inline void request_exit_np(struct task_struct *t) | ||
152 | { | ||
153 | if (is_user_np(t)) { | ||
154 | /* Set the flag that tells user space to call | ||
155 | * into the kernel at the end of a critical section. */ | ||
156 | if (likely(tsk_rt(t)->ctrl_page)) { | ||
157 | TRACE_TASK(t, "setting delayed_preemption flag\n"); | ||
158 | tsk_rt(t)->ctrl_page->delayed_preemption = 1; | ||
159 | } | ||
160 | } | ||
161 | } | ||
162 | |||
163 | static inline void clear_exit_np(struct task_struct *t) | ||
164 | { | ||
165 | if (likely(tsk_rt(t)->ctrl_page)) | ||
166 | tsk_rt(t)->ctrl_page->delayed_preemption = 0; | ||
167 | } | ||
168 | |||
169 | static inline void make_np(struct task_struct *t) | ||
170 | { | ||
171 | tsk_rt(t)->kernel_np++; | ||
172 | } | ||
173 | |||
174 | /* Caller should check if preemption is necessary when | ||
175 | * the function return 0. | ||
176 | */ | ||
177 | static inline int take_np(struct task_struct *t) | ||
178 | { | ||
179 | return --tsk_rt(t)->kernel_np; | ||
180 | } | ||
181 | |||
182 | #else | ||
183 | |||
184 | static inline int is_kernel_np(struct task_struct* t) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static inline int is_user_np(struct task_struct* t) | ||
190 | { | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static inline void request_exit_np(struct task_struct *t) | ||
195 | { | ||
196 | /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */ | ||
197 | BUG(); | ||
198 | } | ||
199 | |||
200 | static inline void clear_exit_np(struct task_struct* t) | ||
201 | { | ||
202 | } | ||
203 | |||
204 | #endif | ||
205 | |||
206 | static inline int is_np(struct task_struct *t) | ||
207 | { | ||
208 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
209 | int kernel, user; | ||
210 | kernel = is_kernel_np(t); | ||
211 | user = is_user_np(t); | ||
212 | if (kernel || user) | ||
213 | TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n", | ||
214 | |||
215 | kernel, user); | ||
216 | return kernel || user; | ||
217 | #else | ||
218 | return unlikely(is_kernel_np(t) || is_user_np(t)); | ||
219 | #endif | ||
220 | } | ||
221 | |||
222 | static inline int is_present(struct task_struct* t) | ||
223 | { | ||
224 | return t && tsk_rt(t)->present; | ||
225 | } | ||
226 | |||
227 | |||
228 | /* make the unit explicit */ | ||
229 | typedef unsigned long quanta_t; | ||
230 | |||
231 | enum round { | ||
232 | FLOOR, | ||
233 | CEIL | ||
234 | }; | ||
235 | |||
236 | |||
237 | /* Tick period is used to convert ns-specified execution | ||
238 | * costs and periods into tick-based equivalents. | ||
239 | */ | ||
240 | extern ktime_t tick_period; | ||
241 | |||
242 | static inline quanta_t time2quanta(lt_t time, enum round round) | ||
243 | { | ||
244 | s64 quantum_length = ktime_to_ns(tick_period); | ||
245 | |||
246 | if (do_div(time, quantum_length) && round == CEIL) | ||
247 | time++; | ||
248 | return (quanta_t) time; | ||
249 | } | ||
250 | |||
251 | /* By how much is cpu staggered behind CPU 0? */ | ||
252 | u64 cpu_stagger_offset(int cpu); | ||
253 | |||
254 | #endif | ||
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h new file mode 100644 index 000000000000..b452be1d2256 --- /dev/null +++ b/include/litmus/rt_domain.h | |||
@@ -0,0 +1,162 @@ | |||
1 | /* CLEANUP: Add comments and make it less messy. | ||
2 | * | ||
3 | */ | ||
4 | |||
5 | #ifndef __UNC_RT_DOMAIN_H__ | ||
6 | #define __UNC_RT_DOMAIN_H__ | ||
7 | |||
8 | #include <litmus/bheap.h> | ||
9 | |||
10 | #define RELEASE_QUEUE_SLOTS 127 /* prime */ | ||
11 | |||
12 | struct _rt_domain; | ||
13 | |||
14 | typedef int (*check_resched_needed_t)(struct _rt_domain *rt); | ||
15 | typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks); | ||
16 | |||
17 | struct release_queue { | ||
18 | /* each slot maintains a list of release heaps sorted | ||
19 | * by release time */ | ||
20 | struct list_head slot[RELEASE_QUEUE_SLOTS]; | ||
21 | }; | ||
22 | |||
23 | typedef struct _rt_domain { | ||
24 | /* runnable rt tasks are in here */ | ||
25 | spinlock_t ready_lock; | ||
26 | struct bheap ready_queue; | ||
27 | |||
28 | /* real-time tasks waiting for release are in here */ | ||
29 | spinlock_t release_lock; | ||
30 | struct release_queue release_queue; | ||
31 | int release_master; | ||
32 | |||
33 | /* for moving tasks to the release queue */ | ||
34 | spinlock_t tobe_lock; | ||
35 | struct list_head tobe_released; | ||
36 | |||
37 | /* how do we check if we need to kick another CPU? */ | ||
38 | check_resched_needed_t check_resched; | ||
39 | |||
40 | /* how do we release jobs? */ | ||
41 | release_jobs_t release_jobs; | ||
42 | |||
43 | /* how are tasks ordered in the ready queue? */ | ||
44 | bheap_prio_t order; | ||
45 | } rt_domain_t; | ||
46 | |||
47 | struct release_heap { | ||
48 | /* list_head for per-time-slot list */ | ||
49 | struct list_head list; | ||
50 | lt_t release_time; | ||
51 | /* all tasks to be released at release_time */ | ||
52 | struct bheap heap; | ||
53 | /* used to trigger the release */ | ||
54 | struct hrtimer timer; | ||
55 | /* used to delegate releases */ | ||
56 | struct hrtimer_start_on_info info; | ||
57 | /* required for the timer callback */ | ||
58 | rt_domain_t* dom; | ||
59 | }; | ||
60 | |||
61 | |||
62 | static inline struct task_struct* __next_ready(rt_domain_t* rt) | ||
63 | { | ||
64 | struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue); | ||
65 | if (hn) | ||
66 | return bheap2task(hn); | ||
67 | else | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | void rt_domain_init(rt_domain_t *rt, bheap_prio_t order, | ||
72 | check_resched_needed_t check, | ||
73 | release_jobs_t relase); | ||
74 | |||
75 | void __add_ready(rt_domain_t* rt, struct task_struct *new); | ||
76 | void __merge_ready(rt_domain_t* rt, struct bheap *tasks); | ||
77 | void __add_release(rt_domain_t* rt, struct task_struct *task); | ||
78 | |||
79 | static inline struct task_struct* __take_ready(rt_domain_t* rt) | ||
80 | { | ||
81 | struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue); | ||
82 | if (hn) | ||
83 | return bheap2task(hn); | ||
84 | else | ||
85 | return NULL; | ||
86 | } | ||
87 | |||
88 | static inline struct task_struct* __peek_ready(rt_domain_t* rt) | ||
89 | { | ||
90 | struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue); | ||
91 | if (hn) | ||
92 | return bheap2task(hn); | ||
93 | else | ||
94 | return NULL; | ||
95 | } | ||
96 | |||
97 | static inline int is_queued(struct task_struct *t) | ||
98 | { | ||
99 | BUG_ON(!tsk_rt(t)->heap_node); | ||
100 | return bheap_node_in_heap(tsk_rt(t)->heap_node); | ||
101 | } | ||
102 | |||
103 | static inline void remove(rt_domain_t* rt, struct task_struct *t) | ||
104 | { | ||
105 | bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node); | ||
106 | } | ||
107 | |||
108 | static inline void add_ready(rt_domain_t* rt, struct task_struct *new) | ||
109 | { | ||
110 | unsigned long flags; | ||
111 | /* first we need the write lock for rt_ready_queue */ | ||
112 | spin_lock_irqsave(&rt->ready_lock, flags); | ||
113 | __add_ready(rt, new); | ||
114 | spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
115 | } | ||
116 | |||
117 | static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
118 | { | ||
119 | unsigned long flags; | ||
120 | spin_lock_irqsave(&rt->ready_lock, flags); | ||
121 | __merge_ready(rt, tasks); | ||
122 | spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
123 | } | ||
124 | |||
125 | static inline struct task_struct* take_ready(rt_domain_t* rt) | ||
126 | { | ||
127 | unsigned long flags; | ||
128 | struct task_struct* ret; | ||
129 | /* first we need the write lock for rt_ready_queue */ | ||
130 | spin_lock_irqsave(&rt->ready_lock, flags); | ||
131 | ret = __take_ready(rt); | ||
132 | spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
133 | return ret; | ||
134 | } | ||
135 | |||
136 | |||
137 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) | ||
138 | { | ||
139 | unsigned long flags; | ||
140 | /* first we need the write lock for rt_ready_queue */ | ||
141 | spin_lock_irqsave(&rt->tobe_lock, flags); | ||
142 | __add_release(rt, task); | ||
143 | spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
144 | } | ||
145 | |||
146 | static inline int __jobs_pending(rt_domain_t* rt) | ||
147 | { | ||
148 | return !bheap_empty(&rt->ready_queue); | ||
149 | } | ||
150 | |||
151 | static inline int jobs_pending(rt_domain_t* rt) | ||
152 | { | ||
153 | unsigned long flags; | ||
154 | int ret; | ||
155 | /* first we need the write lock for rt_ready_queue */ | ||
156 | spin_lock_irqsave(&rt->ready_lock, flags); | ||
157 | ret = !bheap_empty(&rt->ready_queue); | ||
158 | spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
159 | return ret; | ||
160 | } | ||
161 | |||
162 | #endif | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h new file mode 100644 index 000000000000..a7a183f34a80 --- /dev/null +++ b/include/litmus/rt_param.h | |||
@@ -0,0 +1,196 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_RT_PARAM_H_ | ||
6 | #define _LINUX_RT_PARAM_H_ | ||
7 | |||
8 | /* Litmus time type. */ | ||
9 | typedef unsigned long long lt_t; | ||
10 | |||
11 | static inline int lt_after(lt_t a, lt_t b) | ||
12 | { | ||
13 | return ((long long) b) - ((long long) a) < 0; | ||
14 | } | ||
15 | #define lt_before(a, b) lt_after(b, a) | ||
16 | |||
17 | static inline int lt_after_eq(lt_t a, lt_t b) | ||
18 | { | ||
19 | return ((long long) a) - ((long long) b) >= 0; | ||
20 | } | ||
21 | #define lt_before_eq(a, b) lt_after_eq(b, a) | ||
22 | |||
23 | /* different types of clients */ | ||
24 | typedef enum { | ||
25 | RT_CLASS_HARD, | ||
26 | RT_CLASS_SOFT, | ||
27 | RT_CLASS_BEST_EFFORT | ||
28 | } task_class_t; | ||
29 | |||
30 | typedef enum { | ||
31 | NO_ENFORCEMENT, /* job may overrun unhindered */ | ||
32 | QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ | ||
33 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ | ||
34 | } budget_policy_t; | ||
35 | |||
36 | struct rt_task { | ||
37 | lt_t exec_cost; | ||
38 | lt_t period; | ||
39 | lt_t phase; | ||
40 | unsigned int cpu; | ||
41 | task_class_t cls; | ||
42 | budget_policy_t budget_policy; /* ignored by pfair */ | ||
43 | }; | ||
44 | |||
45 | /* The definition of the data that is shared between the kernel and real-time | ||
46 | * tasks via a shared page (see litmus/ctrldev.c). | ||
47 | * | ||
48 | * WARNING: User space can write to this, so don't trust | ||
49 | * the correctness of the fields! | ||
50 | * | ||
51 | * This servees two purposes: to enable efficient signaling | ||
52 | * of non-preemptive sections (user->kernel) and | ||
53 | * delayed preemptions (kernel->user), and to export | ||
54 | * some real-time relevant statistics such as preemption and | ||
55 | * migration data to user space. We can't use a device to export | ||
56 | * statistics because we want to avoid system call overhead when | ||
57 | * determining preemption/migration overheads). | ||
58 | */ | ||
59 | struct control_page { | ||
60 | /* Is the task currently in a non-preemptive section? */ | ||
61 | int np_flag; | ||
62 | /* Should the task call into the kernel when it leaves | ||
63 | * its non-preemptive section? */ | ||
64 | int delayed_preemption; | ||
65 | |||
66 | /* to be extended */ | ||
67 | }; | ||
68 | |||
69 | /* don't export internal data structures to user space (liblitmus) */ | ||
70 | #ifdef __KERNEL__ | ||
71 | |||
72 | struct _rt_domain; | ||
73 | struct bheap_node; | ||
74 | struct release_heap; | ||
75 | |||
76 | struct rt_job { | ||
77 | /* Time instant the the job was or will be released. */ | ||
78 | lt_t release; | ||
79 | /* What is the current deadline? */ | ||
80 | lt_t deadline; | ||
81 | |||
82 | /* How much service has this job received so far? */ | ||
83 | lt_t exec_time; | ||
84 | |||
85 | /* Which job is this. This is used to let user space | ||
86 | * specify which job to wait for, which is important if jobs | ||
87 | * overrun. If we just call sys_sleep_next_period() then we | ||
88 | * will unintentionally miss jobs after an overrun. | ||
89 | * | ||
90 | * Increase this sequence number when a job is released. | ||
91 | */ | ||
92 | unsigned int job_no; | ||
93 | }; | ||
94 | |||
95 | struct pfair_param; | ||
96 | |||
97 | /* RT task parameters for scheduling extensions | ||
98 | * These parameters are inherited during clone and therefore must | ||
99 | * be explicitly set up before the task set is launched. | ||
100 | */ | ||
101 | struct rt_param { | ||
102 | /* is the task sleeping? */ | ||
103 | unsigned int flags:8; | ||
104 | |||
105 | /* do we need to check for srp blocking? */ | ||
106 | unsigned int srp_non_recurse:1; | ||
107 | |||
108 | /* is the task present? (true if it can be scheduled) */ | ||
109 | unsigned int present:1; | ||
110 | |||
111 | /* user controlled parameters */ | ||
112 | struct rt_task task_params; | ||
113 | |||
114 | /* timing parameters */ | ||
115 | struct rt_job job_params; | ||
116 | |||
117 | /* task representing the current "inherited" task | ||
118 | * priority, assigned by inherit_priority and | ||
119 | * return priority in the scheduler plugins. | ||
120 | * could point to self if PI does not result in | ||
121 | * an increased task priority. | ||
122 | */ | ||
123 | struct task_struct* inh_task; | ||
124 | |||
125 | #ifdef CONFIG_NP_SECTION | ||
126 | /* For the FMLP under PSN-EDF, it is required to make the task | ||
127 | * non-preemptive from kernel space. In order not to interfere with | ||
128 | * user space, this counter indicates the kernel space np setting. | ||
129 | * kernel_np > 0 => task is non-preemptive | ||
130 | */ | ||
131 | unsigned int kernel_np; | ||
132 | #endif | ||
133 | |||
134 | /* This field can be used by plugins to store where the task | ||
135 | * is currently scheduled. It is the responsibility of the | ||
136 | * plugin to avoid race conditions. | ||
137 | * | ||
138 | * This used by GSN-EDF and PFAIR. | ||
139 | */ | ||
140 | volatile int scheduled_on; | ||
141 | |||
142 | /* Is the stack of the task currently in use? This is updated by | ||
143 | * the LITMUS core. | ||
144 | * | ||
145 | * Be careful to avoid deadlocks! | ||
146 | */ | ||
147 | volatile int stack_in_use; | ||
148 | |||
149 | /* This field can be used by plugins to store where the task | ||
150 | * is currently linked. It is the responsibility of the plugin | ||
151 | * to avoid race conditions. | ||
152 | * | ||
153 | * Used by GSN-EDF. | ||
154 | */ | ||
155 | volatile int linked_on; | ||
156 | |||
157 | /* PFAIR/PD^2 state. Allocated on demand. */ | ||
158 | struct pfair_param* pfair; | ||
159 | |||
160 | /* Fields saved before BE->RT transition. | ||
161 | */ | ||
162 | int old_policy; | ||
163 | int old_prio; | ||
164 | |||
165 | /* ready queue for this task */ | ||
166 | struct _rt_domain* domain; | ||
167 | |||
168 | /* heap element for this task | ||
169 | * | ||
170 | * Warning: Don't statically allocate this node. The heap | ||
171 | * implementation swaps these between tasks, thus after | ||
172 | * dequeuing from a heap you may end up with a different node | ||
173 | * then the one you had when enqueuing the task. For the same | ||
174 | * reason, don't obtain and store references to this node | ||
175 | * other than this pointer (which is updated by the heap | ||
176 | * implementation). | ||
177 | */ | ||
178 | struct bheap_node* heap_node; | ||
179 | struct release_heap* rel_heap; | ||
180 | |||
181 | /* Used by rt_domain to queue task in release list. | ||
182 | */ | ||
183 | struct list_head list; | ||
184 | |||
185 | /* Pointer to the page shared between userspace and kernel. */ | ||
186 | struct control_page * ctrl_page; | ||
187 | }; | ||
188 | |||
189 | /* Possible RT flags */ | ||
190 | #define RT_F_RUNNING 0x00000000 | ||
191 | #define RT_F_SLEEP 0x00000001 | ||
192 | #define RT_F_EXIT_SEM 0x00000008 | ||
193 | |||
194 | #endif | ||
195 | |||
196 | #endif | ||
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h new file mode 100644 index 000000000000..9c1c9f28ba79 --- /dev/null +++ b/include/litmus/sched_plugin.h | |||
@@ -0,0 +1,162 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_SCHED_PLUGIN_H_ | ||
6 | #define _LINUX_SCHED_PLUGIN_H_ | ||
7 | |||
8 | #include <linux/sched.h> | ||
9 | |||
10 | /* struct for semaphore with priority inheritance */ | ||
11 | struct pi_semaphore { | ||
12 | atomic_t count; | ||
13 | int sleepers; | ||
14 | wait_queue_head_t wait; | ||
15 | struct { | ||
16 | /* highest-prio holder/waiter */ | ||
17 | struct task_struct *task; | ||
18 | struct task_struct* cpu_task[NR_CPUS]; | ||
19 | } hp; | ||
20 | /* current lock holder */ | ||
21 | struct task_struct *holder; | ||
22 | }; | ||
23 | |||
24 | /************************ setup/tear down ********************/ | ||
25 | |||
26 | typedef long (*activate_plugin_t) (void); | ||
27 | typedef long (*deactivate_plugin_t) (void); | ||
28 | |||
29 | |||
30 | |||
31 | /********************* scheduler invocation ******************/ | ||
32 | |||
33 | /* Plugin-specific realtime tick handler */ | ||
34 | typedef void (*scheduler_tick_t) (struct task_struct *cur); | ||
35 | /* Novell make sched decision function */ | ||
36 | typedef struct task_struct* (*schedule_t)(struct task_struct * prev); | ||
37 | /* Clean up after the task switch has occured. | ||
38 | * This function is called after every (even non-rt) task switch. | ||
39 | */ | ||
40 | typedef void (*finish_switch_t)(struct task_struct *prev); | ||
41 | |||
42 | |||
43 | /********************* task state changes ********************/ | ||
44 | |||
45 | /* Called to setup a new real-time task. | ||
46 | * Release the first job, enqueue, etc. | ||
47 | * Task may already be running. | ||
48 | */ | ||
49 | typedef void (*task_new_t) (struct task_struct *task, | ||
50 | int on_rq, | ||
51 | int running); | ||
52 | |||
53 | /* Called to re-introduce a task after blocking. | ||
54 | * Can potentially be called multiple times. | ||
55 | */ | ||
56 | typedef void (*task_wake_up_t) (struct task_struct *task); | ||
57 | /* called to notify the plugin of a blocking real-time task | ||
58 | * it will only be called for real-time tasks and before schedule is called */ | ||
59 | typedef void (*task_block_t) (struct task_struct *task); | ||
60 | /* Called when a real-time task exits or changes to a different scheduling | ||
61 | * class. | ||
62 | * Free any allocated resources | ||
63 | */ | ||
64 | typedef void (*task_exit_t) (struct task_struct *); | ||
65 | |||
66 | /* Called when the new_owner is released from the wait queue | ||
67 | * it should now inherit the priority from sem, _before_ it gets readded | ||
68 | * to any queue | ||
69 | */ | ||
70 | typedef long (*inherit_priority_t) (struct pi_semaphore *sem, | ||
71 | struct task_struct *new_owner); | ||
72 | |||
73 | /* Called when the current task releases a semahpore where it might have | ||
74 | * inherited a piority from | ||
75 | */ | ||
76 | typedef long (*return_priority_t) (struct pi_semaphore *sem); | ||
77 | |||
78 | /* Called when a task tries to acquire a semaphore and fails. Check if its | ||
79 | * priority is higher than that of the current holder. | ||
80 | */ | ||
81 | typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t); | ||
82 | |||
83 | |||
84 | |||
85 | |||
86 | /********************* sys call backends ********************/ | ||
87 | /* This function causes the caller to sleep until the next release */ | ||
88 | typedef long (*complete_job_t) (void); | ||
89 | |||
90 | typedef long (*admit_task_t)(struct task_struct* tsk); | ||
91 | |||
92 | typedef void (*release_at_t)(struct task_struct *t, lt_t start); | ||
93 | |||
94 | struct sched_plugin { | ||
95 | struct list_head list; | ||
96 | /* basic info */ | ||
97 | char *plugin_name; | ||
98 | |||
99 | /* setup */ | ||
100 | activate_plugin_t activate_plugin; | ||
101 | deactivate_plugin_t deactivate_plugin; | ||
102 | |||
103 | #ifdef CONFIG_SRP | ||
104 | unsigned int srp_active; | ||
105 | #endif | ||
106 | |||
107 | /* scheduler invocation */ | ||
108 | scheduler_tick_t tick; | ||
109 | schedule_t schedule; | ||
110 | finish_switch_t finish_switch; | ||
111 | |||
112 | /* syscall backend */ | ||
113 | complete_job_t complete_job; | ||
114 | release_at_t release_at; | ||
115 | |||
116 | /* task state changes */ | ||
117 | admit_task_t admit_task; | ||
118 | |||
119 | task_new_t task_new; | ||
120 | task_wake_up_t task_wake_up; | ||
121 | task_block_t task_block; | ||
122 | task_exit_t task_exit; | ||
123 | |||
124 | #ifdef CONFIG_FMLP | ||
125 | /* priority inheritance */ | ||
126 | unsigned int fmlp_active; | ||
127 | inherit_priority_t inherit_priority; | ||
128 | return_priority_t return_priority; | ||
129 | pi_block_t pi_block; | ||
130 | #endif | ||
131 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | ||
132 | |||
133 | |||
134 | extern struct sched_plugin *litmus; | ||
135 | |||
136 | /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */ | ||
137 | extern int cluster_cache_index; | ||
138 | |||
139 | int register_sched_plugin(struct sched_plugin* plugin); | ||
140 | struct sched_plugin* find_sched_plugin(const char* name); | ||
141 | int print_sched_plugins(char* buf, int max); | ||
142 | |||
143 | static inline int srp_active(void) | ||
144 | { | ||
145 | #ifdef CONFIG_SRP | ||
146 | return litmus->srp_active; | ||
147 | #else | ||
148 | return 0; | ||
149 | #endif | ||
150 | } | ||
151 | static inline int fmlp_active(void) | ||
152 | { | ||
153 | #ifdef CONFIG_FMLP | ||
154 | return litmus->fmlp_active; | ||
155 | #else | ||
156 | return 0; | ||
157 | #endif | ||
158 | } | ||
159 | |||
160 | extern struct sched_plugin linux_sched_plugin; | ||
161 | |||
162 | #endif | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h new file mode 100644 index 000000000000..e1b0c9712b5f --- /dev/null +++ b/include/litmus/sched_trace.h | |||
@@ -0,0 +1,192 @@ | |||
1 | /* | ||
2 | * sched_trace.h -- record scheduler events to a byte stream for offline analysis. | ||
3 | */ | ||
4 | #ifndef _LINUX_SCHED_TRACE_H_ | ||
5 | #define _LINUX_SCHED_TRACE_H_ | ||
6 | |||
7 | /* all times in nanoseconds */ | ||
8 | |||
9 | struct st_trace_header { | ||
10 | u8 type; /* Of what type is this record? */ | ||
11 | u8 cpu; /* On which CPU was it recorded? */ | ||
12 | u16 pid; /* PID of the task. */ | ||
13 | u32 job; /* The job sequence number. */ | ||
14 | }; | ||
15 | |||
16 | #define ST_NAME_LEN 16 | ||
17 | struct st_name_data { | ||
18 | char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ | ||
19 | }; | ||
20 | |||
21 | struct st_param_data { /* regular params */ | ||
22 | u32 wcet; | ||
23 | u32 period; | ||
24 | u32 phase; | ||
25 | u8 partition; | ||
26 | u8 __unused[3]; | ||
27 | }; | ||
28 | |||
29 | struct st_release_data { /* A job is was/is going to be released. */ | ||
30 | u64 release; /* What's the release time? */ | ||
31 | u64 deadline; /* By when must it finish? */ | ||
32 | }; | ||
33 | |||
34 | struct st_assigned_data { /* A job was asigned to a CPU. */ | ||
35 | u64 when; | ||
36 | u8 target; /* Where should it execute? */ | ||
37 | u8 __unused[3]; | ||
38 | }; | ||
39 | |||
40 | struct st_switch_to_data { /* A process was switched to on a given CPU. */ | ||
41 | u64 when; /* When did this occur? */ | ||
42 | u32 exec_time; /* Time the current job has executed. */ | ||
43 | |||
44 | }; | ||
45 | |||
46 | struct st_switch_away_data { /* A process was switched away from on a given CPU. */ | ||
47 | u64 when; | ||
48 | u64 exec_time; | ||
49 | }; | ||
50 | |||
51 | struct st_completion_data { /* A job completed. */ | ||
52 | u64 when; | ||
53 | u8 forced:1; /* Set to 1 if job overran and kernel advanced to the | ||
54 | * next task automatically; set to 0 otherwise. | ||
55 | */ | ||
56 | u8 __uflags:7; | ||
57 | u8 __unused[3]; | ||
58 | }; | ||
59 | |||
60 | struct st_block_data { /* A task blocks. */ | ||
61 | u64 when; | ||
62 | u64 __unused; | ||
63 | }; | ||
64 | |||
65 | struct st_resume_data { /* A task resumes. */ | ||
66 | u64 when; | ||
67 | u64 __unused; | ||
68 | }; | ||
69 | |||
70 | struct st_sys_release_data { | ||
71 | u64 when; | ||
72 | u64 release; | ||
73 | }; | ||
74 | |||
75 | #define DATA(x) struct st_ ## x ## _data x; | ||
76 | |||
77 | typedef enum { | ||
78 | ST_NAME = 1, /* Start at one, so that we can spot | ||
79 | * uninitialized records. */ | ||
80 | ST_PARAM, | ||
81 | ST_RELEASE, | ||
82 | ST_ASSIGNED, | ||
83 | ST_SWITCH_TO, | ||
84 | ST_SWITCH_AWAY, | ||
85 | ST_COMPLETION, | ||
86 | ST_BLOCK, | ||
87 | ST_RESUME, | ||
88 | ST_SYS_RELEASE, | ||
89 | } st_event_record_type_t; | ||
90 | |||
91 | struct st_event_record { | ||
92 | struct st_trace_header hdr; | ||
93 | union { | ||
94 | u64 raw[2]; | ||
95 | |||
96 | DATA(name); | ||
97 | DATA(param); | ||
98 | DATA(release); | ||
99 | DATA(assigned); | ||
100 | DATA(switch_to); | ||
101 | DATA(switch_away); | ||
102 | DATA(completion); | ||
103 | DATA(block); | ||
104 | DATA(resume); | ||
105 | DATA(sys_release); | ||
106 | |||
107 | } data; | ||
108 | }; | ||
109 | |||
110 | #undef DATA | ||
111 | |||
112 | #ifdef __KERNEL__ | ||
113 | |||
114 | #include <linux/sched.h> | ||
115 | #include <litmus/feather_trace.h> | ||
116 | |||
117 | #ifdef CONFIG_SCHED_TASK_TRACE | ||
118 | |||
119 | #define SCHED_TRACE(id, callback, task) \ | ||
120 | ft_event1(id, callback, task) | ||
121 | #define SCHED_TRACE2(id, callback, task, xtra) \ | ||
122 | ft_event2(id, callback, task, xtra) | ||
123 | |||
124 | /* provide prototypes; needed on sparc64 */ | ||
125 | #ifndef NO_TASK_TRACE_DECLS | ||
126 | feather_callback void do_sched_trace_task_name(unsigned long id, | ||
127 | struct task_struct* task); | ||
128 | feather_callback void do_sched_trace_task_param(unsigned long id, | ||
129 | struct task_struct* task); | ||
130 | feather_callback void do_sched_trace_task_release(unsigned long id, | ||
131 | struct task_struct* task); | ||
132 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
133 | struct task_struct* task); | ||
134 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
135 | struct task_struct* task); | ||
136 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
137 | struct task_struct* task, | ||
138 | unsigned long forced); | ||
139 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
140 | struct task_struct* task); | ||
141 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
142 | struct task_struct* task); | ||
143 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
144 | lt_t* start); | ||
145 | #endif | ||
146 | |||
147 | #else | ||
148 | |||
149 | #define SCHED_TRACE(id, callback, task) /* no tracing */ | ||
150 | #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ | ||
151 | |||
152 | #endif | ||
153 | |||
154 | |||
155 | #define SCHED_TRACE_BASE_ID 500 | ||
156 | |||
157 | |||
158 | #define sched_trace_task_name(t) \ | ||
159 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t) | ||
160 | #define sched_trace_task_param(t) \ | ||
161 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t) | ||
162 | #define sched_trace_task_release(t) \ | ||
163 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t) | ||
164 | #define sched_trace_task_switch_to(t) \ | ||
165 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t) | ||
166 | #define sched_trace_task_switch_away(t) \ | ||
167 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t) | ||
168 | #define sched_trace_task_completion(t, forced) \ | ||
169 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \ | ||
170 | (unsigned long) forced) | ||
171 | #define sched_trace_task_block(t) \ | ||
172 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t) | ||
173 | #define sched_trace_task_resume(t) \ | ||
174 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t) | ||
175 | /* when is a pointer, it does not need an explicit cast to unsigned long */ | ||
176 | #define sched_trace_sys_release(when) \ | ||
177 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when) | ||
178 | |||
179 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | ||
180 | |||
181 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
182 | void sched_trace_log_message(const char* fmt, ...); | ||
183 | void dump_trace_buffer(int max); | ||
184 | #else | ||
185 | |||
186 | #define sched_trace_log_message(fmt, ...) | ||
187 | |||
188 | #endif | ||
189 | |||
190 | #endif /* __KERNEL__ */ | ||
191 | |||
192 | #endif | ||
diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 000000000000..b32c71180774 --- /dev/null +++ b/include/litmus/trace.h | |||
@@ -0,0 +1,113 @@ | |||
1 | #ifndef _SYS_TRACE_H_ | ||
2 | #define _SYS_TRACE_H_ | ||
3 | |||
4 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
5 | |||
6 | #include <litmus/feather_trace.h> | ||
7 | #include <litmus/feather_buffer.h> | ||
8 | |||
9 | |||
10 | /*********************** TIMESTAMPS ************************/ | ||
11 | |||
12 | enum task_type_marker { | ||
13 | TSK_BE, | ||
14 | TSK_RT, | ||
15 | TSK_UNKNOWN | ||
16 | }; | ||
17 | |||
18 | struct timestamp { | ||
19 | uint64_t timestamp; | ||
20 | uint32_t seq_no; | ||
21 | uint8_t cpu; | ||
22 | uint8_t event; | ||
23 | uint8_t task_type; | ||
24 | }; | ||
25 | |||
26 | /* tracing callbacks */ | ||
27 | feather_callback void save_timestamp(unsigned long event); | ||
28 | feather_callback void save_timestamp_def(unsigned long event, unsigned long type); | ||
29 | feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); | ||
30 | feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); | ||
31 | |||
32 | |||
33 | #define TIMESTAMP(id) ft_event0(id, save_timestamp) | ||
34 | |||
35 | #define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def) | ||
36 | |||
37 | #define TTIMESTAMP(id, task) \ | ||
38 | ft_event1(id, save_timestamp_task, (unsigned long) task) | ||
39 | |||
40 | #define CTIMESTAMP(id, cpu) \ | ||
41 | ft_event1(id, save_timestamp_cpu, (unsigned long) cpu) | ||
42 | |||
43 | #else /* !CONFIG_SCHED_OVERHEAD_TRACE */ | ||
44 | |||
45 | #define TIMESTAMP(id) /* no tracing */ | ||
46 | |||
47 | #define DTIMESTAMP(id, def) /* no tracing */ | ||
48 | |||
49 | #define TTIMESTAMP(id, task) /* no tracing */ | ||
50 | |||
51 | #define CTIMESTAMP(id, cpu) /* no tracing */ | ||
52 | |||
53 | #endif | ||
54 | |||
55 | |||
56 | /* Convention for timestamps | ||
57 | * ========================= | ||
58 | * | ||
59 | * In order to process the trace files with a common tool, we use the following | ||
60 | * convention to measure execution times: The end time id of a code segment is | ||
61 | * always the next number after the start time event id. | ||
62 | */ | ||
63 | |||
64 | #define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only | ||
65 | * care | ||
66 | * about | ||
67 | * next */ | ||
68 | #define TS_SCHED_END(t) TTIMESTAMP(101, t) | ||
69 | #define TS_SCHED2_START(t) TTIMESTAMP(102, t) | ||
70 | #define TS_SCHED2_END(t) TTIMESTAMP(103, t) | ||
71 | |||
72 | #define TS_CXS_START(t) TTIMESTAMP(104, t) | ||
73 | #define TS_CXS_END(t) TTIMESTAMP(105, t) | ||
74 | |||
75 | #define TS_RELEASE_START DTIMESTAMP(106, TSK_RT) | ||
76 | #define TS_RELEASE_END DTIMESTAMP(107, TSK_RT) | ||
77 | |||
78 | #define TS_TICK_START(t) TTIMESTAMP(110, t) | ||
79 | #define TS_TICK_END(t) TTIMESTAMP(111, t) | ||
80 | |||
81 | |||
82 | #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ | ||
83 | #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ | ||
84 | |||
85 | #define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ | ||
86 | #define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ | ||
87 | |||
88 | #define TS_ENTER_NP_START TIMESTAMP(140) | ||
89 | #define TS_ENTER_NP_END TIMESTAMP(141) | ||
90 | |||
91 | #define TS_EXIT_NP_START TIMESTAMP(150) | ||
92 | #define TS_EXIT_NP_END TIMESTAMP(151) | ||
93 | |||
94 | #define TS_SRP_UP_START TIMESTAMP(160) | ||
95 | #define TS_SRP_UP_END TIMESTAMP(161) | ||
96 | #define TS_SRP_DOWN_START TIMESTAMP(162) | ||
97 | #define TS_SRP_DOWN_END TIMESTAMP(163) | ||
98 | |||
99 | #define TS_PI_UP_START TIMESTAMP(170) | ||
100 | #define TS_PI_UP_END TIMESTAMP(171) | ||
101 | #define TS_PI_DOWN_START TIMESTAMP(172) | ||
102 | #define TS_PI_DOWN_END TIMESTAMP(173) | ||
103 | |||
104 | #define TS_FIFO_UP_START TIMESTAMP(180) | ||
105 | #define TS_FIFO_UP_END TIMESTAMP(181) | ||
106 | #define TS_FIFO_DOWN_START TIMESTAMP(182) | ||
107 | #define TS_FIFO_DOWN_END TIMESTAMP(183) | ||
108 | |||
109 | #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) | ||
110 | #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) | ||
111 | |||
112 | |||
113 | #endif /* !_SYS_TRACE_H_ */ | ||
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h new file mode 100644 index 000000000000..dbddc6523f8e --- /dev/null +++ b/include/litmus/unistd_32.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * included from arch/x86/include/asm/unistd_32.h | ||
3 | * | ||
4 | * LITMUS^RT syscalls with "relative" numbers | ||
5 | */ | ||
6 | #define __LSC(x) (__NR_LITMUS + x) | ||
7 | |||
8 | #define __NR_set_rt_task_param __LSC(0) | ||
9 | #define __NR_get_rt_task_param __LSC(1) | ||
10 | #define __NR_complete_job __LSC(2) | ||
11 | #define __NR_od_open __LSC(3) | ||
12 | #define __NR_od_close __LSC(4) | ||
13 | #define __NR_fmlp_down __LSC(5) | ||
14 | #define __NR_fmlp_up __LSC(6) | ||
15 | #define __NR_srp_down __LSC(7) | ||
16 | #define __NR_srp_up __LSC(8) | ||
17 | #define __NR_query_job_no __LSC(9) | ||
18 | #define __NR_wait_for_job_release __LSC(10) | ||
19 | #define __NR_wait_for_ts_release __LSC(11) | ||
20 | #define __NR_release_ts __LSC(12) | ||
21 | #define __NR_null_call __LSC(13) | ||
22 | |||
23 | #define NR_litmus_syscalls 14 | ||
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h new file mode 100644 index 000000000000..f0618e75348d --- /dev/null +++ b/include/litmus/unistd_64.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * included from arch/x86/include/asm/unistd_64.h | ||
3 | * | ||
4 | * LITMUS^RT syscalls with "relative" numbers | ||
5 | */ | ||
6 | #define __LSC(x) (__NR_LITMUS + x) | ||
7 | |||
8 | #define __NR_set_rt_task_param __LSC(0) | ||
9 | __SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param) | ||
10 | #define __NR_get_rt_task_param __LSC(1) | ||
11 | __SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param) | ||
12 | #define __NR_complete_job __LSC(2) | ||
13 | __SYSCALL(__NR_complete_job, sys_complete_job) | ||
14 | #define __NR_od_open __LSC(3) | ||
15 | __SYSCALL(__NR_od_open, sys_od_open) | ||
16 | #define __NR_od_close __LSC(4) | ||
17 | __SYSCALL(__NR_od_close, sys_od_close) | ||
18 | #define __NR_fmlp_down __LSC(5) | ||
19 | __SYSCALL(__NR_fmlp_down, sys_fmlp_down) | ||
20 | #define __NR_fmlp_up __LSC(6) | ||
21 | __SYSCALL(__NR_fmlp_up, sys_fmlp_up) | ||
22 | #define __NR_srp_down __LSC(7) | ||
23 | __SYSCALL(__NR_srp_down, sys_srp_down) | ||
24 | #define __NR_srp_up __LSC(8) | ||
25 | __SYSCALL(__NR_srp_up, sys_srp_up) | ||
26 | #define __NR_query_job_no __LSC(9) | ||
27 | __SYSCALL(__NR_query_job_no, sys_query_job_no) | ||
28 | #define __NR_wait_for_job_release __LSC(10) | ||
29 | __SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release) | ||
30 | #define __NR_wait_for_ts_release __LSC(11) | ||
31 | __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) | ||
32 | #define __NR_release_ts __LSC(12) | ||
33 | __SYSCALL(__NR_release_ts, sys_release_ts) | ||
34 | #define __NR_null_call __LSC(13) | ||
35 | __SYSCALL(__NR_null_call, sys_null_call) | ||
36 | |||
37 | #define NR_litmus_syscalls 14 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 7f2683a10ac4..256ce8c2ebc8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -57,6 +57,8 @@ | |||
57 | #include <asm/mmu_context.h> | 57 | #include <asm/mmu_context.h> |
58 | #include "cred-internals.h" | 58 | #include "cred-internals.h" |
59 | 59 | ||
60 | extern void exit_od_table(struct task_struct *t); | ||
61 | |||
60 | static void exit_mm(struct task_struct * tsk); | 62 | static void exit_mm(struct task_struct * tsk); |
61 | 63 | ||
62 | static void __unhash_process(struct task_struct *p) | 64 | static void __unhash_process(struct task_struct *p) |
@@ -968,6 +970,8 @@ NORET_TYPE void do_exit(long code) | |||
968 | if (unlikely(tsk->audit_context)) | 970 | if (unlikely(tsk->audit_context)) |
969 | audit_free(tsk); | 971 | audit_free(tsk); |
970 | 972 | ||
973 | exit_od_table(tsk); | ||
974 | |||
971 | tsk->exit_code = code; | 975 | tsk->exit_code = code; |
972 | taskstats_exit(tsk, group_dead); | 976 | taskstats_exit(tsk, group_dead); |
973 | 977 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 4c14942a0ee3..166eb780dd7d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -75,6 +75,9 @@ | |||
75 | 75 | ||
76 | #include <trace/events/sched.h> | 76 | #include <trace/events/sched.h> |
77 | 77 | ||
78 | #include <litmus/litmus.h> | ||
79 | #include <litmus/sched_plugin.h> | ||
80 | |||
78 | /* | 81 | /* |
79 | * Protected counters by write_lock_irq(&tasklist_lock) | 82 | * Protected counters by write_lock_irq(&tasklist_lock) |
80 | */ | 83 | */ |
@@ -171,6 +174,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
171 | WARN_ON(atomic_read(&tsk->usage)); | 174 | WARN_ON(atomic_read(&tsk->usage)); |
172 | WARN_ON(tsk == current); | 175 | WARN_ON(tsk == current); |
173 | 176 | ||
177 | exit_litmus(tsk); | ||
174 | exit_creds(tsk); | 178 | exit_creds(tsk); |
175 | delayacct_tsk_free(tsk); | 179 | delayacct_tsk_free(tsk); |
176 | 180 | ||
@@ -253,6 +257,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
253 | 257 | ||
254 | tsk->stack = ti; | 258 | tsk->stack = ti; |
255 | 259 | ||
260 | /* Don't let the new task be a real-time task. */ | ||
261 | litmus_fork(tsk); | ||
262 | |||
256 | err = prop_local_init_single(&tsk->dirties); | 263 | err = prop_local_init_single(&tsk->dirties); |
257 | if (err) | 264 | if (err) |
258 | goto out; | 265 | goto out; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0086628b6e97..c0b440b1f6ee 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -46,6 +46,8 @@ | |||
46 | #include <linux/sched.h> | 46 | #include <linux/sched.h> |
47 | #include <linux/timer.h> | 47 | #include <linux/timer.h> |
48 | 48 | ||
49 | #include <litmus/litmus.h> | ||
50 | |||
49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
50 | 52 | ||
51 | #include <trace/events/timer.h> | 53 | #include <trace/events/timer.h> |
@@ -1041,6 +1043,85 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
1041 | } | 1043 | } |
1042 | EXPORT_SYMBOL_GPL(hrtimer_start); | 1044 | EXPORT_SYMBOL_GPL(hrtimer_start); |
1043 | 1045 | ||
1046 | /** | ||
1047 | * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu | ||
1048 | */ | ||
1049 | void hrtimer_pull(void) | ||
1050 | { | ||
1051 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
1052 | struct hrtimer_start_on_info *info; | ||
1053 | struct list_head *pos, *safe, list; | ||
1054 | |||
1055 | spin_lock(&base->lock); | ||
1056 | list_replace_init(&base->to_pull, &list); | ||
1057 | spin_unlock(&base->lock); | ||
1058 | |||
1059 | list_for_each_safe(pos, safe, &list) { | ||
1060 | info = list_entry(pos, struct hrtimer_start_on_info, list); | ||
1061 | TRACE("pulled timer 0x%x\n", info->timer); | ||
1062 | list_del(pos); | ||
1063 | hrtimer_start(info->timer, info->time, info->mode); | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1067 | /** | ||
1068 | * hrtimer_start_on - trigger timer arming on remote cpu | ||
1069 | * @cpu: remote cpu | ||
1070 | * @info: save timer information for enqueuing on remote cpu | ||
1071 | * @timer: timer to be pulled | ||
1072 | * @time: expire time | ||
1073 | * @mode: timer mode | ||
1074 | */ | ||
1075 | int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info, | ||
1076 | struct hrtimer *timer, ktime_t time, | ||
1077 | const enum hrtimer_mode mode) | ||
1078 | { | ||
1079 | unsigned long flags; | ||
1080 | struct hrtimer_cpu_base* base; | ||
1081 | int in_use = 0, was_empty; | ||
1082 | |||
1083 | /* serialize access to info through the timer base */ | ||
1084 | lock_hrtimer_base(timer, &flags); | ||
1085 | |||
1086 | in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE); | ||
1087 | if (!in_use) { | ||
1088 | INIT_LIST_HEAD(&info->list); | ||
1089 | info->timer = timer; | ||
1090 | info->time = time; | ||
1091 | info->mode = mode; | ||
1092 | /* mark as in use */ | ||
1093 | atomic_set(&info->state, HRTIMER_START_ON_QUEUED); | ||
1094 | } | ||
1095 | |||
1096 | unlock_hrtimer_base(timer, &flags); | ||
1097 | |||
1098 | if (!in_use) { | ||
1099 | /* initiate pull */ | ||
1100 | preempt_disable(); | ||
1101 | if (cpu == smp_processor_id()) { | ||
1102 | /* start timer locally; we may get called | ||
1103 | * with rq->lock held, do not wake up anything | ||
1104 | */ | ||
1105 | TRACE("hrtimer_start_on: starting on local CPU\n"); | ||
1106 | __hrtimer_start_range_ns(info->timer, info->time, | ||
1107 | 0, info->mode, 0); | ||
1108 | } else { | ||
1109 | TRACE("hrtimer_start_on: pulling to remote CPU\n"); | ||
1110 | base = &per_cpu(hrtimer_bases, cpu); | ||
1111 | spin_lock_irqsave(&base->lock, flags); | ||
1112 | was_empty = list_empty(&base->to_pull); | ||
1113 | list_add(&info->list, &base->to_pull); | ||
1114 | spin_unlock_irqrestore(&base->lock, flags); | ||
1115 | if (was_empty) | ||
1116 | /* only send IPI if other no else | ||
1117 | * has done so already | ||
1118 | */ | ||
1119 | smp_send_pull_timers(cpu); | ||
1120 | } | ||
1121 | preempt_enable(); | ||
1122 | } | ||
1123 | return in_use; | ||
1124 | } | ||
1044 | 1125 | ||
1045 | /** | 1126 | /** |
1046 | * hrtimer_try_to_cancel - try to deactivate a timer | 1127 | * hrtimer_try_to_cancel - try to deactivate a timer |
@@ -1631,6 +1712,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1631 | cpu_base->clock_base[i].cpu_base = cpu_base; | 1712 | cpu_base->clock_base[i].cpu_base = cpu_base; |
1632 | 1713 | ||
1633 | hrtimer_init_hres(cpu_base); | 1714 | hrtimer_init_hres(cpu_base); |
1715 | INIT_LIST_HEAD(&cpu_base->to_pull); | ||
1634 | } | 1716 | } |
1635 | 1717 | ||
1636 | #ifdef CONFIG_HOTPLUG_CPU | 1718 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/printk.c b/kernel/printk.c index 75077ad0b537..ee54355cfdf1 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -71,6 +71,13 @@ int console_printk[4] = { | |||
71 | }; | 71 | }; |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * divert printk() messages when there is a LITMUS^RT debug listener | ||
75 | */ | ||
76 | #include <litmus/litmus.h> | ||
77 | int trace_override = 0; | ||
78 | int trace_recurse = 0; | ||
79 | |||
80 | /* | ||
74 | * Low level drivers may need that to know if they can schedule in | 81 | * Low level drivers may need that to know if they can schedule in |
75 | * their unblank() callback or not. So let's export it. | 82 | * their unblank() callback or not. So let's export it. |
76 | */ | 83 | */ |
@@ -708,6 +715,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
708 | /* Emit the output into the temporary buffer */ | 715 | /* Emit the output into the temporary buffer */ |
709 | printed_len += vscnprintf(printk_buf + printed_len, | 716 | printed_len += vscnprintf(printk_buf + printed_len, |
710 | sizeof(printk_buf) - printed_len, fmt, args); | 717 | sizeof(printk_buf) - printed_len, fmt, args); |
718 | /* if LITMUS^RT tracer is active divert printk() msgs */ | ||
719 | if (trace_override && !trace_recurse) | ||
720 | TRACE("%s", printk_buf); | ||
711 | 721 | ||
712 | 722 | ||
713 | p = printk_buf; | 723 | p = printk_buf; |
@@ -777,7 +787,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
777 | * Try to acquire and then immediately release the | 787 | * Try to acquire and then immediately release the |
778 | * console semaphore. The release will do all the | 788 | * console semaphore. The release will do all the |
779 | * actual magic (print out buffers, wake up klogd, | 789 | * actual magic (print out buffers, wake up klogd, |
780 | * etc). | 790 | * etc). |
781 | * | 791 | * |
782 | * The acquire_console_semaphore_for_printk() function | 792 | * The acquire_console_semaphore_for_printk() function |
783 | * will release 'logbuf_lock' regardless of whether it | 793 | * will release 'logbuf_lock' regardless of whether it |
@@ -1014,7 +1024,7 @@ int printk_needs_cpu(int cpu) | |||
1014 | 1024 | ||
1015 | void wake_up_klogd(void) | 1025 | void wake_up_klogd(void) |
1016 | { | 1026 | { |
1017 | if (waitqueue_active(&log_wait)) | 1027 | if (!trace_override && waitqueue_active(&log_wait)) |
1018 | __raw_get_cpu_var(printk_pending) = 1; | 1028 | __raw_get_cpu_var(printk_pending) = 1; |
1019 | } | 1029 | } |
1020 | 1030 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 3c2a54f70ffe..5e3c509e0efe 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -78,6 +78,9 @@ | |||
78 | 78 | ||
79 | #include "sched_cpupri.h" | 79 | #include "sched_cpupri.h" |
80 | 80 | ||
81 | #include <litmus/sched_trace.h> | ||
82 | #include <litmus/trace.h> | ||
83 | |||
81 | #define CREATE_TRACE_POINTS | 84 | #define CREATE_TRACE_POINTS |
82 | #include <trace/events/sched.h> | 85 | #include <trace/events/sched.h> |
83 | 86 | ||
@@ -450,6 +453,12 @@ struct rt_rq { | |||
450 | #endif | 453 | #endif |
451 | }; | 454 | }; |
452 | 455 | ||
456 | /* Litmus related fields in a runqueue */ | ||
457 | struct litmus_rq { | ||
458 | unsigned long nr_running; | ||
459 | struct task_struct *prev; | ||
460 | }; | ||
461 | |||
453 | #ifdef CONFIG_SMP | 462 | #ifdef CONFIG_SMP |
454 | 463 | ||
455 | /* | 464 | /* |
@@ -512,6 +521,7 @@ struct rq { | |||
512 | 521 | ||
513 | struct cfs_rq cfs; | 522 | struct cfs_rq cfs; |
514 | struct rt_rq rt; | 523 | struct rt_rq rt; |
524 | struct litmus_rq litmus; | ||
515 | 525 | ||
516 | #ifdef CONFIG_FAIR_GROUP_SCHED | 526 | #ifdef CONFIG_FAIR_GROUP_SCHED |
517 | /* list of leaf cfs_rq on this cpu: */ | 527 | /* list of leaf cfs_rq on this cpu: */ |
@@ -1833,7 +1843,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1833 | 1843 | ||
1834 | static const struct sched_class rt_sched_class; | 1844 | static const struct sched_class rt_sched_class; |
1835 | 1845 | ||
1836 | #define sched_class_highest (&rt_sched_class) | 1846 | #define sched_class_highest (&litmus_sched_class) |
1837 | #define for_each_class(class) \ | 1847 | #define for_each_class(class) \ |
1838 | for (class = sched_class_highest; class; class = class->next) | 1848 | for (class = sched_class_highest; class; class = class->next) |
1839 | 1849 | ||
@@ -1932,6 +1942,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
1932 | #include "sched_idletask.c" | 1942 | #include "sched_idletask.c" |
1933 | #include "sched_fair.c" | 1943 | #include "sched_fair.c" |
1934 | #include "sched_rt.c" | 1944 | #include "sched_rt.c" |
1945 | #include "../litmus/sched_litmus.c" | ||
1935 | #ifdef CONFIG_SCHED_DEBUG | 1946 | #ifdef CONFIG_SCHED_DEBUG |
1936 | # include "sched_debug.c" | 1947 | # include "sched_debug.c" |
1937 | #endif | 1948 | #endif |
@@ -2372,6 +2383,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2372 | unsigned long flags; | 2383 | unsigned long flags; |
2373 | struct rq *rq; | 2384 | struct rq *rq; |
2374 | 2385 | ||
2386 | if (is_realtime(p)) | ||
2387 | TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); | ||
2388 | |||
2375 | if (!sched_feat(SYNC_WAKEUPS)) | 2389 | if (!sched_feat(SYNC_WAKEUPS)) |
2376 | wake_flags &= ~WF_SYNC; | 2390 | wake_flags &= ~WF_SYNC; |
2377 | 2391 | ||
@@ -2390,7 +2404,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2390 | orig_cpu = cpu; | 2404 | orig_cpu = cpu; |
2391 | 2405 | ||
2392 | #ifdef CONFIG_SMP | 2406 | #ifdef CONFIG_SMP |
2393 | if (unlikely(task_running(rq, p))) | 2407 | if (unlikely(task_running(rq, p)) || is_realtime(p)) |
2394 | goto out_activate; | 2408 | goto out_activate; |
2395 | 2409 | ||
2396 | /* | 2410 | /* |
@@ -2497,6 +2511,8 @@ out_running: | |||
2497 | } | 2511 | } |
2498 | #endif | 2512 | #endif |
2499 | out: | 2513 | out: |
2514 | if (is_realtime(p)) | ||
2515 | TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state); | ||
2500 | task_rq_unlock(rq, &flags); | 2516 | task_rq_unlock(rq, &flags); |
2501 | put_cpu(); | 2517 | put_cpu(); |
2502 | 2518 | ||
@@ -2814,6 +2830,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2814 | */ | 2830 | */ |
2815 | prev_state = prev->state; | 2831 | prev_state = prev->state; |
2816 | finish_arch_switch(prev); | 2832 | finish_arch_switch(prev); |
2833 | litmus->finish_switch(prev); | ||
2834 | prev->rt_param.stack_in_use = NO_CPU; | ||
2817 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 2835 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2818 | local_irq_disable(); | 2836 | local_irq_disable(); |
2819 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 2837 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
@@ -2843,6 +2861,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | |||
2843 | { | 2861 | { |
2844 | if (prev->sched_class->pre_schedule) | 2862 | if (prev->sched_class->pre_schedule) |
2845 | prev->sched_class->pre_schedule(rq, prev); | 2863 | prev->sched_class->pre_schedule(rq, prev); |
2864 | |||
2865 | /* LITMUS^RT not very clean hack: we need to save the prev task | ||
2866 | * as our scheduling decision rely on it (as we drop the rq lock | ||
2867 | * something in prev can change...); there is no way to escape | ||
2868 | * this ack apart from modifying pick_nex_task(rq, _prev_) or | ||
2869 | * falling back on the previous solution of decoupling | ||
2870 | * scheduling decisions | ||
2871 | */ | ||
2872 | rq->litmus.prev = prev; | ||
2846 | } | 2873 | } |
2847 | 2874 | ||
2848 | /* rq->lock is NOT held, but preemption is disabled */ | 2875 | /* rq->lock is NOT held, but preemption is disabled */ |
@@ -3520,18 +3547,26 @@ void scheduler_tick(void) | |||
3520 | 3547 | ||
3521 | sched_clock_tick(); | 3548 | sched_clock_tick(); |
3522 | 3549 | ||
3550 | TS_TICK_START(current); | ||
3551 | |||
3523 | raw_spin_lock(&rq->lock); | 3552 | raw_spin_lock(&rq->lock); |
3524 | update_rq_clock(rq); | 3553 | update_rq_clock(rq); |
3525 | update_cpu_load(rq); | 3554 | update_cpu_load(rq); |
3526 | curr->sched_class->task_tick(rq, curr, 0); | 3555 | curr->sched_class->task_tick(rq, curr, 0); |
3556 | |||
3557 | /* litmus_tick may force current to resched */ | ||
3558 | litmus_tick(rq, curr); | ||
3559 | |||
3527 | raw_spin_unlock(&rq->lock); | 3560 | raw_spin_unlock(&rq->lock); |
3528 | 3561 | ||
3529 | perf_event_task_tick(curr); | 3562 | perf_event_task_tick(curr); |
3530 | 3563 | ||
3531 | #ifdef CONFIG_SMP | 3564 | #ifdef CONFIG_SMP |
3532 | rq->idle_at_tick = idle_cpu(cpu); | 3565 | rq->idle_at_tick = idle_cpu(cpu); |
3533 | trigger_load_balance(rq, cpu); | 3566 | if (!is_realtime(current)) |
3567 | trigger_load_balance(rq, cpu); | ||
3534 | #endif | 3568 | #endif |
3569 | TS_TICK_END(current); | ||
3535 | } | 3570 | } |
3536 | 3571 | ||
3537 | notrace unsigned long get_parent_ip(unsigned long addr) | 3572 | notrace unsigned long get_parent_ip(unsigned long addr) |
@@ -3672,12 +3707,20 @@ pick_next_task(struct rq *rq) | |||
3672 | /* | 3707 | /* |
3673 | * Optimization: we know that if all tasks are in | 3708 | * Optimization: we know that if all tasks are in |
3674 | * the fair class we can call that function directly: | 3709 | * the fair class we can call that function directly: |
3675 | */ | 3710 | |
3676 | if (likely(rq->nr_running == rq->cfs.nr_running)) { | 3711 | * NOT IN LITMUS^RT! |
3712 | |||
3713 | * This breaks many assumptions in the plugins. | ||
3714 | * Do not uncomment without thinking long and hard | ||
3715 | * about how this affects global plugins such as GSN-EDF. | ||
3716 | |||
3717 | if (rq->nr_running == rq->cfs.nr_running) { | ||
3718 | TRACE("taking shortcut in pick_next_task()\n"); | ||
3677 | p = fair_sched_class.pick_next_task(rq); | 3719 | p = fair_sched_class.pick_next_task(rq); |
3678 | if (likely(p)) | 3720 | if (likely(p)) |
3679 | return p; | 3721 | return p; |
3680 | } | 3722 | } |
3723 | */ | ||
3681 | 3724 | ||
3682 | class = sched_class_highest; | 3725 | class = sched_class_highest; |
3683 | for ( ; ; ) { | 3726 | for ( ; ; ) { |
@@ -3712,6 +3755,8 @@ need_resched: | |||
3712 | 3755 | ||
3713 | release_kernel_lock(prev); | 3756 | release_kernel_lock(prev); |
3714 | need_resched_nonpreemptible: | 3757 | need_resched_nonpreemptible: |
3758 | TS_SCHED_START; | ||
3759 | sched_trace_task_switch_away(prev); | ||
3715 | 3760 | ||
3716 | schedule_debug(prev); | 3761 | schedule_debug(prev); |
3717 | 3762 | ||
@@ -3746,15 +3791,22 @@ need_resched_nonpreemptible: | |||
3746 | rq->curr = next; | 3791 | rq->curr = next; |
3747 | ++*switch_count; | 3792 | ++*switch_count; |
3748 | 3793 | ||
3794 | TS_SCHED_END(next); | ||
3795 | TS_CXS_START(next); | ||
3749 | context_switch(rq, prev, next); /* unlocks the rq */ | 3796 | context_switch(rq, prev, next); /* unlocks the rq */ |
3797 | TS_CXS_END(current); | ||
3750 | /* | 3798 | /* |
3751 | * the context switch might have flipped the stack from under | 3799 | * the context switch might have flipped the stack from under |
3752 | * us, hence refresh the local variables. | 3800 | * us, hence refresh the local variables. |
3753 | */ | 3801 | */ |
3754 | cpu = smp_processor_id(); | 3802 | cpu = smp_processor_id(); |
3755 | rq = cpu_rq(cpu); | 3803 | rq = cpu_rq(cpu); |
3756 | } else | 3804 | } else { |
3805 | TS_SCHED_END(prev); | ||
3757 | raw_spin_unlock_irq(&rq->lock); | 3806 | raw_spin_unlock_irq(&rq->lock); |
3807 | } | ||
3808 | |||
3809 | sched_trace_task_switch_to(current); | ||
3758 | 3810 | ||
3759 | post_schedule(rq); | 3811 | post_schedule(rq); |
3760 | 3812 | ||
@@ -3767,6 +3819,9 @@ need_resched_nonpreemptible: | |||
3767 | preempt_enable_no_resched(); | 3819 | preempt_enable_no_resched(); |
3768 | if (need_resched()) | 3820 | if (need_resched()) |
3769 | goto need_resched; | 3821 | goto need_resched; |
3822 | |||
3823 | if (srp_active()) | ||
3824 | srp_ceiling_block(); | ||
3770 | } | 3825 | } |
3771 | EXPORT_SYMBOL(schedule); | 3826 | EXPORT_SYMBOL(schedule); |
3772 | 3827 | ||
@@ -4043,6 +4098,17 @@ void complete_all(struct completion *x) | |||
4043 | } | 4098 | } |
4044 | EXPORT_SYMBOL(complete_all); | 4099 | EXPORT_SYMBOL(complete_all); |
4045 | 4100 | ||
4101 | void complete_n(struct completion *x, int n) | ||
4102 | { | ||
4103 | unsigned long flags; | ||
4104 | |||
4105 | spin_lock_irqsave(&x->wait.lock, flags); | ||
4106 | x->done += n; | ||
4107 | __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL); | ||
4108 | spin_unlock_irqrestore(&x->wait.lock, flags); | ||
4109 | } | ||
4110 | EXPORT_SYMBOL(complete_n); | ||
4111 | |||
4046 | static inline long __sched | 4112 | static inline long __sched |
4047 | do_wait_for_common(struct completion *x, long timeout, int state) | 4113 | do_wait_for_common(struct completion *x, long timeout, int state) |
4048 | { | 4114 | { |
@@ -4471,7 +4537,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
4471 | p->normal_prio = normal_prio(p); | 4537 | p->normal_prio = normal_prio(p); |
4472 | /* we are holding p->pi_lock already */ | 4538 | /* we are holding p->pi_lock already */ |
4473 | p->prio = rt_mutex_getprio(p); | 4539 | p->prio = rt_mutex_getprio(p); |
4474 | if (rt_prio(p->prio)) | 4540 | if (p->policy == SCHED_LITMUS) |
4541 | p->sched_class = &litmus_sched_class; | ||
4542 | else if (rt_prio(p->prio)) | ||
4475 | p->sched_class = &rt_sched_class; | 4543 | p->sched_class = &rt_sched_class; |
4476 | else | 4544 | else |
4477 | p->sched_class = &fair_sched_class; | 4545 | p->sched_class = &fair_sched_class; |
@@ -4516,7 +4584,7 @@ recheck: | |||
4516 | 4584 | ||
4517 | if (policy != SCHED_FIFO && policy != SCHED_RR && | 4585 | if (policy != SCHED_FIFO && policy != SCHED_RR && |
4518 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 4586 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
4519 | policy != SCHED_IDLE) | 4587 | policy != SCHED_IDLE && policy != SCHED_LITMUS) |
4520 | return -EINVAL; | 4588 | return -EINVAL; |
4521 | } | 4589 | } |
4522 | 4590 | ||
@@ -4531,6 +4599,8 @@ recheck: | |||
4531 | return -EINVAL; | 4599 | return -EINVAL; |
4532 | if (rt_policy(policy) != (param->sched_priority != 0)) | 4600 | if (rt_policy(policy) != (param->sched_priority != 0)) |
4533 | return -EINVAL; | 4601 | return -EINVAL; |
4602 | if (policy == SCHED_LITMUS && policy == p->policy) | ||
4603 | return -EINVAL; | ||
4534 | 4604 | ||
4535 | /* | 4605 | /* |
4536 | * Allow unprivileged RT tasks to decrease priority: | 4606 | * Allow unprivileged RT tasks to decrease priority: |
@@ -4585,6 +4655,12 @@ recheck: | |||
4585 | return retval; | 4655 | return retval; |
4586 | } | 4656 | } |
4587 | 4657 | ||
4658 | if (policy == SCHED_LITMUS) { | ||
4659 | retval = litmus_admit_task(p); | ||
4660 | if (retval) | ||
4661 | return retval; | ||
4662 | } | ||
4663 | |||
4588 | /* | 4664 | /* |
4589 | * make sure no PI-waiters arrive (or leave) while we are | 4665 | * make sure no PI-waiters arrive (or leave) while we are |
4590 | * changing the priority of the task: | 4666 | * changing the priority of the task: |
@@ -4612,10 +4688,19 @@ recheck: | |||
4612 | 4688 | ||
4613 | p->sched_reset_on_fork = reset_on_fork; | 4689 | p->sched_reset_on_fork = reset_on_fork; |
4614 | 4690 | ||
4691 | if (p->policy == SCHED_LITMUS) | ||
4692 | litmus_exit_task(p); | ||
4693 | |||
4615 | oldprio = p->prio; | 4694 | oldprio = p->prio; |
4616 | prev_class = p->sched_class; | 4695 | prev_class = p->sched_class; |
4617 | __setscheduler(rq, p, policy, param->sched_priority); | 4696 | __setscheduler(rq, p, policy, param->sched_priority); |
4618 | 4697 | ||
4698 | if (policy == SCHED_LITMUS) { | ||
4699 | p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; | ||
4700 | p->rt_param.present = running; | ||
4701 | litmus->task_new(p, on_rq, running); | ||
4702 | } | ||
4703 | |||
4619 | if (running) | 4704 | if (running) |
4620 | p->sched_class->set_curr_task(rq); | 4705 | p->sched_class->set_curr_task(rq); |
4621 | if (on_rq) { | 4706 | if (on_rq) { |
@@ -4785,10 +4870,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4785 | rcu_read_lock(); | 4870 | rcu_read_lock(); |
4786 | 4871 | ||
4787 | p = find_process_by_pid(pid); | 4872 | p = find_process_by_pid(pid); |
4788 | if (!p) { | 4873 | /* Don't set affinity if task not found and for LITMUS tasks */ |
4874 | if (!p || is_realtime(p)) { | ||
4789 | rcu_read_unlock(); | 4875 | rcu_read_unlock(); |
4790 | put_online_cpus(); | 4876 | put_online_cpus(); |
4791 | return -ESRCH; | 4877 | return p ? -EPERM : -ESRCH; |
4792 | } | 4878 | } |
4793 | 4879 | ||
4794 | /* Prevent p going away */ | 4880 | /* Prevent p going away */ |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5a5ea2cd924f..a4a741dfebfe 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1708,7 +1708,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1708 | int sync = wake_flags & WF_SYNC; | 1708 | int sync = wake_flags & WF_SYNC; |
1709 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1709 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1710 | 1710 | ||
1711 | if (unlikely(rt_prio(p->prio))) | 1711 | if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) { |
1712 | goto preempt; | 1712 | goto preempt; |
1713 | 1713 | ||
1714 | if (unlikely(p->sched_class != &fair_sched_class)) | 1714 | if (unlikely(p->sched_class != &fair_sched_class)) |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index b5b920ae2ea7..c2fbb02c1b54 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1014,7 +1014,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
1014 | */ | 1014 | */ |
1015 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) | 1015 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
1016 | { | 1016 | { |
1017 | if (p->prio < rq->curr->prio) { | 1017 | if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) { |
1018 | resched_task(rq->curr); | 1018 | resched_task(rq->curr); |
1019 | return; | 1019 | return; |
1020 | } | 1020 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f992762d7f51..0adc54bd7c7c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -721,6 +721,46 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
721 | } | 721 | } |
722 | 722 | ||
723 | /** | 723 | /** |
724 | * tick_set_quanta_type - get the quanta type as a boot option | ||
725 | * Default is standard setup with ticks staggered over first | ||
726 | * half of tick period. | ||
727 | */ | ||
728 | int quanta_type = LINUX_DEFAULT_TICKS; | ||
729 | static int __init tick_set_quanta_type(char *str) | ||
730 | { | ||
731 | if (strcmp("aligned", str) == 0) { | ||
732 | quanta_type = LITMUS_ALIGNED_TICKS; | ||
733 | printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n"); | ||
734 | } | ||
735 | else if (strcmp("staggered", str) == 0) { | ||
736 | quanta_type = LITMUS_STAGGERED_TICKS; | ||
737 | printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n"); | ||
738 | } | ||
739 | return 1; | ||
740 | } | ||
741 | __setup("quanta=", tick_set_quanta_type); | ||
742 | |||
743 | u64 cpu_stagger_offset(int cpu) | ||
744 | { | ||
745 | u64 offset = 0; | ||
746 | switch (quanta_type) { | ||
747 | case LITMUS_ALIGNED_TICKS: | ||
748 | offset = 0; | ||
749 | break; | ||
750 | case LITMUS_STAGGERED_TICKS: | ||
751 | offset = ktime_to_ns(tick_period); | ||
752 | do_div(offset, num_possible_cpus()); | ||
753 | offset *= cpu; | ||
754 | break; | ||
755 | default: | ||
756 | offset = ktime_to_ns(tick_period) >> 1; | ||
757 | do_div(offset, num_possible_cpus()); | ||
758 | offset *= cpu; | ||
759 | } | ||
760 | return offset; | ||
761 | } | ||
762 | |||
763 | /** | ||
724 | * tick_setup_sched_timer - setup the tick emulation timer | 764 | * tick_setup_sched_timer - setup the tick emulation timer |
725 | */ | 765 | */ |
726 | void tick_setup_sched_timer(void) | 766 | void tick_setup_sched_timer(void) |
@@ -737,9 +777,11 @@ void tick_setup_sched_timer(void) | |||
737 | 777 | ||
738 | /* Get the next period (per cpu) */ | 778 | /* Get the next period (per cpu) */ |
739 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 779 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
740 | offset = ktime_to_ns(tick_period) >> 1; | 780 | |
741 | do_div(offset, num_possible_cpus()); | 781 | /* Offset must be set correctly to achieve desired quanta type. */ |
742 | offset *= smp_processor_id(); | 782 | offset = cpu_stagger_offset(smp_processor_id()); |
783 | |||
784 | /* Add the correct offset to expiration time */ | ||
743 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | 785 | hrtimer_add_expires_ns(&ts->sched_timer, offset); |
744 | 786 | ||
745 | for (;;) { | 787 | for (;;) { |
diff --git a/litmus/Kconfig b/litmus/Kconfig new file mode 100644 index 000000000000..874794f64af1 --- /dev/null +++ b/litmus/Kconfig | |||
@@ -0,0 +1,85 @@ | |||
1 | menu "LITMUS^RT" | ||
2 | |||
3 | menu "Real-Time Synchronization" | ||
4 | |||
5 | config NP_SECTION | ||
6 | bool "Non-preemptive section support" | ||
7 | default n | ||
8 | help | ||
9 | Allow tasks to become non-preemptable. | ||
10 | Note that plugins still need to explicitly support non-preemptivity. | ||
11 | Currently, only GSN-EDF and PSN-EDF have such support. | ||
12 | |||
13 | This is required to support the FMLP. | ||
14 | If disabled, all tasks will be considered preemptable at all times. | ||
15 | |||
16 | config SRP | ||
17 | bool "Stack Resource Policy (SRP)" | ||
18 | default n | ||
19 | help | ||
20 | Include support for Baker's Stack Resource Policy. | ||
21 | |||
22 | Say Yes if you want FMLP local long critical section | ||
23 | synchronization support. | ||
24 | |||
25 | config FMLP | ||
26 | bool "FMLP support" | ||
27 | depends on NP_SECTION | ||
28 | default n | ||
29 | help | ||
30 | Include support for deterministic multiprocessor real-time | ||
31 | synchronization support. | ||
32 | |||
33 | Say Yes if you want FMLP long critical section | ||
34 | synchronization support. | ||
35 | |||
36 | endmenu | ||
37 | |||
38 | menu "Tracing" | ||
39 | |||
40 | config FEATHER_TRACE | ||
41 | bool "Feather-Trace Infrastructure" | ||
42 | default y | ||
43 | help | ||
44 | Feather-Trace basic tracing infrastructure. Includes device file | ||
45 | driver and instrumentation point support. | ||
46 | |||
47 | |||
48 | config SCHED_TASK_TRACE | ||
49 | bool "Trace real-time tasks" | ||
50 | depends on FEATHER_TRACE | ||
51 | default y | ||
52 | help | ||
53 | Include support for the sched_trace_XXX() tracing functions. This | ||
54 | allows the collection of real-time task events such as job | ||
55 | completions, job releases, early completions, etc. This results in a | ||
56 | small overhead in the scheduling code. Disable if the overhead is not | ||
57 | acceptable (e.g., benchmarking). | ||
58 | |||
59 | Say Yes for debugging. | ||
60 | Say No for overhead tracing. | ||
61 | |||
62 | config SCHED_OVERHEAD_TRACE | ||
63 | bool "Record timestamps for overhead measurements" | ||
64 | depends on FEATHER_TRACE | ||
65 | default n | ||
66 | help | ||
67 | Export event stream for overhead tracing. | ||
68 | Say Yes for overhead tracing. | ||
69 | |||
70 | config SCHED_DEBUG_TRACE | ||
71 | bool "TRACE() debugging" | ||
72 | default y | ||
73 | help | ||
74 | Include support for sched_trace_log_messageg(), which is used to | ||
75 | implement TRACE(). If disabled, no TRACE() messages will be included | ||
76 | in the kernel, and no overheads due to debugging statements will be | ||
77 | incurred by the scheduler. Disable if the overhead is not acceptable | ||
78 | (e.g. benchmarking). | ||
79 | |||
80 | Say Yes for debugging. | ||
81 | Say No for overhead tracing. | ||
82 | |||
83 | endmenu | ||
84 | |||
85 | endmenu | ||
diff --git a/litmus/Makefile b/litmus/Makefile new file mode 100644 index 000000000000..0cc33e8bee51 --- /dev/null +++ b/litmus/Makefile | |||
@@ -0,0 +1,23 @@ | |||
1 | # | ||
2 | # Makefile for LITMUS^RT | ||
3 | # | ||
4 | |||
5 | obj-y = sched_plugin.o litmus.o \ | ||
6 | jobs.o \ | ||
7 | sync.o \ | ||
8 | rt_domain.o \ | ||
9 | edf_common.o \ | ||
10 | fdso.o \ | ||
11 | srp.o \ | ||
12 | fmlp.o \ | ||
13 | bheap.o \ | ||
14 | ctrldev.o \ | ||
15 | sched_gsn_edf.o \ | ||
16 | sched_psn_edf.o \ | ||
17 | sched_cedf.o \ | ||
18 | sched_pfair.o | ||
19 | |||
20 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | ||
21 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | ||
22 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | ||
23 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | ||
diff --git a/litmus/bheap.c b/litmus/bheap.c new file mode 100644 index 000000000000..528af97f18a6 --- /dev/null +++ b/litmus/bheap.c | |||
@@ -0,0 +1,314 @@ | |||
1 | #include "linux/kernel.h" | ||
2 | #include "litmus/bheap.h" | ||
3 | |||
4 | void bheap_init(struct bheap* heap) | ||
5 | { | ||
6 | heap->head = NULL; | ||
7 | heap->min = NULL; | ||
8 | } | ||
9 | |||
10 | void bheap_node_init(struct bheap_node** _h, void* value) | ||
11 | { | ||
12 | struct bheap_node* h = *_h; | ||
13 | h->parent = NULL; | ||
14 | h->next = NULL; | ||
15 | h->child = NULL; | ||
16 | h->degree = NOT_IN_HEAP; | ||
17 | h->value = value; | ||
18 | h->ref = _h; | ||
19 | } | ||
20 | |||
21 | |||
22 | /* make child a subtree of root */ | ||
23 | static void __bheap_link(struct bheap_node* root, | ||
24 | struct bheap_node* child) | ||
25 | { | ||
26 | child->parent = root; | ||
27 | child->next = root->child; | ||
28 | root->child = child; | ||
29 | root->degree++; | ||
30 | } | ||
31 | |||
32 | /* merge root lists */ | ||
33 | static struct bheap_node* __bheap_merge(struct bheap_node* a, | ||
34 | struct bheap_node* b) | ||
35 | { | ||
36 | struct bheap_node* head = NULL; | ||
37 | struct bheap_node** pos = &head; | ||
38 | |||
39 | while (a && b) { | ||
40 | if (a->degree < b->degree) { | ||
41 | *pos = a; | ||
42 | a = a->next; | ||
43 | } else { | ||
44 | *pos = b; | ||
45 | b = b->next; | ||
46 | } | ||
47 | pos = &(*pos)->next; | ||
48 | } | ||
49 | if (a) | ||
50 | *pos = a; | ||
51 | else | ||
52 | *pos = b; | ||
53 | return head; | ||
54 | } | ||
55 | |||
56 | /* reverse a linked list of nodes. also clears parent pointer */ | ||
57 | static struct bheap_node* __bheap_reverse(struct bheap_node* h) | ||
58 | { | ||
59 | struct bheap_node* tail = NULL; | ||
60 | struct bheap_node* next; | ||
61 | |||
62 | if (!h) | ||
63 | return h; | ||
64 | |||
65 | h->parent = NULL; | ||
66 | while (h->next) { | ||
67 | next = h->next; | ||
68 | h->next = tail; | ||
69 | tail = h; | ||
70 | h = next; | ||
71 | h->parent = NULL; | ||
72 | } | ||
73 | h->next = tail; | ||
74 | return h; | ||
75 | } | ||
76 | |||
77 | static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, | ||
78 | struct bheap_node** prev, struct bheap_node** node) | ||
79 | { | ||
80 | struct bheap_node *_prev, *cur; | ||
81 | *prev = NULL; | ||
82 | |||
83 | if (!heap->head) { | ||
84 | *node = NULL; | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | *node = heap->head; | ||
89 | _prev = heap->head; | ||
90 | cur = heap->head->next; | ||
91 | while (cur) { | ||
92 | if (higher_prio(cur, *node)) { | ||
93 | *node = cur; | ||
94 | *prev = _prev; | ||
95 | } | ||
96 | _prev = cur; | ||
97 | cur = cur->next; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, | ||
102 | struct bheap_node* h2) | ||
103 | { | ||
104 | struct bheap_node* h1; | ||
105 | struct bheap_node *prev, *x, *next; | ||
106 | if (!h2) | ||
107 | return; | ||
108 | h1 = heap->head; | ||
109 | if (!h1) { | ||
110 | heap->head = h2; | ||
111 | return; | ||
112 | } | ||
113 | h1 = __bheap_merge(h1, h2); | ||
114 | prev = NULL; | ||
115 | x = h1; | ||
116 | next = x->next; | ||
117 | while (next) { | ||
118 | if (x->degree != next->degree || | ||
119 | (next->next && next->next->degree == x->degree)) { | ||
120 | /* nothing to do, advance */ | ||
121 | prev = x; | ||
122 | x = next; | ||
123 | } else if (higher_prio(x, next)) { | ||
124 | /* x becomes the root of next */ | ||
125 | x->next = next->next; | ||
126 | __bheap_link(x, next); | ||
127 | } else { | ||
128 | /* next becomes the root of x */ | ||
129 | if (prev) | ||
130 | prev->next = next; | ||
131 | else | ||
132 | h1 = next; | ||
133 | __bheap_link(next, x); | ||
134 | x = next; | ||
135 | } | ||
136 | next = x->next; | ||
137 | } | ||
138 | heap->head = h1; | ||
139 | } | ||
140 | |||
141 | static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, | ||
142 | struct bheap* heap) | ||
143 | { | ||
144 | struct bheap_node *prev, *node; | ||
145 | __bheap_min(higher_prio, heap, &prev, &node); | ||
146 | if (!node) | ||
147 | return NULL; | ||
148 | if (prev) | ||
149 | prev->next = node->next; | ||
150 | else | ||
151 | heap->head = node->next; | ||
152 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
153 | return node; | ||
154 | } | ||
155 | |||
156 | /* insert (and reinitialize) a node into the heap */ | ||
157 | void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, | ||
158 | struct bheap_node* node) | ||
159 | { | ||
160 | struct bheap_node *min; | ||
161 | node->child = NULL; | ||
162 | node->parent = NULL; | ||
163 | node->next = NULL; | ||
164 | node->degree = 0; | ||
165 | if (heap->min && higher_prio(node, heap->min)) { | ||
166 | /* swap min cache */ | ||
167 | min = heap->min; | ||
168 | min->child = NULL; | ||
169 | min->parent = NULL; | ||
170 | min->next = NULL; | ||
171 | min->degree = 0; | ||
172 | __bheap_union(higher_prio, heap, min); | ||
173 | heap->min = node; | ||
174 | } else | ||
175 | __bheap_union(higher_prio, heap, node); | ||
176 | } | ||
177 | |||
178 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) | ||
179 | { | ||
180 | struct bheap_node* min; | ||
181 | if (heap->min) { | ||
182 | min = heap->min; | ||
183 | heap->min = NULL; | ||
184 | bheap_insert(higher_prio, heap, min); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /* merge addition into target */ | ||
189 | void bheap_union(bheap_prio_t higher_prio, | ||
190 | struct bheap* target, struct bheap* addition) | ||
191 | { | ||
192 | /* first insert any cached minima, if necessary */ | ||
193 | bheap_uncache_min(higher_prio, target); | ||
194 | bheap_uncache_min(higher_prio, addition); | ||
195 | __bheap_union(higher_prio, target, addition->head); | ||
196 | /* this is a destructive merge */ | ||
197 | addition->head = NULL; | ||
198 | } | ||
199 | |||
200 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
201 | struct bheap* heap) | ||
202 | { | ||
203 | if (!heap->min) | ||
204 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
205 | return heap->min; | ||
206 | } | ||
207 | |||
208 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
209 | struct bheap* heap) | ||
210 | { | ||
211 | struct bheap_node *node; | ||
212 | if (!heap->min) | ||
213 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
214 | node = heap->min; | ||
215 | heap->min = NULL; | ||
216 | if (node) | ||
217 | node->degree = NOT_IN_HEAP; | ||
218 | return node; | ||
219 | } | ||
220 | |||
221 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) | ||
222 | { | ||
223 | struct bheap_node *parent; | ||
224 | struct bheap_node** tmp_ref; | ||
225 | void* tmp; | ||
226 | |||
227 | /* bubble up */ | ||
228 | parent = node->parent; | ||
229 | while (parent && higher_prio(node, parent)) { | ||
230 | /* swap parent and node */ | ||
231 | tmp = parent->value; | ||
232 | parent->value = node->value; | ||
233 | node->value = tmp; | ||
234 | /* swap references */ | ||
235 | *(parent->ref) = node; | ||
236 | *(node->ref) = parent; | ||
237 | tmp_ref = parent->ref; | ||
238 | parent->ref = node->ref; | ||
239 | node->ref = tmp_ref; | ||
240 | /* step up */ | ||
241 | node = parent; | ||
242 | parent = node->parent; | ||
243 | } | ||
244 | |||
245 | return parent != NULL; | ||
246 | } | ||
247 | |||
248 | void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, | ||
249 | struct bheap_node* node) | ||
250 | { | ||
251 | struct bheap_node *parent, *prev, *pos; | ||
252 | struct bheap_node** tmp_ref; | ||
253 | void* tmp; | ||
254 | |||
255 | if (heap->min != node) { | ||
256 | /* bubble up */ | ||
257 | parent = node->parent; | ||
258 | while (parent) { | ||
259 | /* swap parent and node */ | ||
260 | tmp = parent->value; | ||
261 | parent->value = node->value; | ||
262 | node->value = tmp; | ||
263 | /* swap references */ | ||
264 | *(parent->ref) = node; | ||
265 | *(node->ref) = parent; | ||
266 | tmp_ref = parent->ref; | ||
267 | parent->ref = node->ref; | ||
268 | node->ref = tmp_ref; | ||
269 | /* step up */ | ||
270 | node = parent; | ||
271 | parent = node->parent; | ||
272 | } | ||
273 | /* now delete: | ||
274 | * first find prev */ | ||
275 | prev = NULL; | ||
276 | pos = heap->head; | ||
277 | while (pos != node) { | ||
278 | prev = pos; | ||
279 | pos = pos->next; | ||
280 | } | ||
281 | /* we have prev, now remove node */ | ||
282 | if (prev) | ||
283 | prev->next = node->next; | ||
284 | else | ||
285 | heap->head = node->next; | ||
286 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
287 | } else | ||
288 | heap->min = NULL; | ||
289 | node->degree = NOT_IN_HEAP; | ||
290 | } | ||
291 | |||
292 | /* allocate a heap node for value and insert into the heap */ | ||
293 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
294 | void* value, int gfp_flags) | ||
295 | { | ||
296 | struct bheap_node* hn = bheap_node_alloc(gfp_flags); | ||
297 | if (likely(hn)) { | ||
298 | bheap_node_init(&hn, value); | ||
299 | bheap_insert(higher_prio, heap, hn); | ||
300 | } | ||
301 | return hn != NULL; | ||
302 | } | ||
303 | |||
304 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
305 | struct bheap* heap) | ||
306 | { | ||
307 | struct bheap_node* hn = bheap_take(higher_prio, heap); | ||
308 | void* ret = NULL; | ||
309 | if (hn) { | ||
310 | ret = hn->value; | ||
311 | bheap_node_free(hn); | ||
312 | } | ||
313 | return ret; | ||
314 | } | ||
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c new file mode 100644 index 000000000000..6677a67cc945 --- /dev/null +++ b/litmus/ctrldev.c | |||
@@ -0,0 +1,150 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/miscdevice.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | #include <litmus/litmus.h> | ||
8 | |||
9 | /* only one page for now, but we might want to add a RO version at some point */ | ||
10 | |||
11 | #define CTRL_NAME "litmus/ctrl" | ||
12 | |||
13 | /* allocate t->rt_param.ctrl_page*/ | ||
14 | static int alloc_ctrl_page(struct task_struct *t) | ||
15 | { | ||
16 | int err = 0; | ||
17 | |||
18 | /* only allocate if the task doesn't have one yet */ | ||
19 | if (!tsk_rt(t)->ctrl_page) { | ||
20 | tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); | ||
21 | if (!tsk_rt(t)->ctrl_page) | ||
22 | err = -ENOMEM; | ||
23 | /* will get de-allocated in task teardown */ | ||
24 | TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, | ||
25 | tsk_rt(t)->ctrl_page); | ||
26 | } | ||
27 | return err; | ||
28 | } | ||
29 | |||
30 | static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) | ||
31 | { | ||
32 | int err; | ||
33 | unsigned long pfn; | ||
34 | |||
35 | struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); | ||
36 | |||
37 | /* Increase ref count. Is decreased when vma is destroyed. */ | ||
38 | get_page(ctrl); | ||
39 | |||
40 | /* compute page frame number */ | ||
41 | pfn = page_to_pfn(ctrl); | ||
42 | |||
43 | TRACE_CUR(CTRL_NAME | ||
44 | ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n", | ||
45 | tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start, | ||
46 | vma->vm_page_prot); | ||
47 | |||
48 | /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise | ||
49 | * userspace actually gets a copy-on-write page. */ | ||
50 | err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED); | ||
51 | |||
52 | if (err) | ||
53 | TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err); | ||
54 | |||
55 | return err; | ||
56 | } | ||
57 | |||
58 | static void litmus_ctrl_vm_close(struct vm_area_struct* vma) | ||
59 | { | ||
60 | TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, | ||
61 | vma->vm_flags, vma->vm_page_prot); | ||
62 | |||
63 | TRACE_CUR(CTRL_NAME | ||
64 | ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", | ||
65 | (void*) vma->vm_start, (void*) vma->vm_end, vma, | ||
66 | vma->vm_private_data, current->comm, | ||
67 | current->pid); | ||
68 | } | ||
69 | |||
70 | static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, | ||
71 | struct vm_fault* vmf) | ||
72 | { | ||
73 | /* This function should never be called, since | ||
74 | * all pages should have been mapped by mmap() | ||
75 | * already. */ | ||
76 | TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags); | ||
77 | |||
78 | /* nope, you only get one page */ | ||
79 | return VM_FAULT_SIGBUS; | ||
80 | } | ||
81 | |||
82 | static struct vm_operations_struct litmus_ctrl_vm_ops = { | ||
83 | .close = litmus_ctrl_vm_close, | ||
84 | .fault = litmus_ctrl_vm_fault, | ||
85 | }; | ||
86 | |||
87 | static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) | ||
88 | { | ||
89 | int err = 0; | ||
90 | |||
91 | /* first make sure mapper knows what he's doing */ | ||
92 | |||
93 | /* you can only get one page */ | ||
94 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
95 | return -EINVAL; | ||
96 | |||
97 | /* you can only map the "first" page */ | ||
98 | if (vma->vm_pgoff != 0) | ||
99 | return -EINVAL; | ||
100 | |||
101 | /* you can't share it with anyone */ | ||
102 | if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) | ||
103 | return -EINVAL; | ||
104 | |||
105 | vma->vm_ops = &litmus_ctrl_vm_ops; | ||
106 | /* this mapping should not be kept across forks, | ||
107 | * and cannot be expanded */ | ||
108 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; | ||
109 | |||
110 | err = alloc_ctrl_page(current); | ||
111 | if (!err) | ||
112 | err = map_ctrl_page(current, vma); | ||
113 | |||
114 | TRACE_CUR("%s flags=0x%x prot=0x%lx\n", | ||
115 | __FUNCTION__, vma->vm_flags, vma->vm_page_prot); | ||
116 | |||
117 | return err; | ||
118 | } | ||
119 | |||
120 | static struct file_operations litmus_ctrl_fops = { | ||
121 | .owner = THIS_MODULE, | ||
122 | .mmap = litmus_ctrl_mmap, | ||
123 | }; | ||
124 | |||
125 | static struct miscdevice litmus_ctrl_dev = { | ||
126 | .name = CTRL_NAME, | ||
127 | .minor = MISC_DYNAMIC_MINOR, | ||
128 | .fops = &litmus_ctrl_fops, | ||
129 | }; | ||
130 | |||
131 | static int __init init_litmus_ctrl_dev(void) | ||
132 | { | ||
133 | int err; | ||
134 | |||
135 | BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); | ||
136 | |||
137 | printk("Initializing LITMUS^RT control device.\n"); | ||
138 | err = misc_register(&litmus_ctrl_dev); | ||
139 | if (err) | ||
140 | printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); | ||
141 | return err; | ||
142 | } | ||
143 | |||
144 | static void __exit exit_litmus_ctrl_dev(void) | ||
145 | { | ||
146 | misc_deregister(&litmus_ctrl_dev); | ||
147 | } | ||
148 | |||
149 | module_init(init_litmus_ctrl_dev); | ||
150 | module_exit(exit_litmus_ctrl_dev); | ||
diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 000000000000..06daec66c984 --- /dev/null +++ b/litmus/edf_common.c | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * kernel/edf_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/edf_common.h> | ||
16 | |||
17 | /* edf_higher_prio - returns true if first has a higher EDF priority | ||
18 | * than second. Deadline ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int edf_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (first && first == second) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless edf priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* Check for inherited priorities. Change task | ||
37 | * used for comparison in such a case. | ||
38 | */ | ||
39 | if (first && first->rt_param.inh_task) | ||
40 | first_task = first->rt_param.inh_task; | ||
41 | if (second && second->rt_param.inh_task) | ||
42 | second_task = second->rt_param.inh_task; | ||
43 | |||
44 | return | ||
45 | /* it has to exist in order to have higher priority */ | ||
46 | first_task && ( | ||
47 | /* does the second task exist and is it a real-time task? If | ||
48 | * not, the first task (which is a RT task) has higher | ||
49 | * priority. | ||
50 | */ | ||
51 | !second_task || !is_realtime(second_task) || | ||
52 | |||
53 | /* is the deadline of the first task earlier? | ||
54 | * Then it has higher priority. | ||
55 | */ | ||
56 | earlier_deadline(first_task, second_task) || | ||
57 | |||
58 | /* Do we have a deadline tie? | ||
59 | * Then break by PID. | ||
60 | */ | ||
61 | (get_deadline(first_task) == get_deadline(second_task) && | ||
62 | (first_task->pid < second_task->pid || | ||
63 | |||
64 | /* If the PIDs are the same then the task with the inherited | ||
65 | * priority wins. | ||
66 | */ | ||
67 | (first_task->pid == second_task->pid && | ||
68 | !second->rt_param.inh_task)))); | ||
69 | } | ||
70 | |||
71 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
72 | { | ||
73 | return edf_higher_prio(bheap2task(a), bheap2task(b)); | ||
74 | } | ||
75 | |||
76 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
77 | release_jobs_t release) | ||
78 | { | ||
79 | rt_domain_init(rt, edf_ready_order, resched, release); | ||
80 | } | ||
81 | |||
82 | /* need_to_preempt - check whether the task t needs to be preempted | ||
83 | * call only with irqs disabled and with ready_lock acquired | ||
84 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
85 | */ | ||
86 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
87 | { | ||
88 | /* we need the read lock for edf_ready_queue */ | ||
89 | /* no need to preempt if there is nothing pending */ | ||
90 | if (!__jobs_pending(rt)) | ||
91 | return 0; | ||
92 | /* we need to reschedule if t doesn't exist */ | ||
93 | if (!t) | ||
94 | return 1; | ||
95 | |||
96 | /* NOTE: We cannot check for non-preemptibility since we | ||
97 | * don't know what address space we're currently in. | ||
98 | */ | ||
99 | |||
100 | /* make sure to get non-rt stuff out of the way */ | ||
101 | return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); | ||
102 | } | ||
diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 000000000000..85be716941d8 --- /dev/null +++ b/litmus/fdso.c | |||
@@ -0,0 +1,281 @@ | |||
1 | /* fdso.c - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | * | ||
5 | * Notes: | ||
6 | * - objects descriptor (OD) tables are not cloned during a fork. | ||
7 | * - objects are created on-demand, and freed after the last reference | ||
8 | * is dropped. | ||
9 | * - for now, object types are hard coded. | ||
10 | * - As long as we have live objects, we keep a reference to the inode. | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #include <litmus/fdso.h> | ||
20 | |||
21 | extern struct fdso_ops fmlp_sem_ops; | ||
22 | extern struct fdso_ops srp_sem_ops; | ||
23 | |||
24 | static const struct fdso_ops* fdso_ops[] = { | ||
25 | &fmlp_sem_ops, | ||
26 | &srp_sem_ops, | ||
27 | }; | ||
28 | |||
29 | static void* fdso_create(obj_type_t type) | ||
30 | { | ||
31 | if (fdso_ops[type]->create) | ||
32 | return fdso_ops[type]->create(); | ||
33 | else | ||
34 | return NULL; | ||
35 | } | ||
36 | |||
37 | static void fdso_destroy(obj_type_t type, void* obj) | ||
38 | { | ||
39 | fdso_ops[type]->destroy(obj); | ||
40 | } | ||
41 | |||
42 | static int fdso_open(struct od_table_entry* entry, void* __user config) | ||
43 | { | ||
44 | if (fdso_ops[entry->obj->type]->open) | ||
45 | return fdso_ops[entry->obj->type]->open(entry, config); | ||
46 | else | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | static int fdso_close(struct od_table_entry* entry) | ||
51 | { | ||
52 | if (fdso_ops[entry->obj->type]->close) | ||
53 | return fdso_ops[entry->obj->type]->close(entry); | ||
54 | else | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /* inode must be locked already */ | ||
59 | static struct inode_obj_id* alloc_inode_obj(struct inode* inode, | ||
60 | obj_type_t type, | ||
61 | unsigned int id) | ||
62 | { | ||
63 | struct inode_obj_id* obj; | ||
64 | void* raw_obj; | ||
65 | |||
66 | raw_obj = fdso_create(type); | ||
67 | if (!raw_obj) | ||
68 | return NULL; | ||
69 | |||
70 | obj = kmalloc(sizeof(*obj), GFP_KERNEL); | ||
71 | if (!obj) | ||
72 | return NULL; | ||
73 | INIT_LIST_HEAD(&obj->list); | ||
74 | atomic_set(&obj->count, 1); | ||
75 | obj->type = type; | ||
76 | obj->id = id; | ||
77 | obj->obj = raw_obj; | ||
78 | obj->inode = inode; | ||
79 | |||
80 | list_add(&obj->list, &inode->i_obj_list); | ||
81 | atomic_inc(&inode->i_count); | ||
82 | |||
83 | printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); | ||
84 | return obj; | ||
85 | } | ||
86 | |||
87 | /* inode must be locked already */ | ||
88 | static struct inode_obj_id* get_inode_obj(struct inode* inode, | ||
89 | obj_type_t type, | ||
90 | unsigned int id) | ||
91 | { | ||
92 | struct list_head* pos; | ||
93 | struct inode_obj_id* obj = NULL; | ||
94 | |||
95 | list_for_each(pos, &inode->i_obj_list) { | ||
96 | obj = list_entry(pos, struct inode_obj_id, list); | ||
97 | if (obj->id == id && obj->type == type) { | ||
98 | atomic_inc(&obj->count); | ||
99 | return obj; | ||
100 | } | ||
101 | } | ||
102 | printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); | ||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | |||
107 | static void put_inode_obj(struct inode_obj_id* obj) | ||
108 | { | ||
109 | struct inode* inode; | ||
110 | int let_go = 0; | ||
111 | |||
112 | inode = obj->inode; | ||
113 | if (atomic_dec_and_test(&obj->count)) { | ||
114 | |||
115 | mutex_lock(&inode->i_obj_mutex); | ||
116 | /* no new references can be obtained */ | ||
117 | if (!atomic_read(&obj->count)) { | ||
118 | list_del(&obj->list); | ||
119 | fdso_destroy(obj->type, obj->obj); | ||
120 | kfree(obj); | ||
121 | let_go = 1; | ||
122 | } | ||
123 | mutex_unlock(&inode->i_obj_mutex); | ||
124 | if (let_go) | ||
125 | iput(inode); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static struct od_table_entry* get_od_entry(struct task_struct* t) | ||
130 | { | ||
131 | struct od_table_entry* table; | ||
132 | int i; | ||
133 | |||
134 | |||
135 | table = t->od_table; | ||
136 | if (!table) { | ||
137 | table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, | ||
138 | GFP_KERNEL); | ||
139 | t->od_table = table; | ||
140 | } | ||
141 | |||
142 | for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) | ||
143 | if (!table[i].used) { | ||
144 | table[i].used = 1; | ||
145 | return table + i; | ||
146 | } | ||
147 | return NULL; | ||
148 | } | ||
149 | |||
150 | static int put_od_entry(struct od_table_entry* od) | ||
151 | { | ||
152 | put_inode_obj(od->obj); | ||
153 | od->used = 0; | ||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | void exit_od_table(struct task_struct* t) | ||
158 | { | ||
159 | int i; | ||
160 | |||
161 | if (t->od_table) { | ||
162 | for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) | ||
163 | if (t->od_table[i].used) | ||
164 | put_od_entry(t->od_table + i); | ||
165 | kfree(t->od_table); | ||
166 | t->od_table = NULL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static int do_sys_od_open(struct file* file, obj_type_t type, int id, | ||
171 | void* __user config) | ||
172 | { | ||
173 | int idx = 0, err; | ||
174 | struct inode* inode; | ||
175 | struct inode_obj_id* obj = NULL; | ||
176 | struct od_table_entry* entry; | ||
177 | |||
178 | inode = file->f_dentry->d_inode; | ||
179 | |||
180 | entry = get_od_entry(current); | ||
181 | if (!entry) | ||
182 | return -ENOMEM; | ||
183 | |||
184 | mutex_lock(&inode->i_obj_mutex); | ||
185 | obj = get_inode_obj(inode, type, id); | ||
186 | if (!obj) | ||
187 | obj = alloc_inode_obj(inode, type, id); | ||
188 | if (!obj) { | ||
189 | idx = -ENOMEM; | ||
190 | entry->used = 0; | ||
191 | } else { | ||
192 | entry->obj = obj; | ||
193 | entry->extra = NULL; | ||
194 | idx = entry - current->od_table; | ||
195 | } | ||
196 | |||
197 | mutex_unlock(&inode->i_obj_mutex); | ||
198 | |||
199 | err = fdso_open(entry, config); | ||
200 | if (err < 0) { | ||
201 | /* The class rejected the open call. | ||
202 | * We need to clean up and tell user space. | ||
203 | */ | ||
204 | put_od_entry(entry); | ||
205 | idx = err; | ||
206 | } | ||
207 | |||
208 | return idx; | ||
209 | } | ||
210 | |||
211 | |||
212 | struct od_table_entry* __od_lookup(int od) | ||
213 | { | ||
214 | struct task_struct *t = current; | ||
215 | |||
216 | if (!t->od_table) | ||
217 | return NULL; | ||
218 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
219 | return NULL; | ||
220 | if (!t->od_table[od].used) | ||
221 | return NULL; | ||
222 | return t->od_table + od; | ||
223 | } | ||
224 | |||
225 | |||
226 | asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) | ||
227 | { | ||
228 | int ret = 0; | ||
229 | struct file* file; | ||
230 | |||
231 | /* | ||
232 | 1) get file from fd, get inode from file | ||
233 | 2) lock inode | ||
234 | 3) try to lookup object | ||
235 | 4) if not present create and enqueue object, inc inode refcnt | ||
236 | 5) increment refcnt of object | ||
237 | 6) alloc od_table_entry, setup ptrs | ||
238 | 7) unlock inode | ||
239 | 8) return offset in od_table as OD | ||
240 | */ | ||
241 | |||
242 | if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { | ||
243 | ret = -EINVAL; | ||
244 | goto out; | ||
245 | } | ||
246 | |||
247 | file = fget(fd); | ||
248 | if (!file) { | ||
249 | ret = -EBADF; | ||
250 | goto out; | ||
251 | } | ||
252 | |||
253 | ret = do_sys_od_open(file, type, obj_id, config); | ||
254 | |||
255 | fput(file); | ||
256 | |||
257 | out: | ||
258 | return ret; | ||
259 | } | ||
260 | |||
261 | |||
262 | asmlinkage long sys_od_close(int od) | ||
263 | { | ||
264 | int ret = -EINVAL; | ||
265 | struct task_struct *t = current; | ||
266 | |||
267 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
268 | return ret; | ||
269 | |||
270 | if (!t->od_table || !t->od_table[od].used) | ||
271 | return ret; | ||
272 | |||
273 | |||
274 | /* give the class a chance to reject the close | ||
275 | */ | ||
276 | ret = fdso_close(t->od_table + od); | ||
277 | if (ret == 0) | ||
278 | ret = put_od_entry(t->od_table + od); | ||
279 | |||
280 | return ret; | ||
281 | } | ||
diff --git a/litmus/fmlp.c b/litmus/fmlp.c new file mode 100644 index 000000000000..03fa7358d5eb --- /dev/null +++ b/litmus/fmlp.c | |||
@@ -0,0 +1,268 @@ | |||
1 | /* | ||
2 | * FMLP implementation. | ||
3 | * Much of the code here is borrowed from include/asm-i386/semaphore.h | ||
4 | */ | ||
5 | |||
6 | #include <asm/atomic.h> | ||
7 | |||
8 | #include <linux/semaphore.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/wait.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | #include <litmus/edf_common.h> | ||
16 | |||
17 | #include <litmus/fdso.h> | ||
18 | |||
19 | #include <litmus/trace.h> | ||
20 | |||
21 | #ifdef CONFIG_FMLP | ||
22 | |||
23 | static void* create_fmlp_semaphore(void) | ||
24 | { | ||
25 | struct pi_semaphore* sem; | ||
26 | int i; | ||
27 | |||
28 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
29 | if (!sem) | ||
30 | return NULL; | ||
31 | atomic_set(&sem->count, 1); | ||
32 | sem->sleepers = 0; | ||
33 | init_waitqueue_head(&sem->wait); | ||
34 | sem->hp.task = NULL; | ||
35 | sem->holder = NULL; | ||
36 | for (i = 0; i < NR_CPUS; i++) | ||
37 | sem->hp.cpu_task[i] = NULL; | ||
38 | return sem; | ||
39 | } | ||
40 | |||
41 | static int open_fmlp_semaphore(struct od_table_entry* entry, void* __user arg) | ||
42 | { | ||
43 | if (!fmlp_active()) | ||
44 | return -EBUSY; | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | static void destroy_fmlp_semaphore(void* sem) | ||
49 | { | ||
50 | /* XXX assert invariants */ | ||
51 | kfree(sem); | ||
52 | } | ||
53 | |||
54 | struct fdso_ops fmlp_sem_ops = { | ||
55 | .create = create_fmlp_semaphore, | ||
56 | .open = open_fmlp_semaphore, | ||
57 | .destroy = destroy_fmlp_semaphore | ||
58 | }; | ||
59 | |||
60 | struct wq_pair { | ||
61 | struct task_struct* tsk; | ||
62 | struct pi_semaphore* sem; | ||
63 | }; | ||
64 | |||
65 | static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
66 | void *key) | ||
67 | { | ||
68 | struct wq_pair* wqp = (struct wq_pair*) wait->private; | ||
69 | set_rt_flags(wqp->tsk, RT_F_EXIT_SEM); | ||
70 | litmus->inherit_priority(wqp->sem, wqp->tsk); | ||
71 | TRACE_TASK(wqp->tsk, | ||
72 | "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n"); | ||
73 | /* point to task for default_wake_function() */ | ||
74 | wait->private = wqp->tsk; | ||
75 | default_wake_function(wait, mode, sync, key); | ||
76 | |||
77 | /* Always return true since we know that if we encountered a task | ||
78 | * that was already running the wake_up raced with the schedule in | ||
79 | * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled | ||
80 | * immediately and own the lock. We must not wake up another task in | ||
81 | * any case. | ||
82 | */ | ||
83 | return 1; | ||
84 | } | ||
85 | |||
86 | /* caller is responsible for locking */ | ||
87 | int edf_set_hp_task(struct pi_semaphore *sem) | ||
88 | { | ||
89 | struct list_head *tmp, *next; | ||
90 | struct task_struct *queued; | ||
91 | int ret = 0; | ||
92 | |||
93 | sem->hp.task = NULL; | ||
94 | list_for_each_safe(tmp, next, &sem->wait.task_list) { | ||
95 | queued = ((struct wq_pair*) | ||
96 | list_entry(tmp, wait_queue_t, | ||
97 | task_list)->private)->tsk; | ||
98 | |||
99 | /* Compare task prios, find high prio task. */ | ||
100 | if (edf_higher_prio(queued, sem->hp.task)) { | ||
101 | sem->hp.task = queued; | ||
102 | ret = 1; | ||
103 | } | ||
104 | } | ||
105 | return ret; | ||
106 | } | ||
107 | |||
108 | /* caller is responsible for locking */ | ||
109 | int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu) | ||
110 | { | ||
111 | struct list_head *tmp, *next; | ||
112 | struct task_struct *queued; | ||
113 | int ret = 0; | ||
114 | |||
115 | sem->hp.cpu_task[cpu] = NULL; | ||
116 | list_for_each_safe(tmp, next, &sem->wait.task_list) { | ||
117 | queued = ((struct wq_pair*) | ||
118 | list_entry(tmp, wait_queue_t, | ||
119 | task_list)->private)->tsk; | ||
120 | |||
121 | /* Compare task prios, find high prio task. */ | ||
122 | if (get_partition(queued) == cpu && | ||
123 | edf_higher_prio(queued, sem->hp.cpu_task[cpu])) { | ||
124 | sem->hp.cpu_task[cpu] = queued; | ||
125 | ret = 1; | ||
126 | } | ||
127 | } | ||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | static int do_fmlp_down(struct pi_semaphore* sem) | ||
132 | { | ||
133 | unsigned long flags; | ||
134 | struct task_struct *tsk = current; | ||
135 | struct wq_pair pair; | ||
136 | int suspended = 1; | ||
137 | wait_queue_t wait = { | ||
138 | .private = &pair, | ||
139 | .func = rt_pi_wake_up, | ||
140 | .task_list = {NULL, NULL} | ||
141 | }; | ||
142 | |||
143 | pair.tsk = tsk; | ||
144 | pair.sem = sem; | ||
145 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
146 | |||
147 | if (atomic_dec_return(&sem->count) < 0 || | ||
148 | waitqueue_active(&sem->wait)) { | ||
149 | /* we need to suspend */ | ||
150 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
151 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
152 | |||
153 | TRACE_CUR("suspends on PI lock %p\n", sem); | ||
154 | litmus->pi_block(sem, tsk); | ||
155 | |||
156 | /* release lock before sleeping */ | ||
157 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
158 | |||
159 | TS_PI_DOWN_END; | ||
160 | preempt_enable_no_resched(); | ||
161 | |||
162 | |||
163 | /* we depend on the FIFO order | ||
164 | * Thus, we don't need to recheck when we wake up, we | ||
165 | * are guaranteed to have the lock since there is only one | ||
166 | * wake up per release | ||
167 | */ | ||
168 | schedule(); | ||
169 | |||
170 | TRACE_CUR("woke up, now owns PI lock %p\n", sem); | ||
171 | |||
172 | /* try_to_wake_up() set our state to TASK_RUNNING, | ||
173 | * all we need to do is to remove our wait queue entry | ||
174 | */ | ||
175 | remove_wait_queue(&sem->wait, &wait); | ||
176 | } else { | ||
177 | /* no priority inheritance necessary, since there are no queued | ||
178 | * tasks. | ||
179 | */ | ||
180 | suspended = 0; | ||
181 | TRACE_CUR("acquired PI lock %p, no contention\n", sem); | ||
182 | sem->holder = tsk; | ||
183 | |||
184 | /* don't know if we're global or partitioned. */ | ||
185 | sem->hp.task = tsk; | ||
186 | sem->hp.cpu_task[get_partition(tsk)] = tsk; | ||
187 | |||
188 | litmus->inherit_priority(sem, tsk); | ||
189 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
190 | } | ||
191 | return suspended; | ||
192 | } | ||
193 | |||
194 | static void do_fmlp_up(struct pi_semaphore* sem) | ||
195 | { | ||
196 | unsigned long flags; | ||
197 | |||
198 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
199 | |||
200 | TRACE_CUR("releases PI lock %p\n", sem); | ||
201 | litmus->return_priority(sem); | ||
202 | sem->holder = NULL; | ||
203 | if (atomic_inc_return(&sem->count) < 1) | ||
204 | /* there is a task queued */ | ||
205 | wake_up_locked(&sem->wait); | ||
206 | |||
207 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
208 | } | ||
209 | |||
210 | asmlinkage long sys_fmlp_down(int sem_od) | ||
211 | { | ||
212 | long ret = 0; | ||
213 | struct pi_semaphore * sem; | ||
214 | int suspended = 0; | ||
215 | |||
216 | preempt_disable(); | ||
217 | TS_PI_DOWN_START; | ||
218 | |||
219 | sem = lookup_fmlp_sem(sem_od); | ||
220 | if (sem) | ||
221 | suspended = do_fmlp_down(sem); | ||
222 | else | ||
223 | ret = -EINVAL; | ||
224 | |||
225 | if (!suspended) { | ||
226 | TS_PI_DOWN_END; | ||
227 | preempt_enable(); | ||
228 | } | ||
229 | |||
230 | return ret; | ||
231 | } | ||
232 | |||
233 | asmlinkage long sys_fmlp_up(int sem_od) | ||
234 | { | ||
235 | long ret = 0; | ||
236 | struct pi_semaphore * sem; | ||
237 | |||
238 | preempt_disable(); | ||
239 | TS_PI_UP_START; | ||
240 | |||
241 | sem = lookup_fmlp_sem(sem_od); | ||
242 | if (sem) | ||
243 | do_fmlp_up(sem); | ||
244 | else | ||
245 | ret = -EINVAL; | ||
246 | |||
247 | |||
248 | TS_PI_UP_END; | ||
249 | preempt_enable(); | ||
250 | |||
251 | return ret; | ||
252 | } | ||
253 | |||
254 | #else | ||
255 | |||
256 | struct fdso_ops fmlp_sem_ops = {}; | ||
257 | |||
258 | asmlinkage long sys_fmlp_down(int sem_od) | ||
259 | { | ||
260 | return -ENOSYS; | ||
261 | } | ||
262 | |||
263 | asmlinkage long sys_fmlp_up(int sem_od) | ||
264 | { | ||
265 | return -ENOSYS; | ||
266 | } | ||
267 | |||
268 | #endif | ||
diff --git a/litmus/ft_event.c b/litmus/ft_event.c new file mode 100644 index 000000000000..6084b6d6b364 --- /dev/null +++ b/litmus/ft_event.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <litmus/feather_trace.h> | ||
4 | |||
5 | #ifndef __ARCH_HAS_FEATHER_TRACE | ||
6 | /* provide dummy implementation */ | ||
7 | |||
8 | int ft_events[MAX_EVENTS]; | ||
9 | |||
10 | int ft_enable_event(unsigned long id) | ||
11 | { | ||
12 | if (id < MAX_EVENTS) { | ||
13 | ft_events[id]++; | ||
14 | return 1; | ||
15 | } else | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | int ft_disable_event(unsigned long id) | ||
20 | { | ||
21 | if (id < MAX_EVENTS && ft_events[id]) { | ||
22 | ft_events[id]--; | ||
23 | return 1; | ||
24 | } else | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | int ft_disable_all_events(void) | ||
29 | { | ||
30 | int i; | ||
31 | |||
32 | for (i = 0; i < MAX_EVENTS; i++) | ||
33 | ft_events[i] = 0; | ||
34 | |||
35 | return MAX_EVENTS; | ||
36 | } | ||
37 | |||
38 | int ft_is_event_enabled(unsigned long id) | ||
39 | { | ||
40 | return id < MAX_EVENTS && ft_events[id]; | ||
41 | } | ||
42 | |||
43 | #endif | ||
diff --git a/litmus/ftdev.c b/litmus/ftdev.c new file mode 100644 index 000000000000..8b2d74d816a2 --- /dev/null +++ b/litmus/ftdev.c | |||
@@ -0,0 +1,359 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/fs.h> | ||
3 | #include <linux/cdev.h> | ||
4 | #include <asm/uaccess.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | #include <litmus/litmus.h> | ||
8 | #include <litmus/feather_trace.h> | ||
9 | #include <litmus/ftdev.h> | ||
10 | |||
11 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) | ||
12 | { | ||
13 | struct ft_buffer* buf; | ||
14 | size_t total = (size + 1) * count; | ||
15 | char* mem; | ||
16 | int order = 0, pages = 1; | ||
17 | |||
18 | buf = kmalloc(sizeof(*buf), GFP_KERNEL); | ||
19 | if (!buf) | ||
20 | return NULL; | ||
21 | |||
22 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
23 | while (pages < total) { | ||
24 | order++; | ||
25 | pages *= 2; | ||
26 | } | ||
27 | |||
28 | mem = (char*) __get_free_pages(GFP_KERNEL, order); | ||
29 | if (!mem) { | ||
30 | kfree(buf); | ||
31 | return NULL; | ||
32 | } | ||
33 | |||
34 | if (!init_ft_buffer(buf, count, size, | ||
35 | mem + (count * size), /* markers at the end */ | ||
36 | mem)) { /* buffer objects */ | ||
37 | free_pages((unsigned long) mem, order); | ||
38 | kfree(buf); | ||
39 | return NULL; | ||
40 | } | ||
41 | return buf; | ||
42 | } | ||
43 | |||
44 | void free_ft_buffer(struct ft_buffer* buf) | ||
45 | { | ||
46 | int order = 0, pages = 1; | ||
47 | size_t total; | ||
48 | |||
49 | if (buf) { | ||
50 | total = (buf->slot_size + 1) * buf->slot_count; | ||
51 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
52 | while (pages < total) { | ||
53 | order++; | ||
54 | pages *= 2; | ||
55 | } | ||
56 | free_pages((unsigned long) buf->buffer_mem, order); | ||
57 | kfree(buf); | ||
58 | } | ||
59 | } | ||
60 | |||
61 | struct ftdev_event { | ||
62 | int id; | ||
63 | struct ftdev_event* next; | ||
64 | }; | ||
65 | |||
66 | static int activate(struct ftdev_event** chain, int id) | ||
67 | { | ||
68 | struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL); | ||
69 | if (ev) { | ||
70 | printk(KERN_INFO | ||
71 | "Enabling feather-trace event %d.\n", (int) id); | ||
72 | ft_enable_event(id); | ||
73 | ev->id = id; | ||
74 | ev->next = *chain; | ||
75 | *chain = ev; | ||
76 | } | ||
77 | return ev ? 0 : -ENOMEM; | ||
78 | } | ||
79 | |||
80 | static void deactivate(struct ftdev_event** chain, int id) | ||
81 | { | ||
82 | struct ftdev_event **cur = chain; | ||
83 | struct ftdev_event *nxt; | ||
84 | while (*cur) { | ||
85 | if ((*cur)->id == id) { | ||
86 | nxt = (*cur)->next; | ||
87 | kfree(*cur); | ||
88 | *cur = nxt; | ||
89 | printk(KERN_INFO | ||
90 | "Disabling feather-trace event %d.\n", (int) id); | ||
91 | ft_disable_event(id); | ||
92 | break; | ||
93 | } | ||
94 | cur = &(*cur)->next; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | static int ftdev_open(struct inode *in, struct file *filp) | ||
99 | { | ||
100 | struct ftdev* ftdev; | ||
101 | struct ftdev_minor* ftdm; | ||
102 | unsigned int buf_idx = iminor(in); | ||
103 | int err = 0; | ||
104 | |||
105 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
106 | |||
107 | if (buf_idx >= ftdev->minor_cnt) { | ||
108 | err = -ENODEV; | ||
109 | goto out; | ||
110 | } | ||
111 | if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx))) | ||
112 | goto out; | ||
113 | |||
114 | ftdm = ftdev->minor + buf_idx; | ||
115 | filp->private_data = ftdm; | ||
116 | |||
117 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
118 | err = -ERESTARTSYS; | ||
119 | goto out; | ||
120 | } | ||
121 | |||
122 | if (!ftdm->readers && ftdev->alloc) | ||
123 | err = ftdev->alloc(ftdev, buf_idx); | ||
124 | if (0 == err) | ||
125 | ftdm->readers++; | ||
126 | |||
127 | mutex_unlock(&ftdm->lock); | ||
128 | out: | ||
129 | return err; | ||
130 | } | ||
131 | |||
132 | static int ftdev_release(struct inode *in, struct file *filp) | ||
133 | { | ||
134 | struct ftdev* ftdev; | ||
135 | struct ftdev_minor* ftdm; | ||
136 | unsigned int buf_idx = iminor(in); | ||
137 | int err = 0; | ||
138 | |||
139 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
140 | |||
141 | if (buf_idx >= ftdev->minor_cnt) { | ||
142 | err = -ENODEV; | ||
143 | goto out; | ||
144 | } | ||
145 | ftdm = ftdev->minor + buf_idx; | ||
146 | |||
147 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
148 | err = -ERESTARTSYS; | ||
149 | goto out; | ||
150 | } | ||
151 | |||
152 | if (ftdm->readers == 1) { | ||
153 | while (ftdm->events) | ||
154 | deactivate(&ftdm->events, ftdm->events->id); | ||
155 | |||
156 | /* wait for any pending events to complete */ | ||
157 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
158 | schedule_timeout(HZ); | ||
159 | |||
160 | printk(KERN_ALERT "Failed trace writes: %u\n", | ||
161 | ftdm->buf->failed_writes); | ||
162 | |||
163 | if (ftdev->free) | ||
164 | ftdev->free(ftdev, buf_idx); | ||
165 | } | ||
166 | |||
167 | ftdm->readers--; | ||
168 | mutex_unlock(&ftdm->lock); | ||
169 | out: | ||
170 | return err; | ||
171 | } | ||
172 | |||
173 | /* based on ft_buffer_read | ||
174 | * @returns < 0 : page fault | ||
175 | * = 0 : no data available | ||
176 | * = 1 : one slot copied | ||
177 | */ | ||
178 | static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest) | ||
179 | { | ||
180 | unsigned int idx; | ||
181 | int err = 0; | ||
182 | if (buf->free_count != buf->slot_count) { | ||
183 | /* data available */ | ||
184 | idx = buf->read_idx % buf->slot_count; | ||
185 | if (buf->slots[idx] == SLOT_READY) { | ||
186 | err = copy_to_user(dest, ((char*) buf->buffer_mem) + | ||
187 | idx * buf->slot_size, | ||
188 | buf->slot_size); | ||
189 | if (err == 0) { | ||
190 | /* copy ok */ | ||
191 | buf->slots[idx] = SLOT_FREE; | ||
192 | buf->read_idx++; | ||
193 | fetch_and_inc(&buf->free_count); | ||
194 | err = 1; | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | return err; | ||
199 | } | ||
200 | |||
201 | static ssize_t ftdev_read(struct file *filp, | ||
202 | char __user *to, size_t len, loff_t *f_pos) | ||
203 | { | ||
204 | /* we ignore f_pos, this is strictly sequential */ | ||
205 | |||
206 | ssize_t err = 0; | ||
207 | size_t chunk; | ||
208 | int copied; | ||
209 | struct ftdev_minor* ftdm = filp->private_data; | ||
210 | |||
211 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
212 | err = -ERESTARTSYS; | ||
213 | goto out; | ||
214 | } | ||
215 | |||
216 | |||
217 | chunk = ftdm->buf->slot_size; | ||
218 | while (len >= chunk) { | ||
219 | copied = ft_buffer_copy_to_user(ftdm->buf, to); | ||
220 | if (copied == 1) { | ||
221 | len -= chunk; | ||
222 | to += chunk; | ||
223 | err += chunk; | ||
224 | } else if (err == 0 && copied == 0 && ftdm->events) { | ||
225 | /* Only wait if there are any events enabled and only | ||
226 | * if we haven't copied some data yet. We cannot wait | ||
227 | * here with copied data because that data would get | ||
228 | * lost if the task is interrupted (e.g., killed). | ||
229 | */ | ||
230 | set_current_state(TASK_INTERRUPTIBLE); | ||
231 | schedule_timeout(50); | ||
232 | if (signal_pending(current)) { | ||
233 | if (err == 0) | ||
234 | /* nothing read yet, signal problem */ | ||
235 | err = -ERESTARTSYS; | ||
236 | break; | ||
237 | } | ||
238 | } else if (copied < 0) { | ||
239 | /* page fault */ | ||
240 | err = copied; | ||
241 | break; | ||
242 | } else | ||
243 | /* nothing left to get, return to user space */ | ||
244 | break; | ||
245 | } | ||
246 | mutex_unlock(&ftdm->lock); | ||
247 | out: | ||
248 | return err; | ||
249 | } | ||
250 | |||
251 | typedef uint32_t cmd_t; | ||
252 | |||
253 | static ssize_t ftdev_write(struct file *filp, const char __user *from, | ||
254 | size_t len, loff_t *f_pos) | ||
255 | { | ||
256 | struct ftdev_minor* ftdm = filp->private_data; | ||
257 | ssize_t err = -EINVAL; | ||
258 | cmd_t cmd; | ||
259 | cmd_t id; | ||
260 | |||
261 | if (len % sizeof(cmd) || len < 2 * sizeof(cmd)) | ||
262 | goto out; | ||
263 | |||
264 | if (copy_from_user(&cmd, from, sizeof(cmd))) { | ||
265 | err = -EFAULT; | ||
266 | goto out; | ||
267 | } | ||
268 | len -= sizeof(cmd); | ||
269 | from += sizeof(cmd); | ||
270 | |||
271 | if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD) | ||
272 | goto out; | ||
273 | |||
274 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
275 | err = -ERESTARTSYS; | ||
276 | goto out; | ||
277 | } | ||
278 | |||
279 | err = sizeof(cmd); | ||
280 | while (len) { | ||
281 | if (copy_from_user(&id, from, sizeof(cmd))) { | ||
282 | err = -EFAULT; | ||
283 | goto out_unlock; | ||
284 | } | ||
285 | /* FIXME: check id against list of acceptable events */ | ||
286 | len -= sizeof(cmd); | ||
287 | from += sizeof(cmd); | ||
288 | if (cmd == FTDEV_DISABLE_CMD) | ||
289 | deactivate(&ftdm->events, id); | ||
290 | else if (activate(&ftdm->events, id) != 0) { | ||
291 | err = -ENOMEM; | ||
292 | goto out_unlock; | ||
293 | } | ||
294 | err += sizeof(cmd); | ||
295 | } | ||
296 | |||
297 | out_unlock: | ||
298 | mutex_unlock(&ftdm->lock); | ||
299 | out: | ||
300 | return err; | ||
301 | } | ||
302 | |||
303 | struct file_operations ftdev_fops = { | ||
304 | .owner = THIS_MODULE, | ||
305 | .open = ftdev_open, | ||
306 | .release = ftdev_release, | ||
307 | .write = ftdev_write, | ||
308 | .read = ftdev_read, | ||
309 | }; | ||
310 | |||
311 | |||
312 | void ftdev_init(struct ftdev* ftdev, struct module* owner) | ||
313 | { | ||
314 | int i; | ||
315 | cdev_init(&ftdev->cdev, &ftdev_fops); | ||
316 | ftdev->cdev.owner = owner; | ||
317 | ftdev->cdev.ops = &ftdev_fops; | ||
318 | ftdev->minor_cnt = 0; | ||
319 | for (i = 0; i < MAX_FTDEV_MINORS; i++) { | ||
320 | mutex_init(&ftdev->minor[i].lock); | ||
321 | ftdev->minor[i].readers = 0; | ||
322 | ftdev->minor[i].buf = NULL; | ||
323 | ftdev->minor[i].events = NULL; | ||
324 | } | ||
325 | ftdev->alloc = NULL; | ||
326 | ftdev->free = NULL; | ||
327 | ftdev->can_open = NULL; | ||
328 | } | ||
329 | |||
330 | int register_ftdev(struct ftdev* ftdev, const char* name, int major) | ||
331 | { | ||
332 | dev_t trace_dev; | ||
333 | int error = 0; | ||
334 | |||
335 | if(major) { | ||
336 | trace_dev = MKDEV(major, 0); | ||
337 | error = register_chrdev_region(trace_dev, ftdev->minor_cnt, | ||
338 | name); | ||
339 | } else { | ||
340 | error = alloc_chrdev_region(&trace_dev, 0, ftdev->minor_cnt, | ||
341 | name); | ||
342 | major = MAJOR(trace_dev); | ||
343 | } | ||
344 | if (error) | ||
345 | { | ||
346 | printk(KERN_WARNING "ftdev(%s): " | ||
347 | "Could not register major/minor number %d/%u\n", | ||
348 | name, major, ftdev->minor_cnt); | ||
349 | return error; | ||
350 | } | ||
351 | error = cdev_add(&ftdev->cdev, trace_dev, ftdev->minor_cnt); | ||
352 | if (error) { | ||
353 | printk(KERN_WARNING "ftdev(%s): " | ||
354 | "Could not add cdev for major/minor = %d/%u.\n", | ||
355 | name, major, ftdev->minor_cnt); | ||
356 | return error; | ||
357 | } | ||
358 | return error; | ||
359 | } | ||
diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 000000000000..36e314625d86 --- /dev/null +++ b/litmus/jobs.c | |||
@@ -0,0 +1,43 @@ | |||
1 | /* litmus/jobs.c - common job control code | ||
2 | */ | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | |||
6 | #include <litmus/litmus.h> | ||
7 | #include <litmus/jobs.h> | ||
8 | |||
9 | void prepare_for_next_period(struct task_struct *t) | ||
10 | { | ||
11 | BUG_ON(!t); | ||
12 | /* prepare next release */ | ||
13 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | ||
14 | t->rt_param.job_params.deadline += get_rt_period(t); | ||
15 | t->rt_param.job_params.exec_time = 0; | ||
16 | /* update job sequence number */ | ||
17 | t->rt_param.job_params.job_no++; | ||
18 | |||
19 | /* don't confuse Linux */ | ||
20 | t->rt.time_slice = 1; | ||
21 | } | ||
22 | |||
23 | void release_at(struct task_struct *t, lt_t start) | ||
24 | { | ||
25 | t->rt_param.job_params.deadline = start; | ||
26 | prepare_for_next_period(t); | ||
27 | set_rt_flags(t, RT_F_RUNNING); | ||
28 | } | ||
29 | |||
30 | |||
31 | /* | ||
32 | * Deactivate current task until the beginning of the next period. | ||
33 | */ | ||
34 | long complete_job(void) | ||
35 | { | ||
36 | /* Mark that we do not excute anymore */ | ||
37 | set_rt_flags(current, RT_F_SLEEP); | ||
38 | /* call schedule, this will return when a new job arrives | ||
39 | * it also takes care of preparing for the next release | ||
40 | */ | ||
41 | schedule(); | ||
42 | return 0; | ||
43 | } | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..5bf848386e1c --- /dev/null +++ b/litmus/litmus.c | |||
@@ -0,0 +1,786 @@ | |||
1 | /* | ||
2 | * litmus.c -- Implementation of the LITMUS syscalls, | ||
3 | * the LITMUS intialization code, | ||
4 | * and the procfs interface.. | ||
5 | */ | ||
6 | #include <asm/uaccess.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/sysrq.h> | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/proc_fs.h> | ||
12 | #include <linux/slab.h> | ||
13 | |||
14 | #include <litmus/litmus.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <litmus/sched_plugin.h> | ||
17 | |||
18 | #include <litmus/bheap.h> | ||
19 | |||
20 | #include <litmus/trace.h> | ||
21 | |||
22 | #include <litmus/rt_domain.h> | ||
23 | |||
24 | /* Number of RT tasks that exist in the system */ | ||
25 | atomic_t rt_task_count = ATOMIC_INIT(0); | ||
26 | static DEFINE_SPINLOCK(task_transition_lock); | ||
27 | /* synchronize plugin switching */ | ||
28 | atomic_t cannot_use_plugin = ATOMIC_INIT(0); | ||
29 | |||
30 | /* Give log messages sequential IDs. */ | ||
31 | atomic_t __log_seq_no = ATOMIC_INIT(0); | ||
32 | |||
33 | /* current master CPU for handling timer IRQs */ | ||
34 | atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); | ||
35 | |||
36 | static struct kmem_cache * bheap_node_cache; | ||
37 | extern struct kmem_cache * release_heap_cache; | ||
38 | |||
39 | struct bheap_node* bheap_node_alloc(int gfp_flags) | ||
40 | { | ||
41 | return kmem_cache_alloc(bheap_node_cache, gfp_flags); | ||
42 | } | ||
43 | |||
44 | void bheap_node_free(struct bheap_node* hn) | ||
45 | { | ||
46 | kmem_cache_free(bheap_node_cache, hn); | ||
47 | } | ||
48 | |||
49 | struct release_heap* release_heap_alloc(int gfp_flags); | ||
50 | void release_heap_free(struct release_heap* rh); | ||
51 | |||
52 | /* | ||
53 | * sys_set_task_rt_param | ||
54 | * @pid: Pid of the task which scheduling parameters must be changed | ||
55 | * @param: New real-time extension parameters such as the execution cost and | ||
56 | * period | ||
57 | * Syscall for manipulating with task rt extension params | ||
58 | * Returns EFAULT if param is NULL. | ||
59 | * ESRCH if pid is not corrsponding | ||
60 | * to a valid task. | ||
61 | * EINVAL if either period or execution cost is <=0 | ||
62 | * EPERM if pid is a real-time task | ||
63 | * 0 if success | ||
64 | * | ||
65 | * Only non-real-time tasks may be configured with this system call | ||
66 | * to avoid races with the scheduler. In practice, this means that a | ||
67 | * task's parameters must be set _before_ calling sys_prepare_rt_task() | ||
68 | * | ||
69 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
70 | * target. | ||
71 | */ | ||
72 | asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
73 | { | ||
74 | struct rt_task tp; | ||
75 | struct task_struct *target; | ||
76 | int retval = -EINVAL; | ||
77 | |||
78 | printk("Setting up rt task parameters for process %d.\n", pid); | ||
79 | |||
80 | if (pid < 0 || param == 0) { | ||
81 | goto out; | ||
82 | } | ||
83 | if (copy_from_user(&tp, param, sizeof(tp))) { | ||
84 | retval = -EFAULT; | ||
85 | goto out; | ||
86 | } | ||
87 | |||
88 | /* Task search and manipulation must be protected */ | ||
89 | read_lock_irq(&tasklist_lock); | ||
90 | if (!(target = find_task_by_vpid(pid))) { | ||
91 | retval = -ESRCH; | ||
92 | goto out_unlock; | ||
93 | } | ||
94 | |||
95 | if (is_realtime(target)) { | ||
96 | /* The task is already a real-time task. | ||
97 | * We cannot not allow parameter changes at this point. | ||
98 | */ | ||
99 | retval = -EBUSY; | ||
100 | goto out_unlock; | ||
101 | } | ||
102 | |||
103 | if (tp.exec_cost <= 0) | ||
104 | goto out_unlock; | ||
105 | if (tp.period <= 0) | ||
106 | goto out_unlock; | ||
107 | if (!cpu_online(tp.cpu)) | ||
108 | goto out_unlock; | ||
109 | if (tp.period < tp.exec_cost) | ||
110 | { | ||
111 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
112 | "because wcet > period\n", pid); | ||
113 | goto out_unlock; | ||
114 | } | ||
115 | if (tp.budget_policy != NO_ENFORCEMENT && | ||
116 | tp.budget_policy != QUANTUM_ENFORCEMENT) | ||
117 | { | ||
118 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
119 | "because unsupported budget enforcement policy specified\n", pid); | ||
120 | goto out_unlock; | ||
121 | } | ||
122 | |||
123 | target->rt_param.task_params = tp; | ||
124 | |||
125 | retval = 0; | ||
126 | out_unlock: | ||
127 | read_unlock_irq(&tasklist_lock); | ||
128 | out: | ||
129 | return retval; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Getter of task's RT params | ||
134 | * returns EINVAL if param or pid is NULL | ||
135 | * returns ESRCH if pid does not correspond to a valid task | ||
136 | * returns EFAULT if copying of parameters has failed. | ||
137 | * | ||
138 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
139 | * target. | ||
140 | */ | ||
141 | asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
142 | { | ||
143 | int retval = -EINVAL; | ||
144 | struct task_struct *source; | ||
145 | struct rt_task lp; | ||
146 | if (param == 0 || pid < 0) | ||
147 | goto out; | ||
148 | read_lock(&tasklist_lock); | ||
149 | if (!(source = find_task_by_vpid(pid))) { | ||
150 | retval = -ESRCH; | ||
151 | goto out_unlock; | ||
152 | } | ||
153 | lp = source->rt_param.task_params; | ||
154 | read_unlock(&tasklist_lock); | ||
155 | /* Do copying outside the lock */ | ||
156 | retval = | ||
157 | copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; | ||
158 | return retval; | ||
159 | out_unlock: | ||
160 | read_unlock(&tasklist_lock); | ||
161 | out: | ||
162 | return retval; | ||
163 | |||
164 | } | ||
165 | |||
166 | /* | ||
167 | * This is the crucial function for periodic task implementation, | ||
168 | * It checks if a task is periodic, checks if such kind of sleep | ||
169 | * is permitted and calls plugin-specific sleep, which puts the | ||
170 | * task into a wait array. | ||
171 | * returns 0 on successful wakeup | ||
172 | * returns EPERM if current conditions do not permit such sleep | ||
173 | * returns EINVAL if current task is not able to go to sleep | ||
174 | */ | ||
175 | asmlinkage long sys_complete_job(void) | ||
176 | { | ||
177 | int retval = -EPERM; | ||
178 | if (!is_realtime(current)) { | ||
179 | retval = -EINVAL; | ||
180 | goto out; | ||
181 | } | ||
182 | /* Task with negative or zero period cannot sleep */ | ||
183 | if (get_rt_period(current) <= 0) { | ||
184 | retval = -EINVAL; | ||
185 | goto out; | ||
186 | } | ||
187 | /* The plugin has to put the task into an | ||
188 | * appropriate queue and call schedule | ||
189 | */ | ||
190 | retval = litmus->complete_job(); | ||
191 | out: | ||
192 | return retval; | ||
193 | } | ||
194 | |||
195 | /* This is an "improved" version of sys_complete_job that | ||
196 | * addresses the problem of unintentionally missing a job after | ||
197 | * an overrun. | ||
198 | * | ||
199 | * returns 0 on successful wakeup | ||
200 | * returns EPERM if current conditions do not permit such sleep | ||
201 | * returns EINVAL if current task is not able to go to sleep | ||
202 | */ | ||
203 | asmlinkage long sys_wait_for_job_release(unsigned int job) | ||
204 | { | ||
205 | int retval = -EPERM; | ||
206 | if (!is_realtime(current)) { | ||
207 | retval = -EINVAL; | ||
208 | goto out; | ||
209 | } | ||
210 | |||
211 | /* Task with negative or zero period cannot sleep */ | ||
212 | if (get_rt_period(current) <= 0) { | ||
213 | retval = -EINVAL; | ||
214 | goto out; | ||
215 | } | ||
216 | |||
217 | retval = 0; | ||
218 | |||
219 | /* first wait until we have "reached" the desired job | ||
220 | * | ||
221 | * This implementation has at least two problems: | ||
222 | * | ||
223 | * 1) It doesn't gracefully handle the wrap around of | ||
224 | * job_no. Since LITMUS is a prototype, this is not much | ||
225 | * of a problem right now. | ||
226 | * | ||
227 | * 2) It is theoretically racy if a job release occurs | ||
228 | * between checking job_no and calling sleep_next_period(). | ||
229 | * A proper solution would requiring adding another callback | ||
230 | * in the plugin structure and testing the condition with | ||
231 | * interrupts disabled. | ||
232 | * | ||
233 | * FIXME: At least problem 2 should be taken care of eventually. | ||
234 | */ | ||
235 | while (!retval && job > current->rt_param.job_params.job_no) | ||
236 | /* If the last job overran then job <= job_no and we | ||
237 | * don't send the task to sleep. | ||
238 | */ | ||
239 | retval = litmus->complete_job(); | ||
240 | out: | ||
241 | return retval; | ||
242 | } | ||
243 | |||
244 | /* This is a helper syscall to query the current job sequence number. | ||
245 | * | ||
246 | * returns 0 on successful query | ||
247 | * returns EPERM if task is not a real-time task. | ||
248 | * returns EFAULT if &job is not a valid pointer. | ||
249 | */ | ||
250 | asmlinkage long sys_query_job_no(unsigned int __user *job) | ||
251 | { | ||
252 | int retval = -EPERM; | ||
253 | if (is_realtime(current)) | ||
254 | retval = put_user(current->rt_param.job_params.job_no, job); | ||
255 | |||
256 | return retval; | ||
257 | } | ||
258 | |||
259 | /* sys_null_call() is only used for determining raw system call | ||
260 | * overheads (kernel entry, kernel exit). It has no useful side effects. | ||
261 | * If ts is non-NULL, then the current Feather-Trace time is recorded. | ||
262 | */ | ||
263 | asmlinkage long sys_null_call(cycles_t __user *ts) | ||
264 | { | ||
265 | long ret = 0; | ||
266 | cycles_t now; | ||
267 | |||
268 | if (ts) { | ||
269 | now = get_cycles(); | ||
270 | ret = put_user(now, ts); | ||
271 | } | ||
272 | |||
273 | return ret; | ||
274 | } | ||
275 | |||
276 | /* p is a real-time task. Re-init its state as a best-effort task. */ | ||
277 | static void reinit_litmus_state(struct task_struct* p, int restore) | ||
278 | { | ||
279 | struct rt_task user_config = {}; | ||
280 | void* ctrl_page = NULL; | ||
281 | |||
282 | if (restore) { | ||
283 | /* Safe user-space provided configuration data. | ||
284 | * and allocated page. */ | ||
285 | user_config = p->rt_param.task_params; | ||
286 | ctrl_page = p->rt_param.ctrl_page; | ||
287 | } | ||
288 | |||
289 | /* We probably should not be inheriting any task's priority | ||
290 | * at this point in time. | ||
291 | */ | ||
292 | WARN_ON(p->rt_param.inh_task); | ||
293 | |||
294 | /* We need to restore the priority of the task. */ | ||
295 | // __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); XXX why is this commented? | ||
296 | |||
297 | /* Cleanup everything else. */ | ||
298 | memset(&p->rt_param, 0, sizeof(p->rt_param)); | ||
299 | |||
300 | /* Restore preserved fields. */ | ||
301 | if (restore) { | ||
302 | p->rt_param.task_params = user_config; | ||
303 | p->rt_param.ctrl_page = ctrl_page; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | long litmus_admit_task(struct task_struct* tsk) | ||
308 | { | ||
309 | long retval = 0; | ||
310 | unsigned long flags; | ||
311 | |||
312 | BUG_ON(is_realtime(tsk)); | ||
313 | |||
314 | if (get_rt_period(tsk) == 0 || | ||
315 | get_exec_cost(tsk) > get_rt_period(tsk)) { | ||
316 | TRACE_TASK(tsk, "litmus admit: invalid task parameters " | ||
317 | "(%lu, %lu)\n", | ||
318 | get_exec_cost(tsk), get_rt_period(tsk)); | ||
319 | retval = -EINVAL; | ||
320 | goto out; | ||
321 | } | ||
322 | |||
323 | if (!cpu_online(get_partition(tsk))) { | ||
324 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
325 | get_partition(tsk)); | ||
326 | retval = -EINVAL; | ||
327 | goto out; | ||
328 | } | ||
329 | |||
330 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); | ||
331 | |||
332 | /* avoid scheduler plugin changing underneath us */ | ||
333 | spin_lock_irqsave(&task_transition_lock, flags); | ||
334 | |||
335 | /* allocate heap node for this task */ | ||
336 | tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); | ||
337 | tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); | ||
338 | |||
339 | if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { | ||
340 | printk(KERN_WARNING "litmus: no more heap node memory!?\n"); | ||
341 | |||
342 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
343 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
344 | |||
345 | retval = -ENOMEM; | ||
346 | goto out_unlock; | ||
347 | } else { | ||
348 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); | ||
349 | } | ||
350 | |||
351 | retval = litmus->admit_task(tsk); | ||
352 | |||
353 | if (!retval) { | ||
354 | sched_trace_task_name(tsk); | ||
355 | sched_trace_task_param(tsk); | ||
356 | atomic_inc(&rt_task_count); | ||
357 | } | ||
358 | |||
359 | out_unlock: | ||
360 | spin_unlock_irqrestore(&task_transition_lock, flags); | ||
361 | out: | ||
362 | return retval; | ||
363 | } | ||
364 | |||
365 | void litmus_exit_task(struct task_struct* tsk) | ||
366 | { | ||
367 | if (is_realtime(tsk)) { | ||
368 | sched_trace_task_completion(tsk, 1); | ||
369 | |||
370 | litmus->task_exit(tsk); | ||
371 | |||
372 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | ||
373 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
374 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
375 | |||
376 | atomic_dec(&rt_task_count); | ||
377 | reinit_litmus_state(tsk, 1); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | /* IPI callback to synchronize plugin switching */ | ||
382 | static void synch_on_plugin_switch(void* info) | ||
383 | { | ||
384 | while (atomic_read(&cannot_use_plugin)) | ||
385 | cpu_relax(); | ||
386 | } | ||
387 | |||
388 | /* Switching a plugin in use is tricky. | ||
389 | * We must watch out that no real-time tasks exists | ||
390 | * (and that none is created in parallel) and that the plugin is not | ||
391 | * currently in use on any processor (in theory). | ||
392 | */ | ||
393 | int switch_sched_plugin(struct sched_plugin* plugin) | ||
394 | { | ||
395 | unsigned long flags; | ||
396 | int ret = 0; | ||
397 | |||
398 | BUG_ON(!plugin); | ||
399 | |||
400 | /* forbid other cpus to use the plugin */ | ||
401 | atomic_set(&cannot_use_plugin, 1); | ||
402 | /* send IPI to force other CPUs to synch with us */ | ||
403 | smp_call_function(synch_on_plugin_switch, NULL, 0); | ||
404 | |||
405 | /* stop task transitions */ | ||
406 | spin_lock_irqsave(&task_transition_lock, flags); | ||
407 | |||
408 | /* don't switch if there are active real-time tasks */ | ||
409 | if (atomic_read(&rt_task_count) == 0) { | ||
410 | ret = litmus->deactivate_plugin(); | ||
411 | if (0 != ret) | ||
412 | goto out; | ||
413 | ret = plugin->activate_plugin(); | ||
414 | if (0 != ret) { | ||
415 | printk(KERN_INFO "Can't activate %s (%d).\n", | ||
416 | plugin->plugin_name, ret); | ||
417 | plugin = &linux_sched_plugin; | ||
418 | } | ||
419 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | ||
420 | litmus = plugin; | ||
421 | } else | ||
422 | ret = -EBUSY; | ||
423 | out: | ||
424 | spin_unlock_irqrestore(&task_transition_lock, flags); | ||
425 | atomic_set(&cannot_use_plugin, 0); | ||
426 | return ret; | ||
427 | } | ||
428 | |||
429 | /* Called upon fork. | ||
430 | * p is the newly forked task. | ||
431 | */ | ||
432 | void litmus_fork(struct task_struct* p) | ||
433 | { | ||
434 | if (is_realtime(p)) | ||
435 | /* clean out any litmus related state, don't preserve anything */ | ||
436 | reinit_litmus_state(p, 0); | ||
437 | else | ||
438 | /* non-rt tasks might have ctrl_page set */ | ||
439 | tsk_rt(p)->ctrl_page = NULL; | ||
440 | |||
441 | /* od tables are never inherited across a fork */ | ||
442 | p->od_table = NULL; | ||
443 | } | ||
444 | |||
445 | /* Called upon execve(). | ||
446 | * current is doing the exec. | ||
447 | * Don't let address space specific stuff leak. | ||
448 | */ | ||
449 | void litmus_exec(void) | ||
450 | { | ||
451 | struct task_struct* p = current; | ||
452 | |||
453 | if (is_realtime(p)) { | ||
454 | WARN_ON(p->rt_param.inh_task); | ||
455 | if (tsk_rt(p)->ctrl_page) { | ||
456 | free_page((unsigned long) tsk_rt(p)->ctrl_page); | ||
457 | tsk_rt(p)->ctrl_page = NULL; | ||
458 | } | ||
459 | } | ||
460 | } | ||
461 | |||
462 | void exit_litmus(struct task_struct *dead_tsk) | ||
463 | { | ||
464 | /* We also allow non-RT tasks to | ||
465 | * allocate control pages to allow | ||
466 | * measurements with non-RT tasks. | ||
467 | * So check if we need to free the page | ||
468 | * in any case. | ||
469 | */ | ||
470 | if (tsk_rt(dead_tsk)->ctrl_page) { | ||
471 | TRACE_TASK(dead_tsk, | ||
472 | "freeing ctrl_page %p\n", | ||
473 | tsk_rt(dead_tsk)->ctrl_page); | ||
474 | free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); | ||
475 | } | ||
476 | |||
477 | /* main cleanup only for RT tasks */ | ||
478 | if (is_realtime(dead_tsk)) | ||
479 | litmus_exit_task(dead_tsk); | ||
480 | } | ||
481 | |||
482 | |||
483 | #ifdef CONFIG_MAGIC_SYSRQ | ||
484 | int sys_kill(int pid, int sig); | ||
485 | |||
486 | static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty) | ||
487 | { | ||
488 | struct task_struct *t; | ||
489 | read_lock(&tasklist_lock); | ||
490 | for_each_process(t) { | ||
491 | if (is_realtime(t)) { | ||
492 | sys_kill(t->pid, SIGKILL); | ||
493 | } | ||
494 | } | ||
495 | read_unlock(&tasklist_lock); | ||
496 | } | ||
497 | |||
498 | static struct sysrq_key_op sysrq_kill_rt_tasks_op = { | ||
499 | .handler = sysrq_handle_kill_rt_tasks, | ||
500 | .help_msg = "quit-rt-tasks(X)", | ||
501 | .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", | ||
502 | }; | ||
503 | #endif | ||
504 | |||
505 | /* in litmus/sync.c */ | ||
506 | int count_tasks_waiting_for_release(void); | ||
507 | |||
508 | static int proc_read_stats(char *page, char **start, | ||
509 | off_t off, int count, | ||
510 | int *eof, void *data) | ||
511 | { | ||
512 | int len; | ||
513 | |||
514 | len = snprintf(page, PAGE_SIZE, | ||
515 | "real-time tasks = %d\n" | ||
516 | "ready for release = %d\n", | ||
517 | atomic_read(&rt_task_count), | ||
518 | count_tasks_waiting_for_release()); | ||
519 | return len; | ||
520 | } | ||
521 | |||
522 | static int proc_read_plugins(char *page, char **start, | ||
523 | off_t off, int count, | ||
524 | int *eof, void *data) | ||
525 | { | ||
526 | int len; | ||
527 | |||
528 | len = print_sched_plugins(page, PAGE_SIZE); | ||
529 | return len; | ||
530 | } | ||
531 | |||
532 | static int proc_read_curr(char *page, char **start, | ||
533 | off_t off, int count, | ||
534 | int *eof, void *data) | ||
535 | { | ||
536 | int len; | ||
537 | |||
538 | len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); | ||
539 | return len; | ||
540 | } | ||
541 | |||
542 | static int proc_write_curr(struct file *file, | ||
543 | const char *buffer, | ||
544 | unsigned long count, | ||
545 | void *data) | ||
546 | { | ||
547 | int len, ret; | ||
548 | char name[65]; | ||
549 | struct sched_plugin* found; | ||
550 | |||
551 | if(count > 64) | ||
552 | len = 64; | ||
553 | else | ||
554 | len = count; | ||
555 | |||
556 | if(copy_from_user(name, buffer, len)) | ||
557 | return -EFAULT; | ||
558 | |||
559 | name[len] = '\0'; | ||
560 | /* chomp name */ | ||
561 | if (len > 1 && name[len - 1] == '\n') | ||
562 | name[len - 1] = '\0'; | ||
563 | |||
564 | found = find_sched_plugin(name); | ||
565 | |||
566 | if (found) { | ||
567 | ret = switch_sched_plugin(found); | ||
568 | if (ret != 0) | ||
569 | printk(KERN_INFO "Could not switch plugin: %d\n", ret); | ||
570 | } else | ||
571 | printk(KERN_INFO "Plugin '%s' is unknown.\n", name); | ||
572 | |||
573 | return len; | ||
574 | } | ||
575 | |||
576 | static int proc_read_cluster_size(char *page, char **start, | ||
577 | off_t off, int count, | ||
578 | int *eof, void *data) | ||
579 | { | ||
580 | int len; | ||
581 | if (cluster_cache_index == 2) | ||
582 | len = snprintf(page, PAGE_SIZE, "L2\n"); | ||
583 | else if (cluster_cache_index == 3) | ||
584 | len = snprintf(page, PAGE_SIZE, "L3\n"); | ||
585 | else if (cluster_cache_index == 1) | ||
586 | len = snprintf(page, PAGE_SIZE, "L1\n"); | ||
587 | else | ||
588 | len = snprintf(page, PAGE_SIZE, "ALL\n"); | ||
589 | |||
590 | return len; | ||
591 | } | ||
592 | |||
593 | static int proc_write_cluster_size(struct file *file, | ||
594 | const char *buffer, | ||
595 | unsigned long count, | ||
596 | void *data) | ||
597 | { | ||
598 | int len; | ||
599 | /* L2, L3 */ | ||
600 | char cache_name[33]; | ||
601 | |||
602 | if(count > 32) | ||
603 | len = 32; | ||
604 | else | ||
605 | len = count; | ||
606 | |||
607 | if(copy_from_user(cache_name, buffer, len)) | ||
608 | return -EFAULT; | ||
609 | |||
610 | cache_name[len] = '\0'; | ||
611 | /* chomp name */ | ||
612 | if (len > 1 && cache_name[len - 1] == '\n') | ||
613 | cache_name[len - 1] = '\0'; | ||
614 | |||
615 | /* do a quick and dirty comparison to find the cluster size */ | ||
616 | if (!strcmp(cache_name, "L2")) | ||
617 | cluster_cache_index = 2; | ||
618 | else if (!strcmp(cache_name, "L3")) | ||
619 | cluster_cache_index = 3; | ||
620 | else if (!strcmp(cache_name, "L1")) | ||
621 | cluster_cache_index = 1; | ||
622 | else if (!strcmp(cache_name, "ALL")) | ||
623 | cluster_cache_index = num_online_cpus(); | ||
624 | else | ||
625 | printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); | ||
626 | |||
627 | return len; | ||
628 | } | ||
629 | |||
630 | static int proc_read_release_master(char *page, char **start, | ||
631 | off_t off, int count, | ||
632 | int *eof, void *data) | ||
633 | { | ||
634 | int len, master; | ||
635 | master = atomic_read(&release_master_cpu); | ||
636 | if (master == NO_CPU) | ||
637 | len = snprintf(page, PAGE_SIZE, "NO_CPU\n"); | ||
638 | else | ||
639 | len = snprintf(page, PAGE_SIZE, "%d\n", master); | ||
640 | return len; | ||
641 | } | ||
642 | |||
643 | static int proc_write_release_master(struct file *file, | ||
644 | const char *buffer, | ||
645 | unsigned long count, | ||
646 | void *data) | ||
647 | { | ||
648 | int cpu, err, online = 0; | ||
649 | char msg[64]; | ||
650 | |||
651 | if (count > 63) | ||
652 | return -EINVAL; | ||
653 | |||
654 | if (copy_from_user(msg, buffer, count)) | ||
655 | return -EFAULT; | ||
656 | |||
657 | /* terminate */ | ||
658 | msg[count] = '\0'; | ||
659 | /* chomp */ | ||
660 | if (count > 1 && msg[count - 1] == '\n') | ||
661 | msg[count - 1] = '\0'; | ||
662 | |||
663 | if (strcmp(msg, "NO_CPU") == 0) { | ||
664 | atomic_set(&release_master_cpu, NO_CPU); | ||
665 | return count; | ||
666 | } else { | ||
667 | err = sscanf(msg, "%d", &cpu); | ||
668 | if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { | ||
669 | atomic_set(&release_master_cpu, cpu); | ||
670 | return count; | ||
671 | } else { | ||
672 | TRACE("invalid release master: '%s' " | ||
673 | "(err:%d cpu:%d online:%d)\n", | ||
674 | msg, err, cpu, online); | ||
675 | return -EINVAL; | ||
676 | } | ||
677 | } | ||
678 | } | ||
679 | |||
680 | static struct proc_dir_entry *litmus_dir = NULL, | ||
681 | *curr_file = NULL, | ||
682 | *stat_file = NULL, | ||
683 | *plugs_file = NULL, | ||
684 | *clus_cache_idx_file = NULL, | ||
685 | *release_master_file = NULL; | ||
686 | |||
687 | static int __init init_litmus_proc(void) | ||
688 | { | ||
689 | litmus_dir = proc_mkdir("litmus", NULL); | ||
690 | if (!litmus_dir) { | ||
691 | printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); | ||
692 | return -ENOMEM; | ||
693 | } | ||
694 | |||
695 | curr_file = create_proc_entry("active_plugin", | ||
696 | 0644, litmus_dir); | ||
697 | if (!curr_file) { | ||
698 | printk(KERN_ERR "Could not allocate active_plugin " | ||
699 | "procfs entry.\n"); | ||
700 | return -ENOMEM; | ||
701 | } | ||
702 | curr_file->read_proc = proc_read_curr; | ||
703 | curr_file->write_proc = proc_write_curr; | ||
704 | |||
705 | release_master_file = create_proc_entry("release_master", | ||
706 | 0644, litmus_dir); | ||
707 | if (!release_master_file) { | ||
708 | printk(KERN_ERR "Could not allocate release_master " | ||
709 | "procfs entry.\n"); | ||
710 | return -ENOMEM; | ||
711 | } | ||
712 | release_master_file->read_proc = proc_read_release_master; | ||
713 | release_master_file->write_proc = proc_write_release_master; | ||
714 | |||
715 | clus_cache_idx_file = create_proc_entry("cluster_cache", | ||
716 | 0644, litmus_dir); | ||
717 | if (!clus_cache_idx_file) { | ||
718 | printk(KERN_ERR "Could not allocate cluster_cache " | ||
719 | "procfs entry.\n"); | ||
720 | return -ENOMEM; | ||
721 | } | ||
722 | clus_cache_idx_file->read_proc = proc_read_cluster_size; | ||
723 | clus_cache_idx_file->write_proc = proc_write_cluster_size; | ||
724 | |||
725 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | ||
726 | proc_read_stats, NULL); | ||
727 | |||
728 | plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir, | ||
729 | proc_read_plugins, NULL); | ||
730 | |||
731 | return 0; | ||
732 | } | ||
733 | |||
734 | static void exit_litmus_proc(void) | ||
735 | { | ||
736 | if (plugs_file) | ||
737 | remove_proc_entry("plugins", litmus_dir); | ||
738 | if (stat_file) | ||
739 | remove_proc_entry("stats", litmus_dir); | ||
740 | if (curr_file) | ||
741 | remove_proc_entry("active_plugin", litmus_dir); | ||
742 | if (clus_cache_idx_file) | ||
743 | remove_proc_entry("cluster_cache", litmus_dir); | ||
744 | if (release_master_file) | ||
745 | remove_proc_entry("release_master", litmus_dir); | ||
746 | if (litmus_dir) | ||
747 | remove_proc_entry("litmus", NULL); | ||
748 | } | ||
749 | |||
750 | extern struct sched_plugin linux_sched_plugin; | ||
751 | |||
752 | static int __init _init_litmus(void) | ||
753 | { | ||
754 | /* Common initializers, | ||
755 | * mode change lock is used to enforce single mode change | ||
756 | * operation. | ||
757 | */ | ||
758 | printk("Starting LITMUS^RT kernel\n"); | ||
759 | |||
760 | register_sched_plugin(&linux_sched_plugin); | ||
761 | |||
762 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); | ||
763 | release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); | ||
764 | |||
765 | #ifdef CONFIG_MAGIC_SYSRQ | ||
766 | /* offer some debugging help */ | ||
767 | if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) | ||
768 | printk("Registered kill rt tasks magic sysrq.\n"); | ||
769 | else | ||
770 | printk("Could not register kill rt tasks magic sysrq.\n"); | ||
771 | #endif | ||
772 | |||
773 | init_litmus_proc(); | ||
774 | |||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | static void _exit_litmus(void) | ||
779 | { | ||
780 | exit_litmus_proc(); | ||
781 | kmem_cache_destroy(bheap_node_cache); | ||
782 | kmem_cache_destroy(release_heap_cache); | ||
783 | } | ||
784 | |||
785 | module_init(_init_litmus); | ||
786 | module_exit(_exit_litmus); | ||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 000000000000..609ff0f82abb --- /dev/null +++ b/litmus/rt_domain.c | |||
@@ -0,0 +1,310 @@ | |||
1 | /* | ||
2 | * litmus/rt_domain.c | ||
3 | * | ||
4 | * LITMUS real-time infrastructure. This file contains the | ||
5 | * functions that manipulate RT domains. RT domains are an abstraction | ||
6 | * of a ready queue and a release queue. | ||
7 | */ | ||
8 | |||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/slab.h> | ||
13 | |||
14 | #include <litmus/litmus.h> | ||
15 | #include <litmus/sched_plugin.h> | ||
16 | #include <litmus/sched_trace.h> | ||
17 | |||
18 | #include <litmus/rt_domain.h> | ||
19 | |||
20 | #include <litmus/trace.h> | ||
21 | |||
22 | #include <litmus/bheap.h> | ||
23 | |||
24 | static int dummy_resched(rt_domain_t *rt) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | static int dummy_order(struct bheap_node* a, struct bheap_node* b) | ||
30 | { | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | /* default implementation: use default lock */ | ||
35 | static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
36 | { | ||
37 | merge_ready(rt, tasks); | ||
38 | } | ||
39 | |||
40 | static unsigned int time2slot(lt_t time) | ||
41 | { | ||
42 | return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; | ||
43 | } | ||
44 | |||
45 | static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | ||
46 | { | ||
47 | unsigned long flags; | ||
48 | struct release_heap* rh; | ||
49 | |||
50 | TRACE("on_release_timer(0x%p) starts.\n", timer); | ||
51 | |||
52 | TS_RELEASE_START; | ||
53 | |||
54 | rh = container_of(timer, struct release_heap, timer); | ||
55 | |||
56 | spin_lock_irqsave(&rh->dom->release_lock, flags); | ||
57 | TRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | ||
58 | /* remove from release queue */ | ||
59 | list_del(&rh->list); | ||
60 | spin_unlock_irqrestore(&rh->dom->release_lock, flags); | ||
61 | TRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); | ||
62 | |||
63 | /* call release callback */ | ||
64 | rh->dom->release_jobs(rh->dom, &rh->heap); | ||
65 | /* WARNING: rh can be referenced from other CPUs from now on. */ | ||
66 | |||
67 | TS_RELEASE_END; | ||
68 | |||
69 | TRACE("on_release_timer(0x%p) ends.\n", timer); | ||
70 | |||
71 | return HRTIMER_NORESTART; | ||
72 | } | ||
73 | |||
74 | /* allocated in litmus.c */ | ||
75 | struct kmem_cache * release_heap_cache; | ||
76 | |||
77 | struct release_heap* release_heap_alloc(int gfp_flags) | ||
78 | { | ||
79 | struct release_heap* rh; | ||
80 | rh= kmem_cache_alloc(release_heap_cache, gfp_flags); | ||
81 | if (rh) { | ||
82 | /* initialize timer */ | ||
83 | hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
84 | rh->timer.function = on_release_timer; | ||
85 | } | ||
86 | return rh; | ||
87 | } | ||
88 | |||
89 | void release_heap_free(struct release_heap* rh) | ||
90 | { | ||
91 | /* make sure timer is no longer in use */ | ||
92 | hrtimer_cancel(&rh->timer); | ||
93 | kmem_cache_free(release_heap_cache, rh); | ||
94 | } | ||
95 | |||
96 | /* Caller must hold release lock. | ||
97 | * Will return heap for given time. If no such heap exists prior to | ||
98 | * the invocation it will be created. | ||
99 | */ | ||
100 | static struct release_heap* get_release_heap(rt_domain_t *rt, | ||
101 | struct task_struct* t, | ||
102 | int use_task_heap) | ||
103 | { | ||
104 | struct list_head* pos; | ||
105 | struct release_heap* heap = NULL; | ||
106 | struct release_heap* rh; | ||
107 | lt_t release_time = get_release(t); | ||
108 | unsigned int slot = time2slot(release_time); | ||
109 | |||
110 | /* initialize pos for the case that the list is empty */ | ||
111 | pos = rt->release_queue.slot[slot].next; | ||
112 | list_for_each(pos, &rt->release_queue.slot[slot]) { | ||
113 | rh = list_entry(pos, struct release_heap, list); | ||
114 | if (release_time == rh->release_time) { | ||
115 | /* perfect match -- this happens on hyperperiod | ||
116 | * boundaries | ||
117 | */ | ||
118 | heap = rh; | ||
119 | break; | ||
120 | } else if (lt_before(release_time, rh->release_time)) { | ||
121 | /* we need to insert a new node since rh is | ||
122 | * already in the future | ||
123 | */ | ||
124 | break; | ||
125 | } | ||
126 | } | ||
127 | if (!heap && use_task_heap) { | ||
128 | /* use pre-allocated release heap */ | ||
129 | rh = tsk_rt(t)->rel_heap; | ||
130 | |||
131 | rh->dom = rt; | ||
132 | rh->release_time = release_time; | ||
133 | |||
134 | /* add to release queue */ | ||
135 | list_add(&rh->list, pos->prev); | ||
136 | heap = rh; | ||
137 | } | ||
138 | return heap; | ||
139 | } | ||
140 | |||
141 | static void reinit_release_heap(struct task_struct* t) | ||
142 | { | ||
143 | struct release_heap* rh; | ||
144 | |||
145 | /* use pre-allocated release heap */ | ||
146 | rh = tsk_rt(t)->rel_heap; | ||
147 | |||
148 | /* Make sure it is safe to use. The timer callback could still | ||
149 | * be executing on another CPU; hrtimer_cancel() will wait | ||
150 | * until the timer callback has completed. However, under no | ||
151 | * circumstances should the timer be active (= yet to be | ||
152 | * triggered). | ||
153 | * | ||
154 | * WARNING: If the CPU still holds the release_lock at this point, | ||
155 | * deadlock may occur! | ||
156 | */ | ||
157 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
158 | |||
159 | /* initialize */ | ||
160 | bheap_init(&rh->heap); | ||
161 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
162 | } | ||
163 | /* arm_release_timer() - start local release timer or trigger | ||
164 | * remote timer (pull timer) | ||
165 | * | ||
166 | * Called by add_release() with: | ||
167 | * - tobe_lock taken | ||
168 | * - IRQ disabled | ||
169 | */ | ||
170 | static void arm_release_timer(rt_domain_t *_rt) | ||
171 | { | ||
172 | rt_domain_t *rt = _rt; | ||
173 | struct list_head list; | ||
174 | struct list_head *pos, *safe; | ||
175 | struct task_struct* t; | ||
176 | struct release_heap* rh; | ||
177 | |||
178 | TRACE("arm_release_timer() at %llu\n", litmus_clock()); | ||
179 | list_replace_init(&rt->tobe_released, &list); | ||
180 | |||
181 | list_for_each_safe(pos, safe, &list) { | ||
182 | /* pick task of work list */ | ||
183 | t = list_entry(pos, struct task_struct, rt_param.list); | ||
184 | sched_trace_task_release(t); | ||
185 | list_del(pos); | ||
186 | |||
187 | /* put into release heap while holding release_lock */ | ||
188 | spin_lock(&rt->release_lock); | ||
189 | TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); | ||
190 | |||
191 | rh = get_release_heap(rt, t, 0); | ||
192 | if (!rh) { | ||
193 | /* need to use our own, but drop lock first */ | ||
194 | spin_unlock(&rt->release_lock); | ||
195 | TRACE_TASK(t, "Dropped release_lock 0x%p\n", | ||
196 | &rt->release_lock); | ||
197 | |||
198 | reinit_release_heap(t); | ||
199 | TRACE_TASK(t, "release_heap ready\n"); | ||
200 | |||
201 | spin_lock(&rt->release_lock); | ||
202 | TRACE_TASK(t, "Re-acquired release_lock 0x%p\n", | ||
203 | &rt->release_lock); | ||
204 | |||
205 | rh = get_release_heap(rt, t, 1); | ||
206 | } | ||
207 | bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); | ||
208 | TRACE_TASK(t, "arm_release_timer(): added to release heap\n"); | ||
209 | |||
210 | spin_unlock(&rt->release_lock); | ||
211 | TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); | ||
212 | |||
213 | /* To avoid arming the timer multiple times, we only let the | ||
214 | * owner do the arming (which is the "first" task to reference | ||
215 | * this release_heap anyway). | ||
216 | */ | ||
217 | if (rh == tsk_rt(t)->rel_heap) { | ||
218 | TRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); | ||
219 | /* we cannot arm the timer using hrtimer_start() | ||
220 | * as it may deadlock on rq->lock | ||
221 | * | ||
222 | * PINNED mode is ok on both local and remote CPU | ||
223 | */ | ||
224 | if (rt->release_master == NO_CPU) | ||
225 | __hrtimer_start_range_ns(&rh->timer, | ||
226 | ns_to_ktime(rh->release_time), | ||
227 | 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
228 | else | ||
229 | hrtimer_start_on(rt->release_master, | ||
230 | &rh->info, &rh->timer, | ||
231 | ns_to_ktime(rh->release_time), | ||
232 | HRTIMER_MODE_ABS_PINNED); | ||
233 | } else | ||
234 | TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | void rt_domain_init(rt_domain_t *rt, | ||
239 | bheap_prio_t order, | ||
240 | check_resched_needed_t check, | ||
241 | release_jobs_t release | ||
242 | ) | ||
243 | { | ||
244 | int i; | ||
245 | |||
246 | BUG_ON(!rt); | ||
247 | if (!check) | ||
248 | check = dummy_resched; | ||
249 | if (!release) | ||
250 | release = default_release_jobs; | ||
251 | if (!order) | ||
252 | order = dummy_order; | ||
253 | |||
254 | rt->release_master = NO_CPU; | ||
255 | |||
256 | bheap_init(&rt->ready_queue); | ||
257 | INIT_LIST_HEAD(&rt->tobe_released); | ||
258 | for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) | ||
259 | INIT_LIST_HEAD(&rt->release_queue.slot[i]); | ||
260 | |||
261 | spin_lock_init(&rt->ready_lock); | ||
262 | spin_lock_init(&rt->release_lock); | ||
263 | spin_lock_init(&rt->tobe_lock); | ||
264 | |||
265 | rt->check_resched = check; | ||
266 | rt->release_jobs = release; | ||
267 | rt->order = order; | ||
268 | } | ||
269 | |||
270 | /* add_ready - add a real-time task to the rt ready queue. It must be runnable. | ||
271 | * @new: the newly released task | ||
272 | */ | ||
273 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | ||
274 | { | ||
275 | TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n", | ||
276 | new->comm, new->pid, get_exec_cost(new), get_rt_period(new), | ||
277 | get_release(new), litmus_clock()); | ||
278 | |||
279 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); | ||
280 | |||
281 | bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); | ||
282 | rt->check_resched(rt); | ||
283 | } | ||
284 | |||
285 | /* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. | ||
286 | * @tasks - the newly released tasks | ||
287 | */ | ||
288 | void __merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
289 | { | ||
290 | bheap_union(rt->order, &rt->ready_queue, tasks); | ||
291 | rt->check_resched(rt); | ||
292 | } | ||
293 | |||
294 | /* add_release - add a real-time task to the rt release queue. | ||
295 | * @task: the sleeping task | ||
296 | */ | ||
297 | void __add_release(rt_domain_t* rt, struct task_struct *task) | ||
298 | { | ||
299 | TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); | ||
300 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
301 | task->rt_param.domain = rt; | ||
302 | |||
303 | /* start release timer */ | ||
304 | TS_SCHED2_START(task); | ||
305 | |||
306 | arm_release_timer(rt); | ||
307 | |||
308 | TS_SCHED2_END(task); | ||
309 | } | ||
310 | |||
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c new file mode 100644 index 000000000000..e57a11afda16 --- /dev/null +++ b/litmus/sched_cedf.c | |||
@@ -0,0 +1,772 @@ | |||
1 | /* | ||
2 | * litmus/sched_cedf.c | ||
3 | * | ||
4 | * Implementation of the C-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | |||
32 | #include <litmus/litmus.h> | ||
33 | #include <litmus/jobs.h> | ||
34 | #include <litmus/sched_plugin.h> | ||
35 | #include <litmus/edf_common.h> | ||
36 | #include <litmus/sched_trace.h> | ||
37 | |||
38 | #include <litmus/bheap.h> | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | |||
42 | /* forward declaration... a funny thing with C ;) */ | ||
43 | struct clusterdomain; | ||
44 | |||
45 | /* cpu_entry_t - maintain the linked and scheduled state | ||
46 | * | ||
47 | * A cpu also contains a pointer to the cedf_domain_t cluster | ||
48 | * that owns it (struct clusterdomain*) | ||
49 | */ | ||
50 | typedef struct { | ||
51 | int cpu; | ||
52 | struct clusterdomain* cluster; /* owning cluster */ | ||
53 | struct task_struct* linked; /* only RT tasks */ | ||
54 | struct task_struct* scheduled; /* only RT tasks */ | ||
55 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
56 | struct bheap_node* hn; | ||
57 | } cpu_entry_t; | ||
58 | |||
59 | /* one cpu_entry_t per CPU */ | ||
60 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
61 | |||
62 | #define set_will_schedule() \ | ||
63 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1)) | ||
64 | #define clear_will_schedule() \ | ||
65 | (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0)) | ||
66 | #define test_will_schedule(cpu) \ | ||
67 | (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) | ||
68 | |||
69 | /* | ||
70 | * In C-EDF there is a cedf domain _per_ cluster | ||
71 | * The number of clusters is dynamically determined accordingly to the | ||
72 | * total cpu number and the cluster size | ||
73 | */ | ||
74 | typedef struct clusterdomain { | ||
75 | /* rt_domain for this cluster */ | ||
76 | rt_domain_t domain; | ||
77 | /* cpus in this cluster */ | ||
78 | cpu_entry_t* *cpus; | ||
79 | /* map of this cluster cpus */ | ||
80 | cpumask_var_t cpu_map; | ||
81 | /* the cpus queue themselves according to priority in here */ | ||
82 | struct bheap_node *heap_node; | ||
83 | struct bheap cpu_heap; | ||
84 | /* lock for this cluster */ | ||
85 | #define lock domain.ready_lock | ||
86 | } cedf_domain_t; | ||
87 | |||
88 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | ||
89 | cedf_domain_t *cedf; | ||
90 | |||
91 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | ||
92 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
93 | |||
94 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
95 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
96 | * information during the initialization of the plugin (e.g., topology) | ||
97 | #define WANT_ALL_SCHED_EVENTS | ||
98 | */ | ||
99 | #define VERBOSE_INIT | ||
100 | |||
101 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
102 | { | ||
103 | cpu_entry_t *a, *b; | ||
104 | a = _a->value; | ||
105 | b = _b->value; | ||
106 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
107 | * the top of the heap. | ||
108 | */ | ||
109 | return edf_higher_prio(b->linked, a->linked); | ||
110 | } | ||
111 | |||
112 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
113 | * order in the cpu queue. Caller must hold cedf lock. | ||
114 | */ | ||
115 | static void update_cpu_position(cpu_entry_t *entry) | ||
116 | { | ||
117 | cedf_domain_t *cluster = entry->cluster; | ||
118 | |||
119 | if (likely(bheap_node_in_heap(entry->hn))) | ||
120 | bheap_delete(cpu_lower_prio, | ||
121 | &cluster->cpu_heap, | ||
122 | entry->hn); | ||
123 | |||
124 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
125 | } | ||
126 | |||
127 | /* caller must hold cedf lock */ | ||
128 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) | ||
129 | { | ||
130 | struct bheap_node* hn; | ||
131 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
132 | return hn->value; | ||
133 | } | ||
134 | |||
135 | |||
136 | /* link_task_to_cpu - Update the link of a CPU. | ||
137 | * Handles the case where the to-be-linked task is already | ||
138 | * scheduled on a different CPU. | ||
139 | */ | ||
140 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
141 | cpu_entry_t *entry) | ||
142 | { | ||
143 | cpu_entry_t *sched; | ||
144 | struct task_struct* tmp; | ||
145 | int on_cpu; | ||
146 | |||
147 | BUG_ON(linked && !is_realtime(linked)); | ||
148 | |||
149 | /* Currently linked task is set to be unlinked. */ | ||
150 | if (entry->linked) { | ||
151 | entry->linked->rt_param.linked_on = NO_CPU; | ||
152 | } | ||
153 | |||
154 | /* Link new task to CPU. */ | ||
155 | if (linked) { | ||
156 | set_rt_flags(linked, RT_F_RUNNING); | ||
157 | /* handle task is already scheduled somewhere! */ | ||
158 | on_cpu = linked->rt_param.scheduled_on; | ||
159 | if (on_cpu != NO_CPU) { | ||
160 | sched = &per_cpu(cedf_cpu_entries, on_cpu); | ||
161 | /* this should only happen if not linked already */ | ||
162 | BUG_ON(sched->linked == linked); | ||
163 | |||
164 | /* If we are already scheduled on the CPU to which we | ||
165 | * wanted to link, we don't need to do the swap -- | ||
166 | * we just link ourselves to the CPU and depend on | ||
167 | * the caller to get things right. | ||
168 | */ | ||
169 | if (entry != sched) { | ||
170 | TRACE_TASK(linked, | ||
171 | "already scheduled on %d, updating link.\n", | ||
172 | sched->cpu); | ||
173 | tmp = sched->linked; | ||
174 | linked->rt_param.linked_on = sched->cpu; | ||
175 | sched->linked = linked; | ||
176 | update_cpu_position(sched); | ||
177 | linked = tmp; | ||
178 | } | ||
179 | } | ||
180 | if (linked) /* might be NULL due to swap */ | ||
181 | linked->rt_param.linked_on = entry->cpu; | ||
182 | } | ||
183 | entry->linked = linked; | ||
184 | #ifdef WANT_ALL_SCHED_EVENTS | ||
185 | if (linked) | ||
186 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
187 | else | ||
188 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
189 | #endif | ||
190 | update_cpu_position(entry); | ||
191 | } | ||
192 | |||
193 | /* unlink - Make sure a task is not linked any longer to an entry | ||
194 | * where it was linked before. Must hold cedf_lock. | ||
195 | */ | ||
196 | static noinline void unlink(struct task_struct* t) | ||
197 | { | ||
198 | cpu_entry_t *entry; | ||
199 | |||
200 | if (unlikely(!t)) { | ||
201 | TRACE_BUG_ON(!t); | ||
202 | return; | ||
203 | } | ||
204 | |||
205 | |||
206 | if (t->rt_param.linked_on != NO_CPU) { | ||
207 | /* unlink */ | ||
208 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); | ||
209 | t->rt_param.linked_on = NO_CPU; | ||
210 | link_task_to_cpu(NULL, entry); | ||
211 | } else if (is_queued(t)) { | ||
212 | /* This is an interesting situation: t is scheduled, | ||
213 | * but was just recently unlinked. It cannot be | ||
214 | * linked anywhere else (because then it would have | ||
215 | * been relinked to this CPU), thus it must be in some | ||
216 | * queue. We must remove it from the list in this | ||
217 | * case. | ||
218 | * | ||
219 | * in C-EDF case is should be somewhere in the queue for | ||
220 | * its domain, therefore and we can get the domain using | ||
221 | * task_cpu_cluster | ||
222 | */ | ||
223 | remove(&(task_cpu_cluster(t))->domain, t); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | |||
228 | /* preempt - force a CPU to reschedule | ||
229 | */ | ||
230 | static void preempt(cpu_entry_t *entry) | ||
231 | { | ||
232 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
233 | } | ||
234 | |||
235 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
236 | * Caller must hold cedf_lock. | ||
237 | */ | ||
238 | static noinline void requeue(struct task_struct* task) | ||
239 | { | ||
240 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
241 | BUG_ON(!task); | ||
242 | /* sanity check before insertion */ | ||
243 | BUG_ON(is_queued(task)); | ||
244 | |||
245 | if (is_released(task, litmus_clock())) | ||
246 | __add_ready(&cluster->domain, task); | ||
247 | else { | ||
248 | /* it has got to wait */ | ||
249 | add_release(&cluster->domain, task); | ||
250 | } | ||
251 | } | ||
252 | |||
253 | /* check for any necessary preemptions */ | ||
254 | static void check_for_preemptions(cedf_domain_t *cluster) | ||
255 | { | ||
256 | struct task_struct *task; | ||
257 | cpu_entry_t* last; | ||
258 | |||
259 | for(last = lowest_prio_cpu(cluster); | ||
260 | edf_preemption_needed(&cluster->domain, last->linked); | ||
261 | last = lowest_prio_cpu(cluster)) { | ||
262 | /* preemption necessary */ | ||
263 | task = __take_ready(&cluster->domain); | ||
264 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
265 | task->pid, last->cpu); | ||
266 | if (last->linked) | ||
267 | requeue(last->linked); | ||
268 | link_task_to_cpu(task, last); | ||
269 | preempt(last); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | /* cedf_job_arrival: task is either resumed or released */ | ||
274 | static noinline void cedf_job_arrival(struct task_struct* task) | ||
275 | { | ||
276 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
277 | BUG_ON(!task); | ||
278 | |||
279 | requeue(task); | ||
280 | check_for_preemptions(cluster); | ||
281 | } | ||
282 | |||
283 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
284 | { | ||
285 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); | ||
286 | unsigned long flags; | ||
287 | |||
288 | spin_lock_irqsave(&cluster->lock, flags); | ||
289 | |||
290 | __merge_ready(&cluster->domain, tasks); | ||
291 | check_for_preemptions(cluster); | ||
292 | |||
293 | spin_unlock_irqrestore(&cluster->lock, flags); | ||
294 | } | ||
295 | |||
296 | /* caller holds cedf_lock */ | ||
297 | static noinline void job_completion(struct task_struct *t, int forced) | ||
298 | { | ||
299 | BUG_ON(!t); | ||
300 | |||
301 | sched_trace_task_completion(t, forced); | ||
302 | |||
303 | TRACE_TASK(t, "job_completion().\n"); | ||
304 | |||
305 | /* set flags */ | ||
306 | set_rt_flags(t, RT_F_SLEEP); | ||
307 | /* prepare for next period */ | ||
308 | prepare_for_next_period(t); | ||
309 | if (is_released(t, litmus_clock())) | ||
310 | sched_trace_task_release(t); | ||
311 | /* unlink */ | ||
312 | unlink(t); | ||
313 | /* requeue | ||
314 | * But don't requeue a blocking task. */ | ||
315 | if (is_running(t)) | ||
316 | cedf_job_arrival(t); | ||
317 | } | ||
318 | |||
319 | /* cedf_tick - this function is called for every local timer | ||
320 | * interrupt. | ||
321 | * | ||
322 | * checks whether the current task has expired and checks | ||
323 | * whether we need to preempt it if it has not expired | ||
324 | */ | ||
325 | static void cedf_tick(struct task_struct* t) | ||
326 | { | ||
327 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
328 | if (!is_np(t)) { | ||
329 | /* np tasks will be preempted when they become | ||
330 | * preemptable again | ||
331 | */ | ||
332 | set_tsk_need_resched(t); | ||
333 | set_will_schedule(); | ||
334 | TRACE("cedf_scheduler_tick: " | ||
335 | "%d is preemptable " | ||
336 | " => FORCE_RESCHED\n", t->pid); | ||
337 | } else if (is_user_np(t)) { | ||
338 | TRACE("cedf_scheduler_tick: " | ||
339 | "%d is non-preemptable, " | ||
340 | "preemption delayed.\n", t->pid); | ||
341 | request_exit_np(t); | ||
342 | } | ||
343 | } | ||
344 | } | ||
345 | |||
346 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
347 | * assumptions on the state of the current task since it may be called for a | ||
348 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
349 | * was necessary, because sys_exit_np() was called, because some Linux | ||
350 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
351 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
352 | * current state is. | ||
353 | * | ||
354 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
355 | * | ||
356 | * The following assertions for the scheduled task could hold: | ||
357 | * | ||
358 | * - !is_running(scheduled) // the job blocks | ||
359 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
360 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
361 | * - linked != scheduled // we need to reschedule (for any reason) | ||
362 | * - is_np(scheduled) // rescheduling must be delayed, | ||
363 | * sys_exit_np must be requested | ||
364 | * | ||
365 | * Any of these can occur together. | ||
366 | */ | ||
367 | static struct task_struct* cedf_schedule(struct task_struct * prev) | ||
368 | { | ||
369 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
370 | cedf_domain_t *cluster = entry->cluster; | ||
371 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
372 | struct task_struct* next = NULL; | ||
373 | |||
374 | spin_lock(&cluster->lock); | ||
375 | clear_will_schedule(); | ||
376 | |||
377 | /* sanity checking */ | ||
378 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
379 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
380 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
381 | |||
382 | /* (0) Determine state */ | ||
383 | exists = entry->scheduled != NULL; | ||
384 | blocks = exists && !is_running(entry->scheduled); | ||
385 | out_of_time = exists && | ||
386 | budget_enforced(entry->scheduled) && | ||
387 | budget_exhausted(entry->scheduled); | ||
388 | np = exists && is_np(entry->scheduled); | ||
389 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
390 | preempt = entry->scheduled != entry->linked; | ||
391 | |||
392 | #ifdef WANT_ALL_SCHED_EVENTS | ||
393 | TRACE_TASK(prev, "invoked cedf_schedule.\n"); | ||
394 | #endif | ||
395 | |||
396 | if (exists) | ||
397 | TRACE_TASK(prev, | ||
398 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
399 | "state:%d sig:%d\n", | ||
400 | blocks, out_of_time, np, sleep, preempt, | ||
401 | prev->state, signal_pending(prev)); | ||
402 | if (entry->linked && preempt) | ||
403 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
404 | entry->linked->comm, entry->linked->pid); | ||
405 | |||
406 | |||
407 | /* If a task blocks we have no choice but to reschedule. | ||
408 | */ | ||
409 | if (blocks) | ||
410 | unlink(entry->scheduled); | ||
411 | |||
412 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
413 | * We need to make sure to update the link structure anyway in case | ||
414 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
415 | * hurt. | ||
416 | */ | ||
417 | if (np && (out_of_time || preempt || sleep)) { | ||
418 | unlink(entry->scheduled); | ||
419 | request_exit_np(entry->scheduled); | ||
420 | } | ||
421 | |||
422 | /* Any task that is preemptable and either exhausts its execution | ||
423 | * budget or wants to sleep completes. We may have to reschedule after | ||
424 | * this. Don't do a job completion if we block (can't have timers running | ||
425 | * for blocked jobs). Preemption go first for the same reason. | ||
426 | */ | ||
427 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
428 | job_completion(entry->scheduled, !sleep); | ||
429 | |||
430 | /* Link pending task if we became unlinked. | ||
431 | */ | ||
432 | if (!entry->linked) | ||
433 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
434 | |||
435 | /* The final scheduling decision. Do we need to switch for some reason? | ||
436 | * If linked is different from scheduled, then select linked as next. | ||
437 | */ | ||
438 | if ((!np || blocks) && | ||
439 | entry->linked != entry->scheduled) { | ||
440 | /* Schedule a linked job? */ | ||
441 | if (entry->linked) { | ||
442 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
443 | next = entry->linked; | ||
444 | } | ||
445 | if (entry->scheduled) { | ||
446 | /* not gonna be scheduled soon */ | ||
447 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
448 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
449 | } | ||
450 | } else | ||
451 | /* Only override Linux scheduler if we have a real-time task | ||
452 | * scheduled that needs to continue. | ||
453 | */ | ||
454 | if (exists) | ||
455 | next = prev; | ||
456 | |||
457 | spin_unlock(&cluster->lock); | ||
458 | |||
459 | #ifdef WANT_ALL_SCHED_EVENTS | ||
460 | TRACE("cedf_lock released, next=0x%p\n", next); | ||
461 | |||
462 | if (next) | ||
463 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
464 | else if (exists && !next) | ||
465 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
466 | #endif | ||
467 | |||
468 | |||
469 | return next; | ||
470 | } | ||
471 | |||
472 | |||
473 | /* _finish_switch - we just finished the switch away from prev | ||
474 | */ | ||
475 | static void cedf_finish_switch(struct task_struct *prev) | ||
476 | { | ||
477 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
478 | |||
479 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
480 | #ifdef WANT_ALL_SCHED_EVENTS | ||
481 | TRACE_TASK(prev, "switched away from\n"); | ||
482 | #endif | ||
483 | } | ||
484 | |||
485 | |||
486 | /* Prepare a task for running in RT mode | ||
487 | */ | ||
488 | static void cedf_task_new(struct task_struct * t, int on_rq, int running) | ||
489 | { | ||
490 | unsigned long flags; | ||
491 | cpu_entry_t* entry; | ||
492 | cedf_domain_t* cluster; | ||
493 | |||
494 | TRACE("gsn edf: task new %d\n", t->pid); | ||
495 | |||
496 | /* the cluster doesn't change even if t is running */ | ||
497 | cluster = task_cpu_cluster(t); | ||
498 | |||
499 | spin_lock_irqsave(&cluster->domain.ready_lock, flags); | ||
500 | |||
501 | /* setup job params */ | ||
502 | release_at(t, litmus_clock()); | ||
503 | |||
504 | if (running) { | ||
505 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | ||
506 | BUG_ON(entry->scheduled); | ||
507 | |||
508 | entry->scheduled = t; | ||
509 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
510 | } else { | ||
511 | t->rt_param.scheduled_on = NO_CPU; | ||
512 | } | ||
513 | t->rt_param.linked_on = NO_CPU; | ||
514 | |||
515 | cedf_job_arrival(t); | ||
516 | spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags); | ||
517 | } | ||
518 | |||
519 | static void cedf_task_wake_up(struct task_struct *task) | ||
520 | { | ||
521 | unsigned long flags; | ||
522 | lt_t now; | ||
523 | cedf_domain_t *cluster; | ||
524 | |||
525 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
526 | |||
527 | cluster = task_cpu_cluster(task); | ||
528 | |||
529 | spin_lock_irqsave(&cluster->lock, flags); | ||
530 | /* We need to take suspensions because of semaphores into | ||
531 | * account! If a job resumes after being suspended due to acquiring | ||
532 | * a semaphore, it should never be treated as a new job release. | ||
533 | */ | ||
534 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
535 | set_rt_flags(task, RT_F_RUNNING); | ||
536 | } else { | ||
537 | now = litmus_clock(); | ||
538 | if (is_tardy(task, now)) { | ||
539 | /* new sporadic release */ | ||
540 | release_at(task, now); | ||
541 | sched_trace_task_release(task); | ||
542 | } | ||
543 | else { | ||
544 | if (task->rt.time_slice) { | ||
545 | /* came back in time before deadline | ||
546 | */ | ||
547 | set_rt_flags(task, RT_F_RUNNING); | ||
548 | } | ||
549 | } | ||
550 | } | ||
551 | cedf_job_arrival(task); | ||
552 | spin_unlock_irqrestore(&cluster->lock, flags); | ||
553 | } | ||
554 | |||
555 | static void cedf_task_block(struct task_struct *t) | ||
556 | { | ||
557 | unsigned long flags; | ||
558 | cedf_domain_t *cluster; | ||
559 | |||
560 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
561 | |||
562 | cluster = task_cpu_cluster(t); | ||
563 | |||
564 | /* unlink if necessary */ | ||
565 | spin_lock_irqsave(&cluster->lock, flags); | ||
566 | unlink(t); | ||
567 | spin_unlock_irqrestore(&cluster->lock, flags); | ||
568 | |||
569 | BUG_ON(!is_realtime(t)); | ||
570 | } | ||
571 | |||
572 | |||
573 | static void cedf_task_exit(struct task_struct * t) | ||
574 | { | ||
575 | unsigned long flags; | ||
576 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
577 | |||
578 | /* unlink if necessary */ | ||
579 | spin_lock_irqsave(&cluster->lock, flags); | ||
580 | unlink(t); | ||
581 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
582 | cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
583 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
584 | } | ||
585 | spin_unlock_irqrestore(&cluster->lock, flags); | ||
586 | |||
587 | BUG_ON(!is_realtime(t)); | ||
588 | TRACE_TASK(t, "RIP\n"); | ||
589 | } | ||
590 | |||
591 | static long cedf_admit_task(struct task_struct* tsk) | ||
592 | { | ||
593 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
594 | } | ||
595 | |||
596 | /* total number of cluster */ | ||
597 | static int num_clusters; | ||
598 | /* we do not support cluster of different sizes */ | ||
599 | static unsigned int cluster_size; | ||
600 | |||
601 | #ifdef VERBOSE_INIT | ||
602 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
603 | { | ||
604 | int chk; | ||
605 | char buf[255]; | ||
606 | |||
607 | chk = cpulist_scnprintf(buf, 254, mask); | ||
608 | buf[chk] = '\0'; | ||
609 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
610 | |||
611 | } | ||
612 | #endif | ||
613 | |||
614 | static int clusters_allocated = 0; | ||
615 | |||
616 | static void cleanup_cedf(void) | ||
617 | { | ||
618 | int i; | ||
619 | |||
620 | if (clusters_allocated) { | ||
621 | for (i = 0; i < num_clusters; i++) { | ||
622 | kfree(cedf[i].cpus); | ||
623 | kfree(cedf[i].heap_node); | ||
624 | free_cpumask_var(cedf[i].cpu_map); | ||
625 | } | ||
626 | |||
627 | kfree(cedf); | ||
628 | } | ||
629 | } | ||
630 | |||
631 | static long cedf_activate_plugin(void) | ||
632 | { | ||
633 | int i, j, cpu, ccpu, cpu_count; | ||
634 | cpu_entry_t *entry; | ||
635 | |||
636 | cpumask_var_t mask; | ||
637 | int chk = 0; | ||
638 | |||
639 | /* de-allocate old clusters, if any */ | ||
640 | cleanup_cedf(); | ||
641 | |||
642 | printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n", | ||
643 | cluster_cache_index); | ||
644 | |||
645 | /* need to get cluster_size first */ | ||
646 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
647 | return -ENOMEM; | ||
648 | |||
649 | if (unlikely(cluster_cache_index == num_online_cpus())) { | ||
650 | |||
651 | cluster_size = num_online_cpus(); | ||
652 | } else { | ||
653 | |||
654 | chk = get_shared_cpu_map(mask, 0, cluster_cache_index); | ||
655 | if (chk) { | ||
656 | /* if chk != 0 then it is the max allowed index */ | ||
657 | printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n", | ||
658 | cluster_cache_index); | ||
659 | printk(KERN_INFO "C-EDF: Using cache index = %d\n", | ||
660 | chk); | ||
661 | cluster_cache_index = chk; | ||
662 | } | ||
663 | |||
664 | cluster_size = cpumask_weight(mask); | ||
665 | } | ||
666 | |||
667 | if ((num_online_cpus() % cluster_size) != 0) { | ||
668 | /* this can't be right, some cpus are left out */ | ||
669 | printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", | ||
670 | num_online_cpus(), cluster_size); | ||
671 | return -1; | ||
672 | } | ||
673 | |||
674 | num_clusters = num_online_cpus() / cluster_size; | ||
675 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | ||
676 | num_clusters, cluster_size); | ||
677 | |||
678 | /* initialize clusters */ | ||
679 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | ||
680 | for (i = 0; i < num_clusters; i++) { | ||
681 | |||
682 | cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
683 | GFP_ATOMIC); | ||
684 | cedf[i].heap_node = kmalloc( | ||
685 | cluster_size * sizeof(struct bheap_node), | ||
686 | GFP_ATOMIC); | ||
687 | bheap_init(&(cedf[i].cpu_heap)); | ||
688 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | ||
689 | |||
690 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | ||
691 | return -ENOMEM; | ||
692 | } | ||
693 | |||
694 | /* cycle through cluster and add cpus to them */ | ||
695 | for (i = 0; i < num_clusters; i++) { | ||
696 | |||
697 | for_each_online_cpu(cpu) { | ||
698 | /* check if the cpu is already in a cluster */ | ||
699 | for (j = 0; j < num_clusters; j++) | ||
700 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
701 | break; | ||
702 | /* if it is in a cluster go to next cpu */ | ||
703 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
704 | continue; | ||
705 | |||
706 | /* this cpu isn't in any cluster */ | ||
707 | /* get the shared cpus */ | ||
708 | if (unlikely(cluster_cache_index == num_online_cpus())) | ||
709 | cpumask_copy(mask, cpu_online_mask); | ||
710 | else | ||
711 | get_shared_cpu_map(mask, cpu, cluster_cache_index); | ||
712 | |||
713 | cpumask_copy(cedf[i].cpu_map, mask); | ||
714 | #ifdef VERBOSE_INIT | ||
715 | print_cluster_topology(mask, cpu); | ||
716 | #endif | ||
717 | /* add cpus to current cluster and init cpu_entry_t */ | ||
718 | cpu_count = 0; | ||
719 | for_each_cpu(ccpu, cedf[i].cpu_map) { | ||
720 | |||
721 | entry = &per_cpu(cedf_cpu_entries, ccpu); | ||
722 | cedf[i].cpus[cpu_count] = entry; | ||
723 | atomic_set(&entry->will_schedule, 0); | ||
724 | entry->cpu = ccpu; | ||
725 | entry->cluster = &cedf[i]; | ||
726 | entry->hn = &(cedf[i].heap_node[cpu_count]); | ||
727 | bheap_node_init(&entry->hn, entry); | ||
728 | |||
729 | cpu_count++; | ||
730 | |||
731 | entry->linked = NULL; | ||
732 | entry->scheduled = NULL; | ||
733 | update_cpu_position(entry); | ||
734 | } | ||
735 | /* done with this cluster */ | ||
736 | break; | ||
737 | } | ||
738 | } | ||
739 | |||
740 | free_cpumask_var(mask); | ||
741 | clusters_allocated = 1; | ||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | /* Plugin object */ | ||
746 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | ||
747 | .plugin_name = "C-EDF", | ||
748 | .finish_switch = cedf_finish_switch, | ||
749 | .tick = cedf_tick, | ||
750 | .task_new = cedf_task_new, | ||
751 | .complete_job = complete_job, | ||
752 | .task_exit = cedf_task_exit, | ||
753 | .schedule = cedf_schedule, | ||
754 | .task_wake_up = cedf_task_wake_up, | ||
755 | .task_block = cedf_task_block, | ||
756 | .admit_task = cedf_admit_task, | ||
757 | .activate_plugin = cedf_activate_plugin, | ||
758 | }; | ||
759 | |||
760 | |||
761 | static int __init init_cedf(void) | ||
762 | { | ||
763 | return register_sched_plugin(&cedf_plugin); | ||
764 | } | ||
765 | |||
766 | static void clean_cedf(void) | ||
767 | { | ||
768 | cleanup_cedf(); | ||
769 | } | ||
770 | |||
771 | module_init(init_cedf); | ||
772 | module_exit(clean_cedf); | ||
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c new file mode 100644 index 000000000000..6137c74729cb --- /dev/null +++ b/litmus/sched_gsn_edf.c | |||
@@ -0,0 +1,830 @@ | |||
1 | /* | ||
2 | * litmus/sched_gsn_edf.c | ||
3 | * | ||
4 | * Implementation of the GSN-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This version uses the simple approach and serializes all scheduling | ||
7 | * decisions by the use of a queue lock. This is probably not the | ||
8 | * best way to do it, but it should suffice for now. | ||
9 | */ | ||
10 | |||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/percpu.h> | ||
13 | #include <linux/sched.h> | ||
14 | |||
15 | #include <litmus/litmus.h> | ||
16 | #include <litmus/jobs.h> | ||
17 | #include <litmus/sched_plugin.h> | ||
18 | #include <litmus/edf_common.h> | ||
19 | #include <litmus/sched_trace.h> | ||
20 | |||
21 | #include <litmus/bheap.h> | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | |||
25 | /* Overview of GSN-EDF operations. | ||
26 | * | ||
27 | * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This | ||
28 | * description only covers how the individual operations are implemented in | ||
29 | * LITMUS. | ||
30 | * | ||
31 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
32 | * structure (NOT the actually scheduled | ||
33 | * task). If there is another linked task To | ||
34 | * already it will set To->linked_on = NO_CPU | ||
35 | * (thereby removing its association with this | ||
36 | * CPU). However, it will not requeue the | ||
37 | * previously linked task (if any). It will set | ||
38 | * T's state to RT_F_RUNNING and check whether | ||
39 | * it is already running somewhere else. If T | ||
40 | * is scheduled somewhere else it will link | ||
41 | * it to that CPU instead (and pull the linked | ||
42 | * task to cpu). T may be NULL. | ||
43 | * | ||
44 | * unlink(T) - Unlink removes T from all scheduler data | ||
45 | * structures. If it is linked to some CPU it | ||
46 | * will link NULL to that CPU. If it is | ||
47 | * currently queued in the gsnedf queue it will | ||
48 | * be removed from the rt_domain. It is safe to | ||
49 | * call unlink(T) if T is not linked. T may not | ||
50 | * be NULL. | ||
51 | * | ||
52 | * requeue(T) - Requeue will insert T into the appropriate | ||
53 | * queue. If the system is in real-time mode and | ||
54 | * the T is released already, it will go into the | ||
55 | * ready queue. If the system is not in | ||
56 | * real-time mode is T, then T will go into the | ||
57 | * release queue. If T's release time is in the | ||
58 | * future, it will go into the release | ||
59 | * queue. That means that T's release time/job | ||
60 | * no/etc. has to be updated before requeu(T) is | ||
61 | * called. It is not safe to call requeue(T) | ||
62 | * when T is already queued. T may not be NULL. | ||
63 | * | ||
64 | * gsnedf_job_arrival(T) - This is the catch all function when T enters | ||
65 | * the system after either a suspension or at a | ||
66 | * job release. It will queue T (which means it | ||
67 | * is not safe to call gsnedf_job_arrival(T) if | ||
68 | * T is already queued) and then check whether a | ||
69 | * preemption is necessary. If a preemption is | ||
70 | * necessary it will update the linkage | ||
71 | * accordingly and cause scheduled to be called | ||
72 | * (either with an IPI or need_resched). It is | ||
73 | * safe to call gsnedf_job_arrival(T) if T's | ||
74 | * next job has not been actually released yet | ||
75 | * (releast time in the future). T will be put | ||
76 | * on the release queue in that case. | ||
77 | * | ||
78 | * job_completion(T) - Take care of everything that needs to be done | ||
79 | * to prepare T for its next release and place | ||
80 | * it in the right queue with | ||
81 | * gsnedf_job_arrival(). | ||
82 | * | ||
83 | * | ||
84 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
85 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
86 | * the functions will automatically propagate pending task from the ready queue | ||
87 | * to a linked task. This is the job of the calling function ( by means of | ||
88 | * __take_ready). | ||
89 | */ | ||
90 | |||
91 | |||
92 | /* cpu_entry_t - maintain the linked and scheduled state | ||
93 | */ | ||
94 | typedef struct { | ||
95 | int cpu; | ||
96 | struct task_struct* linked; /* only RT tasks */ | ||
97 | struct task_struct* scheduled; /* only RT tasks */ | ||
98 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
99 | struct bheap_node* hn; | ||
100 | } cpu_entry_t; | ||
101 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
102 | |||
103 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | ||
104 | |||
105 | #define set_will_schedule() \ | ||
106 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1)) | ||
107 | #define clear_will_schedule() \ | ||
108 | (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0)) | ||
109 | #define test_will_schedule(cpu) \ | ||
110 | (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule)) | ||
111 | |||
112 | |||
113 | /* the cpus queue themselves according to priority in here */ | ||
114 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | ||
115 | static struct bheap gsnedf_cpu_heap; | ||
116 | |||
117 | static rt_domain_t gsnedf; | ||
118 | #define gsnedf_lock (gsnedf.ready_lock) | ||
119 | |||
120 | |||
121 | /* Uncomment this if you want to see all scheduling decisions in the | ||
122 | * TRACE() log. | ||
123 | #define WANT_ALL_SCHED_EVENTS | ||
124 | */ | ||
125 | |||
126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
127 | { | ||
128 | cpu_entry_t *a, *b; | ||
129 | a = _a->value; | ||
130 | b = _b->value; | ||
131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
132 | * the top of the heap. | ||
133 | */ | ||
134 | return edf_higher_prio(b->linked, a->linked); | ||
135 | } | ||
136 | |||
137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
138 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
139 | */ | ||
140 | static void update_cpu_position(cpu_entry_t *entry) | ||
141 | { | ||
142 | if (likely(bheap_node_in_heap(entry->hn))) | ||
143 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
144 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
145 | } | ||
146 | |||
147 | /* caller must hold gsnedf lock */ | ||
148 | static cpu_entry_t* lowest_prio_cpu(void) | ||
149 | { | ||
150 | struct bheap_node* hn; | ||
151 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
152 | return hn->value; | ||
153 | } | ||
154 | |||
155 | |||
156 | /* link_task_to_cpu - Update the link of a CPU. | ||
157 | * Handles the case where the to-be-linked task is already | ||
158 | * scheduled on a different CPU. | ||
159 | */ | ||
160 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
161 | cpu_entry_t *entry) | ||
162 | { | ||
163 | cpu_entry_t *sched; | ||
164 | struct task_struct* tmp; | ||
165 | int on_cpu; | ||
166 | |||
167 | BUG_ON(linked && !is_realtime(linked)); | ||
168 | |||
169 | /* Currently linked task is set to be unlinked. */ | ||
170 | if (entry->linked) { | ||
171 | entry->linked->rt_param.linked_on = NO_CPU; | ||
172 | } | ||
173 | |||
174 | /* Link new task to CPU. */ | ||
175 | if (linked) { | ||
176 | set_rt_flags(linked, RT_F_RUNNING); | ||
177 | /* handle task is already scheduled somewhere! */ | ||
178 | on_cpu = linked->rt_param.scheduled_on; | ||
179 | if (on_cpu != NO_CPU) { | ||
180 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
181 | /* this should only happen if not linked already */ | ||
182 | BUG_ON(sched->linked == linked); | ||
183 | |||
184 | /* If we are already scheduled on the CPU to which we | ||
185 | * wanted to link, we don't need to do the swap -- | ||
186 | * we just link ourselves to the CPU and depend on | ||
187 | * the caller to get things right. | ||
188 | */ | ||
189 | if (entry != sched) { | ||
190 | TRACE_TASK(linked, | ||
191 | "already scheduled on %d, updating link.\n", | ||
192 | sched->cpu); | ||
193 | tmp = sched->linked; | ||
194 | linked->rt_param.linked_on = sched->cpu; | ||
195 | sched->linked = linked; | ||
196 | update_cpu_position(sched); | ||
197 | linked = tmp; | ||
198 | } | ||
199 | } | ||
200 | if (linked) /* might be NULL due to swap */ | ||
201 | linked->rt_param.linked_on = entry->cpu; | ||
202 | } | ||
203 | entry->linked = linked; | ||
204 | #ifdef WANT_ALL_SCHED_EVENTS | ||
205 | if (linked) | ||
206 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
207 | else | ||
208 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
209 | #endif | ||
210 | update_cpu_position(entry); | ||
211 | } | ||
212 | |||
213 | /* unlink - Make sure a task is not linked any longer to an entry | ||
214 | * where it was linked before. Must hold gsnedf_lock. | ||
215 | */ | ||
216 | static noinline void unlink(struct task_struct* t) | ||
217 | { | ||
218 | cpu_entry_t *entry; | ||
219 | |||
220 | if (unlikely(!t)) { | ||
221 | TRACE_BUG_ON(!t); | ||
222 | return; | ||
223 | } | ||
224 | |||
225 | if (t->rt_param.linked_on != NO_CPU) { | ||
226 | /* unlink */ | ||
227 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
228 | t->rt_param.linked_on = NO_CPU; | ||
229 | link_task_to_cpu(NULL, entry); | ||
230 | } else if (is_queued(t)) { | ||
231 | /* This is an interesting situation: t is scheduled, | ||
232 | * but was just recently unlinked. It cannot be | ||
233 | * linked anywhere else (because then it would have | ||
234 | * been relinked to this CPU), thus it must be in some | ||
235 | * queue. We must remove it from the list in this | ||
236 | * case. | ||
237 | */ | ||
238 | remove(&gsnedf, t); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | |||
243 | /* preempt - force a CPU to reschedule | ||
244 | */ | ||
245 | static void preempt(cpu_entry_t *entry) | ||
246 | { | ||
247 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
248 | } | ||
249 | |||
250 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
251 | * Caller must hold gsnedf_lock. | ||
252 | */ | ||
253 | static noinline void requeue(struct task_struct* task) | ||
254 | { | ||
255 | BUG_ON(!task); | ||
256 | /* sanity check before insertion */ | ||
257 | BUG_ON(is_queued(task)); | ||
258 | |||
259 | if (is_released(task, litmus_clock())) | ||
260 | __add_ready(&gsnedf, task); | ||
261 | else { | ||
262 | /* it has got to wait */ | ||
263 | add_release(&gsnedf, task); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | /* check for any necessary preemptions */ | ||
268 | static void check_for_preemptions(void) | ||
269 | { | ||
270 | struct task_struct *task; | ||
271 | cpu_entry_t* last; | ||
272 | |||
273 | for(last = lowest_prio_cpu(); | ||
274 | edf_preemption_needed(&gsnedf, last->linked); | ||
275 | last = lowest_prio_cpu()) { | ||
276 | /* preemption necessary */ | ||
277 | task = __take_ready(&gsnedf); | ||
278 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
279 | task->pid, last->cpu); | ||
280 | if (last->linked) | ||
281 | requeue(last->linked); | ||
282 | link_task_to_cpu(task, last); | ||
283 | preempt(last); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
288 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
289 | { | ||
290 | BUG_ON(!task); | ||
291 | |||
292 | requeue(task); | ||
293 | check_for_preemptions(); | ||
294 | } | ||
295 | |||
296 | static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
297 | { | ||
298 | unsigned long flags; | ||
299 | |||
300 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
301 | |||
302 | __merge_ready(rt, tasks); | ||
303 | check_for_preemptions(); | ||
304 | |||
305 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
306 | } | ||
307 | |||
308 | /* caller holds gsnedf_lock */ | ||
309 | static noinline void job_completion(struct task_struct *t, int forced) | ||
310 | { | ||
311 | BUG_ON(!t); | ||
312 | |||
313 | sched_trace_task_completion(t, forced); | ||
314 | |||
315 | TRACE_TASK(t, "job_completion().\n"); | ||
316 | |||
317 | /* set flags */ | ||
318 | set_rt_flags(t, RT_F_SLEEP); | ||
319 | /* prepare for next period */ | ||
320 | prepare_for_next_period(t); | ||
321 | if (is_released(t, litmus_clock())) | ||
322 | sched_trace_task_release(t); | ||
323 | /* unlink */ | ||
324 | unlink(t); | ||
325 | /* requeue | ||
326 | * But don't requeue a blocking task. */ | ||
327 | if (is_running(t)) | ||
328 | gsnedf_job_arrival(t); | ||
329 | } | ||
330 | |||
331 | /* gsnedf_tick - this function is called for every local timer | ||
332 | * interrupt. | ||
333 | * | ||
334 | * checks whether the current task has expired and checks | ||
335 | * whether we need to preempt it if it has not expired | ||
336 | */ | ||
337 | static void gsnedf_tick(struct task_struct* t) | ||
338 | { | ||
339 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
340 | if (!is_np(t)) { | ||
341 | /* np tasks will be preempted when they become | ||
342 | * preemptable again | ||
343 | */ | ||
344 | set_tsk_need_resched(t); | ||
345 | set_will_schedule(); | ||
346 | TRACE("gsnedf_scheduler_tick: " | ||
347 | "%d is preemptable " | ||
348 | " => FORCE_RESCHED\n", t->pid); | ||
349 | } else if (is_user_np(t)) { | ||
350 | TRACE("gsnedf_scheduler_tick: " | ||
351 | "%d is non-preemptable, " | ||
352 | "preemption delayed.\n", t->pid); | ||
353 | request_exit_np(t); | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
359 | * assumptions on the state of the current task since it may be called for a | ||
360 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
361 | * was necessary, because sys_exit_np() was called, because some Linux | ||
362 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
363 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
364 | * current state is. | ||
365 | * | ||
366 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
367 | * | ||
368 | * The following assertions for the scheduled task could hold: | ||
369 | * | ||
370 | * - !is_running(scheduled) // the job blocks | ||
371 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
372 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
373 | * - linked != scheduled // we need to reschedule (for any reason) | ||
374 | * - is_np(scheduled) // rescheduling must be delayed, | ||
375 | * sys_exit_np must be requested | ||
376 | * | ||
377 | * Any of these can occur together. | ||
378 | */ | ||
379 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
380 | { | ||
381 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
382 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
383 | struct task_struct* next = NULL; | ||
384 | |||
385 | /* Bail out early if we are the release master. | ||
386 | * The release master never schedules any real-time tasks. | ||
387 | */ | ||
388 | if (gsnedf.release_master == entry->cpu) | ||
389 | return NULL; | ||
390 | |||
391 | spin_lock(&gsnedf_lock); | ||
392 | clear_will_schedule(); | ||
393 | |||
394 | /* sanity checking */ | ||
395 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
396 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
397 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
398 | |||
399 | /* (0) Determine state */ | ||
400 | exists = entry->scheduled != NULL; | ||
401 | blocks = exists && !is_running(entry->scheduled); | ||
402 | out_of_time = exists && | ||
403 | budget_enforced(entry->scheduled) && | ||
404 | budget_exhausted(entry->scheduled); | ||
405 | np = exists && is_np(entry->scheduled); | ||
406 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
407 | preempt = entry->scheduled != entry->linked; | ||
408 | |||
409 | #ifdef WANT_ALL_SCHED_EVENTS | ||
410 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | ||
411 | #endif | ||
412 | |||
413 | if (exists) | ||
414 | TRACE_TASK(prev, | ||
415 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
416 | "state:%d sig:%d\n", | ||
417 | blocks, out_of_time, np, sleep, preempt, | ||
418 | prev->state, signal_pending(prev)); | ||
419 | if (entry->linked && preempt) | ||
420 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
421 | entry->linked->comm, entry->linked->pid); | ||
422 | |||
423 | |||
424 | /* If a task blocks we have no choice but to reschedule. | ||
425 | */ | ||
426 | if (blocks) | ||
427 | unlink(entry->scheduled); | ||
428 | |||
429 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
430 | * We need to make sure to update the link structure anyway in case | ||
431 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
432 | * hurt. | ||
433 | */ | ||
434 | if (np && (out_of_time || preempt || sleep)) { | ||
435 | unlink(entry->scheduled); | ||
436 | request_exit_np(entry->scheduled); | ||
437 | } | ||
438 | |||
439 | /* Any task that is preemptable and either exhausts its execution | ||
440 | * budget or wants to sleep completes. We may have to reschedule after | ||
441 | * this. Don't do a job completion if we block (can't have timers running | ||
442 | * for blocked jobs). Preemption go first for the same reason. | ||
443 | */ | ||
444 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
445 | job_completion(entry->scheduled, !sleep); | ||
446 | |||
447 | /* Link pending task if we became unlinked. | ||
448 | */ | ||
449 | if (!entry->linked) | ||
450 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
451 | |||
452 | /* The final scheduling decision. Do we need to switch for some reason? | ||
453 | * If linked is different from scheduled, then select linked as next. | ||
454 | */ | ||
455 | if ((!np || blocks) && | ||
456 | entry->linked != entry->scheduled) { | ||
457 | /* Schedule a linked job? */ | ||
458 | if (entry->linked) { | ||
459 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
460 | next = entry->linked; | ||
461 | } | ||
462 | if (entry->scheduled) { | ||
463 | /* not gonna be scheduled soon */ | ||
464 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
465 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
466 | } | ||
467 | } else | ||
468 | /* Only override Linux scheduler if we have a real-time task | ||
469 | * scheduled that needs to continue. | ||
470 | */ | ||
471 | if (exists) | ||
472 | next = prev; | ||
473 | |||
474 | spin_unlock(&gsnedf_lock); | ||
475 | |||
476 | #ifdef WANT_ALL_SCHED_EVENTS | ||
477 | TRACE("gsnedf_lock released, next=0x%p\n", next); | ||
478 | |||
479 | if (next) | ||
480 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
481 | else if (exists && !next) | ||
482 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
483 | #endif | ||
484 | |||
485 | |||
486 | return next; | ||
487 | } | ||
488 | |||
489 | |||
490 | /* _finish_switch - we just finished the switch away from prev | ||
491 | */ | ||
492 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
493 | { | ||
494 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
495 | |||
496 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
497 | #ifdef WANT_ALL_SCHED_EVENTS | ||
498 | TRACE_TASK(prev, "switched away from\n"); | ||
499 | #endif | ||
500 | } | ||
501 | |||
502 | |||
503 | /* Prepare a task for running in RT mode | ||
504 | */ | ||
505 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
506 | { | ||
507 | unsigned long flags; | ||
508 | cpu_entry_t* entry; | ||
509 | |||
510 | TRACE("gsn edf: task new %d\n", t->pid); | ||
511 | |||
512 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
513 | |||
514 | /* setup job params */ | ||
515 | release_at(t, litmus_clock()); | ||
516 | |||
517 | if (running) { | ||
518 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
519 | BUG_ON(entry->scheduled); | ||
520 | |||
521 | if (entry->cpu != gsnedf.release_master) { | ||
522 | entry->scheduled = t; | ||
523 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
524 | } else { | ||
525 | /* do not schedule on release master */ | ||
526 | preempt(entry); /* force resched */ | ||
527 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
528 | } | ||
529 | } else { | ||
530 | t->rt_param.scheduled_on = NO_CPU; | ||
531 | } | ||
532 | t->rt_param.linked_on = NO_CPU; | ||
533 | |||
534 | gsnedf_job_arrival(t); | ||
535 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
536 | } | ||
537 | |||
538 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
539 | { | ||
540 | unsigned long flags; | ||
541 | lt_t now; | ||
542 | |||
543 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
544 | |||
545 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
546 | /* We need to take suspensions because of semaphores into | ||
547 | * account! If a job resumes after being suspended due to acquiring | ||
548 | * a semaphore, it should never be treated as a new job release. | ||
549 | */ | ||
550 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
551 | set_rt_flags(task, RT_F_RUNNING); | ||
552 | } else { | ||
553 | now = litmus_clock(); | ||
554 | if (is_tardy(task, now)) { | ||
555 | /* new sporadic release */ | ||
556 | release_at(task, now); | ||
557 | sched_trace_task_release(task); | ||
558 | } | ||
559 | else { | ||
560 | if (task->rt.time_slice) { | ||
561 | /* came back in time before deadline | ||
562 | */ | ||
563 | set_rt_flags(task, RT_F_RUNNING); | ||
564 | } | ||
565 | } | ||
566 | } | ||
567 | gsnedf_job_arrival(task); | ||
568 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
569 | } | ||
570 | |||
571 | static void gsnedf_task_block(struct task_struct *t) | ||
572 | { | ||
573 | unsigned long flags; | ||
574 | |||
575 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
576 | |||
577 | /* unlink if necessary */ | ||
578 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
579 | unlink(t); | ||
580 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
581 | |||
582 | BUG_ON(!is_realtime(t)); | ||
583 | } | ||
584 | |||
585 | |||
586 | static void gsnedf_task_exit(struct task_struct * t) | ||
587 | { | ||
588 | unsigned long flags; | ||
589 | |||
590 | /* unlink if necessary */ | ||
591 | spin_lock_irqsave(&gsnedf_lock, flags); | ||
592 | unlink(t); | ||
593 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
594 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
595 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
596 | } | ||
597 | spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
598 | |||
599 | BUG_ON(!is_realtime(t)); | ||
600 | TRACE_TASK(t, "RIP\n"); | ||
601 | } | ||
602 | |||
603 | #ifdef CONFIG_FMLP | ||
604 | |||
605 | /* Update the queue position of a task that got it's priority boosted via | ||
606 | * priority inheritance. */ | ||
607 | static void update_queue_position(struct task_struct *holder) | ||
608 | { | ||
609 | /* We don't know whether holder is in the ready queue. It should, but | ||
610 | * on a budget overrun it may already be in a release queue. Hence, | ||
611 | * calling unlink() is not possible since it assumes that the task is | ||
612 | * not in a release queue. However, we can safely check whether | ||
613 | * sem->holder is currently in a queue or scheduled after locking both | ||
614 | * the release and the ready queue lock. */ | ||
615 | |||
616 | /* Assumption: caller holds gsnedf_lock */ | ||
617 | |||
618 | int check_preempt = 0; | ||
619 | |||
620 | if (tsk_rt(holder)->linked_on != NO_CPU) { | ||
621 | TRACE_TASK(holder, "%s: linked on %d\n", | ||
622 | __FUNCTION__, tsk_rt(holder)->linked_on); | ||
623 | /* Holder is scheduled; need to re-order CPUs. | ||
624 | * We can't use heap_decrease() here since | ||
625 | * the cpu_heap is ordered in reverse direction, so | ||
626 | * it is actually an increase. */ | ||
627 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | ||
628 | gsnedf_cpus[tsk_rt(holder)->linked_on]->hn); | ||
629 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | ||
630 | gsnedf_cpus[tsk_rt(holder)->linked_on]->hn); | ||
631 | } else { | ||
632 | /* holder may be queued: first stop queue changes */ | ||
633 | spin_lock(&gsnedf.release_lock); | ||
634 | if (is_queued(holder)) { | ||
635 | TRACE_TASK(holder, "%s: is queued\n", | ||
636 | __FUNCTION__); | ||
637 | /* We need to update the position | ||
638 | * of holder in some heap. Note that this | ||
639 | * may be a release heap. */ | ||
640 | check_preempt = | ||
641 | !bheap_decrease(edf_ready_order, | ||
642 | tsk_rt(holder)->heap_node); | ||
643 | } else { | ||
644 | /* Nothing to do: if it is not queued and not linked | ||
645 | * then it is currently being moved by other code | ||
646 | * (e.g., a timer interrupt handler) that will use the | ||
647 | * correct priority when enqueuing the task. */ | ||
648 | TRACE_TASK(holder, "%s: is NOT queued => Done.\n", | ||
649 | __FUNCTION__); | ||
650 | } | ||
651 | spin_unlock(&gsnedf.release_lock); | ||
652 | |||
653 | /* If holder was enqueued in a release heap, then the following | ||
654 | * preemption check is pointless, but we can't easily detect | ||
655 | * that case. If you want to fix this, then consider that | ||
656 | * simply adding a state flag requires O(n) time to update when | ||
657 | * releasing n tasks, which conflicts with the goal to have | ||
658 | * O(log n) merges. */ | ||
659 | if (check_preempt) { | ||
660 | /* heap_decrease() hit the top level of the heap: make | ||
661 | * sure preemption checks get the right task, not the | ||
662 | * potentially stale cache. */ | ||
663 | bheap_uncache_min(edf_ready_order, | ||
664 | &gsnedf.ready_queue); | ||
665 | check_for_preemptions(); | ||
666 | } | ||
667 | } | ||
668 | } | ||
669 | |||
670 | static long gsnedf_pi_block(struct pi_semaphore *sem, | ||
671 | struct task_struct *new_waiter) | ||
672 | { | ||
673 | /* This callback has to handle the situation where a new waiter is | ||
674 | * added to the wait queue of the semaphore. | ||
675 | * | ||
676 | * We must check if has a higher priority than the currently | ||
677 | * highest-priority task, and then potentially reschedule. | ||
678 | */ | ||
679 | |||
680 | BUG_ON(!new_waiter); | ||
681 | |||
682 | if (edf_higher_prio(new_waiter, sem->hp.task)) { | ||
683 | TRACE_TASK(new_waiter, " boosts priority via %p\n", sem); | ||
684 | /* called with IRQs disabled */ | ||
685 | spin_lock(&gsnedf_lock); | ||
686 | /* store new highest-priority task */ | ||
687 | sem->hp.task = new_waiter; | ||
688 | if (sem->holder) { | ||
689 | TRACE_TASK(sem->holder, | ||
690 | " holds %p and will inherit from %s/%d\n", | ||
691 | sem, | ||
692 | new_waiter->comm, new_waiter->pid); | ||
693 | /* let holder inherit */ | ||
694 | sem->holder->rt_param.inh_task = new_waiter; | ||
695 | update_queue_position(sem->holder); | ||
696 | } | ||
697 | spin_unlock(&gsnedf_lock); | ||
698 | } | ||
699 | |||
700 | return 0; | ||
701 | } | ||
702 | |||
703 | static long gsnedf_inherit_priority(struct pi_semaphore *sem, | ||
704 | struct task_struct *new_owner) | ||
705 | { | ||
706 | /* We don't need to acquire the gsnedf_lock since at the time of this | ||
707 | * call new_owner isn't actually scheduled yet (it's still sleeping) | ||
708 | * and since the calling function already holds sem->wait.lock, which | ||
709 | * prevents concurrent sem->hp.task changes. | ||
710 | */ | ||
711 | |||
712 | if (sem->hp.task && sem->hp.task != new_owner) { | ||
713 | new_owner->rt_param.inh_task = sem->hp.task; | ||
714 | TRACE_TASK(new_owner, "inherited priority from %s/%d\n", | ||
715 | sem->hp.task->comm, sem->hp.task->pid); | ||
716 | } else | ||
717 | TRACE_TASK(new_owner, | ||
718 | "cannot inherit priority, " | ||
719 | "no higher priority job waits.\n"); | ||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | /* This function is called on a semaphore release, and assumes that | ||
724 | * the current task is also the semaphore holder. | ||
725 | */ | ||
726 | static long gsnedf_return_priority(struct pi_semaphore *sem) | ||
727 | { | ||
728 | struct task_struct* t = current; | ||
729 | int ret = 0; | ||
730 | |||
731 | /* Find new highest-priority semaphore task | ||
732 | * if holder task is the current hp.task. | ||
733 | * | ||
734 | * Calling function holds sem->wait.lock. | ||
735 | */ | ||
736 | if (t == sem->hp.task) | ||
737 | edf_set_hp_task(sem); | ||
738 | |||
739 | TRACE_CUR("gsnedf_return_priority for lock %p\n", sem); | ||
740 | |||
741 | if (t->rt_param.inh_task) { | ||
742 | /* interrupts already disabled by PI code */ | ||
743 | spin_lock(&gsnedf_lock); | ||
744 | |||
745 | /* Reset inh_task to NULL. */ | ||
746 | t->rt_param.inh_task = NULL; | ||
747 | |||
748 | /* Check if rescheduling is necessary */ | ||
749 | unlink(t); | ||
750 | gsnedf_job_arrival(t); | ||
751 | spin_unlock(&gsnedf_lock); | ||
752 | } | ||
753 | |||
754 | return ret; | ||
755 | } | ||
756 | |||
757 | #endif | ||
758 | |||
759 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
760 | { | ||
761 | return 0; | ||
762 | } | ||
763 | |||
764 | static long gsnedf_activate_plugin(void) | ||
765 | { | ||
766 | int cpu; | ||
767 | cpu_entry_t *entry; | ||
768 | |||
769 | bheap_init(&gsnedf_cpu_heap); | ||
770 | gsnedf.release_master = atomic_read(&release_master_cpu); | ||
771 | |||
772 | for_each_online_cpu(cpu) { | ||
773 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
774 | bheap_node_init(&entry->hn, entry); | ||
775 | atomic_set(&entry->will_schedule, 0); | ||
776 | entry->linked = NULL; | ||
777 | entry->scheduled = NULL; | ||
778 | if (cpu != gsnedf.release_master) { | ||
779 | TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); | ||
780 | update_cpu_position(entry); | ||
781 | } else { | ||
782 | TRACE("GSN-EDF: CPU %d is release master.\n", cpu); | ||
783 | } | ||
784 | } | ||
785 | return 0; | ||
786 | } | ||
787 | |||
788 | /* Plugin object */ | ||
789 | static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | ||
790 | .plugin_name = "GSN-EDF", | ||
791 | .finish_switch = gsnedf_finish_switch, | ||
792 | .tick = gsnedf_tick, | ||
793 | .task_new = gsnedf_task_new, | ||
794 | .complete_job = complete_job, | ||
795 | .task_exit = gsnedf_task_exit, | ||
796 | .schedule = gsnedf_schedule, | ||
797 | .task_wake_up = gsnedf_task_wake_up, | ||
798 | .task_block = gsnedf_task_block, | ||
799 | #ifdef CONFIG_FMLP | ||
800 | .fmlp_active = 1, | ||
801 | .pi_block = gsnedf_pi_block, | ||
802 | .inherit_priority = gsnedf_inherit_priority, | ||
803 | .return_priority = gsnedf_return_priority, | ||
804 | #endif | ||
805 | .admit_task = gsnedf_admit_task, | ||
806 | .activate_plugin = gsnedf_activate_plugin, | ||
807 | }; | ||
808 | |||
809 | |||
810 | static int __init init_gsn_edf(void) | ||
811 | { | ||
812 | int cpu; | ||
813 | cpu_entry_t *entry; | ||
814 | |||
815 | bheap_init(&gsnedf_cpu_heap); | ||
816 | /* initialize CPU state */ | ||
817 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
818 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
819 | gsnedf_cpus[cpu] = entry; | ||
820 | atomic_set(&entry->will_schedule, 0); | ||
821 | entry->cpu = cpu; | ||
822 | entry->hn = &gsnedf_heap_node[cpu]; | ||
823 | bheap_node_init(&entry->hn, entry); | ||
824 | } | ||
825 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | ||
826 | return register_sched_plugin(&gsn_edf_plugin); | ||
827 | } | ||
828 | |||
829 | |||
830 | module_init(init_gsn_edf); | ||
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c new file mode 100644 index 000000000000..c1fc7748e590 --- /dev/null +++ b/litmus/sched_litmus.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* This file is included from kernel/sched.c */ | ||
2 | |||
3 | #include <litmus/litmus.h> | ||
4 | #include <litmus/sched_plugin.h> | ||
5 | |||
6 | static void update_time_litmus(struct rq *rq, struct task_struct *p) | ||
7 | { | ||
8 | u64 delta = rq->clock - p->se.exec_start; | ||
9 | if (unlikely((s64)delta < 0)) | ||
10 | delta = 0; | ||
11 | /* per job counter */ | ||
12 | p->rt_param.job_params.exec_time += delta; | ||
13 | /* task counter */ | ||
14 | p->se.sum_exec_runtime += delta; | ||
15 | /* sched_clock() */ | ||
16 | p->se.exec_start = rq->clock; | ||
17 | cpuacct_charge(p, delta); | ||
18 | } | ||
19 | |||
20 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
21 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2); | ||
22 | |||
23 | /* | ||
24 | * litmus_tick gets called by scheduler_tick() with HZ freq | ||
25 | * Interrupts are disabled | ||
26 | */ | ||
27 | static void litmus_tick(struct rq *rq, struct task_struct *p) | ||
28 | { | ||
29 | TS_PLUGIN_TICK_START; | ||
30 | |||
31 | if (is_realtime(p)) | ||
32 | update_time_litmus(rq, p); | ||
33 | |||
34 | /* plugin tick */ | ||
35 | litmus->tick(p); | ||
36 | |||
37 | return; | ||
38 | } | ||
39 | |||
40 | static struct task_struct * | ||
41 | litmus_schedule(struct rq *rq, struct task_struct *prev) | ||
42 | { | ||
43 | struct rq* other_rq; | ||
44 | struct task_struct *next; | ||
45 | |||
46 | long was_running; | ||
47 | lt_t _maybe_deadlock = 0; | ||
48 | |||
49 | /* let the plugin schedule */ | ||
50 | next = litmus->schedule(prev); | ||
51 | |||
52 | /* check if a global plugin pulled a task from a different RQ */ | ||
53 | if (next && task_rq(next) != rq) { | ||
54 | /* we need to migrate the task */ | ||
55 | other_rq = task_rq(next); | ||
56 | TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); | ||
57 | |||
58 | /* while we drop the lock, the prev task could change its | ||
59 | * state | ||
60 | */ | ||
61 | was_running = is_running(prev); | ||
62 | mb(); | ||
63 | spin_unlock(&rq->lock); | ||
64 | |||
65 | /* Don't race with a concurrent switch. This could deadlock in | ||
66 | * the case of cross or circular migrations. It's the job of | ||
67 | * the plugin to make sure that doesn't happen. | ||
68 | */ | ||
69 | TRACE_TASK(next, "stack_in_use=%d\n", | ||
70 | next->rt_param.stack_in_use); | ||
71 | if (next->rt_param.stack_in_use != NO_CPU) { | ||
72 | TRACE_TASK(next, "waiting to deschedule\n"); | ||
73 | _maybe_deadlock = litmus_clock(); | ||
74 | } | ||
75 | while (next->rt_param.stack_in_use != NO_CPU) { | ||
76 | cpu_relax(); | ||
77 | mb(); | ||
78 | if (next->rt_param.stack_in_use == NO_CPU) | ||
79 | TRACE_TASK(next,"descheduled. Proceeding.\n"); | ||
80 | |||
81 | if (lt_before(_maybe_deadlock + 10000000, | ||
82 | litmus_clock())) { | ||
83 | /* We've been spinning for 10ms. | ||
84 | * Something can't be right! | ||
85 | * Let's abandon the task and bail out; at least | ||
86 | * we will have debug info instead of a hard | ||
87 | * deadlock. | ||
88 | */ | ||
89 | TRACE_TASK(next,"stack too long in use. " | ||
90 | "Deadlock?\n"); | ||
91 | next = NULL; | ||
92 | |||
93 | /* bail out */ | ||
94 | spin_lock(&rq->lock); | ||
95 | return next; | ||
96 | } | ||
97 | } | ||
98 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | ||
99 | if (next->oncpu) | ||
100 | TRACE_TASK(next, "waiting for !oncpu"); | ||
101 | while (next->oncpu) { | ||
102 | cpu_relax(); | ||
103 | mb(); | ||
104 | } | ||
105 | #endif | ||
106 | double_rq_lock(rq, other_rq); | ||
107 | mb(); | ||
108 | if (is_realtime(prev) && is_running(prev) != was_running) { | ||
109 | TRACE_TASK(prev, | ||
110 | "state changed while we dropped" | ||
111 | " the lock: is_running=%d, was_running=%d\n", | ||
112 | is_running(prev), was_running); | ||
113 | if (is_running(prev) && !was_running) { | ||
114 | /* prev task became unblocked | ||
115 | * we need to simulate normal sequence of events | ||
116 | * to scheduler plugins. | ||
117 | */ | ||
118 | litmus->task_block(prev); | ||
119 | litmus->task_wake_up(prev); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | set_task_cpu(next, smp_processor_id()); | ||
124 | |||
125 | /* DEBUG: now that we have the lock we need to make sure a | ||
126 | * couple of things still hold: | ||
127 | * - it is still a real-time task | ||
128 | * - it is still runnable (could have been stopped) | ||
129 | * If either is violated, then the active plugin is | ||
130 | * doing something wrong. | ||
131 | */ | ||
132 | if (!is_realtime(next) || !is_running(next)) { | ||
133 | /* BAD BAD BAD */ | ||
134 | TRACE_TASK(next,"BAD: migration invariant FAILED: " | ||
135 | "rt=%d running=%d\n", | ||
136 | is_realtime(next), | ||
137 | is_running(next)); | ||
138 | /* drop the task */ | ||
139 | next = NULL; | ||
140 | } | ||
141 | /* release the other CPU's runqueue, but keep ours */ | ||
142 | spin_unlock(&other_rq->lock); | ||
143 | } | ||
144 | if (next) { | ||
145 | next->rt_param.stack_in_use = rq->cpu; | ||
146 | next->se.exec_start = rq->clock; | ||
147 | } | ||
148 | |||
149 | return next; | ||
150 | } | ||
151 | |||
152 | static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, | ||
153 | int wakeup) | ||
154 | { | ||
155 | if (wakeup) { | ||
156 | sched_trace_task_resume(p); | ||
157 | tsk_rt(p)->present = 1; | ||
158 | litmus->task_wake_up(p); | ||
159 | |||
160 | rq->litmus.nr_running++; | ||
161 | } else | ||
162 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); | ||
163 | } | ||
164 | |||
165 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep) | ||
166 | { | ||
167 | if (sleep) { | ||
168 | litmus->task_block(p); | ||
169 | tsk_rt(p)->present = 0; | ||
170 | sched_trace_task_block(p); | ||
171 | |||
172 | rq->litmus.nr_running--; | ||
173 | } else | ||
174 | TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); | ||
175 | } | ||
176 | |||
177 | static void yield_task_litmus(struct rq *rq) | ||
178 | { | ||
179 | BUG_ON(rq->curr != current); | ||
180 | /* sched_yield() is called to trigger delayed preemptions. | ||
181 | * Thus, mark the current task as needing to be rescheduled. | ||
182 | * This will cause the scheduler plugin to be invoked, which can | ||
183 | * then determine if a preemption is still required. | ||
184 | */ | ||
185 | clear_exit_np(current); | ||
186 | set_tsk_need_resched(current); | ||
187 | } | ||
188 | |||
189 | /* Plugins are responsible for this. | ||
190 | */ | ||
191 | static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) | ||
192 | { | ||
193 | } | ||
194 | |||
195 | static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) | ||
196 | { | ||
197 | } | ||
198 | |||
199 | static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev) | ||
200 | { | ||
201 | update_time_litmus(rq, prev); | ||
202 | if (!is_running(prev)) | ||
203 | tsk_rt(prev)->present = 0; | ||
204 | } | ||
205 | |||
206 | /* pick_next_task_litmus() - litmus_schedule() function | ||
207 | * | ||
208 | * return the next task to be scheduled | ||
209 | */ | ||
210 | static struct task_struct *pick_next_task_litmus(struct rq *rq) | ||
211 | { | ||
212 | /* get the to-be-switched-out task (prev) */ | ||
213 | struct task_struct *prev = rq->litmus.prev; | ||
214 | struct task_struct *next; | ||
215 | |||
216 | /* if not called from schedule() but from somewhere | ||
217 | * else (e.g., migration), return now! | ||
218 | */ | ||
219 | if(!rq->litmus.prev) | ||
220 | return NULL; | ||
221 | |||
222 | rq->litmus.prev = NULL; | ||
223 | |||
224 | TS_PLUGIN_SCHED_START; | ||
225 | next = litmus_schedule(rq, prev); | ||
226 | TS_PLUGIN_SCHED_END; | ||
227 | |||
228 | return next; | ||
229 | } | ||
230 | |||
231 | static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) | ||
232 | { | ||
233 | /* nothing to do; tick related tasks are done by litmus_tick() */ | ||
234 | return; | ||
235 | } | ||
236 | |||
237 | static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) | ||
238 | { | ||
239 | } | ||
240 | |||
241 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, | ||
242 | int oldprio, int running) | ||
243 | { | ||
244 | } | ||
245 | |||
246 | unsigned int get_rr_interval_litmus(struct task_struct *p) | ||
247 | { | ||
248 | /* return infinity */ | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | /* This is called when a task became a real-time task, either due to a SCHED_* | ||
253 | * class transition or due to PI mutex inheritance. We don't handle Linux PI | ||
254 | * mutex inheritance yet (and probably never will). Use LITMUS provided | ||
255 | * synchronization primitives instead. | ||
256 | */ | ||
257 | static void set_curr_task_litmus(struct rq *rq) | ||
258 | { | ||
259 | rq->curr->se.exec_start = rq->clock; | ||
260 | } | ||
261 | |||
262 | |||
263 | #ifdef CONFIG_SMP | ||
264 | /* execve tries to rebalance task in this scheduling domain */ | ||
265 | static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags) | ||
266 | { | ||
267 | /* preemption is already disabled. | ||
268 | * We don't want to change cpu here | ||
269 | */ | ||
270 | return smp_processor_id(); | ||
271 | } | ||
272 | |||
273 | /* we don't repartition at runtime */ | ||
274 | |||
275 | static unsigned long | ||
276 | load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
277 | unsigned long max_load_move, | ||
278 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
279 | int *all_pinned, int *this_best_prio) | ||
280 | { | ||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int | ||
285 | move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
286 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
287 | { | ||
288 | return 0; | ||
289 | } | ||
290 | #endif | ||
291 | |||
292 | const struct sched_class litmus_sched_class = { | ||
293 | .next = &rt_sched_class, | ||
294 | .enqueue_task = enqueue_task_litmus, | ||
295 | .dequeue_task = dequeue_task_litmus, | ||
296 | .yield_task = yield_task_litmus, | ||
297 | |||
298 | .check_preempt_curr = check_preempt_curr_litmus, | ||
299 | |||
300 | .pick_next_task = pick_next_task_litmus, | ||
301 | .put_prev_task = put_prev_task_litmus, | ||
302 | |||
303 | #ifdef CONFIG_SMP | ||
304 | .select_task_rq = select_task_rq_litmus, | ||
305 | |||
306 | .load_balance = load_balance_litmus, | ||
307 | .move_one_task = move_one_task_litmus, | ||
308 | .pre_schedule = pre_schedule_litmus, | ||
309 | #endif | ||
310 | |||
311 | .set_curr_task = set_curr_task_litmus, | ||
312 | .task_tick = task_tick_litmus, | ||
313 | |||
314 | .get_rr_interval = get_rr_interval_litmus, | ||
315 | |||
316 | .prio_changed = prio_changed_litmus, | ||
317 | .switched_to = switched_to_litmus, | ||
318 | }; | ||
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c new file mode 100644 index 000000000000..2ea39223e7f0 --- /dev/null +++ b/litmus/sched_pfair.c | |||
@@ -0,0 +1,896 @@ | |||
1 | /* | ||
2 | * kernel/sched_pfair.c | ||
3 | * | ||
4 | * Implementation of the (global) Pfair scheduling algorithm. | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <asm/div64.h> | ||
9 | #include <linux/delay.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/percpu.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/list.h> | ||
15 | |||
16 | #include <litmus/litmus.h> | ||
17 | #include <litmus/jobs.h> | ||
18 | #include <litmus/rt_domain.h> | ||
19 | #include <litmus/sched_plugin.h> | ||
20 | #include <litmus/sched_trace.h> | ||
21 | |||
22 | #include <litmus/bheap.h> | ||
23 | |||
24 | struct subtask { | ||
25 | /* measured in quanta relative to job release */ | ||
26 | quanta_t release; | ||
27 | quanta_t deadline; | ||
28 | quanta_t overlap; /* called "b bit" by PD^2 */ | ||
29 | quanta_t group_deadline; | ||
30 | }; | ||
31 | |||
32 | struct pfair_param { | ||
33 | quanta_t quanta; /* number of subtasks */ | ||
34 | quanta_t cur; /* index of current subtask */ | ||
35 | |||
36 | quanta_t release; /* in quanta */ | ||
37 | quanta_t period; /* in quanta */ | ||
38 | |||
39 | quanta_t last_quantum; /* when scheduled last */ | ||
40 | int last_cpu; /* where scheduled last */ | ||
41 | |||
42 | unsigned int sporadic_release; /* On wakeup, new sporadic release? */ | ||
43 | |||
44 | struct subtask subtasks[0]; /* allocate together with pfair_param */ | ||
45 | }; | ||
46 | |||
47 | #define tsk_pfair(tsk) ((tsk)->rt_param.pfair) | ||
48 | |||
49 | struct pfair_state { | ||
50 | int cpu; | ||
51 | volatile quanta_t cur_tick; /* updated by the CPU that is advancing | ||
52 | * the time */ | ||
53 | volatile quanta_t local_tick; /* What tick is the local CPU currently | ||
54 | * executing? Updated only by the local | ||
55 | * CPU. In QEMU, this may lag behind the | ||
56 | * current tick. In a real system, with | ||
57 | * proper timers and aligned quanta, | ||
58 | * that should only be the | ||
59 | * case for a very short time after the | ||
60 | * time advanced. With staggered quanta, | ||
61 | * it will lag for the duration of the | ||
62 | * offset. | ||
63 | */ | ||
64 | |||
65 | struct task_struct* linked; /* the task that should be executing */ | ||
66 | struct task_struct* local; /* the local copy of linked */ | ||
67 | struct task_struct* scheduled; /* what is actually scheduled */ | ||
68 | |||
69 | unsigned long missed_quanta; | ||
70 | lt_t offset; /* stagger offset */ | ||
71 | }; | ||
72 | |||
73 | /* Currently, we limit the maximum period of any task to 2000 quanta. | ||
74 | * The reason is that it makes the implementation easier since we do not | ||
75 | * need to reallocate the release wheel on task arrivals. | ||
76 | * In the future | ||
77 | */ | ||
78 | #define PFAIR_MAX_PERIOD 2000 | ||
79 | |||
80 | /* This is the release queue wheel. It is indexed by pfair_time % | ||
81 | * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be | ||
82 | * merged with the ready queue. | ||
83 | */ | ||
84 | static struct bheap release_queue[PFAIR_MAX_PERIOD]; | ||
85 | |||
86 | DEFINE_PER_CPU(struct pfair_state, pfair_state); | ||
87 | struct pfair_state* *pstate; /* short cut */ | ||
88 | |||
89 | static quanta_t pfair_time = 0; /* the "official" PFAIR clock */ | ||
90 | static quanta_t merge_time = 0; /* Updated after the release queue has been | ||
91 | * merged. Used by drop_all_references(). | ||
92 | */ | ||
93 | |||
94 | static rt_domain_t pfair; | ||
95 | |||
96 | /* The pfair_lock is used to serialize all scheduling events. | ||
97 | */ | ||
98 | #define pfair_lock pfair.ready_lock | ||
99 | |||
100 | /* Enable for lots of trace info. | ||
101 | * #define PFAIR_DEBUG | ||
102 | */ | ||
103 | |||
104 | #ifdef PFAIR_DEBUG | ||
105 | #define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args) | ||
106 | #define PTRACE(f, args...) TRACE(f, ## args) | ||
107 | #else | ||
108 | #define PTRACE_TASK(t, f, args...) | ||
109 | #define PTRACE(f, args...) | ||
110 | #endif | ||
111 | |||
112 | /* gcc will inline all of these accessor functions... */ | ||
113 | static struct subtask* cur_subtask(struct task_struct* t) | ||
114 | { | ||
115 | return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur; | ||
116 | } | ||
117 | |||
118 | static quanta_t cur_deadline(struct task_struct* t) | ||
119 | { | ||
120 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | ||
121 | } | ||
122 | |||
123 | |||
124 | static quanta_t cur_sub_release(struct task_struct* t) | ||
125 | { | ||
126 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
127 | } | ||
128 | |||
129 | static quanta_t cur_release(struct task_struct* t) | ||
130 | { | ||
131 | #ifdef EARLY_RELEASE | ||
132 | /* only the release of the first subtask counts when we early | ||
133 | * release */ | ||
134 | return tsk_pfair(t)->release; | ||
135 | #else | ||
136 | return cur_sub_release(t); | ||
137 | #endif | ||
138 | } | ||
139 | |||
140 | static quanta_t cur_overlap(struct task_struct* t) | ||
141 | { | ||
142 | return cur_subtask(t)->overlap; | ||
143 | } | ||
144 | |||
145 | static quanta_t cur_group_deadline(struct task_struct* t) | ||
146 | { | ||
147 | quanta_t gdl = cur_subtask(t)->group_deadline; | ||
148 | if (gdl) | ||
149 | return gdl + tsk_pfair(t)->release; | ||
150 | else | ||
151 | return gdl; | ||
152 | } | ||
153 | |||
154 | |||
155 | static int pfair_higher_prio(struct task_struct* first, | ||
156 | struct task_struct* second) | ||
157 | { | ||
158 | return /* first task must exist */ | ||
159 | first && ( | ||
160 | /* Does the second task exist and is it a real-time task? If | ||
161 | * not, the first task (which is a RT task) has higher | ||
162 | * priority. | ||
163 | */ | ||
164 | !second || !is_realtime(second) || | ||
165 | |||
166 | /* Is the (subtask) deadline of the first task earlier? | ||
167 | * Then it has higher priority. | ||
168 | */ | ||
169 | time_before(cur_deadline(first), cur_deadline(second)) || | ||
170 | |||
171 | /* Do we have a deadline tie? | ||
172 | * Then break by B-bit. | ||
173 | */ | ||
174 | (cur_deadline(first) == cur_deadline(second) && | ||
175 | (cur_overlap(first) > cur_overlap(second) || | ||
176 | |||
177 | /* Do we have a B-bit tie? | ||
178 | * Then break by group deadline. | ||
179 | */ | ||
180 | (cur_overlap(first) == cur_overlap(second) && | ||
181 | (time_after(cur_group_deadline(first), | ||
182 | cur_group_deadline(second)) || | ||
183 | |||
184 | /* Do we have a group deadline tie? | ||
185 | * Then break by PID, which are unique. | ||
186 | */ | ||
187 | (cur_group_deadline(first) == | ||
188 | cur_group_deadline(second) && | ||
189 | first->pid < second->pid)))))); | ||
190 | } | ||
191 | |||
192 | int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
193 | { | ||
194 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | ||
195 | } | ||
196 | |||
197 | /* return the proper release queue for time t */ | ||
198 | static struct bheap* relq(quanta_t t) | ||
199 | { | ||
200 | struct bheap* rq = &release_queue[t % PFAIR_MAX_PERIOD]; | ||
201 | return rq; | ||
202 | } | ||
203 | |||
204 | static void prepare_release(struct task_struct* t, quanta_t at) | ||
205 | { | ||
206 | tsk_pfair(t)->release = at; | ||
207 | tsk_pfair(t)->cur = 0; | ||
208 | } | ||
209 | |||
210 | static void __pfair_add_release(struct task_struct* t, struct bheap* queue) | ||
211 | { | ||
212 | bheap_insert(pfair_ready_order, queue, | ||
213 | tsk_rt(t)->heap_node); | ||
214 | } | ||
215 | |||
216 | static void pfair_add_release(struct task_struct* t) | ||
217 | { | ||
218 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
219 | __pfair_add_release(t, relq(cur_release(t))); | ||
220 | } | ||
221 | |||
222 | /* pull released tasks from the release queue */ | ||
223 | static void poll_releases(quanta_t time) | ||
224 | { | ||
225 | __merge_ready(&pfair, relq(time)); | ||
226 | merge_time = time; | ||
227 | } | ||
228 | |||
229 | static void check_preempt(struct task_struct* t) | ||
230 | { | ||
231 | int cpu = NO_CPU; | ||
232 | if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && | ||
233 | tsk_rt(t)->present) { | ||
234 | /* the task can be scheduled and | ||
235 | * is not scheduled where it ought to be scheduled | ||
236 | */ | ||
237 | cpu = tsk_rt(t)->linked_on != NO_CPU ? | ||
238 | tsk_rt(t)->linked_on : | ||
239 | tsk_rt(t)->scheduled_on; | ||
240 | PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n", | ||
241 | tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on); | ||
242 | /* preempt */ | ||
243 | if (cpu == smp_processor_id()) | ||
244 | set_tsk_need_resched(current); | ||
245 | else { | ||
246 | smp_send_reschedule(cpu); | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
251 | /* caller must hold pfair_lock */ | ||
252 | static void drop_all_references(struct task_struct *t) | ||
253 | { | ||
254 | int cpu; | ||
255 | struct pfair_state* s; | ||
256 | struct bheap* q; | ||
257 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | ||
258 | /* figure out what queue the node is in */ | ||
259 | if (time_before_eq(cur_release(t), merge_time)) | ||
260 | q = &pfair.ready_queue; | ||
261 | else | ||
262 | q = relq(cur_release(t)); | ||
263 | bheap_delete(pfair_ready_order, q, | ||
264 | tsk_rt(t)->heap_node); | ||
265 | } | ||
266 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
267 | s = &per_cpu(pfair_state, cpu); | ||
268 | if (s->linked == t) | ||
269 | s->linked = NULL; | ||
270 | if (s->local == t) | ||
271 | s->local = NULL; | ||
272 | if (s->scheduled == t) | ||
273 | s->scheduled = NULL; | ||
274 | } | ||
275 | } | ||
276 | |||
277 | /* returns 1 if the task needs to go the release queue */ | ||
278 | static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | ||
279 | { | ||
280 | struct pfair_param* p = tsk_pfair(t); | ||
281 | int to_relq; | ||
282 | p->cur = (p->cur + 1) % p->quanta; | ||
283 | if (!p->cur) { | ||
284 | sched_trace_task_completion(t, 1); | ||
285 | if (tsk_rt(t)->present) { | ||
286 | /* we start a new job */ | ||
287 | prepare_for_next_period(t); | ||
288 | sched_trace_task_release(t); | ||
289 | get_rt_flags(t) = RT_F_RUNNING; | ||
290 | p->release += p->period; | ||
291 | } else { | ||
292 | /* remove task from system until it wakes */ | ||
293 | drop_all_references(t); | ||
294 | tsk_pfair(t)->sporadic_release = 1; | ||
295 | TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", | ||
296 | cpu, p->cur); | ||
297 | return 0; | ||
298 | } | ||
299 | } | ||
300 | to_relq = time_after(cur_release(t), time); | ||
301 | TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n", | ||
302 | cpu, p->cur, to_relq); | ||
303 | return to_relq; | ||
304 | } | ||
305 | |||
306 | static void advance_subtasks(quanta_t time) | ||
307 | { | ||
308 | int cpu, missed; | ||
309 | struct task_struct* l; | ||
310 | struct pfair_param* p; | ||
311 | |||
312 | for_each_online_cpu(cpu) { | ||
313 | l = pstate[cpu]->linked; | ||
314 | missed = pstate[cpu]->linked != pstate[cpu]->local; | ||
315 | if (l) { | ||
316 | p = tsk_pfair(l); | ||
317 | p->last_quantum = time; | ||
318 | p->last_cpu = cpu; | ||
319 | if (advance_subtask(time, l, cpu)) { | ||
320 | pstate[cpu]->linked = NULL; | ||
321 | pfair_add_release(l); | ||
322 | } | ||
323 | } | ||
324 | } | ||
325 | } | ||
326 | |||
327 | static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu) | ||
328 | { | ||
329 | int cpu; | ||
330 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
331 | /* always observe scheduled_on linkage */ | ||
332 | default_cpu = tsk_rt(t)->scheduled_on; | ||
333 | } else if (tsk_pfair(t)->last_quantum == time - 1) { | ||
334 | /* back2back quanta */ | ||
335 | /* Only observe last_quantum if no scheduled_on is in the way. | ||
336 | * This should only kick in if a CPU missed quanta, and that | ||
337 | * *should* only happen in QEMU. | ||
338 | */ | ||
339 | cpu = tsk_pfair(t)->last_cpu; | ||
340 | if (!pstate[cpu]->linked || | ||
341 | tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) { | ||
342 | default_cpu = cpu; | ||
343 | } | ||
344 | } | ||
345 | return default_cpu; | ||
346 | } | ||
347 | |||
348 | /* returns one if linking was redirected */ | ||
349 | static int pfair_link(quanta_t time, int cpu, | ||
350 | struct task_struct* t) | ||
351 | { | ||
352 | int target = target_cpu(time, t, cpu); | ||
353 | struct task_struct* prev = pstate[cpu]->linked; | ||
354 | struct task_struct* other; | ||
355 | |||
356 | if (target != cpu) { | ||
357 | other = pstate[target]->linked; | ||
358 | pstate[target]->linked = t; | ||
359 | tsk_rt(t)->linked_on = target; | ||
360 | if (!other) | ||
361 | /* linked ok, but reschedule this CPU */ | ||
362 | return 1; | ||
363 | if (target < cpu) { | ||
364 | /* link other to cpu instead */ | ||
365 | tsk_rt(other)->linked_on = cpu; | ||
366 | pstate[cpu]->linked = other; | ||
367 | if (prev) { | ||
368 | /* prev got pushed back into the ready queue */ | ||
369 | tsk_rt(prev)->linked_on = NO_CPU; | ||
370 | __add_ready(&pfair, prev); | ||
371 | } | ||
372 | /* we are done with this cpu */ | ||
373 | return 0; | ||
374 | } else { | ||
375 | /* re-add other, it's original CPU was not considered yet */ | ||
376 | tsk_rt(other)->linked_on = NO_CPU; | ||
377 | __add_ready(&pfair, other); | ||
378 | /* reschedule this CPU */ | ||
379 | return 1; | ||
380 | } | ||
381 | } else { | ||
382 | pstate[cpu]->linked = t; | ||
383 | tsk_rt(t)->linked_on = cpu; | ||
384 | if (prev) { | ||
385 | /* prev got pushed back into the ready queue */ | ||
386 | tsk_rt(prev)->linked_on = NO_CPU; | ||
387 | __add_ready(&pfair, prev); | ||
388 | } | ||
389 | /* we are done with this CPU */ | ||
390 | return 0; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | static void schedule_subtasks(quanta_t time) | ||
395 | { | ||
396 | int cpu, retry; | ||
397 | |||
398 | for_each_online_cpu(cpu) { | ||
399 | retry = 1; | ||
400 | while (retry) { | ||
401 | if (pfair_higher_prio(__peek_ready(&pfair), | ||
402 | pstate[cpu]->linked)) | ||
403 | retry = pfair_link(time, cpu, | ||
404 | __take_ready(&pfair)); | ||
405 | else | ||
406 | retry = 0; | ||
407 | } | ||
408 | } | ||
409 | } | ||
410 | |||
411 | static void schedule_next_quantum(quanta_t time) | ||
412 | { | ||
413 | int cpu; | ||
414 | |||
415 | /* called with interrupts disabled */ | ||
416 | PTRACE("--- Q %lu at %llu PRE-SPIN\n", | ||
417 | time, litmus_clock()); | ||
418 | spin_lock(&pfair_lock); | ||
419 | PTRACE("<<< Q %lu at %llu\n", | ||
420 | time, litmus_clock()); | ||
421 | |||
422 | sched_trace_quantum_boundary(); | ||
423 | |||
424 | advance_subtasks(time); | ||
425 | poll_releases(time); | ||
426 | schedule_subtasks(time); | ||
427 | |||
428 | for (cpu = 0; cpu < num_online_cpus(); cpu++) | ||
429 | if (pstate[cpu]->linked) | ||
430 | PTRACE_TASK(pstate[cpu]->linked, | ||
431 | " linked on %d.\n", cpu); | ||
432 | else | ||
433 | PTRACE("(null) linked on %d.\n", cpu); | ||
434 | |||
435 | /* We are done. Advance time. */ | ||
436 | mb(); | ||
437 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
438 | if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) { | ||
439 | TRACE("BAD Quantum not acked on %d " | ||
440 | "(l:%lu c:%lu p:%lu)\n", | ||
441 | cpu, | ||
442 | pstate[cpu]->local_tick, | ||
443 | pstate[cpu]->cur_tick, | ||
444 | pfair_time); | ||
445 | pstate[cpu]->missed_quanta++; | ||
446 | } | ||
447 | pstate[cpu]->cur_tick = time; | ||
448 | } | ||
449 | PTRACE(">>> Q %lu at %llu\n", | ||
450 | time, litmus_clock()); | ||
451 | spin_unlock(&pfair_lock); | ||
452 | } | ||
453 | |||
454 | static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state) | ||
455 | { | ||
456 | quanta_t loc; | ||
457 | |||
458 | goto first; /* skip mb() on first iteration */ | ||
459 | do { | ||
460 | cpu_relax(); | ||
461 | mb(); | ||
462 | first: loc = state->cur_tick; | ||
463 | /* FIXME: what if loc > cur? */ | ||
464 | } while (time_before(loc, q)); | ||
465 | PTRACE("observed cur_tick:%lu >= q:%lu\n", | ||
466 | loc, q); | ||
467 | } | ||
468 | |||
469 | static quanta_t current_quantum(struct pfair_state* state) | ||
470 | { | ||
471 | lt_t t = litmus_clock() - state->offset; | ||
472 | return time2quanta(t, FLOOR); | ||
473 | } | ||
474 | |||
475 | static void catchup_quanta(quanta_t from, quanta_t target, | ||
476 | struct pfair_state* state) | ||
477 | { | ||
478 | quanta_t cur = from, time; | ||
479 | TRACE("+++< BAD catching up quanta from %lu to %lu\n", | ||
480 | from, target); | ||
481 | while (time_before(cur, target)) { | ||
482 | wait_for_quantum(cur, state); | ||
483 | cur++; | ||
484 | time = cmpxchg(&pfair_time, | ||
485 | cur - 1, /* expected */ | ||
486 | cur /* next */ | ||
487 | ); | ||
488 | if (time == cur - 1) | ||
489 | schedule_next_quantum(cur); | ||
490 | } | ||
491 | TRACE("+++> catching up done\n"); | ||
492 | } | ||
493 | |||
494 | /* pfair_tick - this function is called for every local timer | ||
495 | * interrupt. | ||
496 | */ | ||
497 | static void pfair_tick(struct task_struct* t) | ||
498 | { | ||
499 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
500 | quanta_t time, cur; | ||
501 | int retry = 10; | ||
502 | |||
503 | do { | ||
504 | cur = current_quantum(state); | ||
505 | PTRACE("q %lu at %llu\n", cur, litmus_clock()); | ||
506 | |||
507 | /* Attempt to advance time. First CPU to get here | ||
508 | * will prepare the next quantum. | ||
509 | */ | ||
510 | time = cmpxchg(&pfair_time, | ||
511 | cur - 1, /* expected */ | ||
512 | cur /* next */ | ||
513 | ); | ||
514 | if (time == cur - 1) { | ||
515 | /* exchange succeeded */ | ||
516 | wait_for_quantum(cur - 1, state); | ||
517 | schedule_next_quantum(cur); | ||
518 | retry = 0; | ||
519 | } else if (time_before(time, cur - 1)) { | ||
520 | /* the whole system missed a tick !? */ | ||
521 | catchup_quanta(time, cur, state); | ||
522 | retry--; | ||
523 | } else if (time_after(time, cur)) { | ||
524 | /* our timer lagging behind!? */ | ||
525 | TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur); | ||
526 | retry--; | ||
527 | } else { | ||
528 | /* Some other CPU already started scheduling | ||
529 | * this quantum. Let it do its job and then update. | ||
530 | */ | ||
531 | retry = 0; | ||
532 | } | ||
533 | } while (retry); | ||
534 | |||
535 | /* Spin locally until time advances. */ | ||
536 | wait_for_quantum(cur, state); | ||
537 | |||
538 | /* copy assignment */ | ||
539 | /* FIXME: what if we race with a future update? Corrupted state? */ | ||
540 | state->local = state->linked; | ||
541 | /* signal that we are done */ | ||
542 | mb(); | ||
543 | state->local_tick = state->cur_tick; | ||
544 | |||
545 | if (state->local != current | ||
546 | && (is_realtime(current) || is_present(state->local))) | ||
547 | set_tsk_need_resched(current); | ||
548 | } | ||
549 | |||
550 | static int safe_to_schedule(struct task_struct* t, int cpu) | ||
551 | { | ||
552 | int where = tsk_rt(t)->scheduled_on; | ||
553 | if (where != NO_CPU && where != cpu) { | ||
554 | TRACE_TASK(t, "BAD: can't be scheduled on %d, " | ||
555 | "scheduled already on %d.\n", cpu, where); | ||
556 | return 0; | ||
557 | } else | ||
558 | return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING; | ||
559 | } | ||
560 | |||
561 | static struct task_struct* pfair_schedule(struct task_struct * prev) | ||
562 | { | ||
563 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
564 | int blocks; | ||
565 | struct task_struct* next = NULL; | ||
566 | |||
567 | spin_lock(&pfair_lock); | ||
568 | |||
569 | blocks = is_realtime(prev) && !is_running(prev); | ||
570 | |||
571 | if (state->local && safe_to_schedule(state->local, state->cpu)) | ||
572 | next = state->local; | ||
573 | |||
574 | if (prev != next) { | ||
575 | tsk_rt(prev)->scheduled_on = NO_CPU; | ||
576 | if (next) | ||
577 | tsk_rt(next)->scheduled_on = state->cpu; | ||
578 | } | ||
579 | |||
580 | spin_unlock(&pfair_lock); | ||
581 | |||
582 | if (next) | ||
583 | TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n", | ||
584 | tsk_pfair(next)->release, pfair_time, litmus_clock()); | ||
585 | else if (is_realtime(prev)) | ||
586 | TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock()); | ||
587 | |||
588 | return next; | ||
589 | } | ||
590 | |||
591 | static void pfair_task_new(struct task_struct * t, int on_rq, int running) | ||
592 | { | ||
593 | unsigned long flags; | ||
594 | |||
595 | TRACE("pfair: task new %d state:%d\n", t->pid, t->state); | ||
596 | |||
597 | spin_lock_irqsave(&pfair_lock, flags); | ||
598 | if (running) | ||
599 | t->rt_param.scheduled_on = task_cpu(t); | ||
600 | else | ||
601 | t->rt_param.scheduled_on = NO_CPU; | ||
602 | |||
603 | prepare_release(t, pfair_time + 1); | ||
604 | tsk_pfair(t)->sporadic_release = 0; | ||
605 | pfair_add_release(t); | ||
606 | check_preempt(t); | ||
607 | |||
608 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
609 | } | ||
610 | |||
611 | static void pfair_task_wake_up(struct task_struct *t) | ||
612 | { | ||
613 | unsigned long flags; | ||
614 | lt_t now; | ||
615 | |||
616 | TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n", | ||
617 | litmus_clock(), cur_release(t), pfair_time); | ||
618 | |||
619 | spin_lock_irqsave(&pfair_lock, flags); | ||
620 | |||
621 | /* It is a little unclear how to deal with Pfair | ||
622 | * tasks that block for a while and then wake. For now, | ||
623 | * if a task blocks and wakes before its next job release, | ||
624 | * then it may resume if it is currently linked somewhere | ||
625 | * (as if it never blocked at all). Otherwise, we have a | ||
626 | * new sporadic job release. | ||
627 | */ | ||
628 | if (tsk_pfair(t)->sporadic_release) { | ||
629 | now = litmus_clock(); | ||
630 | release_at(t, now); | ||
631 | prepare_release(t, time2quanta(now, CEIL)); | ||
632 | sched_trace_task_release(t); | ||
633 | /* FIXME: race with pfair_time advancing */ | ||
634 | pfair_add_release(t); | ||
635 | tsk_pfair(t)->sporadic_release = 0; | ||
636 | } | ||
637 | |||
638 | check_preempt(t); | ||
639 | |||
640 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
641 | TRACE_TASK(t, "wake up done at %llu\n", litmus_clock()); | ||
642 | } | ||
643 | |||
644 | static void pfair_task_block(struct task_struct *t) | ||
645 | { | ||
646 | BUG_ON(!is_realtime(t)); | ||
647 | TRACE_TASK(t, "blocks at %llu, state:%d\n", | ||
648 | litmus_clock(), t->state); | ||
649 | } | ||
650 | |||
651 | static void pfair_task_exit(struct task_struct * t) | ||
652 | { | ||
653 | unsigned long flags; | ||
654 | |||
655 | BUG_ON(!is_realtime(t)); | ||
656 | |||
657 | /* Remote task from release or ready queue, and ensure | ||
658 | * that it is not the scheduled task for ANY CPU. We | ||
659 | * do this blanket check because occassionally when | ||
660 | * tasks exit while blocked, the task_cpu of the task | ||
661 | * might not be the same as the CPU that the PFAIR scheduler | ||
662 | * has chosen for it. | ||
663 | */ | ||
664 | spin_lock_irqsave(&pfair_lock, flags); | ||
665 | |||
666 | TRACE_TASK(t, "RIP, state:%d\n", t->state); | ||
667 | drop_all_references(t); | ||
668 | |||
669 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
670 | |||
671 | kfree(t->rt_param.pfair); | ||
672 | t->rt_param.pfair = NULL; | ||
673 | } | ||
674 | |||
675 | |||
676 | static void pfair_release_at(struct task_struct* task, lt_t start) | ||
677 | { | ||
678 | unsigned long flags; | ||
679 | quanta_t release; | ||
680 | |||
681 | BUG_ON(!is_realtime(task)); | ||
682 | |||
683 | spin_lock_irqsave(&pfair_lock, flags); | ||
684 | release_at(task, start); | ||
685 | release = time2quanta(start, CEIL); | ||
686 | |||
687 | if (release - pfair_time >= PFAIR_MAX_PERIOD) | ||
688 | release = pfair_time + PFAIR_MAX_PERIOD; | ||
689 | |||
690 | TRACE_TASK(task, "sys release at %lu\n", release); | ||
691 | |||
692 | drop_all_references(task); | ||
693 | prepare_release(task, release); | ||
694 | pfair_add_release(task); | ||
695 | |||
696 | /* Clear sporadic release flag, since this release subsumes any | ||
697 | * sporadic release on wake. | ||
698 | */ | ||
699 | tsk_pfair(task)->sporadic_release = 0; | ||
700 | |||
701 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
702 | } | ||
703 | |||
704 | static void init_subtask(struct subtask* sub, unsigned long i, | ||
705 | lt_t quanta, lt_t period) | ||
706 | { | ||
707 | /* since i is zero-based, the formulas are shifted by one */ | ||
708 | lt_t tmp; | ||
709 | |||
710 | /* release */ | ||
711 | tmp = period * i; | ||
712 | do_div(tmp, quanta); /* floor */ | ||
713 | sub->release = (quanta_t) tmp; | ||
714 | |||
715 | /* deadline */ | ||
716 | tmp = period * (i + 1); | ||
717 | if (do_div(tmp, quanta)) /* ceil */ | ||
718 | tmp++; | ||
719 | sub->deadline = (quanta_t) tmp; | ||
720 | |||
721 | /* next release */ | ||
722 | tmp = period * (i + 1); | ||
723 | do_div(tmp, quanta); /* floor */ | ||
724 | sub->overlap = sub->deadline - (quanta_t) tmp; | ||
725 | |||
726 | /* Group deadline. | ||
727 | * Based on the formula given in Uma's thesis. | ||
728 | */ | ||
729 | if (2 * quanta >= period) { | ||
730 | /* heavy */ | ||
731 | tmp = (sub->deadline - (i + 1)) * period; | ||
732 | if (period > quanta && | ||
733 | do_div(tmp, (period - quanta))) /* ceil */ | ||
734 | tmp++; | ||
735 | sub->group_deadline = (quanta_t) tmp; | ||
736 | } else | ||
737 | sub->group_deadline = 0; | ||
738 | } | ||
739 | |||
740 | static void dump_subtasks(struct task_struct* t) | ||
741 | { | ||
742 | unsigned long i; | ||
743 | for (i = 0; i < t->rt_param.pfair->quanta; i++) | ||
744 | TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n", | ||
745 | i + 1, | ||
746 | t->rt_param.pfair->subtasks[i].release, | ||
747 | t->rt_param.pfair->subtasks[i].deadline, | ||
748 | t->rt_param.pfair->subtasks[i].overlap, | ||
749 | t->rt_param.pfair->subtasks[i].group_deadline); | ||
750 | } | ||
751 | |||
752 | static long pfair_admit_task(struct task_struct* t) | ||
753 | { | ||
754 | lt_t quanta; | ||
755 | lt_t period; | ||
756 | s64 quantum_length = ktime_to_ns(tick_period); | ||
757 | struct pfair_param* param; | ||
758 | unsigned long i; | ||
759 | |||
760 | /* Pfair is a tick-based method, so the time | ||
761 | * of interest is jiffies. Calculate tick-based | ||
762 | * times for everything. | ||
763 | * (Ceiling of exec cost, floor of period.) | ||
764 | */ | ||
765 | |||
766 | quanta = get_exec_cost(t); | ||
767 | period = get_rt_period(t); | ||
768 | |||
769 | quanta = time2quanta(get_exec_cost(t), CEIL); | ||
770 | |||
771 | if (do_div(period, quantum_length)) | ||
772 | printk(KERN_WARNING | ||
773 | "The period of %s/%d is not a multiple of %llu.\n", | ||
774 | t->comm, t->pid, (unsigned long long) quantum_length); | ||
775 | |||
776 | if (period >= PFAIR_MAX_PERIOD) { | ||
777 | printk(KERN_WARNING | ||
778 | "PFAIR: Rejecting task %s/%d; its period is too long.\n", | ||
779 | t->comm, t->pid); | ||
780 | return -EINVAL; | ||
781 | } | ||
782 | |||
783 | if (quanta == period) { | ||
784 | /* special case: task has weight 1.0 */ | ||
785 | printk(KERN_INFO | ||
786 | "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n", | ||
787 | t->comm, t->pid, quanta, period); | ||
788 | quanta = 1; | ||
789 | period = 1; | ||
790 | } | ||
791 | |||
792 | param = kmalloc(sizeof(*param) + | ||
793 | quanta * sizeof(struct subtask), GFP_ATOMIC); | ||
794 | |||
795 | if (!param) | ||
796 | return -ENOMEM; | ||
797 | |||
798 | param->quanta = quanta; | ||
799 | param->cur = 0; | ||
800 | param->release = 0; | ||
801 | param->period = period; | ||
802 | |||
803 | for (i = 0; i < quanta; i++) | ||
804 | init_subtask(param->subtasks + i, i, quanta, period); | ||
805 | |||
806 | if (t->rt_param.pfair) | ||
807 | /* get rid of stale allocation */ | ||
808 | kfree(t->rt_param.pfair); | ||
809 | |||
810 | t->rt_param.pfair = param; | ||
811 | |||
812 | /* spew out some debug info */ | ||
813 | dump_subtasks(t); | ||
814 | |||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | static long pfair_activate_plugin(void) | ||
819 | { | ||
820 | int cpu; | ||
821 | struct pfair_state* state; | ||
822 | |||
823 | state = &__get_cpu_var(pfair_state); | ||
824 | pfair_time = current_quantum(state); | ||
825 | |||
826 | TRACE("Activating PFAIR at q=%lu\n", pfair_time); | ||
827 | |||
828 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
829 | state = &per_cpu(pfair_state, cpu); | ||
830 | state->cur_tick = pfair_time; | ||
831 | state->local_tick = pfair_time; | ||
832 | state->missed_quanta = 0; | ||
833 | state->offset = cpu_stagger_offset(cpu); | ||
834 | } | ||
835 | |||
836 | return 0; | ||
837 | } | ||
838 | |||
839 | /* Plugin object */ | ||
840 | static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = { | ||
841 | .plugin_name = "PFAIR", | ||
842 | .tick = pfair_tick, | ||
843 | .task_new = pfair_task_new, | ||
844 | .task_exit = pfair_task_exit, | ||
845 | .schedule = pfair_schedule, | ||
846 | .task_wake_up = pfair_task_wake_up, | ||
847 | .task_block = pfair_task_block, | ||
848 | .admit_task = pfair_admit_task, | ||
849 | .release_at = pfair_release_at, | ||
850 | .complete_job = complete_job, | ||
851 | .activate_plugin = pfair_activate_plugin, | ||
852 | }; | ||
853 | |||
854 | static int __init init_pfair(void) | ||
855 | { | ||
856 | int cpu, i; | ||
857 | struct pfair_state *state; | ||
858 | |||
859 | |||
860 | /* | ||
861 | * initialize short_cut for per-cpu pfair state; | ||
862 | * there may be a problem here if someone removes a cpu | ||
863 | * while we are doing this initialization... and if cpus | ||
864 | * are added / removed later... is it a _real_ problem? | ||
865 | */ | ||
866 | pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL); | ||
867 | |||
868 | /* initialize release queue */ | ||
869 | for (i = 0; i < PFAIR_MAX_PERIOD; i++) | ||
870 | bheap_init(&release_queue[i]); | ||
871 | |||
872 | /* initialize CPU state */ | ||
873 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
874 | state = &per_cpu(pfair_state, cpu); | ||
875 | state->cpu = cpu; | ||
876 | state->cur_tick = 0; | ||
877 | state->local_tick = 0; | ||
878 | state->linked = NULL; | ||
879 | state->local = NULL; | ||
880 | state->scheduled = NULL; | ||
881 | state->missed_quanta = 0; | ||
882 | state->offset = cpu_stagger_offset(cpu); | ||
883 | pstate[cpu] = state; | ||
884 | } | ||
885 | |||
886 | rt_domain_init(&pfair, pfair_ready_order, NULL, NULL); | ||
887 | return register_sched_plugin(&pfair_plugin); | ||
888 | } | ||
889 | |||
890 | static void __exit clean_pfair(void) | ||
891 | { | ||
892 | kfree(pstate); | ||
893 | } | ||
894 | |||
895 | module_init(init_pfair); | ||
896 | module_exit(clean_pfair); | ||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 000000000000..3767b30e610a --- /dev/null +++ b/litmus/sched_plugin.c | |||
@@ -0,0 +1,265 @@ | |||
1 | /* sched_plugin.c -- core infrastructure for the scheduler plugin system | ||
2 | * | ||
3 | * This file includes the initialization of the plugin system, the no-op Linux | ||
4 | * scheduler plugin, some dummy functions, and some helper functions. | ||
5 | */ | ||
6 | |||
7 | #include <linux/list.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | |||
10 | #include <litmus/litmus.h> | ||
11 | #include <litmus/sched_plugin.h> | ||
12 | |||
13 | #include <litmus/jobs.h> | ||
14 | |||
15 | /* | ||
16 | * Generic function to trigger preemption on either local or remote cpu | ||
17 | * from scheduler plugins. The key feature is that this function is | ||
18 | * non-preemptive section aware and does not invoke the scheduler / send | ||
19 | * IPIs if the to-be-preempted task is actually non-preemptive. | ||
20 | */ | ||
21 | void preempt_if_preemptable(struct task_struct* t, int on_cpu) | ||
22 | { | ||
23 | /* t is the real-time task executing on CPU on_cpu If t is NULL, then | ||
24 | * on_cpu is currently scheduling background work. | ||
25 | */ | ||
26 | |||
27 | int send_ipi; | ||
28 | |||
29 | if (smp_processor_id() == on_cpu) { | ||
30 | /* local CPU case */ | ||
31 | if (t) { | ||
32 | /* check if we need to poke userspace */ | ||
33 | if (is_user_np(t)) | ||
34 | /* yes, poke it */ | ||
35 | request_exit_np(t); | ||
36 | else | ||
37 | /* no, see if we are allowed to preempt the | ||
38 | * currently-executing task */ | ||
39 | if (!is_kernel_np(t)) | ||
40 | set_tsk_need_resched(t); | ||
41 | } else | ||
42 | /* move non-real-time task out of the way */ | ||
43 | set_tsk_need_resched(current); | ||
44 | } else { | ||
45 | /* remote CPU case */ | ||
46 | if (!t) | ||
47 | /* currently schedules non-real-time work */ | ||
48 | send_ipi = 1; | ||
49 | else { | ||
50 | /* currently schedules real-time work */ | ||
51 | if (is_user_np(t)) { | ||
52 | /* need to notify user space of delayed | ||
53 | * preemption */ | ||
54 | |||
55 | /* to avoid a race, set the flag, then test | ||
56 | * again */ | ||
57 | request_exit_np(t); | ||
58 | /* make sure it got written */ | ||
59 | mb(); | ||
60 | } | ||
61 | /* Only send an ipi if remote task might have raced our | ||
62 | * request, i.e., send an IPI to make sure if it exited | ||
63 | * its critical section. | ||
64 | */ | ||
65 | send_ipi = !is_np(t) && !is_kernel_np(t); | ||
66 | } | ||
67 | if (likely(send_ipi)) | ||
68 | smp_send_reschedule(on_cpu); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | |||
73 | /************************************************************* | ||
74 | * Dummy plugin functions * | ||
75 | *************************************************************/ | ||
76 | |||
77 | static void litmus_dummy_finish_switch(struct task_struct * prev) | ||
78 | { | ||
79 | } | ||
80 | |||
81 | static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) | ||
82 | { | ||
83 | return NULL; | ||
84 | } | ||
85 | |||
86 | static void litmus_dummy_tick(struct task_struct* tsk) | ||
87 | { | ||
88 | } | ||
89 | |||
90 | static long litmus_dummy_admit_task(struct task_struct* tsk) | ||
91 | { | ||
92 | printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", | ||
93 | tsk->comm, tsk->pid); | ||
94 | return -EINVAL; | ||
95 | } | ||
96 | |||
97 | static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) | ||
98 | { | ||
99 | } | ||
100 | |||
101 | static void litmus_dummy_task_wake_up(struct task_struct *task) | ||
102 | { | ||
103 | } | ||
104 | |||
105 | static void litmus_dummy_task_block(struct task_struct *task) | ||
106 | { | ||
107 | } | ||
108 | |||
109 | static void litmus_dummy_task_exit(struct task_struct *task) | ||
110 | { | ||
111 | } | ||
112 | |||
113 | static long litmus_dummy_complete_job(void) | ||
114 | { | ||
115 | return -ENOSYS; | ||
116 | } | ||
117 | |||
118 | static long litmus_dummy_activate_plugin(void) | ||
119 | { | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static long litmus_dummy_deactivate_plugin(void) | ||
124 | { | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | #ifdef CONFIG_FMLP | ||
129 | |||
130 | static long litmus_dummy_inherit_priority(struct pi_semaphore *sem, | ||
131 | struct task_struct *new_owner) | ||
132 | { | ||
133 | return -ENOSYS; | ||
134 | } | ||
135 | |||
136 | static long litmus_dummy_return_priority(struct pi_semaphore *sem) | ||
137 | { | ||
138 | return -ENOSYS; | ||
139 | } | ||
140 | |||
141 | static long litmus_dummy_pi_block(struct pi_semaphore *sem, | ||
142 | struct task_struct *new_waiter) | ||
143 | { | ||
144 | return -ENOSYS; | ||
145 | } | ||
146 | |||
147 | #endif | ||
148 | |||
149 | |||
150 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | ||
151 | * job. | ||
152 | */ | ||
153 | struct sched_plugin linux_sched_plugin = { | ||
154 | .plugin_name = "Linux", | ||
155 | .tick = litmus_dummy_tick, | ||
156 | .task_new = litmus_dummy_task_new, | ||
157 | .task_exit = litmus_dummy_task_exit, | ||
158 | .task_wake_up = litmus_dummy_task_wake_up, | ||
159 | .task_block = litmus_dummy_task_block, | ||
160 | .complete_job = litmus_dummy_complete_job, | ||
161 | .schedule = litmus_dummy_schedule, | ||
162 | .finish_switch = litmus_dummy_finish_switch, | ||
163 | .activate_plugin = litmus_dummy_activate_plugin, | ||
164 | .deactivate_plugin = litmus_dummy_deactivate_plugin, | ||
165 | #ifdef CONFIG_FMLP | ||
166 | .inherit_priority = litmus_dummy_inherit_priority, | ||
167 | .return_priority = litmus_dummy_return_priority, | ||
168 | .pi_block = litmus_dummy_pi_block, | ||
169 | #endif | ||
170 | .admit_task = litmus_dummy_admit_task | ||
171 | }; | ||
172 | |||
173 | /* | ||
174 | * The cluster size is needed in C-EDF: it makes sense only to cluster | ||
175 | * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster | ||
176 | * all the CPUs that shares a L2 cache, while cluster_cache_index = 3 | ||
177 | * we cluster all CPs that shares a L3 cache | ||
178 | */ | ||
179 | int cluster_cache_index = 2; | ||
180 | |||
181 | /* | ||
182 | * The reference to current plugin that is used to schedule tasks within | ||
183 | * the system. It stores references to actual function implementations | ||
184 | * Should be initialized by calling "init_***_plugin()" | ||
185 | */ | ||
186 | struct sched_plugin *litmus = &linux_sched_plugin; | ||
187 | |||
188 | /* the list of registered scheduling plugins */ | ||
189 | static LIST_HEAD(sched_plugins); | ||
190 | static DEFINE_SPINLOCK(sched_plugins_lock); | ||
191 | |||
192 | #define CHECK(func) {\ | ||
193 | if (!plugin->func) \ | ||
194 | plugin->func = litmus_dummy_ ## func;} | ||
195 | |||
196 | /* FIXME: get reference to module */ | ||
197 | int register_sched_plugin(struct sched_plugin* plugin) | ||
198 | { | ||
199 | printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", | ||
200 | plugin->plugin_name); | ||
201 | |||
202 | /* make sure we don't trip over null pointers later */ | ||
203 | CHECK(finish_switch); | ||
204 | CHECK(schedule); | ||
205 | CHECK(tick); | ||
206 | CHECK(task_wake_up); | ||
207 | CHECK(task_exit); | ||
208 | CHECK(task_block); | ||
209 | CHECK(task_new); | ||
210 | CHECK(complete_job); | ||
211 | CHECK(activate_plugin); | ||
212 | CHECK(deactivate_plugin); | ||
213 | #ifdef CONFIG_FMLP | ||
214 | CHECK(inherit_priority); | ||
215 | CHECK(return_priority); | ||
216 | CHECK(pi_block); | ||
217 | #endif | ||
218 | CHECK(admit_task); | ||
219 | |||
220 | if (!plugin->release_at) | ||
221 | plugin->release_at = release_at; | ||
222 | |||
223 | spin_lock(&sched_plugins_lock); | ||
224 | list_add(&plugin->list, &sched_plugins); | ||
225 | spin_unlock(&sched_plugins_lock); | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | |||
231 | /* FIXME: reference counting, etc. */ | ||
232 | struct sched_plugin* find_sched_plugin(const char* name) | ||
233 | { | ||
234 | struct list_head *pos; | ||
235 | struct sched_plugin *plugin; | ||
236 | |||
237 | spin_lock(&sched_plugins_lock); | ||
238 | list_for_each(pos, &sched_plugins) { | ||
239 | plugin = list_entry(pos, struct sched_plugin, list); | ||
240 | if (!strcmp(plugin->plugin_name, name)) | ||
241 | goto out_unlock; | ||
242 | } | ||
243 | plugin = NULL; | ||
244 | |||
245 | out_unlock: | ||
246 | spin_unlock(&sched_plugins_lock); | ||
247 | return plugin; | ||
248 | } | ||
249 | |||
250 | int print_sched_plugins(char* buf, int max) | ||
251 | { | ||
252 | int count = 0; | ||
253 | struct list_head *pos; | ||
254 | struct sched_plugin *plugin; | ||
255 | |||
256 | spin_lock(&sched_plugins_lock); | ||
257 | list_for_each(pos, &sched_plugins) { | ||
258 | plugin = list_entry(pos, struct sched_plugin, list); | ||
259 | count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); | ||
260 | if (max - count <= 0) | ||
261 | break; | ||
262 | } | ||
263 | spin_unlock(&sched_plugins_lock); | ||
264 | return count; | ||
265 | } | ||
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..af0b30cb8b89 --- /dev/null +++ b/litmus/sched_psn_edf.c | |||
@@ -0,0 +1,480 @@ | |||
1 | /* | ||
2 | * kernel/sched_psn_edf.c | ||
3 | * | ||
4 | * Implementation of the PSN-EDF scheduler plugin. | ||
5 | * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c. | ||
6 | * | ||
7 | * Suspensions and non-preemptable sections are supported. | ||
8 | * Priority inheritance is not supported. | ||
9 | */ | ||
10 | |||
11 | #include <linux/percpu.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/list.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | |||
18 | #include <litmus/litmus.h> | ||
19 | #include <litmus/jobs.h> | ||
20 | #include <litmus/sched_plugin.h> | ||
21 | #include <litmus/edf_common.h> | ||
22 | |||
23 | |||
24 | typedef struct { | ||
25 | rt_domain_t domain; | ||
26 | int cpu; | ||
27 | struct task_struct* scheduled; /* only RT tasks */ | ||
28 | /* | ||
29 | * scheduling lock slock | ||
30 | * protects the domain and serializes scheduling decisions | ||
31 | */ | ||
32 | #define slock domain.ready_lock | ||
33 | |||
34 | } psnedf_domain_t; | ||
35 | |||
36 | DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains); | ||
37 | |||
38 | #define local_edf (&__get_cpu_var(psnedf_domains).domain) | ||
39 | #define local_pedf (&__get_cpu_var(psnedf_domains)) | ||
40 | #define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain) | ||
41 | #define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu)) | ||
42 | #define task_edf(task) remote_edf(get_partition(task)) | ||
43 | #define task_pedf(task) remote_pedf(get_partition(task)) | ||
44 | |||
45 | |||
46 | static void psnedf_domain_init(psnedf_domain_t* pedf, | ||
47 | check_resched_needed_t check, | ||
48 | release_jobs_t release, | ||
49 | int cpu) | ||
50 | { | ||
51 | edf_domain_init(&pedf->domain, check, release); | ||
52 | pedf->cpu = cpu; | ||
53 | pedf->scheduled = NULL; | ||
54 | } | ||
55 | |||
56 | static void requeue(struct task_struct* t, rt_domain_t *edf) | ||
57 | { | ||
58 | if (t->state != TASK_RUNNING) | ||
59 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
60 | |||
61 | set_rt_flags(t, RT_F_RUNNING); | ||
62 | if (is_released(t, litmus_clock())) | ||
63 | __add_ready(edf, t); | ||
64 | else | ||
65 | add_release(edf, t); /* it has got to wait */ | ||
66 | } | ||
67 | |||
68 | /* we assume the lock is being held */ | ||
69 | static void preempt(psnedf_domain_t *pedf) | ||
70 | { | ||
71 | preempt_if_preemptable(pedf->scheduled, pedf->cpu); | ||
72 | } | ||
73 | |||
74 | /* This check is trivial in partioned systems as we only have to consider | ||
75 | * the CPU of the partition. | ||
76 | */ | ||
77 | static int psnedf_check_resched(rt_domain_t *edf) | ||
78 | { | ||
79 | psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain); | ||
80 | |||
81 | /* because this is a callback from rt_domain_t we already hold | ||
82 | * the necessary lock for the ready queue | ||
83 | */ | ||
84 | if (edf_preemption_needed(edf, pedf->scheduled)) { | ||
85 | preempt(pedf); | ||
86 | return 1; | ||
87 | } else | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static void job_completion(struct task_struct* t, int forced) | ||
92 | { | ||
93 | sched_trace_task_completion(t,forced); | ||
94 | TRACE_TASK(t, "job_completion().\n"); | ||
95 | |||
96 | set_rt_flags(t, RT_F_SLEEP); | ||
97 | prepare_for_next_period(t); | ||
98 | } | ||
99 | |||
100 | static void psnedf_tick(struct task_struct *t) | ||
101 | { | ||
102 | psnedf_domain_t *pedf = local_pedf; | ||
103 | |||
104 | /* Check for inconsistency. We don't need the lock for this since | ||
105 | * ->scheduled is only changed in schedule, which obviously is not | ||
106 | * executing in parallel on this CPU | ||
107 | */ | ||
108 | BUG_ON(is_realtime(t) && t != pedf->scheduled); | ||
109 | |||
110 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
111 | if (!is_np(t)) { | ||
112 | set_tsk_need_resched(t); | ||
113 | TRACE("psnedf_scheduler_tick: " | ||
114 | "%d is preemptable " | ||
115 | " => FORCE_RESCHED\n", t->pid); | ||
116 | } else if (is_user_np(t)) { | ||
117 | TRACE("psnedf_scheduler_tick: " | ||
118 | "%d is non-preemptable, " | ||
119 | "preemption delayed.\n", t->pid); | ||
120 | request_exit_np(t); | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | |||
125 | static struct task_struct* psnedf_schedule(struct task_struct * prev) | ||
126 | { | ||
127 | psnedf_domain_t* pedf = local_pedf; | ||
128 | rt_domain_t* edf = &pedf->domain; | ||
129 | struct task_struct* next; | ||
130 | |||
131 | int out_of_time, sleep, preempt, | ||
132 | np, exists, blocks, resched; | ||
133 | |||
134 | spin_lock(&pedf->slock); | ||
135 | |||
136 | /* sanity checking | ||
137 | * differently from gedf, when a task exits (dead) | ||
138 | * pedf->schedule may be null and prev _is_ realtime | ||
139 | */ | ||
140 | BUG_ON(pedf->scheduled && pedf->scheduled != prev); | ||
141 | BUG_ON(pedf->scheduled && !is_realtime(prev)); | ||
142 | |||
143 | /* (0) Determine state */ | ||
144 | exists = pedf->scheduled != NULL; | ||
145 | blocks = exists && !is_running(pedf->scheduled); | ||
146 | out_of_time = exists && | ||
147 | budget_enforced(pedf->scheduled) && | ||
148 | budget_exhausted(pedf->scheduled); | ||
149 | np = exists && is_np(pedf->scheduled); | ||
150 | sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; | ||
151 | preempt = edf_preemption_needed(edf, prev); | ||
152 | |||
153 | /* If we need to preempt do so. | ||
154 | * The following checks set resched to 1 in case of special | ||
155 | * circumstances. | ||
156 | */ | ||
157 | resched = preempt; | ||
158 | |||
159 | /* If a task blocks we have no choice but to reschedule. | ||
160 | */ | ||
161 | if (blocks) | ||
162 | resched = 1; | ||
163 | |||
164 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
165 | * Multiple calls to request_exit_np() don't hurt. | ||
166 | */ | ||
167 | if (np && (out_of_time || preempt || sleep)) | ||
168 | request_exit_np(pedf->scheduled); | ||
169 | |||
170 | /* Any task that is preemptable and either exhausts its execution | ||
171 | * budget or wants to sleep completes. We may have to reschedule after | ||
172 | * this. | ||
173 | */ | ||
174 | if (!np && (out_of_time || sleep) && !blocks) { | ||
175 | job_completion(pedf->scheduled, !sleep); | ||
176 | resched = 1; | ||
177 | } | ||
178 | |||
179 | /* The final scheduling decision. Do we need to switch for some reason? | ||
180 | * Switch if we are in RT mode and have no task or if we need to | ||
181 | * resched. | ||
182 | */ | ||
183 | next = NULL; | ||
184 | if ((!np || blocks) && (resched || !exists)) { | ||
185 | /* Take care of a previously scheduled | ||
186 | * job by taking it out of the Linux runqueue. | ||
187 | */ | ||
188 | if (pedf->scheduled && !blocks) | ||
189 | requeue(pedf->scheduled, edf); | ||
190 | next = __take_ready(edf); | ||
191 | } else | ||
192 | /* Only override Linux scheduler if we have a real-time task | ||
193 | * scheduled that needs to continue. | ||
194 | */ | ||
195 | if (exists) | ||
196 | next = prev; | ||
197 | |||
198 | if (next) { | ||
199 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
200 | set_rt_flags(next, RT_F_RUNNING); | ||
201 | } else { | ||
202 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
203 | } | ||
204 | |||
205 | pedf->scheduled = next; | ||
206 | spin_unlock(&pedf->slock); | ||
207 | |||
208 | return next; | ||
209 | } | ||
210 | |||
211 | |||
212 | /* Prepare a task for running in RT mode | ||
213 | */ | ||
214 | static void psnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
215 | { | ||
216 | rt_domain_t* edf = task_edf(t); | ||
217 | psnedf_domain_t* pedf = task_pedf(t); | ||
218 | unsigned long flags; | ||
219 | |||
220 | TRACE_TASK(t, "psn edf: task new, cpu = %d\n", | ||
221 | t->rt_param.task_params.cpu); | ||
222 | |||
223 | /* setup job parameters */ | ||
224 | release_at(t, litmus_clock()); | ||
225 | |||
226 | /* The task should be running in the queue, otherwise signal | ||
227 | * code will try to wake it up with fatal consequences. | ||
228 | */ | ||
229 | spin_lock_irqsave(&pedf->slock, flags); | ||
230 | if (running) { | ||
231 | /* there shouldn't be anything else running at the time */ | ||
232 | BUG_ON(pedf->scheduled); | ||
233 | pedf->scheduled = t; | ||
234 | } else { | ||
235 | requeue(t, edf); | ||
236 | /* maybe we have to reschedule */ | ||
237 | preempt(pedf); | ||
238 | } | ||
239 | spin_unlock_irqrestore(&pedf->slock, flags); | ||
240 | } | ||
241 | |||
242 | static void psnedf_task_wake_up(struct task_struct *task) | ||
243 | { | ||
244 | unsigned long flags; | ||
245 | psnedf_domain_t* pedf = task_pedf(task); | ||
246 | rt_domain_t* edf = task_edf(task); | ||
247 | lt_t now; | ||
248 | |||
249 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
250 | spin_lock_irqsave(&pedf->slock, flags); | ||
251 | BUG_ON(is_queued(task)); | ||
252 | /* We need to take suspensions because of semaphores into | ||
253 | * account! If a job resumes after being suspended due to acquiring | ||
254 | * a semaphore, it should never be treated as a new job release. | ||
255 | * | ||
256 | * FIXME: This should be done in some more predictable and userspace-controlled way. | ||
257 | */ | ||
258 | now = litmus_clock(); | ||
259 | if (is_tardy(task, now) && | ||
260 | get_rt_flags(task) != RT_F_EXIT_SEM) { | ||
261 | /* new sporadic release */ | ||
262 | release_at(task, now); | ||
263 | sched_trace_task_release(task); | ||
264 | } | ||
265 | |||
266 | /* Only add to ready queue if it is not the currently-scheduled | ||
267 | * task. This could be the case if a task was woken up concurrently | ||
268 | * on a remote CPU before the executing CPU got around to actually | ||
269 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
270 | * and won. | ||
271 | */ | ||
272 | if (pedf->scheduled != task) | ||
273 | requeue(task, edf); | ||
274 | |||
275 | spin_unlock_irqrestore(&pedf->slock, flags); | ||
276 | TRACE_TASK(task, "wake up done\n"); | ||
277 | } | ||
278 | |||
279 | static void psnedf_task_block(struct task_struct *t) | ||
280 | { | ||
281 | /* only running tasks can block, thus t is in no queue */ | ||
282 | TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); | ||
283 | |||
284 | BUG_ON(!is_realtime(t)); | ||
285 | BUG_ON(is_queued(t)); | ||
286 | } | ||
287 | |||
288 | static void psnedf_task_exit(struct task_struct * t) | ||
289 | { | ||
290 | unsigned long flags; | ||
291 | psnedf_domain_t* pedf = task_pedf(t); | ||
292 | rt_domain_t* edf; | ||
293 | |||
294 | spin_lock_irqsave(&pedf->slock, flags); | ||
295 | if (is_queued(t)) { | ||
296 | /* dequeue */ | ||
297 | edf = task_edf(t); | ||
298 | remove(edf, t); | ||
299 | } | ||
300 | if (pedf->scheduled == t) | ||
301 | pedf->scheduled = NULL; | ||
302 | |||
303 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
304 | |||
305 | preempt(pedf); | ||
306 | spin_unlock_irqrestore(&pedf->slock, flags); | ||
307 | } | ||
308 | |||
309 | #ifdef CONFIG_FMLP | ||
310 | static long psnedf_pi_block(struct pi_semaphore *sem, | ||
311 | struct task_struct *new_waiter) | ||
312 | { | ||
313 | psnedf_domain_t* pedf; | ||
314 | rt_domain_t* edf; | ||
315 | struct task_struct* t; | ||
316 | int cpu = get_partition(new_waiter); | ||
317 | |||
318 | BUG_ON(!new_waiter); | ||
319 | |||
320 | if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) { | ||
321 | TRACE_TASK(new_waiter, " boosts priority\n"); | ||
322 | pedf = task_pedf(new_waiter); | ||
323 | edf = task_edf(new_waiter); | ||
324 | |||
325 | /* interrupts already disabled */ | ||
326 | spin_lock(&pedf->slock); | ||
327 | |||
328 | /* store new highest-priority task */ | ||
329 | sem->hp.cpu_task[cpu] = new_waiter; | ||
330 | if (sem->holder && | ||
331 | get_partition(sem->holder) == get_partition(new_waiter)) { | ||
332 | /* let holder inherit */ | ||
333 | sem->holder->rt_param.inh_task = new_waiter; | ||
334 | t = sem->holder; | ||
335 | if (is_queued(t)) { | ||
336 | /* queued in domain*/ | ||
337 | remove(edf, t); | ||
338 | /* readd to make priority change take place */ | ||
339 | /* FIXME: this looks outdated */ | ||
340 | if (is_released(t, litmus_clock())) | ||
341 | __add_ready(edf, t); | ||
342 | else | ||
343 | add_release(edf, t); | ||
344 | } | ||
345 | } | ||
346 | |||
347 | /* check if we need to reschedule */ | ||
348 | if (edf_preemption_needed(edf, current)) | ||
349 | preempt(pedf); | ||
350 | |||
351 | spin_unlock(&pedf->slock); | ||
352 | } | ||
353 | |||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static long psnedf_inherit_priority(struct pi_semaphore *sem, | ||
358 | struct task_struct *new_owner) | ||
359 | { | ||
360 | int cpu = get_partition(new_owner); | ||
361 | |||
362 | new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu]; | ||
363 | if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) { | ||
364 | TRACE_TASK(new_owner, | ||
365 | "inherited priority from %s/%d\n", | ||
366 | sem->hp.cpu_task[cpu]->comm, | ||
367 | sem->hp.cpu_task[cpu]->pid); | ||
368 | } else | ||
369 | TRACE_TASK(new_owner, | ||
370 | "cannot inherit priority: " | ||
371 | "no higher priority job waits on this CPU!\n"); | ||
372 | /* make new owner non-preemptable as required by FMLP under | ||
373 | * PSN-EDF. | ||
374 | */ | ||
375 | make_np(new_owner); | ||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | |||
380 | /* This function is called on a semaphore release, and assumes that | ||
381 | * the current task is also the semaphore holder. | ||
382 | */ | ||
383 | static long psnedf_return_priority(struct pi_semaphore *sem) | ||
384 | { | ||
385 | struct task_struct* t = current; | ||
386 | psnedf_domain_t* pedf = task_pedf(t); | ||
387 | rt_domain_t* edf = task_edf(t); | ||
388 | int ret = 0; | ||
389 | int cpu = get_partition(current); | ||
390 | int still_np; | ||
391 | |||
392 | |||
393 | /* Find new highest-priority semaphore task | ||
394 | * if holder task is the current hp.cpu_task[cpu]. | ||
395 | * | ||
396 | * Calling function holds sem->wait.lock. | ||
397 | */ | ||
398 | if (t == sem->hp.cpu_task[cpu]) | ||
399 | edf_set_hp_cpu_task(sem, cpu); | ||
400 | |||
401 | still_np = take_np(current); | ||
402 | |||
403 | /* Since we don't nest resources, this | ||
404 | * should always be zero */ | ||
405 | BUG_ON(still_np); | ||
406 | |||
407 | if (current->rt_param.inh_task) { | ||
408 | TRACE_CUR("return priority of %s/%d\n", | ||
409 | current->rt_param.inh_task->comm, | ||
410 | current->rt_param.inh_task->pid); | ||
411 | } else | ||
412 | TRACE_CUR(" no priority to return %p\n", sem); | ||
413 | |||
414 | |||
415 | /* Always check for delayed preemptions that might have become | ||
416 | * necessary due to non-preemptive execution. | ||
417 | */ | ||
418 | spin_lock(&pedf->slock); | ||
419 | |||
420 | /* Reset inh_task to NULL. */ | ||
421 | current->rt_param.inh_task = NULL; | ||
422 | |||
423 | /* check if we need to reschedule */ | ||
424 | if (edf_preemption_needed(edf, current)) | ||
425 | preempt(pedf); | ||
426 | |||
427 | spin_unlock(&pedf->slock); | ||
428 | |||
429 | |||
430 | return ret; | ||
431 | } | ||
432 | |||
433 | #endif | ||
434 | |||
435 | static long psnedf_admit_task(struct task_struct* tsk) | ||
436 | { | ||
437 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | ||
438 | } | ||
439 | |||
440 | /* Plugin object */ | ||
441 | static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { | ||
442 | .plugin_name = "PSN-EDF", | ||
443 | #ifdef CONFIG_SRP | ||
444 | .srp_active = 1, | ||
445 | #endif | ||
446 | .tick = psnedf_tick, | ||
447 | .task_new = psnedf_task_new, | ||
448 | .complete_job = complete_job, | ||
449 | .task_exit = psnedf_task_exit, | ||
450 | .schedule = psnedf_schedule, | ||
451 | .task_wake_up = psnedf_task_wake_up, | ||
452 | .task_block = psnedf_task_block, | ||
453 | #ifdef CONFIG_FMLP | ||
454 | .fmlp_active = 1, | ||
455 | .pi_block = psnedf_pi_block, | ||
456 | .inherit_priority = psnedf_inherit_priority, | ||
457 | .return_priority = psnedf_return_priority, | ||
458 | #endif | ||
459 | .admit_task = psnedf_admit_task | ||
460 | }; | ||
461 | |||
462 | |||
463 | static int __init init_psn_edf(void) | ||
464 | { | ||
465 | int i; | ||
466 | |||
467 | /* We do not really want to support cpu hotplug, do we? ;) | ||
468 | * However, if we are so crazy to do so, | ||
469 | * we cannot use num_online_cpu() | ||
470 | */ | ||
471 | for (i = 0; i < num_online_cpus(); i++) { | ||
472 | psnedf_domain_init(remote_pedf(i), | ||
473 | psnedf_check_resched, | ||
474 | NULL, i); | ||
475 | } | ||
476 | return register_sched_plugin(&psn_edf_plugin); | ||
477 | } | ||
478 | |||
479 | module_init(init_psn_edf); | ||
480 | |||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c new file mode 100644 index 000000000000..39a543e22d41 --- /dev/null +++ b/litmus/sched_task_trace.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /* | ||
2 | * sched_task_trace.c -- record scheduling events to a byte stream | ||
3 | */ | ||
4 | |||
5 | #define NO_TASK_TRACE_DECLS | ||
6 | |||
7 | #include <linux/module.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/percpu.h> | ||
10 | |||
11 | #include <litmus/ftdev.h> | ||
12 | #include <litmus/litmus.h> | ||
13 | |||
14 | #include <litmus/sched_trace.h> | ||
15 | #include <litmus/feather_trace.h> | ||
16 | #include <litmus/ftdev.h> | ||
17 | |||
18 | |||
19 | /* set MAJOR to 0 to have it dynamically assigned */ | ||
20 | #define FT_TASK_TRACE_MAJOR 253 | ||
21 | #define NO_EVENTS 4096 /* this is a buffer of 12 4k pages per CPU */ | ||
22 | |||
23 | #define now() litmus_clock() | ||
24 | |||
25 | struct local_buffer { | ||
26 | struct st_event_record record[NO_EVENTS]; | ||
27 | char flag[NO_EVENTS]; | ||
28 | struct ft_buffer ftbuf; | ||
29 | }; | ||
30 | |||
31 | DEFINE_PER_CPU(struct local_buffer, st_event_buffer); | ||
32 | |||
33 | static struct ftdev st_dev; | ||
34 | |||
35 | static int st_dev_can_open(struct ftdev *dev, unsigned int cpu) | ||
36 | { | ||
37 | return cpu_online(cpu) ? 0 : -ENODEV; | ||
38 | } | ||
39 | |||
40 | static int __init init_sched_task_trace(void) | ||
41 | { | ||
42 | struct local_buffer* buf; | ||
43 | int i, ok = 0; | ||
44 | ftdev_init(&st_dev, THIS_MODULE); | ||
45 | for (i = 0; i < NR_CPUS; i++) { | ||
46 | buf = &per_cpu(st_event_buffer, i); | ||
47 | ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, | ||
48 | sizeof(struct st_event_record), | ||
49 | buf->flag, | ||
50 | buf->record); | ||
51 | st_dev.minor[i].buf = &buf->ftbuf; | ||
52 | } | ||
53 | if (ok == NR_CPUS) { | ||
54 | st_dev.minor_cnt = NR_CPUS; | ||
55 | st_dev.can_open = st_dev_can_open; | ||
56 | return register_ftdev(&st_dev, "sched_trace", FT_TASK_TRACE_MAJOR); | ||
57 | } else { | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | } | ||
61 | |||
62 | module_init(init_sched_task_trace); | ||
63 | |||
64 | |||
65 | static inline struct st_event_record* get_record(u8 type, struct task_struct* t) | ||
66 | { | ||
67 | struct st_event_record* rec = NULL; | ||
68 | struct local_buffer* buf; | ||
69 | |||
70 | buf = &get_cpu_var(st_event_buffer); | ||
71 | if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) { | ||
72 | rec->hdr.type = type; | ||
73 | rec->hdr.cpu = smp_processor_id(); | ||
74 | rec->hdr.pid = t ? t->pid : 0; | ||
75 | rec->hdr.job = t ? t->rt_param.job_params.job_no : 0; | ||
76 | } else { | ||
77 | put_cpu_var(st_event_buffer); | ||
78 | } | ||
79 | /* rec will be NULL if it failed */ | ||
80 | return rec; | ||
81 | } | ||
82 | |||
83 | static inline void put_record(struct st_event_record* rec) | ||
84 | { | ||
85 | struct local_buffer* buf; | ||
86 | buf = &__get_cpu_var(st_event_buffer); | ||
87 | ft_buffer_finish_write(&buf->ftbuf, rec); | ||
88 | put_cpu_var(st_event_buffer); | ||
89 | } | ||
90 | |||
91 | feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task) | ||
92 | { | ||
93 | struct task_struct *t = (struct task_struct*) _task; | ||
94 | struct st_event_record* rec = get_record(ST_NAME, t); | ||
95 | int i; | ||
96 | if (rec) { | ||
97 | for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++) | ||
98 | rec->data.name.cmd[i] = t->comm[i]; | ||
99 | put_record(rec); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task) | ||
104 | { | ||
105 | struct task_struct *t = (struct task_struct*) _task; | ||
106 | struct st_event_record* rec = get_record(ST_PARAM, t); | ||
107 | if (rec) { | ||
108 | rec->data.param.wcet = get_exec_cost(t); | ||
109 | rec->data.param.period = get_rt_period(t); | ||
110 | rec->data.param.phase = get_rt_phase(t); | ||
111 | rec->data.param.partition = get_partition(t); | ||
112 | put_record(rec); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task) | ||
117 | { | ||
118 | struct task_struct *t = (struct task_struct*) _task; | ||
119 | struct st_event_record* rec = get_record(ST_RELEASE, t); | ||
120 | if (rec) { | ||
121 | rec->data.release.release = get_release(t); | ||
122 | rec->data.release.deadline = get_deadline(t); | ||
123 | put_record(rec); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /* skipped: st_assigned_data, we don't use it atm */ | ||
128 | |||
129 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
130 | unsigned long _task) | ||
131 | { | ||
132 | struct task_struct *t = (struct task_struct*) _task; | ||
133 | struct st_event_record* rec; | ||
134 | if (is_realtime(t)) { | ||
135 | rec = get_record(ST_SWITCH_TO, t); | ||
136 | if (rec) { | ||
137 | rec->data.switch_to.when = now(); | ||
138 | rec->data.switch_to.exec_time = get_exec_time(t); | ||
139 | put_record(rec); | ||
140 | } | ||
141 | } | ||
142 | } | ||
143 | |||
144 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
145 | unsigned long _task) | ||
146 | { | ||
147 | struct task_struct *t = (struct task_struct*) _task; | ||
148 | struct st_event_record* rec; | ||
149 | if (is_realtime(t)) { | ||
150 | rec = get_record(ST_SWITCH_AWAY, t); | ||
151 | if (rec) { | ||
152 | rec->data.switch_away.when = now(); | ||
153 | rec->data.switch_away.exec_time = get_exec_time(t); | ||
154 | put_record(rec); | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | |||
159 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
160 | unsigned long _task, | ||
161 | unsigned long forced) | ||
162 | { | ||
163 | struct task_struct *t = (struct task_struct*) _task; | ||
164 | struct st_event_record* rec = get_record(ST_COMPLETION, t); | ||
165 | if (rec) { | ||
166 | rec->data.completion.when = now(); | ||
167 | rec->data.completion.forced = forced; | ||
168 | put_record(rec); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
173 | unsigned long _task) | ||
174 | { | ||
175 | struct task_struct *t = (struct task_struct*) _task; | ||
176 | struct st_event_record* rec = get_record(ST_BLOCK, t); | ||
177 | if (rec) { | ||
178 | rec->data.block.when = now(); | ||
179 | put_record(rec); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
184 | unsigned long _task) | ||
185 | { | ||
186 | struct task_struct *t = (struct task_struct*) _task; | ||
187 | struct st_event_record* rec = get_record(ST_RESUME, t); | ||
188 | if (rec) { | ||
189 | rec->data.resume.when = now(); | ||
190 | put_record(rec); | ||
191 | } | ||
192 | } | ||
193 | |||
194 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
195 | unsigned long _start) | ||
196 | { | ||
197 | lt_t *start = (lt_t*) _start; | ||
198 | struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL); | ||
199 | if (rec) { | ||
200 | rec->data.sys_release.when = now(); | ||
201 | rec->data.sys_release.release = *start; | ||
202 | put_record(rec); | ||
203 | } | ||
204 | } | ||
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c new file mode 100644 index 000000000000..ad0b138d4b01 --- /dev/null +++ b/litmus/sched_trace.c | |||
@@ -0,0 +1,378 @@ | |||
1 | /* | ||
2 | * sched_trace.c -- record scheduling events to a byte stream. | ||
3 | */ | ||
4 | #include <linux/spinlock.h> | ||
5 | #include <linux/semaphore.h> | ||
6 | |||
7 | #include <linux/fs.h> | ||
8 | #include <linux/miscdevice.h> | ||
9 | #include <asm/uaccess.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/sysrq.h> | ||
12 | |||
13 | #include <linux/kfifo.h> | ||
14 | |||
15 | #include <litmus/sched_trace.h> | ||
16 | #include <litmus/litmus.h> | ||
17 | |||
18 | #define SCHED_TRACE_NAME "litmus/log" | ||
19 | |||
20 | /* Allocate a buffer of about 32k per CPU */ | ||
21 | #define LITMUS_TRACE_BUF_PAGES 8 | ||
22 | #define LITMUS_TRACE_BUF_SIZE (PAGE_SIZE * LITMUS_TRACE_BUF_PAGES * NR_CPUS) | ||
23 | |||
24 | /* Max length of one read from the buffer */ | ||
25 | #define MAX_READ_LEN (64 * 1024) | ||
26 | |||
27 | /* Max length for one write --- from kernel --- to the buffer */ | ||
28 | #define MSG_SIZE 255 | ||
29 | |||
30 | /* Inner ring buffer structure */ | ||
31 | typedef struct { | ||
32 | rwlock_t del_lock; | ||
33 | |||
34 | /* the buffer */ | ||
35 | struct kfifo *kfifo; | ||
36 | } ring_buffer_t; | ||
37 | |||
38 | /* Main buffer structure */ | ||
39 | typedef struct { | ||
40 | ring_buffer_t buf; | ||
41 | atomic_t reader_cnt; | ||
42 | struct semaphore reader_mutex; | ||
43 | } trace_buffer_t; | ||
44 | |||
45 | |||
46 | /* | ||
47 | * Inner buffer management functions | ||
48 | */ | ||
49 | void rb_init(ring_buffer_t* buf) | ||
50 | { | ||
51 | rwlock_init(&buf->del_lock); | ||
52 | buf->kfifo = NULL; | ||
53 | } | ||
54 | |||
55 | int rb_alloc_buf(ring_buffer_t* buf, unsigned int size) | ||
56 | { | ||
57 | unsigned long flags; | ||
58 | |||
59 | write_lock_irqsave(&buf->del_lock, flags); | ||
60 | |||
61 | buf->kfifo = kfifo_alloc(size, GFP_ATOMIC, NULL); | ||
62 | |||
63 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
64 | |||
65 | if(IS_ERR(buf->kfifo)) { | ||
66 | printk(KERN_ERR "kfifo_alloc failed\n"); | ||
67 | return PTR_ERR(buf->kfifo); | ||
68 | } | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | int rb_free_buf(ring_buffer_t* buf) | ||
74 | { | ||
75 | unsigned long flags; | ||
76 | |||
77 | write_lock_irqsave(&buf->del_lock, flags); | ||
78 | |||
79 | BUG_ON(!buf->kfifo); | ||
80 | kfifo_free(buf->kfifo); | ||
81 | |||
82 | buf->kfifo = NULL; | ||
83 | |||
84 | write_unlock_irqrestore(&buf->del_lock, flags); | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Assumption: concurrent writes are serialized externally | ||
91 | * | ||
92 | * Will only succeed if there is enough space for all len bytes. | ||
93 | */ | ||
94 | int rb_put(ring_buffer_t* buf, char* mem, size_t len) | ||
95 | { | ||
96 | unsigned long flags; | ||
97 | int error = 0; | ||
98 | |||
99 | read_lock_irqsave(&buf->del_lock, flags); | ||
100 | |||
101 | if (!buf->kfifo) { | ||
102 | error = -ENODEV; | ||
103 | goto out; | ||
104 | } | ||
105 | |||
106 | if((__kfifo_put(buf->kfifo, mem, len)) < len) { | ||
107 | error = -ENOMEM; | ||
108 | goto out; | ||
109 | } | ||
110 | |||
111 | out: | ||
112 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
113 | return error; | ||
114 | } | ||
115 | |||
116 | /* Assumption: concurrent reads are serialized externally */ | ||
117 | int rb_get(ring_buffer_t* buf, char* mem, size_t len) | ||
118 | { | ||
119 | unsigned long flags; | ||
120 | int error = 0; | ||
121 | |||
122 | read_lock_irqsave(&buf->del_lock, flags); | ||
123 | if (!buf->kfifo) { | ||
124 | error = -ENODEV; | ||
125 | goto out; | ||
126 | } | ||
127 | |||
128 | error = __kfifo_get(buf->kfifo, (unsigned char*)mem, len); | ||
129 | |||
130 | out: | ||
131 | read_unlock_irqrestore(&buf->del_lock, flags); | ||
132 | return error; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Device Driver management | ||
137 | */ | ||
138 | static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED; | ||
139 | static trace_buffer_t log_buffer; | ||
140 | |||
141 | static void init_log_buffer(void) | ||
142 | { | ||
143 | rb_init(&log_buffer.buf); | ||
144 | atomic_set(&log_buffer.reader_cnt,0); | ||
145 | init_MUTEX(&log_buffer.reader_mutex); | ||
146 | } | ||
147 | |||
148 | static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); | ||
149 | |||
150 | /* | ||
151 | * sched_trace_log_message - Write to the trace buffer (log_buffer) | ||
152 | * | ||
153 | * This is the only function accessing the log_buffer from inside the | ||
154 | * kernel for writing. | ||
155 | * Concurrent access to sched_trace_log_message must be serialized using | ||
156 | * log_buffer_lock | ||
157 | * The maximum length of a formatted message is 255 | ||
158 | */ | ||
159 | void sched_trace_log_message(const char* fmt, ...) | ||
160 | { | ||
161 | unsigned long flags; | ||
162 | va_list args; | ||
163 | size_t len; | ||
164 | char* buf; | ||
165 | |||
166 | va_start(args, fmt); | ||
167 | local_irq_save(flags); | ||
168 | |||
169 | /* format message */ | ||
170 | buf = __get_cpu_var(fmt_buffer); | ||
171 | len = vscnprintf(buf, MSG_SIZE, fmt, args); | ||
172 | |||
173 | spin_lock(&log_buffer_lock); | ||
174 | /* Don't copy the trailing null byte, we don't want null bytes | ||
175 | * in a text file. | ||
176 | */ | ||
177 | rb_put(&log_buffer.buf, buf, len); | ||
178 | spin_unlock(&log_buffer_lock); | ||
179 | |||
180 | local_irq_restore(flags); | ||
181 | va_end(args); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * log_read - Read the trace buffer | ||
186 | * | ||
187 | * This function is called as a file operation from userspace. | ||
188 | * Readers can sleep. Access is serialized through reader_mutex | ||
189 | */ | ||
190 | static ssize_t log_read(struct file *filp, char __user *to, size_t len, | ||
191 | loff_t *f_pos) | ||
192 | { | ||
193 | /* we ignore f_pos, this is strictly sequential */ | ||
194 | |||
195 | ssize_t error = -EINVAL; | ||
196 | char* mem; | ||
197 | trace_buffer_t *tbuf = filp->private_data; | ||
198 | |||
199 | if (down_interruptible(&tbuf->reader_mutex)) { | ||
200 | error = -ERESTARTSYS; | ||
201 | goto out; | ||
202 | } | ||
203 | |||
204 | if (len > MAX_READ_LEN) | ||
205 | len = MAX_READ_LEN; | ||
206 | |||
207 | mem = kmalloc(len, GFP_KERNEL); | ||
208 | if (!mem) { | ||
209 | error = -ENOMEM; | ||
210 | goto out_unlock; | ||
211 | } | ||
212 | |||
213 | error = rb_get(&tbuf->buf, mem, len); | ||
214 | while (!error) { | ||
215 | set_current_state(TASK_INTERRUPTIBLE); | ||
216 | schedule_timeout(110); | ||
217 | if (signal_pending(current)) | ||
218 | error = -ERESTARTSYS; | ||
219 | else | ||
220 | error = rb_get(&tbuf->buf, mem, len); | ||
221 | } | ||
222 | |||
223 | if (error > 0 && copy_to_user(to, mem, error)) | ||
224 | error = -EFAULT; | ||
225 | |||
226 | kfree(mem); | ||
227 | out_unlock: | ||
228 | up(&tbuf->reader_mutex); | ||
229 | out: | ||
230 | return error; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Enable redirection of printk() messages to the trace buffer. | ||
235 | * Defined in kernel/printk.c | ||
236 | */ | ||
237 | extern int trace_override; | ||
238 | extern int trace_recurse; | ||
239 | |||
240 | /* | ||
241 | * log_open - open the global log message ring buffer. | ||
242 | */ | ||
243 | static int log_open(struct inode *in, struct file *filp) | ||
244 | { | ||
245 | int error = -EINVAL; | ||
246 | trace_buffer_t* tbuf; | ||
247 | |||
248 | tbuf = &log_buffer; | ||
249 | |||
250 | if (down_interruptible(&tbuf->reader_mutex)) { | ||
251 | error = -ERESTARTSYS; | ||
252 | goto out; | ||
253 | } | ||
254 | |||
255 | /* first open must allocate buffers */ | ||
256 | if (atomic_inc_return(&tbuf->reader_cnt) == 1) { | ||
257 | if ((error = rb_alloc_buf(&tbuf->buf, LITMUS_TRACE_BUF_SIZE))) | ||
258 | { | ||
259 | atomic_dec(&tbuf->reader_cnt); | ||
260 | goto out_unlock; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | error = 0; | ||
265 | filp->private_data = tbuf; | ||
266 | |||
267 | printk(KERN_DEBUG | ||
268 | "sched_trace kfifo at 0x%p with buffer starting at: 0x%p\n", | ||
269 | tbuf->buf.kfifo, &((tbuf->buf.kfifo)->buffer)); | ||
270 | |||
271 | /* override printk() */ | ||
272 | trace_override++; | ||
273 | |||
274 | out_unlock: | ||
275 | up(&tbuf->reader_mutex); | ||
276 | out: | ||
277 | return error; | ||
278 | } | ||
279 | |||
280 | static int log_release(struct inode *in, struct file *filp) | ||
281 | { | ||
282 | int error = -EINVAL; | ||
283 | trace_buffer_t* tbuf = filp->private_data; | ||
284 | |||
285 | BUG_ON(!filp->private_data); | ||
286 | |||
287 | if (down_interruptible(&tbuf->reader_mutex)) { | ||
288 | error = -ERESTARTSYS; | ||
289 | goto out; | ||
290 | } | ||
291 | |||
292 | /* last release must deallocate buffers */ | ||
293 | if (atomic_dec_return(&tbuf->reader_cnt) == 0) { | ||
294 | error = rb_free_buf(&tbuf->buf); | ||
295 | } | ||
296 | |||
297 | /* release printk() overriding */ | ||
298 | trace_override--; | ||
299 | |||
300 | printk(KERN_DEBUG "sched_trace kfifo released\n"); | ||
301 | |||
302 | up(&tbuf->reader_mutex); | ||
303 | out: | ||
304 | return error; | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * log_fops - The file operations for accessing the global LITMUS log message | ||
309 | * buffer. | ||
310 | * | ||
311 | * Except for opening the device file it uses the same operations as trace_fops. | ||
312 | */ | ||
313 | static struct file_operations log_fops = { | ||
314 | .owner = THIS_MODULE, | ||
315 | .open = log_open, | ||
316 | .release = log_release, | ||
317 | .read = log_read, | ||
318 | }; | ||
319 | |||
320 | static struct miscdevice litmus_log_dev = { | ||
321 | .name = SCHED_TRACE_NAME, | ||
322 | .minor = MISC_DYNAMIC_MINOR, | ||
323 | .fops = &log_fops, | ||
324 | }; | ||
325 | |||
326 | #ifdef CONFIG_MAGIC_SYSRQ | ||
327 | void dump_trace_buffer(int max) | ||
328 | { | ||
329 | char line[80]; | ||
330 | int len; | ||
331 | int count = 0; | ||
332 | |||
333 | /* potential, but very unlikely, race... */ | ||
334 | trace_recurse = 1; | ||
335 | while ((max == 0 || count++ < max) && | ||
336 | (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) { | ||
337 | line[len] = '\0'; | ||
338 | printk("%s", line); | ||
339 | } | ||
340 | trace_recurse = 0; | ||
341 | } | ||
342 | |||
343 | static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty) | ||
344 | { | ||
345 | dump_trace_buffer(100); | ||
346 | } | ||
347 | |||
348 | static struct sysrq_key_op sysrq_dump_trace_buffer_op = { | ||
349 | .handler = sysrq_dump_trace_buffer, | ||
350 | .help_msg = "dump-trace-buffer(Y)", | ||
351 | .action_msg = "writing content of TRACE() buffer", | ||
352 | }; | ||
353 | #endif | ||
354 | |||
355 | static int __init init_sched_trace(void) | ||
356 | { | ||
357 | printk("Initializing TRACE() device\n"); | ||
358 | init_log_buffer(); | ||
359 | |||
360 | #ifdef CONFIG_MAGIC_SYSRQ | ||
361 | /* offer some debugging help */ | ||
362 | if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op)) | ||
363 | printk("Registered dump-trace-buffer(Y) magic sysrq.\n"); | ||
364 | else | ||
365 | printk("Could not register dump-trace-buffer(Y) magic sysrq.\n"); | ||
366 | #endif | ||
367 | |||
368 | |||
369 | return misc_register(&litmus_log_dev); | ||
370 | } | ||
371 | |||
372 | static void __exit exit_sched_trace(void) | ||
373 | { | ||
374 | misc_deregister(&litmus_log_dev); | ||
375 | } | ||
376 | |||
377 | module_init(init_sched_trace); | ||
378 | module_exit(exit_sched_trace); | ||
diff --git a/litmus/srp.c b/litmus/srp.c new file mode 100644 index 000000000000..71639b991630 --- /dev/null +++ b/litmus/srp.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* ************************************************************************** */ | ||
2 | /* STACK RESOURCE POLICY */ | ||
3 | /* ************************************************************************** */ | ||
4 | |||
5 | #include <asm/atomic.h> | ||
6 | #include <linux/wait.h> | ||
7 | #include <litmus/litmus.h> | ||
8 | #include <litmus/sched_plugin.h> | ||
9 | |||
10 | #include <litmus/fdso.h> | ||
11 | |||
12 | #include <litmus/trace.h> | ||
13 | |||
14 | |||
15 | #ifdef CONFIG_SRP | ||
16 | |||
17 | struct srp_priority { | ||
18 | struct list_head list; | ||
19 | unsigned int period; | ||
20 | pid_t pid; | ||
21 | }; | ||
22 | |||
23 | #define list2prio(l) list_entry(l, struct srp_priority, list) | ||
24 | |||
25 | /* SRP task priority comparison function. Smaller periods have highest | ||
26 | * priority, tie-break is PID. Special case: period == 0 <=> no priority | ||
27 | */ | ||
28 | static int srp_higher_prio(struct srp_priority* first, | ||
29 | struct srp_priority* second) | ||
30 | { | ||
31 | if (!first->period) | ||
32 | return 0; | ||
33 | else | ||
34 | return !second->period || | ||
35 | first->period < second->period || ( | ||
36 | first->period == second->period && | ||
37 | first->pid < second->pid); | ||
38 | } | ||
39 | |||
40 | struct srp { | ||
41 | struct list_head ceiling; | ||
42 | wait_queue_head_t ceiling_blocked; | ||
43 | }; | ||
44 | |||
45 | |||
46 | atomic_t srp_objects_in_use = ATOMIC_INIT(0); | ||
47 | |||
48 | DEFINE_PER_CPU(struct srp, srp); | ||
49 | |||
50 | |||
51 | /* Initialize SRP semaphores at boot time. */ | ||
52 | static int __init srp_init(void) | ||
53 | { | ||
54 | int i; | ||
55 | |||
56 | printk("Initializing SRP per-CPU ceilings..."); | ||
57 | for (i = 0; i < NR_CPUS; i++) { | ||
58 | init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); | ||
59 | INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); | ||
60 | } | ||
61 | printk(" done!\n"); | ||
62 | |||
63 | return 0; | ||
64 | } | ||
65 | module_init(srp_init); | ||
66 | |||
67 | |||
68 | #define system_ceiling(srp) list2prio(srp->ceiling.next) | ||
69 | |||
70 | |||
71 | #define UNDEF_SEM -2 | ||
72 | |||
73 | |||
74 | /* struct for uniprocessor SRP "semaphore" */ | ||
75 | struct srp_semaphore { | ||
76 | struct srp_priority ceiling; | ||
77 | struct task_struct* owner; | ||
78 | int cpu; /* cpu associated with this "semaphore" and resource */ | ||
79 | }; | ||
80 | |||
81 | #define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) | ||
82 | |||
83 | static int srp_exceeds_ceiling(struct task_struct* first, | ||
84 | struct srp* srp) | ||
85 | { | ||
86 | return list_empty(&srp->ceiling) || | ||
87 | get_rt_period(first) < system_ceiling(srp)->period || | ||
88 | (get_rt_period(first) == system_ceiling(srp)->period && | ||
89 | first->pid < system_ceiling(srp)->pid) || | ||
90 | ceiling2sem(system_ceiling(srp))->owner == first; | ||
91 | } | ||
92 | |||
93 | static void srp_add_prio(struct srp* srp, struct srp_priority* prio) | ||
94 | { | ||
95 | struct list_head *pos; | ||
96 | if (in_list(&prio->list)) { | ||
97 | printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " | ||
98 | "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); | ||
99 | return; | ||
100 | } | ||
101 | list_for_each(pos, &srp->ceiling) | ||
102 | if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { | ||
103 | __list_add(&prio->list, pos->prev, pos); | ||
104 | return; | ||
105 | } | ||
106 | |||
107 | list_add_tail(&prio->list, &srp->ceiling); | ||
108 | } | ||
109 | |||
110 | |||
111 | static void* create_srp_semaphore(void) | ||
112 | { | ||
113 | struct srp_semaphore* sem; | ||
114 | |||
115 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
116 | if (!sem) | ||
117 | return NULL; | ||
118 | |||
119 | INIT_LIST_HEAD(&sem->ceiling.list); | ||
120 | sem->ceiling.period = 0; | ||
121 | sem->cpu = UNDEF_SEM; | ||
122 | sem->owner = NULL; | ||
123 | atomic_inc(&srp_objects_in_use); | ||
124 | return sem; | ||
125 | } | ||
126 | |||
127 | static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg) | ||
128 | { | ||
129 | struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj; | ||
130 | int ret = 0; | ||
131 | struct task_struct* t = current; | ||
132 | struct srp_priority t_prio; | ||
133 | |||
134 | TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); | ||
135 | if (!srp_active()) | ||
136 | return -EBUSY; | ||
137 | |||
138 | if (sem->cpu == UNDEF_SEM) | ||
139 | sem->cpu = get_partition(t); | ||
140 | else if (sem->cpu != get_partition(t)) | ||
141 | ret = -EPERM; | ||
142 | |||
143 | if (ret == 0) { | ||
144 | t_prio.period = get_rt_period(t); | ||
145 | t_prio.pid = t->pid; | ||
146 | if (srp_higher_prio(&t_prio, &sem->ceiling)) { | ||
147 | sem->ceiling.period = t_prio.period; | ||
148 | sem->ceiling.pid = t_prio.pid; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void destroy_srp_semaphore(void* sem) | ||
156 | { | ||
157 | /* XXX invariants */ | ||
158 | atomic_dec(&srp_objects_in_use); | ||
159 | kfree(sem); | ||
160 | } | ||
161 | |||
162 | struct fdso_ops srp_sem_ops = { | ||
163 | .create = create_srp_semaphore, | ||
164 | .open = open_srp_semaphore, | ||
165 | .destroy = destroy_srp_semaphore | ||
166 | }; | ||
167 | |||
168 | |||
169 | static void do_srp_down(struct srp_semaphore* sem) | ||
170 | { | ||
171 | /* Update ceiling. */ | ||
172 | srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); | ||
173 | WARN_ON(sem->owner != NULL); | ||
174 | sem->owner = current; | ||
175 | TRACE_CUR("acquired srp 0x%p\n", sem); | ||
176 | } | ||
177 | |||
178 | static void do_srp_up(struct srp_semaphore* sem) | ||
179 | { | ||
180 | /* Determine new system priority ceiling for this CPU. */ | ||
181 | WARN_ON(!in_list(&sem->ceiling.list)); | ||
182 | if (in_list(&sem->ceiling.list)) | ||
183 | list_del(&sem->ceiling.list); | ||
184 | |||
185 | sem->owner = NULL; | ||
186 | |||
187 | /* Wake tasks on this CPU, if they exceed current ceiling. */ | ||
188 | TRACE_CUR("released srp 0x%p\n", sem); | ||
189 | wake_up_all(&__get_cpu_var(srp).ceiling_blocked); | ||
190 | } | ||
191 | |||
192 | /* Adjust the system-wide priority ceiling if resource is claimed. */ | ||
193 | asmlinkage long sys_srp_down(int sem_od) | ||
194 | { | ||
195 | int cpu; | ||
196 | int ret = -EINVAL; | ||
197 | struct srp_semaphore* sem; | ||
198 | |||
199 | /* disabling preemptions is sufficient protection since | ||
200 | * SRP is strictly per CPU and we don't interfere with any | ||
201 | * interrupt handlers | ||
202 | */ | ||
203 | preempt_disable(); | ||
204 | TS_SRP_DOWN_START; | ||
205 | |||
206 | cpu = smp_processor_id(); | ||
207 | sem = lookup_srp_sem(sem_od); | ||
208 | if (sem && sem->cpu == cpu) { | ||
209 | do_srp_down(sem); | ||
210 | ret = 0; | ||
211 | } | ||
212 | |||
213 | TS_SRP_DOWN_END; | ||
214 | preempt_enable(); | ||
215 | return ret; | ||
216 | } | ||
217 | |||
218 | /* Adjust the system-wide priority ceiling if resource is freed. */ | ||
219 | asmlinkage long sys_srp_up(int sem_od) | ||
220 | { | ||
221 | int cpu; | ||
222 | int ret = -EINVAL; | ||
223 | struct srp_semaphore* sem; | ||
224 | |||
225 | preempt_disable(); | ||
226 | TS_SRP_UP_START; | ||
227 | |||
228 | cpu = smp_processor_id(); | ||
229 | sem = lookup_srp_sem(sem_od); | ||
230 | |||
231 | if (sem && sem->cpu == cpu) { | ||
232 | do_srp_up(sem); | ||
233 | ret = 0; | ||
234 | } | ||
235 | |||
236 | TS_SRP_UP_END; | ||
237 | preempt_enable(); | ||
238 | return ret; | ||
239 | } | ||
240 | |||
241 | static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
242 | void *key) | ||
243 | { | ||
244 | int cpu = smp_processor_id(); | ||
245 | struct task_struct *tsk = wait->private; | ||
246 | if (cpu != get_partition(tsk)) | ||
247 | TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", | ||
248 | get_partition(tsk)); | ||
249 | else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
250 | return default_wake_function(wait, mode, sync, key); | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | |||
255 | |||
256 | static void do_ceiling_block(struct task_struct *tsk) | ||
257 | { | ||
258 | wait_queue_t wait = { | ||
259 | .private = tsk, | ||
260 | .func = srp_wake_up, | ||
261 | .task_list = {NULL, NULL} | ||
262 | }; | ||
263 | |||
264 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
265 | add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
266 | tsk->rt_param.srp_non_recurse = 1; | ||
267 | preempt_enable_no_resched(); | ||
268 | schedule(); | ||
269 | preempt_disable(); | ||
270 | tsk->rt_param.srp_non_recurse = 0; | ||
271 | remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
272 | } | ||
273 | |||
274 | /* Wait for current task priority to exceed system-wide priority ceiling. | ||
275 | */ | ||
276 | void srp_ceiling_block(void) | ||
277 | { | ||
278 | struct task_struct *tsk = current; | ||
279 | |||
280 | /* Only applies to real-time tasks, but optimize for RT tasks. */ | ||
281 | if (unlikely(!is_realtime(tsk))) | ||
282 | return; | ||
283 | |||
284 | /* Avoid recursive ceiling blocking. */ | ||
285 | if (unlikely(tsk->rt_param.srp_non_recurse)) | ||
286 | return; | ||
287 | |||
288 | /* Bail out early if there aren't any SRP resources around. */ | ||
289 | if (likely(!atomic_read(&srp_objects_in_use))) | ||
290 | return; | ||
291 | |||
292 | preempt_disable(); | ||
293 | if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { | ||
294 | TRACE_CUR("is priority ceiling blocked.\n"); | ||
295 | while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
296 | do_ceiling_block(tsk); | ||
297 | TRACE_CUR("finally exceeds system ceiling.\n"); | ||
298 | } else | ||
299 | TRACE_CUR("is not priority ceiling blocked\n"); | ||
300 | preempt_enable(); | ||
301 | } | ||
302 | |||
303 | |||
304 | #else | ||
305 | |||
306 | asmlinkage long sys_srp_down(int sem_od) | ||
307 | { | ||
308 | return -ENOSYS; | ||
309 | } | ||
310 | |||
311 | asmlinkage long sys_srp_up(int sem_od) | ||
312 | { | ||
313 | return -ENOSYS; | ||
314 | } | ||
315 | |||
316 | struct fdso_ops srp_sem_ops = {}; | ||
317 | |||
318 | #endif | ||
diff --git a/litmus/sync.c b/litmus/sync.c new file mode 100644 index 000000000000..bf75fde5450b --- /dev/null +++ b/litmus/sync.c | |||
@@ -0,0 +1,104 @@ | |||
1 | /* litmus/sync.c - Support for synchronous and asynchronous task system releases. | ||
2 | * | ||
3 | * | ||
4 | */ | ||
5 | |||
6 | #include <asm/atomic.h> | ||
7 | #include <asm/uaccess.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/list.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/completion.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | |||
17 | #include <litmus/sched_trace.h> | ||
18 | |||
19 | static DECLARE_COMPLETION(ts_release); | ||
20 | |||
21 | static long do_wait_for_ts_release(void) | ||
22 | { | ||
23 | long ret = 0; | ||
24 | |||
25 | /* If the interruption races with a release, the completion object | ||
26 | * may have a non-zero counter. To avoid this problem, this should | ||
27 | * be replaced by wait_for_completion(). | ||
28 | * | ||
29 | * For debugging purposes, this is interruptible for now. | ||
30 | */ | ||
31 | ret = wait_for_completion_interruptible(&ts_release); | ||
32 | |||
33 | return ret; | ||
34 | } | ||
35 | |||
36 | int count_tasks_waiting_for_release(void) | ||
37 | { | ||
38 | unsigned long flags; | ||
39 | int task_count = 0; | ||
40 | struct list_head *pos; | ||
41 | |||
42 | spin_lock_irqsave(&ts_release.wait.lock, flags); | ||
43 | list_for_each(pos, &ts_release.wait.task_list) { | ||
44 | task_count++; | ||
45 | } | ||
46 | spin_unlock_irqrestore(&ts_release.wait.lock, flags); | ||
47 | |||
48 | return task_count; | ||
49 | } | ||
50 | |||
51 | static long do_release_ts(lt_t start) | ||
52 | { | ||
53 | int task_count = 0; | ||
54 | unsigned long flags; | ||
55 | struct list_head *pos; | ||
56 | struct task_struct *t; | ||
57 | |||
58 | |||
59 | spin_lock_irqsave(&ts_release.wait.lock, flags); | ||
60 | TRACE("<<<<<< synchronous task system release >>>>>>\n"); | ||
61 | |||
62 | sched_trace_sys_release(&start); | ||
63 | list_for_each(pos, &ts_release.wait.task_list) { | ||
64 | t = (struct task_struct*) list_entry(pos, | ||
65 | struct __wait_queue, | ||
66 | task_list)->private; | ||
67 | task_count++; | ||
68 | litmus->release_at(t, start + t->rt_param.task_params.phase); | ||
69 | sched_trace_task_release(t); | ||
70 | } | ||
71 | |||
72 | spin_unlock_irqrestore(&ts_release.wait.lock, flags); | ||
73 | |||
74 | complete_n(&ts_release, task_count); | ||
75 | |||
76 | return task_count; | ||
77 | } | ||
78 | |||
79 | |||
80 | asmlinkage long sys_wait_for_ts_release(void) | ||
81 | { | ||
82 | long ret = -EPERM; | ||
83 | struct task_struct *t = current; | ||
84 | |||
85 | if (is_realtime(t)) | ||
86 | ret = do_wait_for_ts_release(); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | |||
92 | asmlinkage long sys_release_ts(lt_t __user *__delay) | ||
93 | { | ||
94 | long ret; | ||
95 | lt_t delay; | ||
96 | |||
97 | /* FIXME: check capabilities... */ | ||
98 | |||
99 | ret = copy_from_user(&delay, __delay, sizeof(delay)); | ||
100 | if (ret == 0) | ||
101 | ret = do_release_ts(litmus_clock() + delay); | ||
102 | |||
103 | return ret; | ||
104 | } | ||
diff --git a/litmus/trace.c b/litmus/trace.c new file mode 100644 index 000000000000..440376998dc9 --- /dev/null +++ b/litmus/trace.c | |||
@@ -0,0 +1,103 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include <litmus/ftdev.h> | ||
4 | #include <litmus/litmus.h> | ||
5 | #include <litmus/trace.h> | ||
6 | |||
7 | /******************************************************************************/ | ||
8 | /* Allocation */ | ||
9 | /******************************************************************************/ | ||
10 | |||
11 | static struct ftdev overhead_dev; | ||
12 | |||
13 | #define trace_ts_buf overhead_dev.minor[0].buf | ||
14 | |||
15 | static unsigned int ts_seq_no = 0; | ||
16 | |||
17 | static inline void __save_timestamp_cpu(unsigned long event, | ||
18 | uint8_t type, uint8_t cpu) | ||
19 | { | ||
20 | unsigned int seq_no; | ||
21 | struct timestamp *ts; | ||
22 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
23 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
24 | ts->event = event; | ||
25 | ts->timestamp = ft_timestamp(); | ||
26 | ts->seq_no = seq_no; | ||
27 | ts->cpu = cpu; | ||
28 | ts->task_type = type; | ||
29 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
30 | } | ||
31 | } | ||
32 | |||
33 | static inline void __save_timestamp(unsigned long event, | ||
34 | uint8_t type) | ||
35 | { | ||
36 | __save_timestamp_cpu(event, type, raw_smp_processor_id()); | ||
37 | } | ||
38 | |||
39 | feather_callback void save_timestamp(unsigned long event) | ||
40 | { | ||
41 | __save_timestamp(event, TSK_UNKNOWN); | ||
42 | } | ||
43 | |||
44 | feather_callback void save_timestamp_def(unsigned long event, | ||
45 | unsigned long type) | ||
46 | { | ||
47 | __save_timestamp(event, (uint8_t) type); | ||
48 | } | ||
49 | |||
50 | feather_callback void save_timestamp_task(unsigned long event, | ||
51 | unsigned long t_ptr) | ||
52 | { | ||
53 | int rt = is_realtime((struct task_struct *) t_ptr); | ||
54 | __save_timestamp(event, rt ? TSK_RT : TSK_BE); | ||
55 | } | ||
56 | |||
57 | feather_callback void save_timestamp_cpu(unsigned long event, | ||
58 | unsigned long cpu) | ||
59 | { | ||
60 | __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); | ||
61 | } | ||
62 | |||
63 | /******************************************************************************/ | ||
64 | /* DEVICE FILE DRIVER */ | ||
65 | /******************************************************************************/ | ||
66 | |||
67 | /* | ||
68 | * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) | ||
69 | * and we might not get as much | ||
70 | */ | ||
71 | #define NO_TIMESTAMPS (2 << 11) | ||
72 | |||
73 | /* set MAJOR to 0 to have it dynamically assigned */ | ||
74 | #define FT_TRACE_MAJOR 252 | ||
75 | |||
76 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
77 | { | ||
78 | unsigned int count = NO_TIMESTAMPS; | ||
79 | while (count && !trace_ts_buf) { | ||
80 | printk("time stamp buffer: trying to allocate %u time stamps.\n", count); | ||
81 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | ||
82 | count /= 2; | ||
83 | } | ||
84 | return ftdev->minor[idx].buf ? 0 : -ENOMEM; | ||
85 | } | ||
86 | |||
87 | static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
88 | { | ||
89 | free_ft_buffer(ftdev->minor[idx].buf); | ||
90 | ftdev->minor[idx].buf = NULL; | ||
91 | } | ||
92 | |||
93 | static int __init init_ft_overhead_trace(void) | ||
94 | { | ||
95 | printk("Initializing Feather-Trace overhead tracing device.\n"); | ||
96 | ftdev_init(&overhead_dev, THIS_MODULE); | ||
97 | overhead_dev.minor_cnt = 1; /* only one buffer */ | ||
98 | overhead_dev.alloc = alloc_timestamp_buffer; | ||
99 | overhead_dev.free = free_timestamp_buffer; | ||
100 | return register_ftdev(&overhead_dev, "ft_trace", FT_TRACE_MAJOR); | ||
101 | } | ||
102 | |||
103 | module_init(init_ft_overhead_trace); | ||