diff options
100 files changed, 17213 insertions, 36 deletions
@@ -1,7 +1,7 @@ | |||
1 | VERSION = 2 | 1 | VERSION = 2 |
2 | PATCHLEVEL = 6 | 2 | PATCHLEVEL = 6 |
3 | SUBLEVEL = 36 | 3 | SUBLEVEL = 36 |
4 | EXTRAVERSION = | 4 | EXTRAVERSION =-litmus2010 |
5 | NAME = Flesh-Eating Bats with Fangs | 5 | NAME = Flesh-Eating Bats with Fangs |
6 | 6 | ||
7 | # *DOCUMENTATION* | 7 | # *DOCUMENTATION* |
@@ -659,7 +659,7 @@ export mod_strip_cmd | |||
659 | 659 | ||
660 | 660 | ||
661 | ifeq ($(KBUILD_EXTMOD),) | 661 | ifeq ($(KBUILD_EXTMOD),) |
662 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ | 662 | core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/ |
663 | 663 | ||
664 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ | 664 | vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ |
665 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ | 665 | $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9c26ba7244fb..babad6d7681a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -1808,3 +1808,11 @@ source "security/Kconfig" | |||
1808 | source "crypto/Kconfig" | 1808 | source "crypto/Kconfig" |
1809 | 1809 | ||
1810 | source "lib/Kconfig" | 1810 | source "lib/Kconfig" |
1811 | |||
1812 | config ARCH_HAS_SEND_PULL_TIMERS | ||
1813 | def_bool n | ||
1814 | |||
1815 | config ARCH_HAS_FEATHER_TRACE | ||
1816 | def_bool n | ||
1817 | |||
1818 | source "litmus/Kconfig" | ||
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 3be8de3adaba..8a102a383a36 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h | |||
@@ -16,9 +16,11 @@ | |||
16 | 16 | ||
17 | typedef unsigned long cycles_t; | 17 | typedef unsigned long cycles_t; |
18 | 18 | ||
19 | #ifndef get_cycles | ||
19 | static inline cycles_t get_cycles (void) | 20 | static inline cycles_t get_cycles (void) |
20 | { | 21 | { |
21 | return 0; | 22 | return 0; |
22 | } | 23 | } |
24 | #endif | ||
23 | 25 | ||
24 | #endif | 26 | #endif |
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index c891eb76c0e3..625b30490624 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h | |||
@@ -397,6 +397,9 @@ | |||
397 | #define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) | 397 | #define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) |
398 | #define __NR_prlimit64 (__NR_SYSCALL_BASE+369) | 398 | #define __NR_prlimit64 (__NR_SYSCALL_BASE+369) |
399 | 399 | ||
400 | #define __NR_LITMUS (__NR_SYSCALL_BASE+370) | ||
401 | #include <litmus/unistd_32.h> | ||
402 | |||
400 | /* | 403 | /* |
401 | * The following SWIs are ARM private. | 404 | * The following SWIs are ARM private. |
402 | */ | 405 | */ |
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 5c26eccef998..b99087ac85b9 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S | |||
@@ -379,6 +379,18 @@ | |||
379 | CALL(sys_fanotify_init) | 379 | CALL(sys_fanotify_init) |
380 | CALL(sys_fanotify_mark) | 380 | CALL(sys_fanotify_mark) |
381 | CALL(sys_prlimit64) | 381 | CALL(sys_prlimit64) |
382 | /* 370 */ CALL(sys_set_rt_task_param) | ||
383 | CALL(sys_get_rt_task_param) | ||
384 | CALL(sys_complete_job) | ||
385 | CALL(sys_od_open) | ||
386 | CALL(sys_od_close) | ||
387 | /* 375 */ CALL(sys_litmus_lock) | ||
388 | CALL(sys_litmus_unlock) | ||
389 | CALL(sys_query_job_no) | ||
390 | CALL(sys_wait_for_job_release) | ||
391 | CALL(sys_wait_for_ts_release) | ||
392 | /* 380 */ CALL(sys_release_ts) | ||
393 | CALL(sys_null_call) | ||
382 | #ifndef syscalls_counted | 394 | #ifndef syscalls_counted |
383 | .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls | 395 | .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls |
384 | #define syscalls_counted | 396 | #define syscalls_counted |
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 40dc74f2b27f..b72fbf3d043c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <asm/localtimer.h> | 38 | #include <asm/localtimer.h> |
39 | #include <asm/smp_plat.h> | 39 | #include <asm/smp_plat.h> |
40 | 40 | ||
41 | #include <litmus/preempt.h> | ||
42 | |||
41 | /* | 43 | /* |
42 | * as from 2.5, kernels no longer have an init_tasks structure | 44 | * as from 2.5, kernels no longer have an init_tasks structure |
43 | * so we need some other way of telling a new secondary core | 45 | * so we need some other way of telling a new secondary core |
@@ -533,6 +535,8 @@ asmlinkage void __exception do_IPI(struct pt_regs *regs) | |||
533 | * nothing more to do - eveything is | 535 | * nothing more to do - eveything is |
534 | * done on the interrupt return path | 536 | * done on the interrupt return path |
535 | */ | 537 | */ |
538 | /* LITMUS^RT: take action based on scheduler state */ | ||
539 | sched_state_ipi(); | ||
536 | break; | 540 | break; |
537 | 541 | ||
538 | case IPI_CALL_FUNC: | 542 | case IPI_CALL_FUNC: |
diff --git a/arch/arm/mach-realview/include/mach/timex.h b/arch/arm/mach-realview/include/mach/timex.h index 4eeb069373c2..e8bcc40d1f08 100644 --- a/arch/arm/mach-realview/include/mach/timex.h +++ b/arch/arm/mach-realview/include/mach/timex.h | |||
@@ -21,3 +21,30 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #define CLOCK_TICK_RATE (50000000 / 16) | 23 | #define CLOCK_TICK_RATE (50000000 / 16) |
24 | |||
25 | #if defined(CONFIG_MACH_REALVIEW_PB11MP) || defined(CONFIG_MACH_REALVIEW_PB1176) | ||
26 | |||
27 | static inline unsigned long realview_get_arm11_cp15_ccnt(void) | ||
28 | { | ||
29 | unsigned long cycles; | ||
30 | /* Read CP15 CCNT register. */ | ||
31 | asm volatile ("mrc p15, 0, %0, c15, c12, 1" : "=r" (cycles)); | ||
32 | return cycles; | ||
33 | } | ||
34 | |||
35 | #define get_cycles realview_get_arm11_cp15_ccnt | ||
36 | |||
37 | #elif defined(CONFIG_MACH_REALVIEW_PBA8) | ||
38 | |||
39 | |||
40 | static inline unsigned long realview_get_a8_cp15_ccnt(void) | ||
41 | { | ||
42 | unsigned long cycles; | ||
43 | /* Read CP15 CCNT register. */ | ||
44 | asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles)); | ||
45 | return cycles; | ||
46 | } | ||
47 | |||
48 | #define get_cycles realview_get_a8_cp15_ccnt | ||
49 | |||
50 | #endif | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cea0cd9a316f..5181ed3a211a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -2142,3 +2142,11 @@ source "crypto/Kconfig" | |||
2142 | source "arch/x86/kvm/Kconfig" | 2142 | source "arch/x86/kvm/Kconfig" |
2143 | 2143 | ||
2144 | source "lib/Kconfig" | 2144 | source "lib/Kconfig" |
2145 | |||
2146 | config ARCH_HAS_FEATHER_TRACE | ||
2147 | def_bool y | ||
2148 | |||
2149 | config ARCH_HAS_SEND_PULL_TIMERS | ||
2150 | def_bool y | ||
2151 | |||
2152 | source "litmus/Kconfig" | ||
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 8e8ec663a98f..5d07dea2ebb8 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -13,6 +13,7 @@ | |||
13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | 13 | BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) |
14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR) | ||
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 17 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 18 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
18 | 19 | ||
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h new file mode 100644 index 000000000000..4fd31633405d --- /dev/null +++ b/arch/x86/include/asm/feather_trace.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #ifndef _ARCH_FEATHER_TRACE_H | ||
2 | #define _ARCH_FEATHER_TRACE_H | ||
3 | |||
4 | #include <asm/msr.h> | ||
5 | |||
6 | static inline unsigned long long ft_timestamp(void) | ||
7 | { | ||
8 | return __native_read_tsc(); | ||
9 | } | ||
10 | |||
11 | #ifdef CONFIG_X86_32 | ||
12 | #include "feather_trace_32.h" | ||
13 | #else | ||
14 | #include "feather_trace_64.h" | ||
15 | #endif | ||
16 | |||
17 | #endif | ||
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h new file mode 100644 index 000000000000..70202f90f169 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_32.h | |||
@@ -0,0 +1,79 @@ | |||
1 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
2 | |||
3 | #define feather_callback __attribute__((regparm(0))) | ||
4 | |||
5 | /* | ||
6 | * make the compiler reload any register that is not saved in | ||
7 | * a cdecl function call | ||
8 | */ | ||
9 | #define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx" | ||
10 | |||
11 | #define ft_event(id, callback) \ | ||
12 | __asm__ __volatile__( \ | ||
13 | "1: jmp 2f \n\t" \ | ||
14 | " call " #callback " \n\t" \ | ||
15 | ".section __event_table, \"aw\" \n\t" \ | ||
16 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
17 | ".previous \n\t" \ | ||
18 | "2: \n\t" \ | ||
19 | : : : CLOBBER_LIST) | ||
20 | |||
21 | #define ft_event0(id, callback) \ | ||
22 | __asm__ __volatile__( \ | ||
23 | "1: jmp 2f \n\t" \ | ||
24 | " subl $4, %%esp \n\t" \ | ||
25 | " movl $" #id ", (%%esp) \n\t" \ | ||
26 | " call " #callback " \n\t" \ | ||
27 | " addl $4, %%esp \n\t" \ | ||
28 | ".section __event_table, \"aw\" \n\t" \ | ||
29 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
30 | ".previous \n\t" \ | ||
31 | "2: \n\t" \ | ||
32 | : : : CLOBBER_LIST) | ||
33 | |||
34 | #define ft_event1(id, callback, param) \ | ||
35 | __asm__ __volatile__( \ | ||
36 | "1: jmp 2f \n\t" \ | ||
37 | " subl $8, %%esp \n\t" \ | ||
38 | " movl %0, 4(%%esp) \n\t" \ | ||
39 | " movl $" #id ", (%%esp) \n\t" \ | ||
40 | " call " #callback " \n\t" \ | ||
41 | " addl $8, %%esp \n\t" \ | ||
42 | ".section __event_table, \"aw\" \n\t" \ | ||
43 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
44 | ".previous \n\t" \ | ||
45 | "2: \n\t" \ | ||
46 | : : "r" (param) : CLOBBER_LIST) | ||
47 | |||
48 | #define ft_event2(id, callback, param, param2) \ | ||
49 | __asm__ __volatile__( \ | ||
50 | "1: jmp 2f \n\t" \ | ||
51 | " subl $12, %%esp \n\t" \ | ||
52 | " movl %1, 8(%%esp) \n\t" \ | ||
53 | " movl %0, 4(%%esp) \n\t" \ | ||
54 | " movl $" #id ", (%%esp) \n\t" \ | ||
55 | " call " #callback " \n\t" \ | ||
56 | " addl $12, %%esp \n\t" \ | ||
57 | ".section __event_table, \"aw\" \n\t" \ | ||
58 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
59 | ".previous \n\t" \ | ||
60 | "2: \n\t" \ | ||
61 | : : "r" (param), "r" (param2) : CLOBBER_LIST) | ||
62 | |||
63 | |||
64 | #define ft_event3(id, callback, p, p2, p3) \ | ||
65 | __asm__ __volatile__( \ | ||
66 | "1: jmp 2f \n\t" \ | ||
67 | " subl $16, %%esp \n\t" \ | ||
68 | " movl %2, 12(%%esp) \n\t" \ | ||
69 | " movl %1, 8(%%esp) \n\t" \ | ||
70 | " movl %0, 4(%%esp) \n\t" \ | ||
71 | " movl $" #id ", (%%esp) \n\t" \ | ||
72 | " call " #callback " \n\t" \ | ||
73 | " addl $16, %%esp \n\t" \ | ||
74 | ".section __event_table, \"aw\" \n\t" \ | ||
75 | ".long " #id ", 0, 1b, 2f \n\t" \ | ||
76 | ".previous \n\t" \ | ||
77 | "2: \n\t" \ | ||
78 | : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) | ||
79 | |||
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h new file mode 100644 index 000000000000..54ac2aeb3a28 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_64.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* Do not directly include this file. Include feather_trace.h instead */ | ||
2 | |||
3 | /* regparm is the default on x86_64 */ | ||
4 | #define feather_callback | ||
5 | |||
6 | # define _EVENT_TABLE(id,from,to) \ | ||
7 | ".section __event_table, \"aw\"\n\t" \ | ||
8 | ".balign 8\n\t" \ | ||
9 | ".quad " #id ", 0, " #from ", " #to " \n\t" \ | ||
10 | ".previous \n\t" | ||
11 | |||
12 | /* | ||
13 | * x86_64 callee only owns rbp, rbx, r12 -> r15 | ||
14 | * the called can freely modify the others | ||
15 | */ | ||
16 | #define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ | ||
17 | "r8", "r9", "r10", "r11", "rax" | ||
18 | |||
19 | #define ft_event(id, callback) \ | ||
20 | __asm__ __volatile__( \ | ||
21 | "1: jmp 2f \n\t" \ | ||
22 | " call " #callback " \n\t" \ | ||
23 | _EVENT_TABLE(id,1b,2f) \ | ||
24 | "2: \n\t" \ | ||
25 | : : : CLOBBER_LIST) | ||
26 | |||
27 | #define ft_event0(id, callback) \ | ||
28 | __asm__ __volatile__( \ | ||
29 | "1: jmp 2f \n\t" \ | ||
30 | " movq $" #id ", %%rdi \n\t" \ | ||
31 | " call " #callback " \n\t" \ | ||
32 | _EVENT_TABLE(id,1b,2f) \ | ||
33 | "2: \n\t" \ | ||
34 | : : : CLOBBER_LIST) | ||
35 | |||
36 | #define ft_event1(id, callback, param) \ | ||
37 | __asm__ __volatile__( \ | ||
38 | "1: jmp 2f \n\t" \ | ||
39 | " movq %0, %%rsi \n\t" \ | ||
40 | " movq $" #id ", %%rdi \n\t" \ | ||
41 | " call " #callback " \n\t" \ | ||
42 | _EVENT_TABLE(id,1b,2f) \ | ||
43 | "2: \n\t" \ | ||
44 | : : "r" (param) : CLOBBER_LIST) | ||
45 | |||
46 | #define ft_event2(id, callback, param, param2) \ | ||
47 | __asm__ __volatile__( \ | ||
48 | "1: jmp 2f \n\t" \ | ||
49 | " movq %1, %%rdx \n\t" \ | ||
50 | " movq %0, %%rsi \n\t" \ | ||
51 | " movq $" #id ", %%rdi \n\t" \ | ||
52 | " call " #callback " \n\t" \ | ||
53 | _EVENT_TABLE(id,1b,2f) \ | ||
54 | "2: \n\t" \ | ||
55 | : : "r" (param), "r" (param2) : CLOBBER_LIST) | ||
56 | |||
57 | #define ft_event3(id, callback, p, p2, p3) \ | ||
58 | __asm__ __volatile__( \ | ||
59 | "1: jmp 2f \n\t" \ | ||
60 | " movq %2, %%rcx \n\t" \ | ||
61 | " movq %1, %%rdx \n\t" \ | ||
62 | " movq %0, %%rsi \n\t" \ | ||
63 | " movq $" #id ", %%rdi \n\t" \ | ||
64 | " call " #callback " \n\t" \ | ||
65 | _EVENT_TABLE(id,1b,2f) \ | ||
66 | "2: \n\t" \ | ||
67 | : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST) | ||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 46c0fe05f230..c17411503f28 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -53,6 +53,8 @@ extern void threshold_interrupt(void); | |||
53 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
54 | extern void call_function_single_interrupt(void); | 54 | extern void call_function_single_interrupt(void); |
55 | 55 | ||
56 | extern void pull_timers_interrupt(void); | ||
57 | |||
56 | /* IOAPIC */ | 58 | /* IOAPIC */ |
57 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) | 59 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) |
58 | extern unsigned long io_apic_irqs; | 60 | extern unsigned long io_apic_irqs; |
@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | |||
122 | extern void smp_reschedule_interrupt(struct pt_regs *); | 124 | extern void smp_reschedule_interrupt(struct pt_regs *); |
123 | extern void smp_call_function_interrupt(struct pt_regs *); | 125 | extern void smp_call_function_interrupt(struct pt_regs *); |
124 | extern void smp_call_function_single_interrupt(struct pt_regs *); | 126 | extern void smp_call_function_single_interrupt(struct pt_regs *); |
127 | extern void smp_pull_timers_interrupt(struct pt_regs *); | ||
125 | #ifdef CONFIG_X86_32 | 128 | #ifdef CONFIG_X86_32 |
126 | extern void smp_invalidate_interrupt(struct pt_regs *); | 129 | extern void smp_invalidate_interrupt(struct pt_regs *); |
127 | #else | 130 | #else |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e2ca30092557..6143ebeeebfa 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -109,6 +109,11 @@ | |||
109 | #define LOCAL_TIMER_VECTOR 0xef | 109 | #define LOCAL_TIMER_VECTOR 0xef |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * LITMUS^RT pull timers IRQ vector | ||
113 | */ | ||
114 | #define PULL_TIMERS_VECTOR 0xee | ||
115 | |||
116 | /* | ||
112 | * Generic system vector for platform specific use | 117 | * Generic system vector for platform specific use |
113 | */ | 118 | */ |
114 | #define X86_PLATFORM_IPI_VECTOR 0xed | 119 | #define X86_PLATFORM_IPI_VECTOR 0xed |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 325b7bdbebaa..ebaa04a8d3af 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -169,6 +169,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *); | |||
169 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | 169 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); |
170 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | 170 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
171 | extern unsigned short num_cache_leaves; | 171 | extern unsigned short num_cache_leaves; |
172 | #ifdef CONFIG_SYSFS | ||
173 | extern int get_shared_cpu_map(cpumask_var_t mask, | ||
174 | unsigned int cpu, int index); | ||
175 | #endif | ||
172 | 176 | ||
173 | extern void detect_extended_topology(struct cpuinfo_x86 *c); | 177 | extern void detect_extended_topology(struct cpuinfo_x86 *c); |
174 | extern void detect_ht(struct cpuinfo_x86 *c); | 178 | extern void detect_ht(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index b766a5e8ba0e..b7ba19acd3f8 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -347,9 +347,13 @@ | |||
347 | #define __NR_fanotify_mark 339 | 347 | #define __NR_fanotify_mark 339 |
348 | #define __NR_prlimit64 340 | 348 | #define __NR_prlimit64 340 |
349 | 349 | ||
350 | #define __NR_LITMUS 341 | ||
351 | |||
352 | #include "litmus/unistd_32.h" | ||
353 | |||
350 | #ifdef __KERNEL__ | 354 | #ifdef __KERNEL__ |
351 | 355 | ||
352 | #define NR_syscalls 341 | 356 | #define NR_syscalls 341 + NR_litmus_syscalls |
353 | 357 | ||
354 | #define __ARCH_WANT_IPC_PARSE_VERSION | 358 | #define __ARCH_WANT_IPC_PARSE_VERSION |
355 | #define __ARCH_WANT_OLD_READDIR | 359 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 363e9b8a715b..332bf3c9c84c 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -670,6 +670,10 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) | |||
670 | #define __NR_prlimit64 302 | 670 | #define __NR_prlimit64 302 |
671 | __SYSCALL(__NR_prlimit64, sys_prlimit64) | 671 | __SYSCALL(__NR_prlimit64, sys_prlimit64) |
672 | 672 | ||
673 | #define __NR_LITMUS 303 | ||
674 | |||
675 | #include "litmus/unistd_64.h" | ||
676 | |||
673 | #ifndef __NO_STUBS | 677 | #ifndef __NO_STUBS |
674 | #define __ARCH_WANT_OLD_READDIR | 678 | #define __ARCH_WANT_OLD_READDIR |
675 | #define __ARCH_WANT_OLD_STAT | 679 | #define __ARCH_WANT_OLD_STAT |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fedf32a8c3ec..6890dbb9ac15 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -118,6 +118,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
118 | 118 | ||
119 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 119 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
120 | 120 | ||
121 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o | ||
122 | |||
121 | ### | 123 | ### |
122 | # 64 bit specific files | 124 | # 64 bit specific files |
123 | ifeq ($(CONFIG_X86_64),y) | 125 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 898c2f4eab88..3fec7d9bfd62 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -758,6 +758,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
758 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); | 758 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); |
759 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) | 759 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) |
760 | 760 | ||
761 | /* returns CPUs that share the index cache with cpu */ | ||
762 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
763 | { | ||
764 | int ret = 0; | ||
765 | struct _cpuid4_info *this_leaf; | ||
766 | |||
767 | if (index >= num_cache_leaves) { | ||
768 | index = num_cache_leaves - 1; | ||
769 | ret = index; | ||
770 | } | ||
771 | |||
772 | this_leaf = CPUID4_INFO_IDX(cpu,index); | ||
773 | cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map)); | ||
774 | |||
775 | return ret; | ||
776 | } | ||
777 | |||
761 | #ifdef CONFIG_SMP | 778 | #ifdef CONFIG_SMP |
762 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | 779 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
763 | { | 780 | { |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec7cbba..115e8951e8c8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \ | |||
1016 | call_function_interrupt smp_call_function_interrupt | 1016 | call_function_interrupt smp_call_function_interrupt |
1017 | apicinterrupt RESCHEDULE_VECTOR \ | 1017 | apicinterrupt RESCHEDULE_VECTOR \ |
1018 | reschedule_interrupt smp_reschedule_interrupt | 1018 | reschedule_interrupt smp_reschedule_interrupt |
1019 | apicinterrupt PULL_TIMERS_VECTOR \ | ||
1020 | pull_timers_interrupt smp_pull_timers_interrupt | ||
1019 | #endif | 1021 | #endif |
1020 | 1022 | ||
1021 | apicinterrupt ERROR_APIC_VECTOR \ | 1023 | apicinterrupt ERROR_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c new file mode 100644 index 000000000000..37cc33252713 --- /dev/null +++ b/arch/x86/kernel/ft_event.c | |||
@@ -0,0 +1,118 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <litmus/feather_trace.h> | ||
4 | |||
5 | /* the feather trace management functions assume | ||
6 | * exclusive access to the event table | ||
7 | */ | ||
8 | |||
9 | #ifndef CONFIG_DEBUG_RODATA | ||
10 | |||
11 | #define BYTE_JUMP 0xeb | ||
12 | #define BYTE_JUMP_LEN 0x02 | ||
13 | |||
14 | /* for each event, there is an entry in the event table */ | ||
15 | struct trace_event { | ||
16 | long id; | ||
17 | long count; | ||
18 | long start_addr; | ||
19 | long end_addr; | ||
20 | }; | ||
21 | |||
22 | extern struct trace_event __start___event_table[]; | ||
23 | extern struct trace_event __stop___event_table[]; | ||
24 | |||
25 | /* Workaround: if no events are defined, then the event_table section does not | ||
26 | * exist and the above references cause linker errors. This could probably be | ||
27 | * fixed by adjusting the linker script, but it is easier to maintain for us if | ||
28 | * we simply create a dummy symbol in the event table section. | ||
29 | */ | ||
30 | int __event_table_dummy[0] __attribute__ ((section("__event_table"))); | ||
31 | |||
32 | int ft_enable_event(unsigned long id) | ||
33 | { | ||
34 | struct trace_event* te = __start___event_table; | ||
35 | int count = 0; | ||
36 | char* delta; | ||
37 | unsigned char* instr; | ||
38 | |||
39 | while (te < __stop___event_table) { | ||
40 | if (te->id == id && ++te->count == 1) { | ||
41 | instr = (unsigned char*) te->start_addr; | ||
42 | /* make sure we don't clobber something wrong */ | ||
43 | if (*instr == BYTE_JUMP) { | ||
44 | delta = (((unsigned char*) te->start_addr) + 1); | ||
45 | *delta = 0; | ||
46 | } | ||
47 | } | ||
48 | if (te->id == id) | ||
49 | count++; | ||
50 | te++; | ||
51 | } | ||
52 | |||
53 | printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count); | ||
54 | return count; | ||
55 | } | ||
56 | |||
57 | int ft_disable_event(unsigned long id) | ||
58 | { | ||
59 | struct trace_event* te = __start___event_table; | ||
60 | int count = 0; | ||
61 | char* delta; | ||
62 | unsigned char* instr; | ||
63 | |||
64 | while (te < __stop___event_table) { | ||
65 | if (te->id == id && --te->count == 0) { | ||
66 | instr = (unsigned char*) te->start_addr; | ||
67 | if (*instr == BYTE_JUMP) { | ||
68 | delta = (((unsigned char*) te->start_addr) + 1); | ||
69 | *delta = te->end_addr - te->start_addr - | ||
70 | BYTE_JUMP_LEN; | ||
71 | } | ||
72 | } | ||
73 | if (te->id == id) | ||
74 | count++; | ||
75 | te++; | ||
76 | } | ||
77 | |||
78 | printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count); | ||
79 | return count; | ||
80 | } | ||
81 | |||
82 | int ft_disable_all_events(void) | ||
83 | { | ||
84 | struct trace_event* te = __start___event_table; | ||
85 | int count = 0; | ||
86 | char* delta; | ||
87 | unsigned char* instr; | ||
88 | |||
89 | while (te < __stop___event_table) { | ||
90 | if (te->count) { | ||
91 | instr = (unsigned char*) te->start_addr; | ||
92 | if (*instr == BYTE_JUMP) { | ||
93 | delta = (((unsigned char*) te->start_addr) | ||
94 | + 1); | ||
95 | *delta = te->end_addr - te->start_addr - | ||
96 | BYTE_JUMP_LEN; | ||
97 | te->count = 0; | ||
98 | count++; | ||
99 | } | ||
100 | } | ||
101 | te++; | ||
102 | } | ||
103 | return count; | ||
104 | } | ||
105 | |||
106 | int ft_is_event_enabled(unsigned long id) | ||
107 | { | ||
108 | struct trace_event* te = __start___event_table; | ||
109 | |||
110 | while (te < __stop___event_table) { | ||
111 | if (te->id == id) | ||
112 | return te->count; | ||
113 | te++; | ||
114 | } | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | #endif | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 990ae7cfc578..9772b1a0f9a4 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -189,6 +189,9 @@ static void __init smp_intr_init(void) | |||
189 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | 189 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
190 | call_function_single_interrupt); | 190 | call_function_single_interrupt); |
191 | 191 | ||
192 | /* IPI for hrtimer pulling on remote cpus */ | ||
193 | alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt); | ||
194 | |||
192 | /* Low priority IPI to cleanup after moving an irq */ | 195 | /* Low priority IPI to cleanup after moving an irq */ |
193 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 196 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
194 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 197 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d801210945d6..74cca6014c0e 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -23,6 +23,10 @@ | |||
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/gfp.h> | 24 | #include <linux/gfp.h> |
25 | 25 | ||
26 | #include <litmus/preempt.h> | ||
27 | #include <litmus/debug_trace.h> | ||
28 | #include <litmus/trace.h> | ||
29 | |||
26 | #include <asm/mtrr.h> | 30 | #include <asm/mtrr.h> |
27 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
28 | #include <asm/mmu_context.h> | 32 | #include <asm/mmu_context.h> |
@@ -118,6 +122,7 @@ static void native_smp_send_reschedule(int cpu) | |||
118 | WARN_ON(1); | 122 | WARN_ON(1); |
119 | return; | 123 | return; |
120 | } | 124 | } |
125 | TS_SEND_RESCHED_START(cpu); | ||
121 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); | 126 | apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
122 | } | 127 | } |
123 | 128 | ||
@@ -147,6 +152,16 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
147 | free_cpumask_var(allbutself); | 152 | free_cpumask_var(allbutself); |
148 | } | 153 | } |
149 | 154 | ||
155 | /* trigger timers on remote cpu */ | ||
156 | void smp_send_pull_timers(int cpu) | ||
157 | { | ||
158 | if (unlikely(cpu_is_offline(cpu))) { | ||
159 | WARN_ON(1); | ||
160 | return; | ||
161 | } | ||
162 | apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR); | ||
163 | } | ||
164 | |||
150 | /* | 165 | /* |
151 | * this function calls the 'stop' function on all other CPUs in the system. | 166 | * this function calls the 'stop' function on all other CPUs in the system. |
152 | */ | 167 | */ |
@@ -198,7 +213,10 @@ static void native_smp_send_stop(void) | |||
198 | void smp_reschedule_interrupt(struct pt_regs *regs) | 213 | void smp_reschedule_interrupt(struct pt_regs *regs) |
199 | { | 214 | { |
200 | ack_APIC_irq(); | 215 | ack_APIC_irq(); |
216 | /* LITMUS^RT: this IPI might need to trigger the sched state machine. */ | ||
217 | sched_state_ipi(); | ||
201 | inc_irq_stat(irq_resched_count); | 218 | inc_irq_stat(irq_resched_count); |
219 | TS_SEND_RESCHED_END; | ||
202 | /* | 220 | /* |
203 | * KVM uses this interrupt to force a cpu out of guest mode | 221 | * KVM uses this interrupt to force a cpu out of guest mode |
204 | */ | 222 | */ |
@@ -222,6 +240,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
222 | irq_exit(); | 240 | irq_exit(); |
223 | } | 241 | } |
224 | 242 | ||
243 | extern void hrtimer_pull(void); | ||
244 | |||
245 | void smp_pull_timers_interrupt(struct pt_regs *regs) | ||
246 | { | ||
247 | ack_APIC_irq(); | ||
248 | TRACE("pull timer interrupt\n"); | ||
249 | hrtimer_pull(); | ||
250 | } | ||
251 | |||
225 | struct smp_ops smp_ops = { | 252 | struct smp_ops smp_ops = { |
226 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 253 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
227 | .smp_prepare_cpus = native_smp_prepare_cpus, | 254 | .smp_prepare_cpus = native_smp_prepare_cpus, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index b35786dc9b8f..37702905f658 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -340,3 +340,15 @@ ENTRY(sys_call_table) | |||
340 | .long sys_fanotify_init | 340 | .long sys_fanotify_init |
341 | .long sys_fanotify_mark | 341 | .long sys_fanotify_mark |
342 | .long sys_prlimit64 /* 340 */ | 342 | .long sys_prlimit64 /* 340 */ |
343 | .long sys_set_rt_task_param /* LITMUS^RT 341 */ | ||
344 | .long sys_get_rt_task_param | ||
345 | .long sys_complete_job | ||
346 | .long sys_od_open | ||
347 | .long sys_od_close | ||
348 | .long sys_litmus_lock | ||
349 | .long sys_litmus_unlock | ||
350 | .long sys_query_job_no | ||
351 | .long sys_wait_for_job_release | ||
352 | .long sys_wait_for_ts_release | ||
353 | .long sys_release_ts | ||
354 | .long sys_null_call | ||
diff --git a/drivers/tty/vt/consolemap_deftbl.c b/drivers/tty/vt/consolemap_deftbl.c new file mode 100644 index 000000000000..5f141383566b --- /dev/null +++ b/drivers/tty/vt/consolemap_deftbl.c | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * Do not edit this file; it was automatically generated by | ||
3 | * | ||
4 | * conmakehash drivers/tty/vt/cp437.uni > [this file] | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/types.h> | ||
9 | |||
10 | u8 dfont_unicount[256] = | ||
11 | { | ||
12 | 1, 1, 1, 1, 2, 1, 1, 1, | ||
13 | 1, 1, 1, 1, 1, 1, 1, 2, | ||
14 | 2, 2, 1, 1, 1, 1, 1, 1, | ||
15 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
16 | 1, 1, 2, 1, 1, 1, 1, 2, | ||
17 | 1, 1, 1, 1, 2, 2, 1, 1, | ||
18 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
19 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
20 | 1, 5, 1, 2, 2, 4, 1, 1, | ||
21 | 1, 5, 1, 2, 1, 1, 1, 5, | ||
22 | 1, 1, 2, 1, 1, 4, 1, 1, | ||
23 | 1, 2, 1, 1, 1, 1, 1, 3, | ||
24 | 1, 2, 1, 1, 1, 1, 1, 1, | ||
25 | 1, 1, 1, 1, 1, 1, 1, 2, | ||
26 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
27 | 2, 2, 1, 1, 2, 1, 1, 1, | ||
28 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
29 | 1, 1, 1, 1, 1, 1, 1, 2, | ||
30 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
31 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
32 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
33 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
34 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
35 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
36 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
37 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
38 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
39 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
40 | 1, 2, 1, 1, 1, 1, 2, 1, | ||
41 | 2, 1, 2, 2, 1, 2, 2, 1, | ||
42 | 1, 1, 1, 1, 1, 1, 1, 1, | ||
43 | 1, 1, 1, 1, 1, 1, 2, 1 | ||
44 | }; | ||
45 | |||
46 | u16 dfont_unitable[303] = | ||
47 | { | ||
48 | 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x25c6, 0x2663, 0x2660, | ||
49 | 0x2022, 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, | ||
50 | 0x263c, 0x00a4, 0x25b6, 0x25ba, 0x25c0, 0x25c4, 0x2195, 0x203c, | ||
51 | 0x00b6, 0x00a7, 0x25ac, 0x21a8, 0x2191, 0x2193, 0x2192, 0x2190, | ||
52 | 0x221f, 0x2194, 0x25b2, 0x25bc, 0x0020, 0x0021, 0x0022, 0x00a8, | ||
53 | 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x00b4, 0x0028, 0x0029, | ||
54 | 0x002a, 0x002b, 0x002c, 0x00b8, 0x002d, 0x00ad, 0x002e, 0x002f, | ||
55 | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, | ||
56 | 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, | ||
57 | 0x0040, 0x0041, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0042, 0x0043, | ||
58 | 0x00a9, 0x0044, 0x00d0, 0x0045, 0x00c8, 0x00ca, 0x00cb, 0x0046, | ||
59 | 0x0047, 0x0048, 0x0049, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x004a, | ||
60 | 0x004b, 0x212a, 0x004c, 0x004d, 0x004e, 0x004f, 0x00d2, 0x00d3, | ||
61 | 0x00d4, 0x00d5, 0x0050, 0x0051, 0x0052, 0x00ae, 0x0053, 0x0054, | ||
62 | 0x0055, 0x00d9, 0x00da, 0x00db, 0x0056, 0x0057, 0x0058, 0x0059, | ||
63 | 0x00dd, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x23bd, | ||
64 | 0xf804, 0x0060, 0x0061, 0x00e3, 0x0062, 0x0063, 0x0064, 0x0065, | ||
65 | 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, | ||
66 | 0x006e, 0x006f, 0x00f5, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, | ||
67 | 0x0075, 0x0076, 0x0077, 0x0078, 0x00d7, 0x0079, 0x00fd, 0x007a, | ||
68 | 0x007b, 0x007c, 0x00a6, 0x007d, 0x007e, 0x2302, 0x00c7, 0x00fc, | ||
69 | 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb, | ||
70 | 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x212b, 0x00c9, | ||
71 | 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff, | ||
72 | 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 0x00e1, | ||
73 | 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf, | ||
74 | 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 0x2591, | ||
75 | 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, | ||
76 | 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 0x2514, | ||
77 | 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a, | ||
78 | 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 0x2568, | ||
79 | 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a, | ||
80 | 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 0x03b1, | ||
81 | 0x03b2, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03bc, | ||
82 | 0x03c4, 0x03a6, 0x00d8, 0x0398, 0x03a9, 0x2126, 0x03b4, 0x00f0, | ||
83 | 0x221e, 0x03c6, 0x00f8, 0x03b5, 0x2208, 0x2229, 0x2261, 0x00b1, | ||
84 | 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219, | ||
85 | 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0xfffd, 0x00a0 | ||
86 | }; | ||
diff --git a/drivers/tty/vt/defkeymap.c b/drivers/tty/vt/defkeymap.c new file mode 100644 index 000000000000..d2208dfe3f67 --- /dev/null +++ b/drivers/tty/vt/defkeymap.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* Do not edit this file! It was automatically generated by */ | ||
2 | /* loadkeys --mktable defkeymap.map > defkeymap.c */ | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <linux/keyboard.h> | ||
6 | #include <linux/kd.h> | ||
7 | |||
8 | u_short plain_map[NR_KEYS] = { | ||
9 | 0xf200, 0xf01b, 0xf031, 0xf032, 0xf033, 0xf034, 0xf035, 0xf036, | ||
10 | 0xf037, 0xf038, 0xf039, 0xf030, 0xf02d, 0xf03d, 0xf07f, 0xf009, | ||
11 | 0xfb71, 0xfb77, 0xfb65, 0xfb72, 0xfb74, 0xfb79, 0xfb75, 0xfb69, | ||
12 | 0xfb6f, 0xfb70, 0xf05b, 0xf05d, 0xf201, 0xf702, 0xfb61, 0xfb73, | ||
13 | 0xfb64, 0xfb66, 0xfb67, 0xfb68, 0xfb6a, 0xfb6b, 0xfb6c, 0xf03b, | ||
14 | 0xf027, 0xf060, 0xf700, 0xf05c, 0xfb7a, 0xfb78, 0xfb63, 0xfb76, | ||
15 | 0xfb62, 0xfb6e, 0xfb6d, 0xf02c, 0xf02e, 0xf02f, 0xf700, 0xf30c, | ||
16 | 0xf703, 0xf020, 0xf207, 0xf100, 0xf101, 0xf102, 0xf103, 0xf104, | ||
17 | 0xf105, 0xf106, 0xf107, 0xf108, 0xf109, 0xf208, 0xf209, 0xf307, | ||
18 | 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301, | ||
19 | 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf03c, 0xf10a, | ||
20 | 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
21 | 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603, | ||
22 | 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116, | ||
23 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
24 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
25 | }; | ||
26 | |||
27 | u_short shift_map[NR_KEYS] = { | ||
28 | 0xf200, 0xf01b, 0xf021, 0xf040, 0xf023, 0xf024, 0xf025, 0xf05e, | ||
29 | 0xf026, 0xf02a, 0xf028, 0xf029, 0xf05f, 0xf02b, 0xf07f, 0xf009, | ||
30 | 0xfb51, 0xfb57, 0xfb45, 0xfb52, 0xfb54, 0xfb59, 0xfb55, 0xfb49, | ||
31 | 0xfb4f, 0xfb50, 0xf07b, 0xf07d, 0xf201, 0xf702, 0xfb41, 0xfb53, | ||
32 | 0xfb44, 0xfb46, 0xfb47, 0xfb48, 0xfb4a, 0xfb4b, 0xfb4c, 0xf03a, | ||
33 | 0xf022, 0xf07e, 0xf700, 0xf07c, 0xfb5a, 0xfb58, 0xfb43, 0xfb56, | ||
34 | 0xfb42, 0xfb4e, 0xfb4d, 0xf03c, 0xf03e, 0xf03f, 0xf700, 0xf30c, | ||
35 | 0xf703, 0xf020, 0xf207, 0xf10a, 0xf10b, 0xf10c, 0xf10d, 0xf10e, | ||
36 | 0xf10f, 0xf110, 0xf111, 0xf112, 0xf113, 0xf213, 0xf203, 0xf307, | ||
37 | 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301, | ||
38 | 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf03e, 0xf10a, | ||
39 | 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
40 | 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603, | ||
41 | 0xf20b, 0xf601, 0xf602, 0xf117, 0xf600, 0xf20a, 0xf115, 0xf116, | ||
42 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
43 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
44 | }; | ||
45 | |||
46 | u_short altgr_map[NR_KEYS] = { | ||
47 | 0xf200, 0xf200, 0xf200, 0xf040, 0xf200, 0xf024, 0xf200, 0xf200, | ||
48 | 0xf07b, 0xf05b, 0xf05d, 0xf07d, 0xf05c, 0xf200, 0xf200, 0xf200, | ||
49 | 0xfb71, 0xfb77, 0xf918, 0xfb72, 0xfb74, 0xfb79, 0xfb75, 0xfb69, | ||
50 | 0xfb6f, 0xfb70, 0xf200, 0xf07e, 0xf201, 0xf702, 0xf914, 0xfb73, | ||
51 | 0xf917, 0xf919, 0xfb67, 0xfb68, 0xfb6a, 0xfb6b, 0xfb6c, 0xf200, | ||
52 | 0xf200, 0xf200, 0xf700, 0xf200, 0xfb7a, 0xfb78, 0xf916, 0xfb76, | ||
53 | 0xf915, 0xfb6e, 0xfb6d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c, | ||
54 | 0xf703, 0xf200, 0xf207, 0xf50c, 0xf50d, 0xf50e, 0xf50f, 0xf510, | ||
55 | 0xf511, 0xf512, 0xf513, 0xf514, 0xf515, 0xf208, 0xf202, 0xf911, | ||
56 | 0xf912, 0xf913, 0xf30b, 0xf90e, 0xf90f, 0xf910, 0xf30a, 0xf90b, | ||
57 | 0xf90c, 0xf90d, 0xf90a, 0xf310, 0xf206, 0xf200, 0xf07c, 0xf516, | ||
58 | 0xf517, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
59 | 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603, | ||
60 | 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116, | ||
61 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
62 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
63 | }; | ||
64 | |||
65 | u_short ctrl_map[NR_KEYS] = { | ||
66 | 0xf200, 0xf200, 0xf200, 0xf000, 0xf01b, 0xf01c, 0xf01d, 0xf01e, | ||
67 | 0xf01f, 0xf07f, 0xf200, 0xf200, 0xf01f, 0xf200, 0xf008, 0xf200, | ||
68 | 0xf011, 0xf017, 0xf005, 0xf012, 0xf014, 0xf019, 0xf015, 0xf009, | ||
69 | 0xf00f, 0xf010, 0xf01b, 0xf01d, 0xf201, 0xf702, 0xf001, 0xf013, | ||
70 | 0xf004, 0xf006, 0xf007, 0xf008, 0xf00a, 0xf00b, 0xf00c, 0xf200, | ||
71 | 0xf007, 0xf000, 0xf700, 0xf01c, 0xf01a, 0xf018, 0xf003, 0xf016, | ||
72 | 0xf002, 0xf00e, 0xf00d, 0xf200, 0xf20e, 0xf07f, 0xf700, 0xf30c, | ||
73 | 0xf703, 0xf000, 0xf207, 0xf100, 0xf101, 0xf102, 0xf103, 0xf104, | ||
74 | 0xf105, 0xf106, 0xf107, 0xf108, 0xf109, 0xf208, 0xf204, 0xf307, | ||
75 | 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301, | ||
76 | 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf200, 0xf10a, | ||
77 | 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
78 | 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603, | ||
79 | 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116, | ||
80 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
81 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
82 | }; | ||
83 | |||
84 | u_short shift_ctrl_map[NR_KEYS] = { | ||
85 | 0xf200, 0xf200, 0xf200, 0xf000, 0xf200, 0xf200, 0xf200, 0xf200, | ||
86 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf01f, 0xf200, 0xf200, 0xf200, | ||
87 | 0xf011, 0xf017, 0xf005, 0xf012, 0xf014, 0xf019, 0xf015, 0xf009, | ||
88 | 0xf00f, 0xf010, 0xf200, 0xf200, 0xf201, 0xf702, 0xf001, 0xf013, | ||
89 | 0xf004, 0xf006, 0xf007, 0xf008, 0xf00a, 0xf00b, 0xf00c, 0xf200, | ||
90 | 0xf200, 0xf200, 0xf700, 0xf200, 0xf01a, 0xf018, 0xf003, 0xf016, | ||
91 | 0xf002, 0xf00e, 0xf00d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c, | ||
92 | 0xf703, 0xf200, 0xf207, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
93 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf208, 0xf200, 0xf307, | ||
94 | 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301, | ||
95 | 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf200, 0xf200, | ||
96 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
97 | 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603, | ||
98 | 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116, | ||
99 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
100 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
101 | }; | ||
102 | |||
103 | u_short alt_map[NR_KEYS] = { | ||
104 | 0xf200, 0xf81b, 0xf831, 0xf832, 0xf833, 0xf834, 0xf835, 0xf836, | ||
105 | 0xf837, 0xf838, 0xf839, 0xf830, 0xf82d, 0xf83d, 0xf87f, 0xf809, | ||
106 | 0xf871, 0xf877, 0xf865, 0xf872, 0xf874, 0xf879, 0xf875, 0xf869, | ||
107 | 0xf86f, 0xf870, 0xf85b, 0xf85d, 0xf80d, 0xf702, 0xf861, 0xf873, | ||
108 | 0xf864, 0xf866, 0xf867, 0xf868, 0xf86a, 0xf86b, 0xf86c, 0xf83b, | ||
109 | 0xf827, 0xf860, 0xf700, 0xf85c, 0xf87a, 0xf878, 0xf863, 0xf876, | ||
110 | 0xf862, 0xf86e, 0xf86d, 0xf82c, 0xf82e, 0xf82f, 0xf700, 0xf30c, | ||
111 | 0xf703, 0xf820, 0xf207, 0xf500, 0xf501, 0xf502, 0xf503, 0xf504, | ||
112 | 0xf505, 0xf506, 0xf507, 0xf508, 0xf509, 0xf208, 0xf209, 0xf907, | ||
113 | 0xf908, 0xf909, 0xf30b, 0xf904, 0xf905, 0xf906, 0xf30a, 0xf901, | ||
114 | 0xf902, 0xf903, 0xf900, 0xf310, 0xf206, 0xf200, 0xf83c, 0xf50a, | ||
115 | 0xf50b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
116 | 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603, | ||
117 | 0xf118, 0xf210, 0xf211, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116, | ||
118 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
119 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
120 | }; | ||
121 | |||
122 | u_short ctrl_alt_map[NR_KEYS] = { | ||
123 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
124 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
125 | 0xf811, 0xf817, 0xf805, 0xf812, 0xf814, 0xf819, 0xf815, 0xf809, | ||
126 | 0xf80f, 0xf810, 0xf200, 0xf200, 0xf201, 0xf702, 0xf801, 0xf813, | ||
127 | 0xf804, 0xf806, 0xf807, 0xf808, 0xf80a, 0xf80b, 0xf80c, 0xf200, | ||
128 | 0xf200, 0xf200, 0xf700, 0xf200, 0xf81a, 0xf818, 0xf803, 0xf816, | ||
129 | 0xf802, 0xf80e, 0xf80d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c, | ||
130 | 0xf703, 0xf200, 0xf207, 0xf500, 0xf501, 0xf502, 0xf503, 0xf504, | ||
131 | 0xf505, 0xf506, 0xf507, 0xf508, 0xf509, 0xf208, 0xf200, 0xf307, | ||
132 | 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301, | ||
133 | 0xf302, 0xf303, 0xf300, 0xf20c, 0xf206, 0xf200, 0xf200, 0xf50a, | ||
134 | 0xf50b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
135 | 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603, | ||
136 | 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf20c, | ||
137 | 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d, | ||
138 | 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, | ||
139 | }; | ||
140 | |||
141 | ushort *key_maps[MAX_NR_KEYMAPS] = { | ||
142 | plain_map, shift_map, altgr_map, NULL, | ||
143 | ctrl_map, shift_ctrl_map, NULL, NULL, | ||
144 | alt_map, NULL, NULL, NULL, | ||
145 | ctrl_alt_map, NULL | ||
146 | }; | ||
147 | |||
148 | unsigned int keymap_count = 7; | ||
149 | |||
150 | /* | ||
151 | * Philosophy: most people do not define more strings, but they who do | ||
152 | * often want quite a lot of string space. So, we statically allocate | ||
153 | * the default and allocate dynamically in chunks of 512 bytes. | ||
154 | */ | ||
155 | |||
156 | char func_buf[] = { | ||
157 | '\033', '[', '[', 'A', 0, | ||
158 | '\033', '[', '[', 'B', 0, | ||
159 | '\033', '[', '[', 'C', 0, | ||
160 | '\033', '[', '[', 'D', 0, | ||
161 | '\033', '[', '[', 'E', 0, | ||
162 | '\033', '[', '1', '7', '~', 0, | ||
163 | '\033', '[', '1', '8', '~', 0, | ||
164 | '\033', '[', '1', '9', '~', 0, | ||
165 | '\033', '[', '2', '0', '~', 0, | ||
166 | '\033', '[', '2', '1', '~', 0, | ||
167 | '\033', '[', '2', '3', '~', 0, | ||
168 | '\033', '[', '2', '4', '~', 0, | ||
169 | '\033', '[', '2', '5', '~', 0, | ||
170 | '\033', '[', '2', '6', '~', 0, | ||
171 | '\033', '[', '2', '8', '~', 0, | ||
172 | '\033', '[', '2', '9', '~', 0, | ||
173 | '\033', '[', '3', '1', '~', 0, | ||
174 | '\033', '[', '3', '2', '~', 0, | ||
175 | '\033', '[', '3', '3', '~', 0, | ||
176 | '\033', '[', '3', '4', '~', 0, | ||
177 | '\033', '[', '1', '~', 0, | ||
178 | '\033', '[', '2', '~', 0, | ||
179 | '\033', '[', '3', '~', 0, | ||
180 | '\033', '[', '4', '~', 0, | ||
181 | '\033', '[', '5', '~', 0, | ||
182 | '\033', '[', '6', '~', 0, | ||
183 | '\033', '[', 'M', 0, | ||
184 | '\033', '[', 'P', 0, | ||
185 | }; | ||
186 | |||
187 | char *funcbufptr = func_buf; | ||
188 | int funcbufsize = sizeof(func_buf); | ||
189 | int funcbufleft = 0; /* space left */ | ||
190 | |||
191 | char *func_table[MAX_NR_FUNC] = { | ||
192 | func_buf + 0, | ||
193 | func_buf + 5, | ||
194 | func_buf + 10, | ||
195 | func_buf + 15, | ||
196 | func_buf + 20, | ||
197 | func_buf + 25, | ||
198 | func_buf + 31, | ||
199 | func_buf + 37, | ||
200 | func_buf + 43, | ||
201 | func_buf + 49, | ||
202 | func_buf + 55, | ||
203 | func_buf + 61, | ||
204 | func_buf + 67, | ||
205 | func_buf + 73, | ||
206 | func_buf + 79, | ||
207 | func_buf + 85, | ||
208 | func_buf + 91, | ||
209 | func_buf + 97, | ||
210 | func_buf + 103, | ||
211 | func_buf + 109, | ||
212 | func_buf + 115, | ||
213 | func_buf + 120, | ||
214 | func_buf + 125, | ||
215 | func_buf + 130, | ||
216 | func_buf + 135, | ||
217 | func_buf + 140, | ||
218 | func_buf + 145, | ||
219 | NULL, | ||
220 | NULL, | ||
221 | func_buf + 149, | ||
222 | NULL, | ||
223 | }; | ||
224 | |||
225 | struct kbdiacruc accent_table[MAX_DIACR] = { | ||
226 | {'`', 'A', 0300}, {'`', 'a', 0340}, | ||
227 | {'\'', 'A', 0301}, {'\'', 'a', 0341}, | ||
228 | {'^', 'A', 0302}, {'^', 'a', 0342}, | ||
229 | {'~', 'A', 0303}, {'~', 'a', 0343}, | ||
230 | {'"', 'A', 0304}, {'"', 'a', 0344}, | ||
231 | {'O', 'A', 0305}, {'o', 'a', 0345}, | ||
232 | {'0', 'A', 0305}, {'0', 'a', 0345}, | ||
233 | {'A', 'A', 0305}, {'a', 'a', 0345}, | ||
234 | {'A', 'E', 0306}, {'a', 'e', 0346}, | ||
235 | {',', 'C', 0307}, {',', 'c', 0347}, | ||
236 | {'`', 'E', 0310}, {'`', 'e', 0350}, | ||
237 | {'\'', 'E', 0311}, {'\'', 'e', 0351}, | ||
238 | {'^', 'E', 0312}, {'^', 'e', 0352}, | ||
239 | {'"', 'E', 0313}, {'"', 'e', 0353}, | ||
240 | {'`', 'I', 0314}, {'`', 'i', 0354}, | ||
241 | {'\'', 'I', 0315}, {'\'', 'i', 0355}, | ||
242 | {'^', 'I', 0316}, {'^', 'i', 0356}, | ||
243 | {'"', 'I', 0317}, {'"', 'i', 0357}, | ||
244 | {'-', 'D', 0320}, {'-', 'd', 0360}, | ||
245 | {'~', 'N', 0321}, {'~', 'n', 0361}, | ||
246 | {'`', 'O', 0322}, {'`', 'o', 0362}, | ||
247 | {'\'', 'O', 0323}, {'\'', 'o', 0363}, | ||
248 | {'^', 'O', 0324}, {'^', 'o', 0364}, | ||
249 | {'~', 'O', 0325}, {'~', 'o', 0365}, | ||
250 | {'"', 'O', 0326}, {'"', 'o', 0366}, | ||
251 | {'/', 'O', 0330}, {'/', 'o', 0370}, | ||
252 | {'`', 'U', 0331}, {'`', 'u', 0371}, | ||
253 | {'\'', 'U', 0332}, {'\'', 'u', 0372}, | ||
254 | {'^', 'U', 0333}, {'^', 'u', 0373}, | ||
255 | {'"', 'U', 0334}, {'"', 'u', 0374}, | ||
256 | {'\'', 'Y', 0335}, {'\'', 'y', 0375}, | ||
257 | {'T', 'H', 0336}, {'t', 'h', 0376}, | ||
258 | {'s', 's', 0337}, {'"', 'y', 0377}, | ||
259 | {'s', 'z', 0337}, {'i', 'j', 0377}, | ||
260 | }; | ||
261 | |||
262 | unsigned int accent_table_size = 68; | ||
@@ -19,7 +19,7 @@ | |||
19 | * current->executable is only used by the procfs. This allows a dispatch | 19 | * current->executable is only used by the procfs. This allows a dispatch |
20 | * table to check for several different types of binary formats. We keep | 20 | * table to check for several different types of binary formats. We keep |
21 | * trying until we recognize the file or we run out of supported binary | 21 | * trying until we recognize the file or we run out of supported binary |
22 | * formats. | 22 | * formats. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
@@ -55,6 +55,8 @@ | |||
55 | #include <linux/fs_struct.h> | 55 | #include <linux/fs_struct.h> |
56 | #include <linux/pipe_fs_i.h> | 56 | #include <linux/pipe_fs_i.h> |
57 | 57 | ||
58 | #include <litmus/litmus.h> | ||
59 | |||
58 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
59 | #include <asm/mmu_context.h> | 61 | #include <asm/mmu_context.h> |
60 | #include <asm/tlb.h> | 62 | #include <asm/tlb.h> |
@@ -78,7 +80,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert) | |||
78 | insert ? list_add(&fmt->lh, &formats) : | 80 | insert ? list_add(&fmt->lh, &formats) : |
79 | list_add_tail(&fmt->lh, &formats); | 81 | list_add_tail(&fmt->lh, &formats); |
80 | write_unlock(&binfmt_lock); | 82 | write_unlock(&binfmt_lock); |
81 | return 0; | 83 | return 0; |
82 | } | 84 | } |
83 | 85 | ||
84 | EXPORT_SYMBOL(__register_binfmt); | 86 | EXPORT_SYMBOL(__register_binfmt); |
@@ -1064,7 +1066,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1064 | group */ | 1066 | group */ |
1065 | 1067 | ||
1066 | current->self_exec_id++; | 1068 | current->self_exec_id++; |
1067 | 1069 | ||
1068 | flush_signal_handlers(current, 0); | 1070 | flush_signal_handlers(current, 0); |
1069 | flush_old_files(current->files); | 1071 | flush_old_files(current->files); |
1070 | } | 1072 | } |
@@ -1154,8 +1156,8 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1154 | return res; | 1156 | return res; |
1155 | } | 1157 | } |
1156 | 1158 | ||
1157 | /* | 1159 | /* |
1158 | * Fill the binprm structure from the inode. | 1160 | * Fill the binprm structure from the inode. |
1159 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes | 1161 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes |
1160 | * | 1162 | * |
1161 | * This may be called multiple times for binary chains (scripts for example). | 1163 | * This may be called multiple times for binary chains (scripts for example). |
@@ -1367,6 +1369,7 @@ int do_execve(const char * filename, | |||
1367 | goto out_unmark; | 1369 | goto out_unmark; |
1368 | 1370 | ||
1369 | sched_exec(); | 1371 | sched_exec(); |
1372 | litmus_exec(); | ||
1370 | 1373 | ||
1371 | bprm->file = file; | 1374 | bprm->file = file; |
1372 | bprm->filename = filename; | 1375 | bprm->filename = filename; |
diff --git a/fs/inode.c b/fs/inode.c index 86464332e590..d4fe9c031864 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -266,6 +266,8 @@ void inode_init_once(struct inode *inode) | |||
266 | #ifdef CONFIG_FSNOTIFY | 266 | #ifdef CONFIG_FSNOTIFY |
267 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); | 267 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); |
268 | #endif | 268 | #endif |
269 | INIT_LIST_HEAD(&inode->i_obj_list); | ||
270 | mutex_init(&inode->i_obj_mutex); | ||
269 | } | 271 | } |
270 | EXPORT_SYMBOL(inode_init_once); | 272 | EXPORT_SYMBOL(inode_init_once); |
271 | 273 | ||
diff --git a/include/linux/completion.h b/include/linux/completion.h index 51e3145196f6..c63950e8a863 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
@@ -90,6 +90,7 @@ extern bool completion_done(struct completion *x); | |||
90 | 90 | ||
91 | extern void complete(struct completion *); | 91 | extern void complete(struct completion *); |
92 | extern void complete_all(struct completion *); | 92 | extern void complete_all(struct completion *); |
93 | extern void complete_n(struct completion *, int n); | ||
93 | 94 | ||
94 | /** | 95 | /** |
95 | * INIT_COMPLETION: - reinitialize a completion structure | 96 | * INIT_COMPLETION: - reinitialize a completion structure |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 63d069bd80b7..29a672458d27 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -16,8 +16,8 @@ | |||
16 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute | 16 | * nr_file rlimit, so it's safe to set up a ridiculously high absolute |
17 | * upper limit on files-per-process. | 17 | * upper limit on files-per-process. |
18 | * | 18 | * |
19 | * Some programs (notably those using select()) may have to be | 19 | * Some programs (notably those using select()) may have to be |
20 | * recompiled to take full advantage of the new limits.. | 20 | * recompiled to take full advantage of the new limits.. |
21 | */ | 21 | */ |
22 | 22 | ||
23 | /* Fixed constants first: */ | 23 | /* Fixed constants first: */ |
@@ -172,7 +172,7 @@ struct inodes_stat_t { | |||
172 | #define SEL_EX 4 | 172 | #define SEL_EX 4 |
173 | 173 | ||
174 | /* public flags for file_system_type */ | 174 | /* public flags for file_system_type */ |
175 | #define FS_REQUIRES_DEV 1 | 175 | #define FS_REQUIRES_DEV 1 |
176 | #define FS_BINARY_MOUNTDATA 2 | 176 | #define FS_BINARY_MOUNTDATA 2 |
177 | #define FS_HAS_SUBTYPE 4 | 177 | #define FS_HAS_SUBTYPE 4 |
178 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 178 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
@@ -470,7 +470,7 @@ struct iattr { | |||
470 | */ | 470 | */ |
471 | #include <linux/quota.h> | 471 | #include <linux/quota.h> |
472 | 472 | ||
473 | /** | 473 | /** |
474 | * enum positive_aop_returns - aop return codes with specific semantics | 474 | * enum positive_aop_returns - aop return codes with specific semantics |
475 | * | 475 | * |
476 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | 476 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has |
@@ -480,7 +480,7 @@ struct iattr { | |||
480 | * be a candidate for writeback again in the near | 480 | * be a candidate for writeback again in the near |
481 | * future. Other callers must be careful to unlock | 481 | * future. Other callers must be careful to unlock |
482 | * the page if they get this return. Returned by | 482 | * the page if they get this return. Returned by |
483 | * writepage(); | 483 | * writepage(); |
484 | * | 484 | * |
485 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | 485 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has |
486 | * unlocked it and the page might have been truncated. | 486 | * unlocked it and the page might have been truncated. |
@@ -721,6 +721,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping) | |||
721 | 721 | ||
722 | struct posix_acl; | 722 | struct posix_acl; |
723 | #define ACL_NOT_CACHED ((void *)(-1)) | 723 | #define ACL_NOT_CACHED ((void *)(-1)) |
724 | struct inode_obj_id_table; | ||
724 | 725 | ||
725 | struct inode { | 726 | struct inode { |
726 | struct hlist_node i_hash; | 727 | struct hlist_node i_hash; |
@@ -784,6 +785,8 @@ struct inode { | |||
784 | struct posix_acl *i_acl; | 785 | struct posix_acl *i_acl; |
785 | struct posix_acl *i_default_acl; | 786 | struct posix_acl *i_default_acl; |
786 | #endif | 787 | #endif |
788 | struct list_head i_obj_list; | ||
789 | struct mutex i_obj_mutex; | ||
787 | void *i_private; /* fs or device private pointer */ | 790 | void *i_private; /* fs or device private pointer */ |
788 | }; | 791 | }; |
789 | 792 | ||
@@ -997,10 +1000,10 @@ static inline int file_check_writeable(struct file *filp) | |||
997 | 1000 | ||
998 | #define MAX_NON_LFS ((1UL<<31) - 1) | 1001 | #define MAX_NON_LFS ((1UL<<31) - 1) |
999 | 1002 | ||
1000 | /* Page cache limit. The filesystems should put that into their s_maxbytes | 1003 | /* Page cache limit. The filesystems should put that into their s_maxbytes |
1001 | limits, otherwise bad things can happen in VM. */ | 1004 | limits, otherwise bad things can happen in VM. */ |
1002 | #if BITS_PER_LONG==32 | 1005 | #if BITS_PER_LONG==32 |
1003 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) | 1006 | #define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) |
1004 | #elif BITS_PER_LONG==64 | 1007 | #elif BITS_PER_LONG==64 |
1005 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL | 1008 | #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL |
1006 | #endif | 1009 | #endif |
@@ -2145,7 +2148,7 @@ extern int may_open(struct path *, int, int); | |||
2145 | 2148 | ||
2146 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); | 2149 | extern int kernel_read(struct file *, loff_t, char *, unsigned long); |
2147 | extern struct file * open_exec(const char *); | 2150 | extern struct file * open_exec(const char *); |
2148 | 2151 | ||
2149 | /* fs/dcache.c -- generic fs support functions */ | 2152 | /* fs/dcache.c -- generic fs support functions */ |
2150 | extern int is_subdir(struct dentry *, struct dentry *); | 2153 | extern int is_subdir(struct dentry *, struct dentry *); |
2151 | extern int path_is_under(struct path *, struct path *); | 2154 | extern int path_is_under(struct path *, struct path *); |
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0c1b857d3d..76da541c1f66 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h | |||
@@ -167,6 +167,7 @@ struct hrtimer_clock_base { | |||
167 | * @nr_retries: Total number of hrtimer interrupt retries | 167 | * @nr_retries: Total number of hrtimer interrupt retries |
168 | * @nr_hangs: Total number of hrtimer interrupt hangs | 168 | * @nr_hangs: Total number of hrtimer interrupt hangs |
169 | * @max_hang_time: Maximum time spent in hrtimer_interrupt | 169 | * @max_hang_time: Maximum time spent in hrtimer_interrupt |
170 | * @to_pull: LITMUS^RT list of timers to be pulled on this cpu | ||
170 | */ | 171 | */ |
171 | struct hrtimer_cpu_base { | 172 | struct hrtimer_cpu_base { |
172 | raw_spinlock_t lock; | 173 | raw_spinlock_t lock; |
@@ -180,8 +181,32 @@ struct hrtimer_cpu_base { | |||
180 | unsigned long nr_hangs; | 181 | unsigned long nr_hangs; |
181 | ktime_t max_hang_time; | 182 | ktime_t max_hang_time; |
182 | #endif | 183 | #endif |
184 | struct list_head to_pull; | ||
183 | }; | 185 | }; |
184 | 186 | ||
187 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
188 | |||
189 | #define HRTIMER_START_ON_INACTIVE 0 | ||
190 | #define HRTIMER_START_ON_QUEUED 1 | ||
191 | |||
192 | /* | ||
193 | * struct hrtimer_start_on_info - save timer info on remote cpu | ||
194 | * @list: list of hrtimer_start_on_info on remote cpu (to_pull) | ||
195 | * @timer: timer to be triggered on remote cpu | ||
196 | * @time: time event | ||
197 | * @mode: timer mode | ||
198 | * @state: activity flag | ||
199 | */ | ||
200 | struct hrtimer_start_on_info { | ||
201 | struct list_head list; | ||
202 | struct hrtimer *timer; | ||
203 | ktime_t time; | ||
204 | enum hrtimer_mode mode; | ||
205 | atomic_t state; | ||
206 | }; | ||
207 | |||
208 | #endif | ||
209 | |||
185 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) | 210 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) |
186 | { | 211 | { |
187 | timer->_expires = time; | 212 | timer->_expires = time; |
@@ -348,6 +373,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
348 | unsigned long delta_ns, | 373 | unsigned long delta_ns, |
349 | const enum hrtimer_mode mode, int wakeup); | 374 | const enum hrtimer_mode mode, int wakeup); |
350 | 375 | ||
376 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
377 | extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info); | ||
378 | extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info, | ||
379 | struct hrtimer *timer, ktime_t time, | ||
380 | const enum hrtimer_mode mode); | ||
381 | #endif | ||
382 | |||
351 | extern int hrtimer_cancel(struct hrtimer *timer); | 383 | extern int hrtimer_cancel(struct hrtimer *timer); |
352 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); | 384 | extern int hrtimer_try_to_cancel(struct hrtimer *timer); |
353 | 385 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db2d7dd..c9ac4fc837ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -38,6 +38,7 @@ | |||
38 | #define SCHED_BATCH 3 | 38 | #define SCHED_BATCH 3 |
39 | /* SCHED_ISO: reserved but not implemented yet */ | 39 | /* SCHED_ISO: reserved but not implemented yet */ |
40 | #define SCHED_IDLE 5 | 40 | #define SCHED_IDLE 5 |
41 | #define SCHED_LITMUS 6 | ||
41 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ | 42 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ |
42 | #define SCHED_RESET_ON_FORK 0x40000000 | 43 | #define SCHED_RESET_ON_FORK 0x40000000 |
43 | 44 | ||
@@ -94,6 +95,9 @@ struct sched_param { | |||
94 | 95 | ||
95 | #include <asm/processor.h> | 96 | #include <asm/processor.h> |
96 | 97 | ||
98 | #include <litmus/rt_param.h> | ||
99 | #include <litmus/preempt.h> | ||
100 | |||
97 | struct exec_domain; | 101 | struct exec_domain; |
98 | struct futex_pi_state; | 102 | struct futex_pi_state; |
99 | struct robust_list_head; | 103 | struct robust_list_head; |
@@ -1159,6 +1163,7 @@ struct sched_rt_entity { | |||
1159 | }; | 1163 | }; |
1160 | 1164 | ||
1161 | struct rcu_node; | 1165 | struct rcu_node; |
1166 | struct od_table_entry; | ||
1162 | 1167 | ||
1163 | struct task_struct { | 1168 | struct task_struct { |
1164 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1169 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
@@ -1243,9 +1248,9 @@ struct task_struct { | |||
1243 | unsigned long stack_canary; | 1248 | unsigned long stack_canary; |
1244 | #endif | 1249 | #endif |
1245 | 1250 | ||
1246 | /* | 1251 | /* |
1247 | * pointers to (original) parent process, youngest child, younger sibling, | 1252 | * pointers to (original) parent process, youngest child, younger sibling, |
1248 | * older sibling, respectively. (p->father can be replaced with | 1253 | * older sibling, respectively. (p->father can be replaced with |
1249 | * p->real_parent->pid) | 1254 | * p->real_parent->pid) |
1250 | */ | 1255 | */ |
1251 | struct task_struct *real_parent; /* real parent process */ | 1256 | struct task_struct *real_parent; /* real parent process */ |
@@ -1453,6 +1458,13 @@ struct task_struct { | |||
1453 | int make_it_fail; | 1458 | int make_it_fail; |
1454 | #endif | 1459 | #endif |
1455 | struct prop_local_single dirties; | 1460 | struct prop_local_single dirties; |
1461 | |||
1462 | /* LITMUS RT parameters and state */ | ||
1463 | struct rt_param rt_param; | ||
1464 | |||
1465 | /* references to PI semaphores, etc. */ | ||
1466 | struct od_table_entry *od_table; | ||
1467 | |||
1456 | #ifdef CONFIG_LATENCYTOP | 1468 | #ifdef CONFIG_LATENCYTOP |
1457 | int latency_record_count; | 1469 | int latency_record_count; |
1458 | struct latency_record latency_record[LT_SAVECOUNT]; | 1470 | struct latency_record latency_record[LT_SAVECOUNT]; |
@@ -2014,7 +2026,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s | |||
2014 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | 2026 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); |
2015 | 2027 | ||
2016 | return ret; | 2028 | return ret; |
2017 | } | 2029 | } |
2018 | 2030 | ||
2019 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, | 2031 | extern void block_all_signals(int (*notifier)(void *priv), void *priv, |
2020 | sigset_t *mask); | 2032 | sigset_t *mask); |
@@ -2290,6 +2302,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) | |||
2290 | static inline void set_tsk_need_resched(struct task_struct *tsk) | 2302 | static inline void set_tsk_need_resched(struct task_struct *tsk) |
2291 | { | 2303 | { |
2292 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); | 2304 | set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); |
2305 | sched_state_will_schedule(tsk); | ||
2293 | } | 2306 | } |
2294 | 2307 | ||
2295 | static inline void clear_tsk_need_resched(struct task_struct *tsk) | 2308 | static inline void clear_tsk_need_resched(struct task_struct *tsk) |
diff --git a/include/linux/smp.h b/include/linux/smp.h index cfa2d20e35f1..f86d40768e7f 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask, | |||
80 | void (*func)(void *info), void *info, int wait); | 80 | void (*func)(void *info), void *info, int wait); |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * sends a 'pull timer' event to a remote CPU | ||
84 | */ | ||
85 | extern void smp_send_pull_timers(int cpu); | ||
86 | |||
87 | /* | ||
83 | * Generic and arch helpers | 88 | * Generic and arch helpers |
84 | */ | 89 | */ |
85 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 90 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
diff --git a/include/linux/tick.h b/include/linux/tick.h index b232ccc0ee29..1e29bd5b18af 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -74,6 +74,11 @@ extern int tick_is_oneshot_available(void); | |||
74 | extern struct tick_device *tick_get_device(int cpu); | 74 | extern struct tick_device *tick_get_device(int cpu); |
75 | 75 | ||
76 | # ifdef CONFIG_HIGH_RES_TIMERS | 76 | # ifdef CONFIG_HIGH_RES_TIMERS |
77 | /* LITMUS^RT tick alignment */ | ||
78 | #define LINUX_DEFAULT_TICKS 0 | ||
79 | #define LITMUS_ALIGNED_TICKS 1 | ||
80 | #define LITMUS_STAGGERED_TICKS 2 | ||
81 | |||
77 | extern int tick_init_highres(void); | 82 | extern int tick_init_highres(void); |
78 | extern int tick_program_event(ktime_t expires, int force); | 83 | extern int tick_program_event(ktime_t expires, int force); |
79 | extern void tick_setup_sched_timer(void); | 84 | extern void tick_setup_sched_timer(void); |
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h new file mode 100644 index 000000000000..cf4864a498d8 --- /dev/null +++ b/include/litmus/bheap.h | |||
@@ -0,0 +1,77 @@ | |||
1 | /* bheaps.h -- Binomial Heaps | ||
2 | * | ||
3 | * (c) 2008, 2009 Bjoern Brandenburg | ||
4 | */ | ||
5 | |||
6 | #ifndef BHEAP_H | ||
7 | #define BHEAP_H | ||
8 | |||
9 | #define NOT_IN_HEAP UINT_MAX | ||
10 | |||
11 | struct bheap_node { | ||
12 | struct bheap_node* parent; | ||
13 | struct bheap_node* next; | ||
14 | struct bheap_node* child; | ||
15 | |||
16 | unsigned int degree; | ||
17 | void* value; | ||
18 | struct bheap_node** ref; | ||
19 | }; | ||
20 | |||
21 | struct bheap { | ||
22 | struct bheap_node* head; | ||
23 | /* We cache the minimum of the heap. | ||
24 | * This speeds up repeated peek operations. | ||
25 | */ | ||
26 | struct bheap_node* min; | ||
27 | }; | ||
28 | |||
29 | typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b); | ||
30 | |||
31 | void bheap_init(struct bheap* heap); | ||
32 | void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value); | ||
33 | |||
34 | static inline int bheap_node_in_heap(struct bheap_node* h) | ||
35 | { | ||
36 | return h->degree != NOT_IN_HEAP; | ||
37 | } | ||
38 | |||
39 | static inline int bheap_empty(struct bheap* heap) | ||
40 | { | ||
41 | return heap->head == NULL && heap->min == NULL; | ||
42 | } | ||
43 | |||
44 | /* insert (and reinitialize) a node into the heap */ | ||
45 | void bheap_insert(bheap_prio_t higher_prio, | ||
46 | struct bheap* heap, | ||
47 | struct bheap_node* node); | ||
48 | |||
49 | /* merge addition into target */ | ||
50 | void bheap_union(bheap_prio_t higher_prio, | ||
51 | struct bheap* target, | ||
52 | struct bheap* addition); | ||
53 | |||
54 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
55 | struct bheap* heap); | ||
56 | |||
57 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
58 | struct bheap* heap); | ||
59 | |||
60 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap); | ||
61 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node); | ||
62 | |||
63 | void bheap_delete(bheap_prio_t higher_prio, | ||
64 | struct bheap* heap, | ||
65 | struct bheap_node* node); | ||
66 | |||
67 | /* allocate from memcache */ | ||
68 | struct bheap_node* bheap_node_alloc(int gfp_flags); | ||
69 | void bheap_node_free(struct bheap_node* hn); | ||
70 | |||
71 | /* allocate a heap node for value and insert into the heap */ | ||
72 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
73 | void* value, int gfp_flags); | ||
74 | |||
75 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
76 | struct bheap* heap); | ||
77 | #endif | ||
diff --git a/include/litmus/budget.h b/include/litmus/budget.h new file mode 100644 index 000000000000..732530e63491 --- /dev/null +++ b/include/litmus/budget.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef _LITMUS_BUDGET_H_ | ||
2 | #define _LITMUS_BUDGET_H_ | ||
3 | |||
4 | /* Update the per-processor enforcement timer (arm/reproram/cancel) for | ||
5 | * the next task. */ | ||
6 | void update_enforcement_timer(struct task_struct* t); | ||
7 | |||
8 | #endif | ||
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h new file mode 100644 index 000000000000..0c18dcb15e6c --- /dev/null +++ b/include/litmus/clustered.h | |||
@@ -0,0 +1,44 @@ | |||
1 | #ifndef CLUSTERED_H | ||
2 | #define CLUSTERED_H | ||
3 | |||
4 | /* Which cache level should be used to group CPUs into clusters? | ||
5 | * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under | ||
6 | * global scheduling). | ||
7 | */ | ||
8 | enum cache_level { | ||
9 | GLOBAL_CLUSTER = 0, | ||
10 | L1_CLUSTER = 1, | ||
11 | L2_CLUSTER = 2, | ||
12 | L3_CLUSTER = 3 | ||
13 | }; | ||
14 | |||
15 | int parse_cache_level(const char *str, enum cache_level *level); | ||
16 | const char* cache_level_name(enum cache_level level); | ||
17 | |||
18 | /* expose a cache level in a /proc dir */ | ||
19 | struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, | ||
20 | enum cache_level* level); | ||
21 | |||
22 | |||
23 | |||
24 | struct scheduling_cluster { | ||
25 | unsigned int id; | ||
26 | /* list of CPUs that are part of this cluster */ | ||
27 | struct list_head cpus; | ||
28 | }; | ||
29 | |||
30 | struct cluster_cpu { | ||
31 | unsigned int id; /* which CPU is this? */ | ||
32 | struct list_head cluster_list; /* List of the CPUs in this cluster. */ | ||
33 | struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */ | ||
34 | }; | ||
35 | |||
36 | int get_cluster_size(enum cache_level level); | ||
37 | |||
38 | int assign_cpus_to_clusters(enum cache_level level, | ||
39 | struct scheduling_cluster* clusters[], | ||
40 | unsigned int num_clusters, | ||
41 | struct cluster_cpu* cpus[], | ||
42 | unsigned int num_cpus); | ||
43 | |||
44 | #endif | ||
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h new file mode 100644 index 000000000000..48d086d5a44c --- /dev/null +++ b/include/litmus/debug_trace.h | |||
@@ -0,0 +1,37 @@ | |||
1 | #ifndef LITMUS_DEBUG_TRACE_H | ||
2 | #define LITMUS_DEBUG_TRACE_H | ||
3 | |||
4 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
5 | void sched_trace_log_message(const char* fmt, ...); | ||
6 | void dump_trace_buffer(int max); | ||
7 | #else | ||
8 | |||
9 | #define sched_trace_log_message(fmt, ...) | ||
10 | |||
11 | #endif | ||
12 | |||
13 | extern atomic_t __log_seq_no; | ||
14 | |||
15 | #ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER | ||
16 | #define TRACE_PREFIX "%d P%d [%s@%s:%d]: " | ||
17 | #define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \ | ||
18 | raw_smp_processor_id(), \ | ||
19 | __FUNCTION__, __FILE__, __LINE__ | ||
20 | #else | ||
21 | #define TRACE_PREFIX "%d P%d: " | ||
22 | #define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \ | ||
23 | raw_smp_processor_id() | ||
24 | #endif | ||
25 | |||
26 | #define TRACE(fmt, args...) \ | ||
27 | sched_trace_log_message(TRACE_PREFIX fmt, \ | ||
28 | TRACE_ARGS, ## args) | ||
29 | |||
30 | #define TRACE_TASK(t, fmt, args...) \ | ||
31 | TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \ | ||
32 | (t)->rt_param.job_params.job_no, ##args) | ||
33 | |||
34 | #define TRACE_CUR(fmt, args...) \ | ||
35 | TRACE_TASK(current, fmt, ## args) | ||
36 | |||
37 | #endif | ||
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h new file mode 100644 index 000000000000..2c4266f77c03 --- /dev/null +++ b/include/litmus/edf_common.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * EDF common data structures and utility functions shared by all EDF | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_EDF_COMMON_H__ | ||
11 | #define __UNC_EDF_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int edf_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | #ifdef CONFIG_LITMUS_LOCKING | ||
22 | /* priority comparison without priority inheritance */ | ||
23 | int edf_higher_base_prio(struct task_struct* first, | ||
24 | struct task_struct* second); | ||
25 | |||
26 | int edf_pending_order(struct bheap_node* a, struct bheap_node* b); | ||
27 | #endif | ||
28 | |||
29 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
30 | |||
31 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
32 | |||
33 | #endif | ||
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h new file mode 100644 index 000000000000..d1ee0d1142d8 --- /dev/null +++ b/include/litmus/fdso.h | |||
@@ -0,0 +1,77 @@ | |||
1 | /* fdso.h - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007--2011 B. Brandenburg, LITMUS^RT project | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_FDSO_H_ | ||
7 | #define _LINUX_FDSO_H_ | ||
8 | |||
9 | #include <linux/list.h> | ||
10 | #include <asm/atomic.h> | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/slab.h> | ||
14 | |||
15 | #define MAX_OBJECT_DESCRIPTORS 85 | ||
16 | |||
17 | typedef enum { | ||
18 | MIN_OBJ_TYPE = 0, | ||
19 | |||
20 | FMLP_SEM = 0, | ||
21 | SRP_SEM = 1, | ||
22 | |||
23 | MPCP_SEM = 2, | ||
24 | MPCP_VS_SEM = 3, | ||
25 | DPCP_SEM = 4, | ||
26 | |||
27 | OMLP_SEM = 5, | ||
28 | |||
29 | MAX_OBJ_TYPE = 5 | ||
30 | } obj_type_t; | ||
31 | |||
32 | struct inode_obj_id { | ||
33 | struct list_head list; | ||
34 | atomic_t count; | ||
35 | struct inode* inode; | ||
36 | |||
37 | obj_type_t type; | ||
38 | void* obj; | ||
39 | unsigned int id; | ||
40 | }; | ||
41 | |||
42 | struct fdso_ops; | ||
43 | |||
44 | struct od_table_entry { | ||
45 | unsigned int used; | ||
46 | |||
47 | struct inode_obj_id* obj; | ||
48 | const struct fdso_ops* class; | ||
49 | }; | ||
50 | |||
51 | struct fdso_ops { | ||
52 | int (*create)(void** obj_ref, obj_type_t type, void* __user); | ||
53 | void (*destroy)(obj_type_t type, void*); | ||
54 | int (*open) (struct od_table_entry*, void* __user); | ||
55 | int (*close) (struct od_table_entry*); | ||
56 | }; | ||
57 | |||
58 | /* translate a userspace supplied od into the raw table entry | ||
59 | * returns NULL if od is invalid | ||
60 | */ | ||
61 | struct od_table_entry* get_entry_for_od(int od); | ||
62 | |||
63 | /* translate a userspace supplied od into the associated object | ||
64 | * returns NULL if od is invalid | ||
65 | */ | ||
66 | static inline void* od_lookup(int od, obj_type_t type) | ||
67 | { | ||
68 | struct od_table_entry* e = get_entry_for_od(od); | ||
69 | return e && e->obj->type == type ? e->obj->obj : NULL; | ||
70 | } | ||
71 | |||
72 | #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) | ||
73 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) | ||
74 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) | ||
75 | |||
76 | |||
77 | #endif | ||
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h new file mode 100644 index 000000000000..6c18277fdfc9 --- /dev/null +++ b/include/litmus/feather_buffer.h | |||
@@ -0,0 +1,94 @@ | |||
1 | #ifndef _FEATHER_BUFFER_H_ | ||
2 | #define _FEATHER_BUFFER_H_ | ||
3 | |||
4 | /* requires UINT_MAX and memcpy */ | ||
5 | |||
6 | #define SLOT_FREE 0 | ||
7 | #define SLOT_BUSY 1 | ||
8 | #define SLOT_READY 2 | ||
9 | |||
10 | struct ft_buffer { | ||
11 | unsigned int slot_count; | ||
12 | unsigned int slot_size; | ||
13 | |||
14 | int free_count; | ||
15 | unsigned int write_idx; | ||
16 | unsigned int read_idx; | ||
17 | |||
18 | char* slots; | ||
19 | void* buffer_mem; | ||
20 | unsigned int failed_writes; | ||
21 | }; | ||
22 | |||
23 | static inline int init_ft_buffer(struct ft_buffer* buf, | ||
24 | unsigned int slot_count, | ||
25 | unsigned int slot_size, | ||
26 | char* slots, | ||
27 | void* buffer_mem) | ||
28 | { | ||
29 | int i = 0; | ||
30 | if (!slot_count || UINT_MAX % slot_count != slot_count - 1) { | ||
31 | /* The slot count must divide UNIT_MAX + 1 so that when it | ||
32 | * wraps around the index correctly points to 0. | ||
33 | */ | ||
34 | return 0; | ||
35 | } else { | ||
36 | buf->slot_count = slot_count; | ||
37 | buf->slot_size = slot_size; | ||
38 | buf->slots = slots; | ||
39 | buf->buffer_mem = buffer_mem; | ||
40 | buf->free_count = slot_count; | ||
41 | buf->write_idx = 0; | ||
42 | buf->read_idx = 0; | ||
43 | buf->failed_writes = 0; | ||
44 | for (i = 0; i < slot_count; i++) | ||
45 | buf->slots[i] = SLOT_FREE; | ||
46 | return 1; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr) | ||
51 | { | ||
52 | int free = fetch_and_dec(&buf->free_count); | ||
53 | unsigned int idx; | ||
54 | if (free <= 0) { | ||
55 | fetch_and_inc(&buf->free_count); | ||
56 | *ptr = 0; | ||
57 | fetch_and_inc(&buf->failed_writes); | ||
58 | return 0; | ||
59 | } else { | ||
60 | idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count; | ||
61 | buf->slots[idx] = SLOT_BUSY; | ||
62 | *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size; | ||
63 | return 1; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr) | ||
68 | { | ||
69 | unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size; | ||
70 | buf->slots[idx] = SLOT_READY; | ||
71 | } | ||
72 | |||
73 | |||
74 | /* exclusive reader access is assumed */ | ||
75 | static inline int ft_buffer_read(struct ft_buffer* buf, void* dest) | ||
76 | { | ||
77 | unsigned int idx; | ||
78 | if (buf->free_count == buf->slot_count) | ||
79 | /* nothing available */ | ||
80 | return 0; | ||
81 | idx = buf->read_idx % buf->slot_count; | ||
82 | if (buf->slots[idx] == SLOT_READY) { | ||
83 | memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size, | ||
84 | buf->slot_size); | ||
85 | buf->slots[idx] = SLOT_FREE; | ||
86 | buf->read_idx++; | ||
87 | fetch_and_inc(&buf->free_count); | ||
88 | return 1; | ||
89 | } else | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | |||
94 | #endif | ||
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h new file mode 100644 index 000000000000..028dfb206fb0 --- /dev/null +++ b/include/litmus/feather_trace.h | |||
@@ -0,0 +1,65 @@ | |||
1 | #ifndef _FEATHER_TRACE_H_ | ||
2 | #define _FEATHER_TRACE_H_ | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | |||
6 | int ft_enable_event(unsigned long id); | ||
7 | int ft_disable_event(unsigned long id); | ||
8 | int ft_is_event_enabled(unsigned long id); | ||
9 | int ft_disable_all_events(void); | ||
10 | |||
11 | /* atomic_* funcitons are inline anyway */ | ||
12 | static inline int fetch_and_inc(int *val) | ||
13 | { | ||
14 | return atomic_add_return(1, (atomic_t*) val) - 1; | ||
15 | } | ||
16 | |||
17 | static inline int fetch_and_dec(int *val) | ||
18 | { | ||
19 | return atomic_sub_return(1, (atomic_t*) val) + 1; | ||
20 | } | ||
21 | |||
22 | /* Don't use rewriting implementation if kernel text pages are read-only. | ||
23 | * Ftrace gets around this by using the identity mapping, but that's more | ||
24 | * effort that is warrented right now for Feather-Trace. | ||
25 | * Eventually, it may make sense to replace Feather-Trace with ftrace. | ||
26 | */ | ||
27 | #if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA) | ||
28 | |||
29 | #include <asm/feather_trace.h> | ||
30 | |||
31 | #else /* !__ARCH_HAS_FEATHER_TRACE */ | ||
32 | |||
33 | /* provide default implementation */ | ||
34 | |||
35 | #include <asm/timex.h> /* for get_cycles() */ | ||
36 | |||
37 | static inline unsigned long long ft_timestamp(void) | ||
38 | { | ||
39 | return get_cycles(); | ||
40 | } | ||
41 | |||
42 | #define feather_callback | ||
43 | |||
44 | #define MAX_EVENTS 1024 | ||
45 | |||
46 | extern int ft_events[MAX_EVENTS]; | ||
47 | |||
48 | #define ft_event(id, callback) \ | ||
49 | if (ft_events[id]) callback(); | ||
50 | |||
51 | #define ft_event0(id, callback) \ | ||
52 | if (ft_events[id]) callback(id); | ||
53 | |||
54 | #define ft_event1(id, callback, param) \ | ||
55 | if (ft_events[id]) callback(id, param); | ||
56 | |||
57 | #define ft_event2(id, callback, param, param2) \ | ||
58 | if (ft_events[id]) callback(id, param, param2); | ||
59 | |||
60 | #define ft_event3(id, callback, p, p2, p3) \ | ||
61 | if (ft_events[id]) callback(id, p, p2, p3); | ||
62 | |||
63 | #endif /* __ARCH_HAS_FEATHER_TRACE */ | ||
64 | |||
65 | #endif | ||
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h new file mode 100644 index 000000000000..dd1f7bf1e347 --- /dev/null +++ b/include/litmus/fp_common.h | |||
@@ -0,0 +1,105 @@ | |||
1 | /* Fixed-priority scheduler support. | ||
2 | */ | ||
3 | |||
4 | #ifndef __FP_COMMON_H__ | ||
5 | #define __FP_COMMON_H__ | ||
6 | |||
7 | #include <litmus/rt_domain.h> | ||
8 | |||
9 | #include <asm/bitops.h> | ||
10 | |||
11 | |||
12 | void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
13 | release_jobs_t release); | ||
14 | |||
15 | int fp_higher_prio(struct task_struct* first, | ||
16 | struct task_struct* second); | ||
17 | |||
18 | int fp_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
19 | |||
20 | #define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG) | ||
21 | |||
22 | #if (LITMUS_MAX_PRIORITY % BITS_PER_LONG) | ||
23 | #error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG | ||
24 | #endif | ||
25 | |||
26 | /* bitmask-inexed priority queue */ | ||
27 | struct fp_prio_queue { | ||
28 | unsigned long bitmask[FP_PRIO_BIT_WORDS]; | ||
29 | struct bheap queue[LITMUS_MAX_PRIORITY]; | ||
30 | }; | ||
31 | |||
32 | void fp_prio_queue_init(struct fp_prio_queue* q); | ||
33 | |||
34 | static inline void fpq_set(struct fp_prio_queue* q, unsigned int index) | ||
35 | { | ||
36 | unsigned long *word = q->bitmask + (index / BITS_PER_LONG); | ||
37 | __set_bit(index % BITS_PER_LONG, word); | ||
38 | } | ||
39 | |||
40 | static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index) | ||
41 | { | ||
42 | unsigned long *word = q->bitmask + (index / BITS_PER_LONG); | ||
43 | __clear_bit(index % BITS_PER_LONG, word); | ||
44 | } | ||
45 | |||
46 | static inline unsigned int fpq_find(struct fp_prio_queue* q) | ||
47 | { | ||
48 | int i; | ||
49 | |||
50 | /* loop optimizer should unroll this */ | ||
51 | for (i = 0; i < FP_PRIO_BIT_WORDS; i++) | ||
52 | if (q->bitmask[i]) | ||
53 | return __ffs(q->bitmask[i]) + i * BITS_PER_LONG; | ||
54 | |||
55 | return LITMUS_MAX_PRIORITY; /* nothing found */ | ||
56 | } | ||
57 | |||
58 | static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index) | ||
59 | { | ||
60 | |||
61 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
62 | |||
63 | fpq_set(q, index); | ||
64 | bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node); | ||
65 | } | ||
66 | |||
67 | static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index) | ||
68 | { | ||
69 | BUG_ON(!is_queued(t)); | ||
70 | |||
71 | bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node); | ||
72 | if (likely(bheap_empty(&q->queue[index]))) | ||
73 | fpq_clear(q, index); | ||
74 | } | ||
75 | |||
76 | static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q) | ||
77 | { | ||
78 | unsigned int idx = fpq_find(q); | ||
79 | struct bheap_node* hn; | ||
80 | |||
81 | if (idx < LITMUS_MAX_PRIORITY) { | ||
82 | hn = bheap_peek(fp_ready_order, &q->queue[idx]); | ||
83 | return bheap2task(hn); | ||
84 | } else | ||
85 | return NULL; | ||
86 | } | ||
87 | |||
88 | static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q) | ||
89 | { | ||
90 | unsigned int idx = fpq_find(q); | ||
91 | struct bheap_node* hn; | ||
92 | |||
93 | if (idx < LITMUS_MAX_PRIORITY) { | ||
94 | hn = bheap_take(fp_ready_order, &q->queue[idx]); | ||
95 | if (likely(bheap_empty(&q->queue[idx]))) | ||
96 | fpq_clear(q, idx); | ||
97 | return bheap2task(hn); | ||
98 | } else | ||
99 | return NULL; | ||
100 | } | ||
101 | |||
102 | int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t); | ||
103 | |||
104 | |||
105 | #endif | ||
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h new file mode 100644 index 000000000000..0b959874dd70 --- /dev/null +++ b/include/litmus/ftdev.h | |||
@@ -0,0 +1,55 @@ | |||
1 | #ifndef _LITMUS_FTDEV_H_ | ||
2 | #define _LITMUS_FTDEV_H_ | ||
3 | |||
4 | #include <litmus/feather_trace.h> | ||
5 | #include <litmus/feather_buffer.h> | ||
6 | #include <linux/mutex.h> | ||
7 | #include <linux/cdev.h> | ||
8 | |||
9 | #define FTDEV_ENABLE_CMD 0 | ||
10 | #define FTDEV_DISABLE_CMD 1 | ||
11 | |||
12 | struct ftdev; | ||
13 | |||
14 | /* return 0 if buffer can be opened, otherwise -$REASON */ | ||
15 | typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no); | ||
16 | /* return 0 on success, otherwise -$REASON */ | ||
17 | typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no); | ||
18 | typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no); | ||
19 | /* Let devices handle writes from userspace. No synchronization provided. */ | ||
20 | typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from); | ||
21 | |||
22 | struct ftdev_event; | ||
23 | |||
24 | struct ftdev_minor { | ||
25 | struct ft_buffer* buf; | ||
26 | unsigned int readers; | ||
27 | struct mutex lock; | ||
28 | /* FIXME: filter for authorized events */ | ||
29 | struct ftdev_event* events; | ||
30 | struct device* device; | ||
31 | struct ftdev* ftdev; | ||
32 | }; | ||
33 | |||
34 | struct ftdev { | ||
35 | dev_t major; | ||
36 | struct cdev cdev; | ||
37 | struct class* class; | ||
38 | const char* name; | ||
39 | struct ftdev_minor* minor; | ||
40 | unsigned int minor_cnt; | ||
41 | ftdev_alloc_t alloc; | ||
42 | ftdev_free_t free; | ||
43 | ftdev_can_open_t can_open; | ||
44 | ftdev_write_t write; | ||
45 | }; | ||
46 | |||
47 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size); | ||
48 | void free_ft_buffer(struct ft_buffer* buf); | ||
49 | |||
50 | int ftdev_init( struct ftdev* ftdev, struct module* owner, | ||
51 | const int minor_cnt, const char* name); | ||
52 | void ftdev_exit(struct ftdev* ftdev); | ||
53 | int register_ftdev(struct ftdev* ftdev); | ||
54 | |||
55 | #endif | ||
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h new file mode 100644 index 000000000000..9bd361ef3943 --- /dev/null +++ b/include/litmus/jobs.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef __LITMUS_JOBS_H__ | ||
2 | #define __LITMUS_JOBS_H__ | ||
3 | |||
4 | void prepare_for_next_period(struct task_struct *t); | ||
5 | void release_at(struct task_struct *t, lt_t start); | ||
6 | long complete_job(void); | ||
7 | |||
8 | #endif | ||
9 | |||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h new file mode 100644 index 000000000000..31ac72eddef7 --- /dev/null +++ b/include/litmus/litmus.h | |||
@@ -0,0 +1,292 @@ | |||
1 | /* | ||
2 | * Constant definitions related to | ||
3 | * scheduling policy. | ||
4 | */ | ||
5 | |||
6 | #ifndef _LINUX_LITMUS_H_ | ||
7 | #define _LINUX_LITMUS_H_ | ||
8 | |||
9 | #include <litmus/debug_trace.h> | ||
10 | |||
11 | #ifdef CONFIG_RELEASE_MASTER | ||
12 | extern atomic_t release_master_cpu; | ||
13 | #endif | ||
14 | |||
15 | /* in_list - is a given list_head queued on some list? | ||
16 | */ | ||
17 | static inline int in_list(struct list_head* list) | ||
18 | { | ||
19 | return !( /* case 1: deleted */ | ||
20 | (list->next == LIST_POISON1 && | ||
21 | list->prev == LIST_POISON2) | ||
22 | || | ||
23 | /* case 2: initialized */ | ||
24 | (list->next == list && | ||
25 | list->prev == list) | ||
26 | ); | ||
27 | } | ||
28 | |||
29 | #define NO_CPU 0xffffffff | ||
30 | |||
31 | void litmus_fork(struct task_struct *tsk); | ||
32 | void litmus_exec(void); | ||
33 | /* clean up real-time state of a task */ | ||
34 | void exit_litmus(struct task_struct *dead_tsk); | ||
35 | |||
36 | long litmus_admit_task(struct task_struct *tsk); | ||
37 | void litmus_exit_task(struct task_struct *tsk); | ||
38 | |||
39 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) | ||
40 | #define rt_transition_pending(t) \ | ||
41 | ((t)->rt_param.transition_pending) | ||
42 | |||
43 | #define tsk_rt(t) (&(t)->rt_param) | ||
44 | |||
45 | /* Realtime utility macros */ | ||
46 | #define get_rt_flags(t) (tsk_rt(t)->flags) | ||
47 | #define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) | ||
48 | #define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost) | ||
49 | #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) | ||
50 | #define get_rt_period(t) (tsk_rt(t)->task_params.period) | ||
51 | #define get_rt_phase(t) (tsk_rt(t)->task_params.phase) | ||
52 | #define get_partition(t) (tsk_rt(t)->task_params.cpu) | ||
53 | #define get_priority(t) (tsk_rt(t)->task_params.priority) | ||
54 | #define get_deadline(t) (tsk_rt(t)->job_params.deadline) | ||
55 | #define get_release(t) (tsk_rt(t)->job_params.release) | ||
56 | #define get_class(t) (tsk_rt(t)->task_params.cls) | ||
57 | |||
58 | #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) | ||
59 | #define get_boost_start(t) (tsk_rt(t)->boost_start_time) | ||
60 | |||
61 | inline static int budget_exhausted(struct task_struct* t) | ||
62 | { | ||
63 | return get_exec_time(t) >= get_exec_cost(t); | ||
64 | } | ||
65 | |||
66 | inline static lt_t budget_remaining(struct task_struct* t) | ||
67 | { | ||
68 | if (!budget_exhausted(t)) | ||
69 | return get_exec_cost(t) - get_exec_time(t); | ||
70 | else | ||
71 | /* avoid overflow */ | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | #define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) | ||
76 | |||
77 | #define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ | ||
78 | == PRECISE_ENFORCEMENT) | ||
79 | |||
80 | #define is_hrt(t) \ | ||
81 | (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) | ||
82 | #define is_srt(t) \ | ||
83 | (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT) | ||
84 | #define is_be(t) \ | ||
85 | (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT) | ||
86 | |||
87 | /* Our notion of time within LITMUS: kernel monotonic time. */ | ||
88 | static inline lt_t litmus_clock(void) | ||
89 | { | ||
90 | return ktime_to_ns(ktime_get()); | ||
91 | } | ||
92 | |||
93 | /* A macro to convert from nanoseconds to ktime_t. */ | ||
94 | #define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t) | ||
95 | |||
96 | #define get_domain(t) (tsk_rt(t)->domain) | ||
97 | |||
98 | /* Honor the flag in the preempt_count variable that is set | ||
99 | * when scheduling is in progress. | ||
100 | */ | ||
101 | #define is_running(t) \ | ||
102 | ((t)->state == TASK_RUNNING || \ | ||
103 | task_thread_info(t)->preempt_count & PREEMPT_ACTIVE) | ||
104 | |||
105 | #define is_blocked(t) \ | ||
106 | (!is_running(t)) | ||
107 | #define is_released(t, now) \ | ||
108 | (lt_before_eq(get_release(t), now)) | ||
109 | #define is_tardy(t, now) \ | ||
110 | (lt_before_eq(tsk_rt(t)->job_params.deadline, now)) | ||
111 | |||
112 | /* real-time comparison macros */ | ||
113 | #define earlier_deadline(a, b) (lt_before(\ | ||
114 | (a)->rt_param.job_params.deadline,\ | ||
115 | (b)->rt_param.job_params.deadline)) | ||
116 | #define earlier_release(a, b) (lt_before(\ | ||
117 | (a)->rt_param.job_params.release,\ | ||
118 | (b)->rt_param.job_params.release)) | ||
119 | |||
120 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); | ||
121 | |||
122 | #ifdef CONFIG_LITMUS_LOCKING | ||
123 | void srp_ceiling_block(void); | ||
124 | #else | ||
125 | #define srp_ceiling_block() /* nothing */ | ||
126 | #endif | ||
127 | |||
128 | #define bheap2task(hn) ((struct task_struct*) hn->value) | ||
129 | |||
130 | static inline struct control_page* get_control_page(struct task_struct *t) | ||
131 | { | ||
132 | return tsk_rt(t)->ctrl_page; | ||
133 | } | ||
134 | |||
135 | static inline int has_control_page(struct task_struct* t) | ||
136 | { | ||
137 | return tsk_rt(t)->ctrl_page != NULL; | ||
138 | } | ||
139 | |||
140 | #ifdef CONFIG_NP_SECTION | ||
141 | |||
142 | static inline int is_kernel_np(struct task_struct *t) | ||
143 | { | ||
144 | return tsk_rt(t)->kernel_np; | ||
145 | } | ||
146 | |||
147 | static inline int is_user_np(struct task_struct *t) | ||
148 | { | ||
149 | return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0; | ||
150 | } | ||
151 | |||
152 | static inline void request_exit_np(struct task_struct *t) | ||
153 | { | ||
154 | if (is_user_np(t)) { | ||
155 | /* Set the flag that tells user space to call | ||
156 | * into the kernel at the end of a critical section. */ | ||
157 | if (likely(tsk_rt(t)->ctrl_page)) { | ||
158 | TRACE_TASK(t, "setting delayed_preemption flag\n"); | ||
159 | tsk_rt(t)->ctrl_page->sched.np.preempt = 1; | ||
160 | } | ||
161 | } | ||
162 | } | ||
163 | |||
164 | static inline void make_np(struct task_struct *t) | ||
165 | { | ||
166 | tsk_rt(t)->kernel_np++; | ||
167 | } | ||
168 | |||
169 | /* Caller should check if preemption is necessary when | ||
170 | * the function return 0. | ||
171 | */ | ||
172 | static inline int take_np(struct task_struct *t) | ||
173 | { | ||
174 | return --tsk_rt(t)->kernel_np; | ||
175 | } | ||
176 | |||
177 | /* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */ | ||
178 | static inline int request_exit_np_atomic(struct task_struct *t) | ||
179 | { | ||
180 | union np_flag old, new; | ||
181 | int ok; | ||
182 | |||
183 | if (tsk_rt(t)->ctrl_page) { | ||
184 | old.raw = tsk_rt(t)->ctrl_page->sched.raw; | ||
185 | if (old.np.flag == 0) { | ||
186 | /* no longer non-preemptive */ | ||
187 | return 0; | ||
188 | } else if (old.np.preempt) { | ||
189 | /* already set, nothing for us to do */ | ||
190 | TRACE_TASK(t, "not setting np.preempt flag again\n"); | ||
191 | return 1; | ||
192 | } else { | ||
193 | /* non preemptive and flag not set */ | ||
194 | new.raw = old.raw; | ||
195 | new.np.preempt = 1; | ||
196 | /* if we get old back, then we atomically set the flag */ | ||
197 | ok = cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw; | ||
198 | /* If we raced with a concurrent change, then so be | ||
199 | * it. Deliver it by IPI. We don't want an unbounded | ||
200 | * retry loop here since tasks might exploit that to | ||
201 | * keep the kernel busy indefinitely. */ | ||
202 | TRACE_TASK(t, "request_exit_np => %d\n", ok); | ||
203 | return ok; | ||
204 | } | ||
205 | } else | ||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | #else | ||
210 | |||
211 | static inline int is_kernel_np(struct task_struct* t) | ||
212 | { | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static inline int is_user_np(struct task_struct* t) | ||
217 | { | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static inline void request_exit_np(struct task_struct *t) | ||
222 | { | ||
223 | /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */ | ||
224 | BUG(); | ||
225 | } | ||
226 | |||
227 | static inline int request_exist_np_atomic(struct task_struct *t) | ||
228 | { | ||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | #endif | ||
233 | |||
234 | static inline void clear_exit_np(struct task_struct *t) | ||
235 | { | ||
236 | if (likely(tsk_rt(t)->ctrl_page)) | ||
237 | tsk_rt(t)->ctrl_page->sched.np.preempt = 0; | ||
238 | } | ||
239 | |||
240 | static inline int is_np(struct task_struct *t) | ||
241 | { | ||
242 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
243 | int kernel, user; | ||
244 | kernel = is_kernel_np(t); | ||
245 | user = is_user_np(t); | ||
246 | if (kernel || user) | ||
247 | TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n", | ||
248 | |||
249 | kernel, user); | ||
250 | return kernel || user; | ||
251 | #else | ||
252 | return unlikely(is_kernel_np(t) || is_user_np(t)); | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | static inline int is_present(struct task_struct* t) | ||
257 | { | ||
258 | return t && tsk_rt(t)->present; | ||
259 | } | ||
260 | |||
261 | |||
262 | /* make the unit explicit */ | ||
263 | typedef unsigned long quanta_t; | ||
264 | |||
265 | enum round { | ||
266 | FLOOR, | ||
267 | CEIL | ||
268 | }; | ||
269 | |||
270 | |||
271 | /* Tick period is used to convert ns-specified execution | ||
272 | * costs and periods into tick-based equivalents. | ||
273 | */ | ||
274 | extern ktime_t tick_period; | ||
275 | |||
276 | static inline quanta_t time2quanta(lt_t time, enum round round) | ||
277 | { | ||
278 | s64 quantum_length = ktime_to_ns(tick_period); | ||
279 | |||
280 | if (do_div(time, quantum_length) && round == CEIL) | ||
281 | time++; | ||
282 | return (quanta_t) time; | ||
283 | } | ||
284 | |||
285 | /* By how much is cpu staggered behind CPU 0? */ | ||
286 | u64 cpu_stagger_offset(int cpu); | ||
287 | |||
288 | #define TS_SYSCALL_IN_START \ | ||
289 | if (has_control_page(current)) \ | ||
290 | __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start); | ||
291 | |||
292 | #endif | ||
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h new file mode 100644 index 000000000000..6800e725d48c --- /dev/null +++ b/include/litmus/litmus_proc.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #include <litmus/sched_plugin.h> | ||
2 | #include <linux/proc_fs.h> | ||
3 | |||
4 | int __init init_litmus_proc(void); | ||
5 | void exit_litmus_proc(void); | ||
6 | |||
7 | /* | ||
8 | * On success, returns 0 and sets the pointer to the location of the new | ||
9 | * proc dir entry, otherwise returns an error code and sets pde to NULL. | ||
10 | */ | ||
11 | long make_plugin_proc_dir(struct sched_plugin* plugin, | ||
12 | struct proc_dir_entry** pde); | ||
13 | |||
14 | /* | ||
15 | * Plugins should deallocate all child proc directory entries before | ||
16 | * calling this, to avoid memory leaks. | ||
17 | */ | ||
18 | void remove_plugin_proc_dir(struct sched_plugin* plugin); | ||
19 | |||
20 | |||
21 | /* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and | ||
22 | * remove a '\n' if present. Returns the number of bytes that were read or | ||
23 | * -EFAULT. */ | ||
24 | int copy_and_chomp(char *kbuf, unsigned long ksize, | ||
25 | __user const char* ubuf, unsigned long ulength); | ||
diff --git a/include/litmus/locking.h b/include/litmus/locking.h new file mode 100644 index 000000000000..4d7b870cb443 --- /dev/null +++ b/include/litmus/locking.h | |||
@@ -0,0 +1,28 @@ | |||
1 | #ifndef LITMUS_LOCKING_H | ||
2 | #define LITMUS_LOCKING_H | ||
3 | |||
4 | struct litmus_lock_ops; | ||
5 | |||
6 | /* Generic base struct for LITMUS^RT userspace semaphores. | ||
7 | * This structure should be embedded in protocol-specific semaphores. | ||
8 | */ | ||
9 | struct litmus_lock { | ||
10 | struct litmus_lock_ops *ops; | ||
11 | int type; | ||
12 | }; | ||
13 | |||
14 | struct litmus_lock_ops { | ||
15 | /* Current task tries to obtain / drop a reference to a lock. | ||
16 | * Optional methods, allowed by default. */ | ||
17 | int (*open)(struct litmus_lock*, void* __user); | ||
18 | int (*close)(struct litmus_lock*); | ||
19 | |||
20 | /* Current tries to lock/unlock this lock (mandatory methods). */ | ||
21 | int (*lock)(struct litmus_lock*); | ||
22 | int (*unlock)(struct litmus_lock*); | ||
23 | |||
24 | /* The lock is no longer being referenced (mandatory method). */ | ||
25 | void (*deallocate)(struct litmus_lock*); | ||
26 | }; | ||
27 | |||
28 | #endif | ||
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h new file mode 100644 index 000000000000..f3cf29ad87ee --- /dev/null +++ b/include/litmus/preempt.h | |||
@@ -0,0 +1,165 @@ | |||
1 | #ifndef LITMUS_PREEMPT_H | ||
2 | #define LITMUS_PREEMPT_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <linux/cache.h> | ||
6 | #include <linux/percpu.h> | ||
7 | #include <asm/atomic.h> | ||
8 | |||
9 | #include <litmus/debug_trace.h> | ||
10 | |||
11 | extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); | ||
12 | |||
13 | //#ifdef CONFIG_DEBUG_KERNEL | ||
14 | #if 0 | ||
15 | const char* sched_state_name(int s); | ||
16 | #define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args) | ||
17 | #else | ||
18 | #define TRACE_STATE(fmt, args...) /* ignore */ | ||
19 | #endif | ||
20 | |||
21 | #define VERIFY_SCHED_STATE(x) \ | ||
22 | do { int __s = get_sched_state(); \ | ||
23 | if ((__s & (x)) == 0) \ | ||
24 | TRACE_STATE("INVALID s=0x%x (%s) not " \ | ||
25 | "in 0x%x (%s) [%s]\n", \ | ||
26 | __s, sched_state_name(__s), \ | ||
27 | (x), #x, __FUNCTION__); \ | ||
28 | } while (0); | ||
29 | |||
30 | #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ | ||
31 | TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ | ||
32 | cpu, (x), sched_state_name(x), \ | ||
33 | (y), sched_state_name(y)) | ||
34 | |||
35 | |||
36 | typedef enum scheduling_state { | ||
37 | TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that | ||
38 | * should be scheduled, and the processor does not | ||
39 | * plan to invoke schedule(). */ | ||
40 | SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the | ||
41 | * processor should reschedule, but this has not | ||
42 | * been communicated yet (IPI still pending). */ | ||
43 | WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to | ||
44 | * reschedule and will do so shortly. */ | ||
45 | TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(), | ||
46 | * has selected a new task to schedule, but has not | ||
47 | * yet performed the actual context switch. */ | ||
48 | PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context | ||
49 | * switch, but a remote processor has already | ||
50 | * determined that a higher-priority task became | ||
51 | * eligible after the task was picked. */ | ||
52 | } sched_state_t; | ||
53 | |||
54 | static inline sched_state_t get_sched_state_on(int cpu) | ||
55 | { | ||
56 | return atomic_read(&per_cpu(resched_state, cpu)); | ||
57 | } | ||
58 | |||
59 | static inline sched_state_t get_sched_state(void) | ||
60 | { | ||
61 | return atomic_read(&__get_cpu_var(resched_state)); | ||
62 | } | ||
63 | |||
64 | static inline int is_in_sched_state(int possible_states) | ||
65 | { | ||
66 | return get_sched_state() & possible_states; | ||
67 | } | ||
68 | |||
69 | static inline int cpu_is_in_sched_state(int cpu, int possible_states) | ||
70 | { | ||
71 | return get_sched_state_on(cpu) & possible_states; | ||
72 | } | ||
73 | |||
74 | static inline void set_sched_state(sched_state_t s) | ||
75 | { | ||
76 | TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id()); | ||
77 | atomic_set(&__get_cpu_var(resched_state), s); | ||
78 | } | ||
79 | |||
80 | static inline int sched_state_transition(sched_state_t from, sched_state_t to) | ||
81 | { | ||
82 | sched_state_t old_state; | ||
83 | |||
84 | old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to); | ||
85 | if (old_state == from) { | ||
86 | TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id()); | ||
87 | return 1; | ||
88 | } else | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static inline int sched_state_transition_on(int cpu, | ||
93 | sched_state_t from, | ||
94 | sched_state_t to) | ||
95 | { | ||
96 | sched_state_t old_state; | ||
97 | |||
98 | old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to); | ||
99 | if (old_state == from) { | ||
100 | TRACE_SCHED_STATE_CHANGE(from, to, cpu); | ||
101 | return 1; | ||
102 | } else | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | /* Plugins must call this function after they have decided which job to | ||
107 | * schedule next. IMPORTANT: this function must be called while still holding | ||
108 | * the lock that is used to serialize scheduling decisions. | ||
109 | * | ||
110 | * (Ideally, we would like to use runqueue locks for this purpose, but that | ||
111 | * would lead to deadlocks with the migration code.) | ||
112 | */ | ||
113 | static inline void sched_state_task_picked(void) | ||
114 | { | ||
115 | VERIFY_SCHED_STATE(WILL_SCHEDULE); | ||
116 | |||
117 | /* WILL_SCHEDULE has only a local tansition => simple store is ok */ | ||
118 | set_sched_state(TASK_PICKED); | ||
119 | } | ||
120 | |||
121 | static inline void sched_state_entered_schedule(void) | ||
122 | { | ||
123 | /* Update state for the case that we entered schedule() not due to | ||
124 | * set_tsk_need_resched() */ | ||
125 | set_sched_state(WILL_SCHEDULE); | ||
126 | } | ||
127 | |||
128 | /* Called by schedule() to check if the scheduling decision is still valid | ||
129 | * after a context switch. Returns 1 if the CPU needs to reschdule. */ | ||
130 | static inline int sched_state_validate_switch(void) | ||
131 | { | ||
132 | int left_state_ok = 0; | ||
133 | |||
134 | VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED); | ||
135 | |||
136 | if (is_in_sched_state(TASK_PICKED)) { | ||
137 | /* Might be good; let's try to transition out of this | ||
138 | * state. This must be done atomically since remote processors | ||
139 | * may try to change the state, too. */ | ||
140 | left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED); | ||
141 | } | ||
142 | |||
143 | if (!left_state_ok) { | ||
144 | /* We raced with a higher-priority task arrival => not | ||
145 | * valid. The CPU needs to reschedule. */ | ||
146 | set_sched_state(WILL_SCHEDULE); | ||
147 | return 1; | ||
148 | } else | ||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | /* State transition events. See litmus/preempt.c for details. */ | ||
153 | void sched_state_will_schedule(struct task_struct* tsk); | ||
154 | void sched_state_ipi(void); | ||
155 | /* Cause a CPU (remote or local) to reschedule. */ | ||
156 | void litmus_reschedule(int cpu); | ||
157 | void litmus_reschedule_local(void); | ||
158 | |||
159 | #ifdef CONFIG_DEBUG_KERNEL | ||
160 | void sched_state_plugin_check(void); | ||
161 | #else | ||
162 | #define sched_state_plugin_check() /* no check */ | ||
163 | #endif | ||
164 | |||
165 | #endif | ||
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h new file mode 100644 index 000000000000..ac249292e866 --- /dev/null +++ b/include/litmus/rt_domain.h | |||
@@ -0,0 +1,182 @@ | |||
1 | /* CLEANUP: Add comments and make it less messy. | ||
2 | * | ||
3 | */ | ||
4 | |||
5 | #ifndef __UNC_RT_DOMAIN_H__ | ||
6 | #define __UNC_RT_DOMAIN_H__ | ||
7 | |||
8 | #include <litmus/bheap.h> | ||
9 | |||
10 | #define RELEASE_QUEUE_SLOTS 127 /* prime */ | ||
11 | |||
12 | struct _rt_domain; | ||
13 | |||
14 | typedef int (*check_resched_needed_t)(struct _rt_domain *rt); | ||
15 | typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks); | ||
16 | |||
17 | struct release_queue { | ||
18 | /* each slot maintains a list of release heaps sorted | ||
19 | * by release time */ | ||
20 | struct list_head slot[RELEASE_QUEUE_SLOTS]; | ||
21 | }; | ||
22 | |||
23 | typedef struct _rt_domain { | ||
24 | /* runnable rt tasks are in here */ | ||
25 | raw_spinlock_t ready_lock; | ||
26 | struct bheap ready_queue; | ||
27 | |||
28 | /* real-time tasks waiting for release are in here */ | ||
29 | raw_spinlock_t release_lock; | ||
30 | struct release_queue release_queue; | ||
31 | |||
32 | #ifdef CONFIG_RELEASE_MASTER | ||
33 | int release_master; | ||
34 | #endif | ||
35 | |||
36 | /* for moving tasks to the release queue */ | ||
37 | raw_spinlock_t tobe_lock; | ||
38 | struct list_head tobe_released; | ||
39 | |||
40 | /* how do we check if we need to kick another CPU? */ | ||
41 | check_resched_needed_t check_resched; | ||
42 | |||
43 | /* how do we release jobs? */ | ||
44 | release_jobs_t release_jobs; | ||
45 | |||
46 | /* how are tasks ordered in the ready queue? */ | ||
47 | bheap_prio_t order; | ||
48 | } rt_domain_t; | ||
49 | |||
50 | struct release_heap { | ||
51 | /* list_head for per-time-slot list */ | ||
52 | struct list_head list; | ||
53 | lt_t release_time; | ||
54 | /* all tasks to be released at release_time */ | ||
55 | struct bheap heap; | ||
56 | /* used to trigger the release */ | ||
57 | struct hrtimer timer; | ||
58 | |||
59 | #ifdef CONFIG_RELEASE_MASTER | ||
60 | /* used to delegate releases */ | ||
61 | struct hrtimer_start_on_info info; | ||
62 | #endif | ||
63 | /* required for the timer callback */ | ||
64 | rt_domain_t* dom; | ||
65 | }; | ||
66 | |||
67 | |||
68 | static inline struct task_struct* __next_ready(rt_domain_t* rt) | ||
69 | { | ||
70 | struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue); | ||
71 | if (hn) | ||
72 | return bheap2task(hn); | ||
73 | else | ||
74 | return NULL; | ||
75 | } | ||
76 | |||
77 | void rt_domain_init(rt_domain_t *rt, bheap_prio_t order, | ||
78 | check_resched_needed_t check, | ||
79 | release_jobs_t relase); | ||
80 | |||
81 | void __add_ready(rt_domain_t* rt, struct task_struct *new); | ||
82 | void __merge_ready(rt_domain_t* rt, struct bheap *tasks); | ||
83 | void __add_release(rt_domain_t* rt, struct task_struct *task); | ||
84 | |||
85 | static inline struct task_struct* __take_ready(rt_domain_t* rt) | ||
86 | { | ||
87 | struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue); | ||
88 | if (hn) | ||
89 | return bheap2task(hn); | ||
90 | else | ||
91 | return NULL; | ||
92 | } | ||
93 | |||
94 | static inline struct task_struct* __peek_ready(rt_domain_t* rt) | ||
95 | { | ||
96 | struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue); | ||
97 | if (hn) | ||
98 | return bheap2task(hn); | ||
99 | else | ||
100 | return NULL; | ||
101 | } | ||
102 | |||
103 | static inline int is_queued(struct task_struct *t) | ||
104 | { | ||
105 | BUG_ON(!tsk_rt(t)->heap_node); | ||
106 | return bheap_node_in_heap(tsk_rt(t)->heap_node); | ||
107 | } | ||
108 | |||
109 | static inline void remove(rt_domain_t* rt, struct task_struct *t) | ||
110 | { | ||
111 | bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node); | ||
112 | } | ||
113 | |||
114 | static inline void add_ready(rt_domain_t* rt, struct task_struct *new) | ||
115 | { | ||
116 | unsigned long flags; | ||
117 | /* first we need the write lock for rt_ready_queue */ | ||
118 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
119 | __add_ready(rt, new); | ||
120 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
121 | } | ||
122 | |||
123 | static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
124 | { | ||
125 | unsigned long flags; | ||
126 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
127 | __merge_ready(rt, tasks); | ||
128 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
129 | } | ||
130 | |||
131 | static inline struct task_struct* take_ready(rt_domain_t* rt) | ||
132 | { | ||
133 | unsigned long flags; | ||
134 | struct task_struct* ret; | ||
135 | /* first we need the write lock for rt_ready_queue */ | ||
136 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
137 | ret = __take_ready(rt); | ||
138 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | |||
143 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) | ||
144 | { | ||
145 | unsigned long flags; | ||
146 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | ||
147 | __add_release(rt, task); | ||
148 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
149 | } | ||
150 | |||
151 | #ifdef CONFIG_RELEASE_MASTER | ||
152 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
153 | int target_cpu); | ||
154 | |||
155 | static inline void add_release_on(rt_domain_t* rt, | ||
156 | struct task_struct *task, | ||
157 | int target_cpu) | ||
158 | { | ||
159 | unsigned long flags; | ||
160 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | ||
161 | __add_release_on(rt, task, target_cpu); | ||
162 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
163 | } | ||
164 | #endif | ||
165 | |||
166 | static inline int __jobs_pending(rt_domain_t* rt) | ||
167 | { | ||
168 | return !bheap_empty(&rt->ready_queue); | ||
169 | } | ||
170 | |||
171 | static inline int jobs_pending(rt_domain_t* rt) | ||
172 | { | ||
173 | unsigned long flags; | ||
174 | int ret; | ||
175 | /* first we need the write lock for rt_ready_queue */ | ||
176 | raw_spin_lock_irqsave(&rt->ready_lock, flags); | ||
177 | ret = !bheap_empty(&rt->ready_queue); | ||
178 | raw_spin_unlock_irqrestore(&rt->ready_lock, flags); | ||
179 | return ret; | ||
180 | } | ||
181 | |||
182 | #endif | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h new file mode 100644 index 000000000000..a23ce1524051 --- /dev/null +++ b/include/litmus/rt_param.h | |||
@@ -0,0 +1,228 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_RT_PARAM_H_ | ||
6 | #define _LINUX_RT_PARAM_H_ | ||
7 | |||
8 | /* Litmus time type. */ | ||
9 | typedef unsigned long long lt_t; | ||
10 | |||
11 | static inline int lt_after(lt_t a, lt_t b) | ||
12 | { | ||
13 | return ((long long) b) - ((long long) a) < 0; | ||
14 | } | ||
15 | #define lt_before(a, b) lt_after(b, a) | ||
16 | |||
17 | static inline int lt_after_eq(lt_t a, lt_t b) | ||
18 | { | ||
19 | return ((long long) a) - ((long long) b) >= 0; | ||
20 | } | ||
21 | #define lt_before_eq(a, b) lt_after_eq(b, a) | ||
22 | |||
23 | /* different types of clients */ | ||
24 | typedef enum { | ||
25 | RT_CLASS_HARD, | ||
26 | RT_CLASS_SOFT, | ||
27 | RT_CLASS_BEST_EFFORT | ||
28 | } task_class_t; | ||
29 | |||
30 | typedef enum { | ||
31 | NO_ENFORCEMENT, /* job may overrun unhindered */ | ||
32 | QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ | ||
33 | PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ | ||
34 | } budget_policy_t; | ||
35 | |||
36 | #define LITMUS_MAX_PRIORITY 512 | ||
37 | |||
38 | struct rt_task { | ||
39 | lt_t exec_cost; | ||
40 | lt_t period; | ||
41 | lt_t phase; | ||
42 | unsigned int cpu; | ||
43 | unsigned int priority; | ||
44 | task_class_t cls; | ||
45 | budget_policy_t budget_policy; /* ignored by pfair */ | ||
46 | }; | ||
47 | |||
48 | union np_flag { | ||
49 | uint32_t raw; | ||
50 | struct { | ||
51 | /* Is the task currently in a non-preemptive section? */ | ||
52 | uint32_t flag:31; | ||
53 | /* Should the task call into the scheduler? */ | ||
54 | uint32_t preempt:1; | ||
55 | } np; | ||
56 | }; | ||
57 | |||
58 | /* The definition of the data that is shared between the kernel and real-time | ||
59 | * tasks via a shared page (see litmus/ctrldev.c). | ||
60 | * | ||
61 | * WARNING: User space can write to this, so don't trust | ||
62 | * the correctness of the fields! | ||
63 | * | ||
64 | * This servees two purposes: to enable efficient signaling | ||
65 | * of non-preemptive sections (user->kernel) and | ||
66 | * delayed preemptions (kernel->user), and to export | ||
67 | * some real-time relevant statistics such as preemption and | ||
68 | * migration data to user space. We can't use a device to export | ||
69 | * statistics because we want to avoid system call overhead when | ||
70 | * determining preemption/migration overheads). | ||
71 | */ | ||
72 | struct control_page { | ||
73 | volatile union np_flag sched; | ||
74 | |||
75 | /* locking overhead tracing: time stamp prior to system call */ | ||
76 | uint64_t ts_syscall_start; /* Feather-Trace cycles */ | ||
77 | |||
78 | /* to be extended */ | ||
79 | }; | ||
80 | |||
81 | /* don't export internal data structures to user space (liblitmus) */ | ||
82 | #ifdef __KERNEL__ | ||
83 | |||
84 | struct _rt_domain; | ||
85 | struct bheap_node; | ||
86 | struct release_heap; | ||
87 | |||
88 | struct rt_job { | ||
89 | /* Time instant the the job was or will be released. */ | ||
90 | lt_t release; | ||
91 | /* What is the current deadline? */ | ||
92 | lt_t deadline; | ||
93 | |||
94 | /* How much service has this job received so far? */ | ||
95 | lt_t exec_time; | ||
96 | |||
97 | /* Which job is this. This is used to let user space | ||
98 | * specify which job to wait for, which is important if jobs | ||
99 | * overrun. If we just call sys_sleep_next_period() then we | ||
100 | * will unintentionally miss jobs after an overrun. | ||
101 | * | ||
102 | * Increase this sequence number when a job is released. | ||
103 | */ | ||
104 | unsigned int job_no; | ||
105 | }; | ||
106 | |||
107 | struct pfair_param; | ||
108 | |||
109 | /* RT task parameters for scheduling extensions | ||
110 | * These parameters are inherited during clone and therefore must | ||
111 | * be explicitly set up before the task set is launched. | ||
112 | */ | ||
113 | struct rt_param { | ||
114 | /* is the task sleeping? */ | ||
115 | unsigned int flags:8; | ||
116 | |||
117 | /* do we need to check for srp blocking? */ | ||
118 | unsigned int srp_non_recurse:1; | ||
119 | |||
120 | /* is the task present? (true if it can be scheduled) */ | ||
121 | unsigned int present:1; | ||
122 | |||
123 | #ifdef CONFIG_LITMUS_LOCKING | ||
124 | /* Is the task being priority-boosted by a locking protocol? */ | ||
125 | unsigned int priority_boosted:1; | ||
126 | /* If so, when did this start? */ | ||
127 | lt_t boost_start_time; | ||
128 | #endif | ||
129 | |||
130 | /* user controlled parameters */ | ||
131 | struct rt_task task_params; | ||
132 | |||
133 | /* timing parameters */ | ||
134 | struct rt_job job_params; | ||
135 | |||
136 | /* task representing the current "inherited" task | ||
137 | * priority, assigned by inherit_priority and | ||
138 | * return priority in the scheduler plugins. | ||
139 | * could point to self if PI does not result in | ||
140 | * an increased task priority. | ||
141 | */ | ||
142 | struct task_struct* inh_task; | ||
143 | |||
144 | #ifdef CONFIG_NP_SECTION | ||
145 | /* For the FMLP under PSN-EDF, it is required to make the task | ||
146 | * non-preemptive from kernel space. In order not to interfere with | ||
147 | * user space, this counter indicates the kernel space np setting. | ||
148 | * kernel_np > 0 => task is non-preemptive | ||
149 | */ | ||
150 | unsigned int kernel_np; | ||
151 | #endif | ||
152 | |||
153 | /* This field can be used by plugins to store where the task | ||
154 | * is currently scheduled. It is the responsibility of the | ||
155 | * plugin to avoid race conditions. | ||
156 | * | ||
157 | * This used by GSN-EDF and PFAIR. | ||
158 | */ | ||
159 | volatile int scheduled_on; | ||
160 | |||
161 | /* Is the stack of the task currently in use? This is updated by | ||
162 | * the LITMUS core. | ||
163 | * | ||
164 | * Be careful to avoid deadlocks! | ||
165 | */ | ||
166 | volatile int stack_in_use; | ||
167 | |||
168 | /* This field can be used by plugins to store where the task | ||
169 | * is currently linked. It is the responsibility of the plugin | ||
170 | * to avoid race conditions. | ||
171 | * | ||
172 | * Used by GSN-EDF. | ||
173 | */ | ||
174 | volatile int linked_on; | ||
175 | |||
176 | /* PFAIR/PD^2 state. Allocated on demand. */ | ||
177 | struct pfair_param* pfair; | ||
178 | |||
179 | /* Fields saved before BE->RT transition. | ||
180 | */ | ||
181 | int old_policy; | ||
182 | int old_prio; | ||
183 | |||
184 | /* ready queue for this task */ | ||
185 | struct _rt_domain* domain; | ||
186 | |||
187 | /* heap element for this task | ||
188 | * | ||
189 | * Warning: Don't statically allocate this node. The heap | ||
190 | * implementation swaps these between tasks, thus after | ||
191 | * dequeuing from a heap you may end up with a different node | ||
192 | * then the one you had when enqueuing the task. For the same | ||
193 | * reason, don't obtain and store references to this node | ||
194 | * other than this pointer (which is updated by the heap | ||
195 | * implementation). | ||
196 | */ | ||
197 | struct bheap_node* heap_node; | ||
198 | struct release_heap* rel_heap; | ||
199 | |||
200 | #ifdef CONFIG_LITMUS_LOCKING | ||
201 | /* task in heap of pending jobs -- used by C-EDF for priority donation */ | ||
202 | struct bheap_node* pending_node; | ||
203 | /* is the job in a critical section or a wait queue?*/ | ||
204 | unsigned int request_incomplete; | ||
205 | /* is the job currently a donor? */ | ||
206 | unsigned int is_donor; | ||
207 | /* is this job suspended, waiting to become eligible? */ | ||
208 | unsigned int waiting_eligible; | ||
209 | |||
210 | int pending_on; | ||
211 | #endif | ||
212 | |||
213 | /* Used by rt_domain to queue task in release list. | ||
214 | */ | ||
215 | struct list_head list; | ||
216 | |||
217 | /* Pointer to the page shared between userspace and kernel. */ | ||
218 | struct control_page * ctrl_page; | ||
219 | }; | ||
220 | |||
221 | /* Possible RT flags */ | ||
222 | #define RT_F_RUNNING 0x00000000 | ||
223 | #define RT_F_SLEEP 0x00000001 | ||
224 | #define RT_F_EXIT_SEM 0x00000008 | ||
225 | |||
226 | #endif | ||
227 | |||
228 | #endif | ||
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h new file mode 100644 index 000000000000..b5d1ae7bc3b6 --- /dev/null +++ b/include/litmus/sched_plugin.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Definition of the scheduler plugin interface. | ||
3 | * | ||
4 | */ | ||
5 | #ifndef _LINUX_SCHED_PLUGIN_H_ | ||
6 | #define _LINUX_SCHED_PLUGIN_H_ | ||
7 | |||
8 | #include <linux/sched.h> | ||
9 | |||
10 | #ifdef CONFIG_LITMUS_LOCKING | ||
11 | #include <litmus/locking.h> | ||
12 | #endif | ||
13 | |||
14 | /************************ setup/tear down ********************/ | ||
15 | |||
16 | typedef long (*activate_plugin_t) (void); | ||
17 | typedef long (*deactivate_plugin_t) (void); | ||
18 | |||
19 | |||
20 | |||
21 | /********************* scheduler invocation ******************/ | ||
22 | |||
23 | /* Plugin-specific realtime tick handler */ | ||
24 | typedef void (*scheduler_tick_t) (struct task_struct *cur); | ||
25 | /* Novell make sched decision function */ | ||
26 | typedef struct task_struct* (*schedule_t)(struct task_struct * prev); | ||
27 | /* Clean up after the task switch has occured. | ||
28 | * This function is called after every (even non-rt) task switch. | ||
29 | */ | ||
30 | typedef void (*finish_switch_t)(struct task_struct *prev); | ||
31 | |||
32 | |||
33 | /********************* task state changes ********************/ | ||
34 | |||
35 | /* Called to setup a new real-time task. | ||
36 | * Release the first job, enqueue, etc. | ||
37 | * Task may already be running. | ||
38 | */ | ||
39 | typedef void (*task_new_t) (struct task_struct *task, | ||
40 | int on_rq, | ||
41 | int running); | ||
42 | |||
43 | /* Called to re-introduce a task after blocking. | ||
44 | * Can potentially be called multiple times. | ||
45 | */ | ||
46 | typedef void (*task_wake_up_t) (struct task_struct *task); | ||
47 | /* called to notify the plugin of a blocking real-time task | ||
48 | * it will only be called for real-time tasks and before schedule is called */ | ||
49 | typedef void (*task_block_t) (struct task_struct *task); | ||
50 | /* Called when a real-time task exits or changes to a different scheduling | ||
51 | * class. | ||
52 | * Free any allocated resources | ||
53 | */ | ||
54 | typedef void (*task_exit_t) (struct task_struct *); | ||
55 | |||
56 | /* called early before the caller holds the runqueue lock */ | ||
57 | typedef void (*pre_setsched_t) (struct task_struct *, int policy); | ||
58 | |||
59 | |||
60 | /* Called when the current task attempts to create a new lock of a given | ||
61 | * protocol type. */ | ||
62 | typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, | ||
63 | void* __user config); | ||
64 | |||
65 | |||
66 | /********************* sys call backends ********************/ | ||
67 | /* This function causes the caller to sleep until the next release */ | ||
68 | typedef long (*complete_job_t) (void); | ||
69 | |||
70 | typedef long (*admit_task_t)(struct task_struct* tsk); | ||
71 | |||
72 | typedef void (*release_at_t)(struct task_struct *t, lt_t start); | ||
73 | |||
74 | struct sched_plugin { | ||
75 | struct list_head list; | ||
76 | /* basic info */ | ||
77 | char *plugin_name; | ||
78 | |||
79 | /* setup */ | ||
80 | activate_plugin_t activate_plugin; | ||
81 | deactivate_plugin_t deactivate_plugin; | ||
82 | |||
83 | /* scheduler invocation */ | ||
84 | scheduler_tick_t tick; | ||
85 | schedule_t schedule; | ||
86 | finish_switch_t finish_switch; | ||
87 | |||
88 | /* syscall backend */ | ||
89 | complete_job_t complete_job; | ||
90 | release_at_t release_at; | ||
91 | |||
92 | /* task state changes */ | ||
93 | admit_task_t admit_task; | ||
94 | |||
95 | task_new_t task_new; | ||
96 | task_wake_up_t task_wake_up; | ||
97 | task_block_t task_block; | ||
98 | task_exit_t task_exit; | ||
99 | |||
100 | pre_setsched_t pre_setsched; | ||
101 | |||
102 | #ifdef CONFIG_LITMUS_LOCKING | ||
103 | /* locking protocols */ | ||
104 | allocate_lock_t allocate_lock; | ||
105 | #endif | ||
106 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | ||
107 | |||
108 | |||
109 | extern struct sched_plugin *litmus; | ||
110 | |||
111 | int register_sched_plugin(struct sched_plugin* plugin); | ||
112 | struct sched_plugin* find_sched_plugin(const char* name); | ||
113 | int print_sched_plugins(char* buf, int max); | ||
114 | |||
115 | extern struct sched_plugin linux_sched_plugin; | ||
116 | |||
117 | #endif | ||
diff --git a/include/litmus/sched_plugin.h.rej b/include/litmus/sched_plugin.h.rej new file mode 100644 index 000000000000..47e0c27c5061 --- /dev/null +++ b/include/litmus/sched_plugin.h.rej | |||
@@ -0,0 +1,22 @@ | |||
1 | --- include/litmus/sched_plugin.h | ||
2 | +++ include/litmus/sched_plugin.h | ||
3 | @@ -53,6 +53,10 @@ | ||
4 | */ | ||
5 | typedef void (*task_exit_t) (struct task_struct *); | ||
6 | |||
7 | +/* called early before the caller holds the runqueue lock */ | ||
8 | +typedef void (*pre_setsched_t) (struct task_struct *, int policy); | ||
9 | + | ||
10 | + | ||
11 | /* Called when the current task attempts to create a new lock of a given | ||
12 | * protocol type. */ | ||
13 | typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, | ||
14 | @@ -93,6 +97,8 @@ | ||
15 | task_block_t task_block; | ||
16 | task_exit_t task_exit; | ||
17 | |||
18 | + pre_setsched_t pre_setsched; | ||
19 | + | ||
20 | #ifdef CONFIG_LITMUS_LOCKING | ||
21 | /* locking protocols */ | ||
22 | allocate_lock_t allocate_lock; | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h new file mode 100644 index 000000000000..7ca34cb13881 --- /dev/null +++ b/include/litmus/sched_trace.h | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * sched_trace.h -- record scheduler events to a byte stream for offline analysis. | ||
3 | */ | ||
4 | #ifndef _LINUX_SCHED_TRACE_H_ | ||
5 | #define _LINUX_SCHED_TRACE_H_ | ||
6 | |||
7 | /* all times in nanoseconds */ | ||
8 | |||
9 | struct st_trace_header { | ||
10 | u8 type; /* Of what type is this record? */ | ||
11 | u8 cpu; /* On which CPU was it recorded? */ | ||
12 | u16 pid; /* PID of the task. */ | ||
13 | u32 job; /* The job sequence number. */ | ||
14 | }; | ||
15 | |||
16 | #define ST_NAME_LEN 16 | ||
17 | struct st_name_data { | ||
18 | char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ | ||
19 | }; | ||
20 | |||
21 | struct st_param_data { /* regular params */ | ||
22 | u32 wcet; | ||
23 | u32 period; | ||
24 | u32 phase; | ||
25 | u8 partition; | ||
26 | u8 class; | ||
27 | u8 __unused[2]; | ||
28 | }; | ||
29 | |||
30 | struct st_release_data { /* A job is was/is going to be released. */ | ||
31 | u64 release; /* What's the release time? */ | ||
32 | u64 deadline; /* By when must it finish? */ | ||
33 | }; | ||
34 | |||
35 | struct st_assigned_data { /* A job was asigned to a CPU. */ | ||
36 | u64 when; | ||
37 | u8 target; /* Where should it execute? */ | ||
38 | u8 __unused[7]; | ||
39 | }; | ||
40 | |||
41 | struct st_switch_to_data { /* A process was switched to on a given CPU. */ | ||
42 | u64 when; /* When did this occur? */ | ||
43 | u32 exec_time; /* Time the current job has executed. */ | ||
44 | u8 __unused[4]; | ||
45 | |||
46 | }; | ||
47 | |||
48 | struct st_switch_away_data { /* A process was switched away from on a given CPU. */ | ||
49 | u64 when; | ||
50 | u64 exec_time; | ||
51 | }; | ||
52 | |||
53 | struct st_completion_data { /* A job completed. */ | ||
54 | u64 when; | ||
55 | u8 forced:1; /* Set to 1 if job overran and kernel advanced to the | ||
56 | * next task automatically; set to 0 otherwise. | ||
57 | */ | ||
58 | u8 __uflags:7; | ||
59 | u8 __unused[7]; | ||
60 | }; | ||
61 | |||
62 | struct st_block_data { /* A task blocks. */ | ||
63 | u64 when; | ||
64 | u64 __unused; | ||
65 | }; | ||
66 | |||
67 | struct st_resume_data { /* A task resumes. */ | ||
68 | u64 when; | ||
69 | u64 __unused; | ||
70 | }; | ||
71 | |||
72 | struct st_action_data { | ||
73 | u64 when; | ||
74 | u8 action; | ||
75 | u8 __unused[7]; | ||
76 | }; | ||
77 | |||
78 | struct st_sys_release_data { | ||
79 | u64 when; | ||
80 | u64 release; | ||
81 | }; | ||
82 | |||
83 | #define DATA(x) struct st_ ## x ## _data x; | ||
84 | |||
85 | typedef enum { | ||
86 | ST_NAME = 1, /* Start at one, so that we can spot | ||
87 | * uninitialized records. */ | ||
88 | ST_PARAM, | ||
89 | ST_RELEASE, | ||
90 | ST_ASSIGNED, | ||
91 | ST_SWITCH_TO, | ||
92 | ST_SWITCH_AWAY, | ||
93 | ST_COMPLETION, | ||
94 | ST_BLOCK, | ||
95 | ST_RESUME, | ||
96 | ST_ACTION, | ||
97 | ST_SYS_RELEASE | ||
98 | } st_event_record_type_t; | ||
99 | |||
100 | struct st_event_record { | ||
101 | struct st_trace_header hdr; | ||
102 | union { | ||
103 | u64 raw[2]; | ||
104 | |||
105 | DATA(name); | ||
106 | DATA(param); | ||
107 | DATA(release); | ||
108 | DATA(assigned); | ||
109 | DATA(switch_to); | ||
110 | DATA(switch_away); | ||
111 | DATA(completion); | ||
112 | DATA(block); | ||
113 | DATA(resume); | ||
114 | DATA(action); | ||
115 | DATA(sys_release); | ||
116 | } data; | ||
117 | }; | ||
118 | |||
119 | #undef DATA | ||
120 | |||
121 | #ifdef __KERNEL__ | ||
122 | |||
123 | #include <linux/sched.h> | ||
124 | #include <litmus/feather_trace.h> | ||
125 | |||
126 | #ifdef CONFIG_SCHED_TASK_TRACE | ||
127 | |||
128 | #define SCHED_TRACE(id, callback, task) \ | ||
129 | ft_event1(id, callback, task) | ||
130 | #define SCHED_TRACE2(id, callback, task, xtra) \ | ||
131 | ft_event2(id, callback, task, xtra) | ||
132 | |||
133 | /* provide prototypes; needed on sparc64 */ | ||
134 | #ifndef NO_TASK_TRACE_DECLS | ||
135 | feather_callback void do_sched_trace_task_name(unsigned long id, | ||
136 | struct task_struct* task); | ||
137 | feather_callback void do_sched_trace_task_param(unsigned long id, | ||
138 | struct task_struct* task); | ||
139 | feather_callback void do_sched_trace_task_release(unsigned long id, | ||
140 | struct task_struct* task); | ||
141 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
142 | struct task_struct* task); | ||
143 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
144 | struct task_struct* task); | ||
145 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
146 | struct task_struct* task, | ||
147 | unsigned long forced); | ||
148 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
149 | struct task_struct* task); | ||
150 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
151 | struct task_struct* task); | ||
152 | feather_callback void do_sched_trace_action(unsigned long id, | ||
153 | struct task_struct* task, | ||
154 | unsigned long action); | ||
155 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
156 | lt_t* start); | ||
157 | |||
158 | #endif | ||
159 | |||
160 | #else | ||
161 | |||
162 | #define SCHED_TRACE(id, callback, task) /* no tracing */ | ||
163 | #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ | ||
164 | |||
165 | #endif | ||
166 | |||
167 | |||
168 | #define SCHED_TRACE_BASE_ID 500 | ||
169 | |||
170 | |||
171 | #define sched_trace_task_name(t) \ | ||
172 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t) | ||
173 | #define sched_trace_task_param(t) \ | ||
174 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t) | ||
175 | #define sched_trace_task_release(t) \ | ||
176 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t) | ||
177 | #define sched_trace_task_switch_to(t) \ | ||
178 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t) | ||
179 | #define sched_trace_task_switch_away(t) \ | ||
180 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t) | ||
181 | #define sched_trace_task_completion(t, forced) \ | ||
182 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \ | ||
183 | (unsigned long) forced) | ||
184 | #define sched_trace_task_block(t) \ | ||
185 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t) | ||
186 | #define sched_trace_task_resume(t) \ | ||
187 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t) | ||
188 | #define sched_trace_action(t, action) \ | ||
189 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, do_sched_trace_action, t, \ | ||
190 | (unsigned long) action); | ||
191 | /* when is a pointer, it does not need an explicit cast to unsigned long */ | ||
192 | #define sched_trace_sys_release(when) \ | ||
193 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when) | ||
194 | |||
195 | |||
196 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | ||
197 | |||
198 | #endif /* __KERNEL__ */ | ||
199 | |||
200 | #endif | ||
diff --git a/include/litmus/srp.h b/include/litmus/srp.h new file mode 100644 index 000000000000..c9a4552b2bf3 --- /dev/null +++ b/include/litmus/srp.h | |||
@@ -0,0 +1,28 @@ | |||
1 | #ifndef LITMUS_SRP_H | ||
2 | #define LITMUS_SRP_H | ||
3 | |||
4 | struct srp_semaphore; | ||
5 | |||
6 | struct srp_priority { | ||
7 | struct list_head list; | ||
8 | unsigned int priority; | ||
9 | pid_t pid; | ||
10 | }; | ||
11 | #define list2prio(l) list_entry(l, struct srp_priority, list) | ||
12 | |||
13 | /* struct for uniprocessor SRP "semaphore" */ | ||
14 | struct srp_semaphore { | ||
15 | struct litmus_lock litmus_lock; | ||
16 | struct srp_priority ceiling; | ||
17 | struct task_struct* owner; | ||
18 | int cpu; /* cpu associated with this "semaphore" and resource */ | ||
19 | }; | ||
20 | |||
21 | /* map a task to its SRP preemption level priority */ | ||
22 | typedef unsigned int (*srp_prioritization_t)(struct task_struct* t); | ||
23 | /* Must be updated by each plugin that uses SRP.*/ | ||
24 | extern srp_prioritization_t get_srp_prio; | ||
25 | |||
26 | struct srp_semaphore* allocate_srp_semaphore(void); | ||
27 | |||
28 | #endif | ||
diff --git a/include/litmus/trace.h b/include/litmus/trace.h new file mode 100644 index 000000000000..d6829c416912 --- /dev/null +++ b/include/litmus/trace.h | |||
@@ -0,0 +1,129 @@ | |||
1 | #ifndef _SYS_TRACE_H_ | ||
2 | #define _SYS_TRACE_H_ | ||
3 | |||
4 | #ifdef CONFIG_SCHED_OVERHEAD_TRACE | ||
5 | |||
6 | #include <litmus/feather_trace.h> | ||
7 | #include <litmus/feather_buffer.h> | ||
8 | |||
9 | |||
10 | /*********************** TIMESTAMPS ************************/ | ||
11 | |||
12 | enum task_type_marker { | ||
13 | TSK_BE, | ||
14 | TSK_RT, | ||
15 | TSK_UNKNOWN | ||
16 | }; | ||
17 | |||
18 | struct timestamp { | ||
19 | uint64_t timestamp; | ||
20 | uint32_t seq_no; | ||
21 | uint8_t cpu; | ||
22 | uint8_t event; | ||
23 | uint8_t task_type; | ||
24 | }; | ||
25 | |||
26 | /* tracing callbacks */ | ||
27 | feather_callback void save_timestamp(unsigned long event); | ||
28 | feather_callback void save_timestamp_def(unsigned long event, unsigned long type); | ||
29 | feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr); | ||
30 | feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu); | ||
31 | feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr); | ||
32 | feather_callback void save_timestamp_time(unsigned long event, unsigned long time_ptr); | ||
33 | |||
34 | #define TIMESTAMP(id) ft_event0(id, save_timestamp) | ||
35 | |||
36 | #define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def) | ||
37 | |||
38 | #define TTIMESTAMP(id, task) \ | ||
39 | ft_event1(id, save_timestamp_task, (unsigned long) task) | ||
40 | |||
41 | #define CTIMESTAMP(id, cpu) \ | ||
42 | ft_event1(id, save_timestamp_cpu, (unsigned long) cpu) | ||
43 | |||
44 | #define LTIMESTAMP(id, task) \ | ||
45 | ft_event1(id, save_task_latency, (unsigned long) task) | ||
46 | |||
47 | #define TIMESTAMP_TIME(id, time_ptr) \ | ||
48 | ft_event1(id, save_timestamp_time, (unsigned long) time_ptr) | ||
49 | |||
50 | #define TIMESTAMP_PID(id) ft_event0(id, save_timestamp_pid) | ||
51 | |||
52 | #else /* !CONFIG_SCHED_OVERHEAD_TRACE */ | ||
53 | |||
54 | #define TIMESTAMP(id) /* no tracing */ | ||
55 | |||
56 | #define DTIMESTAMP(id, def) /* no tracing */ | ||
57 | |||
58 | #define TTIMESTAMP(id, task) /* no tracing */ | ||
59 | |||
60 | #define CTIMESTAMP(id, cpu) /* no tracing */ | ||
61 | |||
62 | #define LTIMESTAMP(id, when_ptr) /* no tracing */ | ||
63 | |||
64 | #define TIMESTAMP_TIME(id, time_ptr) /* no tracing */ | ||
65 | |||
66 | #define TIMESTAMP_PID(id) /* no tracing */ | ||
67 | |||
68 | #endif | ||
69 | |||
70 | |||
71 | /* Convention for timestamps | ||
72 | * ========================= | ||
73 | * | ||
74 | * In order to process the trace files with a common tool, we use the following | ||
75 | * convention to measure execution times: The end time id of a code segment is | ||
76 | * always the next number after the start time event id. | ||
77 | */ | ||
78 | |||
79 | #define __TS_SYSCALL_IN_START(p) TIMESTAMP_TIME(10, p) | ||
80 | #define TS_SYSCALL_IN_END TIMESTAMP_PID(11) | ||
81 | |||
82 | #define TS_SYSCALL_OUT_START TIMESTAMP_PID(20) | ||
83 | #define TS_SYSCALL_OUT_END TIMESTAMP_PID(21) | ||
84 | |||
85 | #define TS_LOCK_START TIMESTAMP_PID(30) | ||
86 | #define TS_LOCK_END TIMESTAMP_PID(31) | ||
87 | |||
88 | #define TS_LOCK_SUSPEND TIMESTAMP_PID(38) | ||
89 | #define TS_LOCK_RESUME TIMESTAMP_PID(39) | ||
90 | |||
91 | #define TS_UNLOCK_START TIMESTAMP_PID(40) | ||
92 | #define TS_UNLOCK_END TIMESTAMP_PID(41) | ||
93 | |||
94 | #define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only | ||
95 | * care | ||
96 | * about | ||
97 | * next */ | ||
98 | #define TS_SCHED_END(t) TTIMESTAMP(101, t) | ||
99 | #define TS_SCHED2_START(t) TTIMESTAMP(102, t) | ||
100 | #define TS_SCHED2_END(t) TTIMESTAMP(103, t) | ||
101 | |||
102 | #define TS_CXS_START(t) TTIMESTAMP(104, t) | ||
103 | #define TS_CXS_END(t) TTIMESTAMP(105, t) | ||
104 | |||
105 | #define TS_RELEASE_START DTIMESTAMP(106, TSK_RT) | ||
106 | #define TS_RELEASE_END DTIMESTAMP(107, TSK_RT) | ||
107 | |||
108 | #define TS_TICK_START(t) TTIMESTAMP(110, t) | ||
109 | #define TS_TICK_END(t) TTIMESTAMP(111, t) | ||
110 | |||
111 | |||
112 | #define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ | ||
113 | #define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ | ||
114 | |||
115 | #define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ | ||
116 | #define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ | ||
117 | |||
118 | #define TS_ENTER_NP_START TIMESTAMP(140) | ||
119 | #define TS_ENTER_NP_END TIMESTAMP(141) | ||
120 | |||
121 | #define TS_EXIT_NP_START TIMESTAMP(150) | ||
122 | #define TS_EXIT_NP_END TIMESTAMP(151) | ||
123 | |||
124 | #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) | ||
125 | #define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) | ||
126 | |||
127 | #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) | ||
128 | |||
129 | #endif /* !_SYS_TRACE_H_ */ | ||
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h new file mode 100644 index 000000000000..94264c27d9ac --- /dev/null +++ b/include/litmus/unistd_32.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * included from arch/x86/include/asm/unistd_32.h | ||
3 | * | ||
4 | * LITMUS^RT syscalls with "relative" numbers | ||
5 | */ | ||
6 | #define __LSC(x) (__NR_LITMUS + x) | ||
7 | |||
8 | #define __NR_set_rt_task_param __LSC(0) | ||
9 | #define __NR_get_rt_task_param __LSC(1) | ||
10 | #define __NR_complete_job __LSC(2) | ||
11 | #define __NR_od_open __LSC(3) | ||
12 | #define __NR_od_close __LSC(4) | ||
13 | #define __NR_litmus_lock __LSC(5) | ||
14 | #define __NR_litmus_unlock __LSC(6) | ||
15 | #define __NR_query_job_no __LSC(7) | ||
16 | #define __NR_wait_for_job_release __LSC(8) | ||
17 | #define __NR_wait_for_ts_release __LSC(9) | ||
18 | #define __NR_release_ts __LSC(10) | ||
19 | #define __NR_null_call __LSC(11) | ||
20 | |||
21 | #define NR_litmus_syscalls 12 | ||
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h new file mode 100644 index 000000000000..d5ced0d2642c --- /dev/null +++ b/include/litmus/unistd_64.h | |||
@@ -0,0 +1,33 @@ | |||
1 | /* | ||
2 | * included from arch/x86/include/asm/unistd_64.h | ||
3 | * | ||
4 | * LITMUS^RT syscalls with "relative" numbers | ||
5 | */ | ||
6 | #define __LSC(x) (__NR_LITMUS + x) | ||
7 | |||
8 | #define __NR_set_rt_task_param __LSC(0) | ||
9 | __SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param) | ||
10 | #define __NR_get_rt_task_param __LSC(1) | ||
11 | __SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param) | ||
12 | #define __NR_complete_job __LSC(2) | ||
13 | __SYSCALL(__NR_complete_job, sys_complete_job) | ||
14 | #define __NR_od_open __LSC(3) | ||
15 | __SYSCALL(__NR_od_open, sys_od_open) | ||
16 | #define __NR_od_close __LSC(4) | ||
17 | __SYSCALL(__NR_od_close, sys_od_close) | ||
18 | #define __NR_litmus_lock __LSC(5) | ||
19 | __SYSCALL(__NR_litmus_lock, sys_litmus_lock) | ||
20 | #define __NR_litmus_unlock __LSC(6) | ||
21 | __SYSCALL(__NR_litmus_unlock, sys_litmus_unlock) | ||
22 | #define __NR_query_job_no __LSC(7) | ||
23 | __SYSCALL(__NR_query_job_no, sys_query_job_no) | ||
24 | #define __NR_wait_for_job_release __LSC(8) | ||
25 | __SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release) | ||
26 | #define __NR_wait_for_ts_release __LSC(9) | ||
27 | __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) | ||
28 | #define __NR_release_ts __LSC(10) | ||
29 | __SYSCALL(__NR_release_ts, sys_release_ts) | ||
30 | #define __NR_null_call __LSC(11) | ||
31 | __SYSCALL(__NR_null_call, sys_null_call) | ||
32 | |||
33 | #define NR_litmus_syscalls 12 | ||
diff --git a/include/litmus/wait.h b/include/litmus/wait.h new file mode 100644 index 000000000000..ce1347c355f8 --- /dev/null +++ b/include/litmus/wait.h | |||
@@ -0,0 +1,57 @@ | |||
1 | #ifndef _LITMUS_WAIT_H_ | ||
2 | #define _LITMUS_WAIT_H_ | ||
3 | |||
4 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); | ||
5 | |||
6 | /* wrap regular wait_queue_t head */ | ||
7 | struct __prio_wait_queue { | ||
8 | wait_queue_t wq; | ||
9 | |||
10 | /* some priority point */ | ||
11 | lt_t priority; | ||
12 | /* break ties in priority by lower tie_breaker */ | ||
13 | unsigned int tie_breaker; | ||
14 | }; | ||
15 | |||
16 | typedef struct __prio_wait_queue prio_wait_queue_t; | ||
17 | |||
18 | static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq, | ||
19 | struct task_struct* t, | ||
20 | lt_t priority) | ||
21 | { | ||
22 | init_waitqueue_entry(&pwq->wq, t); | ||
23 | pwq->priority = priority; | ||
24 | pwq->tie_breaker = 0; | ||
25 | } | ||
26 | |||
27 | static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq, | ||
28 | struct task_struct* t, | ||
29 | lt_t priority, | ||
30 | unsigned int tie_breaker) | ||
31 | { | ||
32 | init_waitqueue_entry(&pwq->wq, t); | ||
33 | pwq->priority = priority; | ||
34 | pwq->tie_breaker = tie_breaker; | ||
35 | } | ||
36 | |||
37 | unsigned int __add_wait_queue_prio_exclusive( | ||
38 | wait_queue_head_t* head, | ||
39 | prio_wait_queue_t *new); | ||
40 | |||
41 | static inline unsigned int add_wait_queue_prio_exclusive( | ||
42 | wait_queue_head_t* head, | ||
43 | prio_wait_queue_t *new) | ||
44 | { | ||
45 | unsigned long flags; | ||
46 | unsigned int passed; | ||
47 | |||
48 | spin_lock_irqsave(&head->lock, flags); | ||
49 | passed = __add_wait_queue_prio_exclusive(head, new); | ||
50 | |||
51 | spin_unlock_irqrestore(&head->lock, flags); | ||
52 | |||
53 | return passed; | ||
54 | } | ||
55 | |||
56 | |||
57 | #endif | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db28..b9d3bc6c21ec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -56,6 +56,8 @@ | |||
56 | #include <asm/pgtable.h> | 56 | #include <asm/pgtable.h> |
57 | #include <asm/mmu_context.h> | 57 | #include <asm/mmu_context.h> |
58 | 58 | ||
59 | extern void exit_od_table(struct task_struct *t); | ||
60 | |||
59 | static void exit_mm(struct task_struct * tsk); | 61 | static void exit_mm(struct task_struct * tsk); |
60 | 62 | ||
61 | static void __unhash_process(struct task_struct *p, bool group_dead) | 63 | static void __unhash_process(struct task_struct *p, bool group_dead) |
@@ -960,6 +962,8 @@ NORET_TYPE void do_exit(long code) | |||
960 | if (unlikely(tsk->audit_context)) | 962 | if (unlikely(tsk->audit_context)) |
961 | audit_free(tsk); | 963 | audit_free(tsk); |
962 | 964 | ||
965 | exit_od_table(tsk); | ||
966 | |||
963 | tsk->exit_code = code; | 967 | tsk->exit_code = code; |
964 | taskstats_exit(tsk, group_dead); | 968 | taskstats_exit(tsk, group_dead); |
965 | 969 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index c445f8cc408d..ab7f29d906c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -75,6 +75,9 @@ | |||
75 | 75 | ||
76 | #include <trace/events/sched.h> | 76 | #include <trace/events/sched.h> |
77 | 77 | ||
78 | #include <litmus/litmus.h> | ||
79 | #include <litmus/sched_plugin.h> | ||
80 | |||
78 | /* | 81 | /* |
79 | * Protected counters by write_lock_irq(&tasklist_lock) | 82 | * Protected counters by write_lock_irq(&tasklist_lock) |
80 | */ | 83 | */ |
@@ -183,6 +186,7 @@ void __put_task_struct(struct task_struct *tsk) | |||
183 | WARN_ON(atomic_read(&tsk->usage)); | 186 | WARN_ON(atomic_read(&tsk->usage)); |
184 | WARN_ON(tsk == current); | 187 | WARN_ON(tsk == current); |
185 | 188 | ||
189 | exit_litmus(tsk); | ||
186 | exit_creds(tsk); | 190 | exit_creds(tsk); |
187 | delayacct_tsk_free(tsk); | 191 | delayacct_tsk_free(tsk); |
188 | put_signal_struct(tsk->signal); | 192 | put_signal_struct(tsk->signal); |
@@ -266,6 +270,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
266 | 270 | ||
267 | tsk->stack = ti; | 271 | tsk->stack = ti; |
268 | 272 | ||
273 | /* Don't let the new task be a real-time task. */ | ||
274 | litmus_fork(tsk); | ||
275 | |||
269 | err = prop_local_init_single(&tsk->dirties); | 276 | err = prop_local_init_single(&tsk->dirties); |
270 | if (err) | 277 | if (err) |
271 | goto out; | 278 | goto out; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 72206cf5c6cf..cb49883b64e5 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -46,6 +46,8 @@ | |||
46 | #include <linux/sched.h> | 46 | #include <linux/sched.h> |
47 | #include <linux/timer.h> | 47 | #include <linux/timer.h> |
48 | 48 | ||
49 | #include <litmus/litmus.h> | ||
50 | |||
49 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
50 | 52 | ||
51 | #include <trace/events/timer.h> | 53 | #include <trace/events/timer.h> |
@@ -1042,6 +1044,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
1042 | } | 1044 | } |
1043 | EXPORT_SYMBOL_GPL(hrtimer_start); | 1045 | EXPORT_SYMBOL_GPL(hrtimer_start); |
1044 | 1046 | ||
1047 | #ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS | ||
1048 | |||
1049 | /** | ||
1050 | * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info | ||
1051 | */ | ||
1052 | void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info) | ||
1053 | { | ||
1054 | memset(info, 0, sizeof(struct hrtimer_start_on_info)); | ||
1055 | atomic_set(&info->state, HRTIMER_START_ON_INACTIVE); | ||
1056 | } | ||
1057 | |||
1058 | /** | ||
1059 | * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu | ||
1060 | */ | ||
1061 | void hrtimer_pull(void) | ||
1062 | { | ||
1063 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
1064 | struct hrtimer_start_on_info *info; | ||
1065 | struct list_head *pos, *safe, list; | ||
1066 | |||
1067 | raw_spin_lock(&base->lock); | ||
1068 | list_replace_init(&base->to_pull, &list); | ||
1069 | raw_spin_unlock(&base->lock); | ||
1070 | |||
1071 | list_for_each_safe(pos, safe, &list) { | ||
1072 | info = list_entry(pos, struct hrtimer_start_on_info, list); | ||
1073 | TRACE("pulled timer 0x%x\n", info->timer); | ||
1074 | list_del(pos); | ||
1075 | hrtimer_start(info->timer, info->time, info->mode); | ||
1076 | } | ||
1077 | } | ||
1078 | |||
1079 | /** | ||
1080 | * hrtimer_start_on - trigger timer arming on remote cpu | ||
1081 | * @cpu: remote cpu | ||
1082 | * @info: save timer information for enqueuing on remote cpu | ||
1083 | * @timer: timer to be pulled | ||
1084 | * @time: expire time | ||
1085 | * @mode: timer mode | ||
1086 | */ | ||
1087 | int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info, | ||
1088 | struct hrtimer *timer, ktime_t time, | ||
1089 | const enum hrtimer_mode mode) | ||
1090 | { | ||
1091 | unsigned long flags; | ||
1092 | struct hrtimer_cpu_base* base; | ||
1093 | int in_use = 0, was_empty; | ||
1094 | |||
1095 | /* serialize access to info through the timer base */ | ||
1096 | lock_hrtimer_base(timer, &flags); | ||
1097 | |||
1098 | in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE); | ||
1099 | if (!in_use) { | ||
1100 | INIT_LIST_HEAD(&info->list); | ||
1101 | info->timer = timer; | ||
1102 | info->time = time; | ||
1103 | info->mode = mode; | ||
1104 | /* mark as in use */ | ||
1105 | atomic_set(&info->state, HRTIMER_START_ON_QUEUED); | ||
1106 | } | ||
1107 | |||
1108 | unlock_hrtimer_base(timer, &flags); | ||
1109 | |||
1110 | if (!in_use) { | ||
1111 | /* initiate pull */ | ||
1112 | preempt_disable(); | ||
1113 | if (cpu == smp_processor_id()) { | ||
1114 | /* start timer locally; we may get called | ||
1115 | * with rq->lock held, do not wake up anything | ||
1116 | */ | ||
1117 | TRACE("hrtimer_start_on: starting on local CPU\n"); | ||
1118 | __hrtimer_start_range_ns(info->timer, info->time, | ||
1119 | 0, info->mode, 0); | ||
1120 | } else { | ||
1121 | TRACE("hrtimer_start_on: pulling to remote CPU\n"); | ||
1122 | base = &per_cpu(hrtimer_bases, cpu); | ||
1123 | raw_spin_lock_irqsave(&base->lock, flags); | ||
1124 | was_empty = list_empty(&base->to_pull); | ||
1125 | list_add(&info->list, &base->to_pull); | ||
1126 | raw_spin_unlock_irqrestore(&base->lock, flags); | ||
1127 | if (was_empty) | ||
1128 | /* only send IPI if other no else | ||
1129 | * has done so already | ||
1130 | */ | ||
1131 | smp_send_pull_timers(cpu); | ||
1132 | } | ||
1133 | preempt_enable(); | ||
1134 | } | ||
1135 | return in_use; | ||
1136 | } | ||
1137 | |||
1138 | #endif | ||
1045 | 1139 | ||
1046 | /** | 1140 | /** |
1047 | * hrtimer_try_to_cancel - try to deactivate a timer | 1141 | * hrtimer_try_to_cancel - try to deactivate a timer |
@@ -1634,6 +1728,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1634 | cpu_base->clock_base[i].cpu_base = cpu_base; | 1728 | cpu_base->clock_base[i].cpu_base = cpu_base; |
1635 | 1729 | ||
1636 | hrtimer_init_hres(cpu_base); | 1730 | hrtimer_init_hres(cpu_base); |
1731 | INIT_LIST_HEAD(&cpu_base->to_pull); | ||
1637 | } | 1732 | } |
1638 | 1733 | ||
1639 | #ifdef CONFIG_HOTPLUG_CPU | 1734 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/kernel/printk.c b/kernel/printk.c index 8fe465ac008a..9dc8ea140426 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -74,6 +74,13 @@ int console_printk[4] = { | |||
74 | }; | 74 | }; |
75 | 75 | ||
76 | /* | 76 | /* |
77 | * divert printk() messages when there is a LITMUS^RT debug listener | ||
78 | */ | ||
79 | #include <litmus/litmus.h> | ||
80 | int trace_override = 0; | ||
81 | int trace_recurse = 0; | ||
82 | |||
83 | /* | ||
77 | * Low level drivers may need that to know if they can schedule in | 84 | * Low level drivers may need that to know if they can schedule in |
78 | * their unblank() callback or not. So let's export it. | 85 | * their unblank() callback or not. So let's export it. |
79 | */ | 86 | */ |
@@ -735,6 +742,9 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
735 | /* Emit the output into the temporary buffer */ | 742 | /* Emit the output into the temporary buffer */ |
736 | printed_len += vscnprintf(printk_buf + printed_len, | 743 | printed_len += vscnprintf(printk_buf + printed_len, |
737 | sizeof(printk_buf) - printed_len, fmt, args); | 744 | sizeof(printk_buf) - printed_len, fmt, args); |
745 | /* if LITMUS^RT tracer is active divert printk() msgs */ | ||
746 | if (trace_override && !trace_recurse) | ||
747 | TRACE("%s", printk_buf); | ||
738 | 748 | ||
739 | 749 | ||
740 | p = printk_buf; | 750 | p = printk_buf; |
@@ -804,7 +814,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
804 | * Try to acquire and then immediately release the | 814 | * Try to acquire and then immediately release the |
805 | * console semaphore. The release will do all the | 815 | * console semaphore. The release will do all the |
806 | * actual magic (print out buffers, wake up klogd, | 816 | * actual magic (print out buffers, wake up klogd, |
807 | * etc). | 817 | * etc). |
808 | * | 818 | * |
809 | * The acquire_console_semaphore_for_printk() function | 819 | * The acquire_console_semaphore_for_printk() function |
810 | * will release 'logbuf_lock' regardless of whether it | 820 | * will release 'logbuf_lock' regardless of whether it |
@@ -1067,7 +1077,7 @@ int printk_needs_cpu(int cpu) | |||
1067 | 1077 | ||
1068 | void wake_up_klogd(void) | 1078 | void wake_up_klogd(void) |
1069 | { | 1079 | { |
1070 | if (waitqueue_active(&log_wait)) | 1080 | if (!trace_override && waitqueue_active(&log_wait)) |
1071 | __raw_get_cpu_var(printk_pending) = 1; | 1081 | __raw_get_cpu_var(printk_pending) = 1; |
1072 | } | 1082 | } |
1073 | 1083 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index dc85ceb90832..1f5327f8c012 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -79,6 +79,11 @@ | |||
79 | #include "sched_cpupri.h" | 79 | #include "sched_cpupri.h" |
80 | #include "workqueue_sched.h" | 80 | #include "workqueue_sched.h" |
81 | 81 | ||
82 | #include <litmus/sched_trace.h> | ||
83 | #include <litmus/trace.h> | ||
84 | |||
85 | static void litmus_tick(struct rq*, struct task_struct*); | ||
86 | |||
82 | #define CREATE_TRACE_POINTS | 87 | #define CREATE_TRACE_POINTS |
83 | #include <trace/events/sched.h> | 88 | #include <trace/events/sched.h> |
84 | 89 | ||
@@ -405,6 +410,12 @@ struct rt_rq { | |||
405 | #endif | 410 | #endif |
406 | }; | 411 | }; |
407 | 412 | ||
413 | /* Litmus related fields in a runqueue */ | ||
414 | struct litmus_rq { | ||
415 | unsigned long nr_running; | ||
416 | struct task_struct *prev; | ||
417 | }; | ||
418 | |||
408 | #ifdef CONFIG_SMP | 419 | #ifdef CONFIG_SMP |
409 | 420 | ||
410 | /* | 421 | /* |
@@ -471,6 +482,7 @@ struct rq { | |||
471 | 482 | ||
472 | struct cfs_rq cfs; | 483 | struct cfs_rq cfs; |
473 | struct rt_rq rt; | 484 | struct rt_rq rt; |
485 | struct litmus_rq litmus; | ||
474 | 486 | ||
475 | #ifdef CONFIG_FAIR_GROUP_SCHED | 487 | #ifdef CONFIG_FAIR_GROUP_SCHED |
476 | /* list of leaf cfs_rq on this cpu: */ | 488 | /* list of leaf cfs_rq on this cpu: */ |
@@ -566,8 +578,14 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
566 | * A queue event has occurred, and we're going to schedule. In | 578 | * A queue event has occurred, and we're going to schedule. In |
567 | * this case, we can save a useless back to back clock update. | 579 | * this case, we can save a useless back to back clock update. |
568 | */ | 580 | */ |
581 | /* LITMUS^RT: turning off the clock update is buggy in Linux 2.6.36; | ||
582 | * the scheduler can "forget" to renable the runqueue clock in some | ||
583 | * cases. LITMUS^RT amplifies the effects of this problem. Hence, we | ||
584 | * turn it off to avoid stalling clocks. */ | ||
585 | /* | ||
569 | if (test_tsk_need_resched(p)) | 586 | if (test_tsk_need_resched(p)) |
570 | rq->skip_clock_update = 1; | 587 | rq->skip_clock_update = 1; |
588 | */ | ||
571 | } | 589 | } |
572 | 590 | ||
573 | static inline int cpu_of(struct rq *rq) | 591 | static inline int cpu_of(struct rq *rq) |
@@ -1042,6 +1060,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1042 | raw_spin_lock(&rq->lock); | 1060 | raw_spin_lock(&rq->lock); |
1043 | update_rq_clock(rq); | 1061 | update_rq_clock(rq); |
1044 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); | 1062 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); |
1063 | litmus_tick(rq, rq->curr); | ||
1045 | raw_spin_unlock(&rq->lock); | 1064 | raw_spin_unlock(&rq->lock); |
1046 | 1065 | ||
1047 | return HRTIMER_NORESTART; | 1066 | return HRTIMER_NORESTART; |
@@ -1840,7 +1859,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1840 | 1859 | ||
1841 | static const struct sched_class rt_sched_class; | 1860 | static const struct sched_class rt_sched_class; |
1842 | 1861 | ||
1843 | #define sched_class_highest (&rt_sched_class) | 1862 | #define sched_class_highest (&litmus_sched_class) |
1844 | #define for_each_class(class) \ | 1863 | #define for_each_class(class) \ |
1845 | for (class = sched_class_highest; class; class = class->next) | 1864 | for (class = sched_class_highest; class; class = class->next) |
1846 | 1865 | ||
@@ -1920,6 +1939,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1920 | #include "sched_idletask.c" | 1939 | #include "sched_idletask.c" |
1921 | #include "sched_fair.c" | 1940 | #include "sched_fair.c" |
1922 | #include "sched_rt.c" | 1941 | #include "sched_rt.c" |
1942 | #include "../litmus/sched_litmus.c" | ||
1923 | #ifdef CONFIG_SCHED_DEBUG | 1943 | #ifdef CONFIG_SCHED_DEBUG |
1924 | # include "sched_debug.c" | 1944 | # include "sched_debug.c" |
1925 | #endif | 1945 | #endif |
@@ -2352,6 +2372,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2352 | unsigned long en_flags = ENQUEUE_WAKEUP; | 2372 | unsigned long en_flags = ENQUEUE_WAKEUP; |
2353 | struct rq *rq; | 2373 | struct rq *rq; |
2354 | 2374 | ||
2375 | if (is_realtime(p)) | ||
2376 | TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); | ||
2377 | |||
2355 | this_cpu = get_cpu(); | 2378 | this_cpu = get_cpu(); |
2356 | 2379 | ||
2357 | smp_wmb(); | 2380 | smp_wmb(); |
@@ -2366,7 +2389,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2366 | orig_cpu = cpu; | 2389 | orig_cpu = cpu; |
2367 | 2390 | ||
2368 | #ifdef CONFIG_SMP | 2391 | #ifdef CONFIG_SMP |
2369 | if (unlikely(task_running(rq, p))) | 2392 | if (unlikely(task_running(rq, p)) || is_realtime(p)) |
2370 | goto out_activate; | 2393 | goto out_activate; |
2371 | 2394 | ||
2372 | /* | 2395 | /* |
@@ -2428,6 +2451,8 @@ out_activate: | |||
2428 | out_running: | 2451 | out_running: |
2429 | ttwu_post_activation(p, rq, wake_flags, success); | 2452 | ttwu_post_activation(p, rq, wake_flags, success); |
2430 | out: | 2453 | out: |
2454 | if (is_realtime(p)) | ||
2455 | TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state); | ||
2431 | task_rq_unlock(rq, &flags); | 2456 | task_rq_unlock(rq, &flags); |
2432 | put_cpu(); | 2457 | put_cpu(); |
2433 | 2458 | ||
@@ -2532,7 +2557,8 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2532 | * Revert to default priority/policy on fork if requested. | 2557 | * Revert to default priority/policy on fork if requested. |
2533 | */ | 2558 | */ |
2534 | if (unlikely(p->sched_reset_on_fork)) { | 2559 | if (unlikely(p->sched_reset_on_fork)) { |
2535 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { | 2560 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR || |
2561 | p->policy == SCHED_LITMUS) { | ||
2536 | p->policy = SCHED_NORMAL; | 2562 | p->policy = SCHED_NORMAL; |
2537 | p->normal_prio = p->static_prio; | 2563 | p->normal_prio = p->static_prio; |
2538 | } | 2564 | } |
@@ -2748,6 +2774,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2748 | */ | 2774 | */ |
2749 | prev_state = prev->state; | 2775 | prev_state = prev->state; |
2750 | finish_arch_switch(prev); | 2776 | finish_arch_switch(prev); |
2777 | litmus->finish_switch(prev); | ||
2778 | prev->rt_param.stack_in_use = NO_CPU; | ||
2751 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 2779 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2752 | local_irq_disable(); | 2780 | local_irq_disable(); |
2753 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 2781 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
@@ -2777,6 +2805,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev) | |||
2777 | { | 2805 | { |
2778 | if (prev->sched_class->pre_schedule) | 2806 | if (prev->sched_class->pre_schedule) |
2779 | prev->sched_class->pre_schedule(rq, prev); | 2807 | prev->sched_class->pre_schedule(rq, prev); |
2808 | |||
2809 | /* LITMUS^RT not very clean hack: we need to save the prev task | ||
2810 | * as our scheduling decision rely on it (as we drop the rq lock | ||
2811 | * something in prev can change...); there is no way to escape | ||
2812 | * this ack apart from modifying pick_nex_task(rq, _prev_) or | ||
2813 | * falling back on the previous solution of decoupling | ||
2814 | * scheduling decisions | ||
2815 | */ | ||
2816 | rq->litmus.prev = prev; | ||
2780 | } | 2817 | } |
2781 | 2818 | ||
2782 | /* rq->lock is NOT held, but preemption is disabled */ | 2819 | /* rq->lock is NOT held, but preemption is disabled */ |
@@ -3578,18 +3615,26 @@ void scheduler_tick(void) | |||
3578 | 3615 | ||
3579 | sched_clock_tick(); | 3616 | sched_clock_tick(); |
3580 | 3617 | ||
3618 | TS_TICK_START(current); | ||
3619 | |||
3581 | raw_spin_lock(&rq->lock); | 3620 | raw_spin_lock(&rq->lock); |
3582 | update_rq_clock(rq); | 3621 | update_rq_clock(rq); |
3583 | update_cpu_load_active(rq); | 3622 | update_cpu_load_active(rq); |
3584 | curr->sched_class->task_tick(rq, curr, 0); | 3623 | curr->sched_class->task_tick(rq, curr, 0); |
3624 | |||
3625 | /* litmus_tick may force current to resched */ | ||
3626 | litmus_tick(rq, curr); | ||
3627 | |||
3585 | raw_spin_unlock(&rq->lock); | 3628 | raw_spin_unlock(&rq->lock); |
3586 | 3629 | ||
3587 | perf_event_task_tick(curr); | 3630 | perf_event_task_tick(curr); |
3588 | 3631 | ||
3589 | #ifdef CONFIG_SMP | 3632 | #ifdef CONFIG_SMP |
3590 | rq->idle_at_tick = idle_cpu(cpu); | 3633 | rq->idle_at_tick = idle_cpu(cpu); |
3591 | trigger_load_balance(rq, cpu); | 3634 | if (!is_realtime(current)) |
3635 | trigger_load_balance(rq, cpu); | ||
3592 | #endif | 3636 | #endif |
3637 | TS_TICK_END(current); | ||
3593 | } | 3638 | } |
3594 | 3639 | ||
3595 | notrace unsigned long get_parent_ip(unsigned long addr) | 3640 | notrace unsigned long get_parent_ip(unsigned long addr) |
@@ -3716,12 +3761,20 @@ pick_next_task(struct rq *rq) | |||
3716 | /* | 3761 | /* |
3717 | * Optimization: we know that if all tasks are in | 3762 | * Optimization: we know that if all tasks are in |
3718 | * the fair class we can call that function directly: | 3763 | * the fair class we can call that function directly: |
3719 | */ | 3764 | |
3720 | if (likely(rq->nr_running == rq->cfs.nr_running)) { | 3765 | * NOT IN LITMUS^RT! |
3766 | |||
3767 | * This breaks many assumptions in the plugins. | ||
3768 | * Do not uncomment without thinking long and hard | ||
3769 | * about how this affects global plugins such as GSN-EDF. | ||
3770 | |||
3771 | if (rq->nr_running == rq->cfs.nr_running) { | ||
3772 | TRACE("taking shortcut in pick_next_task()\n"); | ||
3721 | p = fair_sched_class.pick_next_task(rq); | 3773 | p = fair_sched_class.pick_next_task(rq); |
3722 | if (likely(p)) | 3774 | if (likely(p)) |
3723 | return p; | 3775 | return p; |
3724 | } | 3776 | } |
3777 | */ | ||
3725 | 3778 | ||
3726 | class = sched_class_highest; | 3779 | class = sched_class_highest; |
3727 | for ( ; ; ) { | 3780 | for ( ; ; ) { |
@@ -3748,6 +3801,7 @@ asmlinkage void __sched schedule(void) | |||
3748 | 3801 | ||
3749 | need_resched: | 3802 | need_resched: |
3750 | preempt_disable(); | 3803 | preempt_disable(); |
3804 | sched_state_entered_schedule(); | ||
3751 | cpu = smp_processor_id(); | 3805 | cpu = smp_processor_id(); |
3752 | rq = cpu_rq(cpu); | 3806 | rq = cpu_rq(cpu); |
3753 | rcu_note_context_switch(cpu); | 3807 | rcu_note_context_switch(cpu); |
@@ -3755,6 +3809,8 @@ need_resched: | |||
3755 | 3809 | ||
3756 | release_kernel_lock(prev); | 3810 | release_kernel_lock(prev); |
3757 | need_resched_nonpreemptible: | 3811 | need_resched_nonpreemptible: |
3812 | TS_SCHED_START; | ||
3813 | sched_trace_task_switch_away(prev); | ||
3758 | 3814 | ||
3759 | schedule_debug(prev); | 3815 | schedule_debug(prev); |
3760 | 3816 | ||
@@ -3803,7 +3859,10 @@ need_resched_nonpreemptible: | |||
3803 | rq->curr = next; | 3859 | rq->curr = next; |
3804 | ++*switch_count; | 3860 | ++*switch_count; |
3805 | 3861 | ||
3862 | TS_SCHED_END(next); | ||
3863 | TS_CXS_START(next); | ||
3806 | context_switch(rq, prev, next); /* unlocks the rq */ | 3864 | context_switch(rq, prev, next); /* unlocks the rq */ |
3865 | TS_CXS_END(current); | ||
3807 | /* | 3866 | /* |
3808 | * The context switch have flipped the stack from under us | 3867 | * The context switch have flipped the stack from under us |
3809 | * and restored the local variables which were saved when | 3868 | * and restored the local variables which were saved when |
@@ -3812,17 +3871,23 @@ need_resched_nonpreemptible: | |||
3812 | */ | 3871 | */ |
3813 | cpu = smp_processor_id(); | 3872 | cpu = smp_processor_id(); |
3814 | rq = cpu_rq(cpu); | 3873 | rq = cpu_rq(cpu); |
3815 | } else | 3874 | } else { |
3875 | TS_SCHED_END(prev); | ||
3816 | raw_spin_unlock_irq(&rq->lock); | 3876 | raw_spin_unlock_irq(&rq->lock); |
3877 | } | ||
3878 | |||
3879 | sched_trace_task_switch_to(current); | ||
3817 | 3880 | ||
3818 | post_schedule(rq); | 3881 | post_schedule(rq); |
3819 | 3882 | ||
3820 | if (unlikely(reacquire_kernel_lock(prev))) | 3883 | if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev))) |
3821 | goto need_resched_nonpreemptible; | 3884 | goto need_resched_nonpreemptible; |
3822 | 3885 | ||
3823 | preempt_enable_no_resched(); | 3886 | preempt_enable_no_resched(); |
3824 | if (need_resched()) | 3887 | if (need_resched()) |
3825 | goto need_resched; | 3888 | goto need_resched; |
3889 | |||
3890 | srp_ceiling_block(); | ||
3826 | } | 3891 | } |
3827 | EXPORT_SYMBOL(schedule); | 3892 | EXPORT_SYMBOL(schedule); |
3828 | 3893 | ||
@@ -4108,6 +4173,17 @@ void complete_all(struct completion *x) | |||
4108 | } | 4173 | } |
4109 | EXPORT_SYMBOL(complete_all); | 4174 | EXPORT_SYMBOL(complete_all); |
4110 | 4175 | ||
4176 | void complete_n(struct completion *x, int n) | ||
4177 | { | ||
4178 | unsigned long flags; | ||
4179 | |||
4180 | spin_lock_irqsave(&x->wait.lock, flags); | ||
4181 | x->done += n; | ||
4182 | __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL); | ||
4183 | spin_unlock_irqrestore(&x->wait.lock, flags); | ||
4184 | } | ||
4185 | EXPORT_SYMBOL(complete_n); | ||
4186 | |||
4111 | static inline long __sched | 4187 | static inline long __sched |
4112 | do_wait_for_common(struct completion *x, long timeout, int state) | 4188 | do_wait_for_common(struct completion *x, long timeout, int state) |
4113 | { | 4189 | { |
@@ -4550,7 +4626,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
4550 | p->normal_prio = normal_prio(p); | 4626 | p->normal_prio = normal_prio(p); |
4551 | /* we are holding p->pi_lock already */ | 4627 | /* we are holding p->pi_lock already */ |
4552 | p->prio = rt_mutex_getprio(p); | 4628 | p->prio = rt_mutex_getprio(p); |
4553 | if (rt_prio(p->prio)) | 4629 | if (p->policy == SCHED_LITMUS) |
4630 | p->sched_class = &litmus_sched_class; | ||
4631 | else if (rt_prio(p->prio)) | ||
4554 | p->sched_class = &rt_sched_class; | 4632 | p->sched_class = &rt_sched_class; |
4555 | else | 4633 | else |
4556 | p->sched_class = &fair_sched_class; | 4634 | p->sched_class = &fair_sched_class; |
@@ -4595,7 +4673,7 @@ recheck: | |||
4595 | 4673 | ||
4596 | if (policy != SCHED_FIFO && policy != SCHED_RR && | 4674 | if (policy != SCHED_FIFO && policy != SCHED_RR && |
4597 | policy != SCHED_NORMAL && policy != SCHED_BATCH && | 4675 | policy != SCHED_NORMAL && policy != SCHED_BATCH && |
4598 | policy != SCHED_IDLE) | 4676 | policy != SCHED_IDLE && policy != SCHED_LITMUS) |
4599 | return -EINVAL; | 4677 | return -EINVAL; |
4600 | } | 4678 | } |
4601 | 4679 | ||
@@ -4610,6 +4688,8 @@ recheck: | |||
4610 | return -EINVAL; | 4688 | return -EINVAL; |
4611 | if (rt_policy(policy) != (param->sched_priority != 0)) | 4689 | if (rt_policy(policy) != (param->sched_priority != 0)) |
4612 | return -EINVAL; | 4690 | return -EINVAL; |
4691 | if (policy == SCHED_LITMUS && policy == p->policy) | ||
4692 | return -EINVAL; | ||
4613 | 4693 | ||
4614 | /* | 4694 | /* |
4615 | * Allow unprivileged RT tasks to decrease priority: | 4695 | * Allow unprivileged RT tasks to decrease priority: |
@@ -4650,6 +4730,12 @@ recheck: | |||
4650 | return retval; | 4730 | return retval; |
4651 | } | 4731 | } |
4652 | 4732 | ||
4733 | if (policy == SCHED_LITMUS) { | ||
4734 | retval = litmus_admit_task(p); | ||
4735 | if (retval) | ||
4736 | return retval; | ||
4737 | } | ||
4738 | |||
4653 | /* | 4739 | /* |
4654 | * make sure no PI-waiters arrive (or leave) while we are | 4740 | * make sure no PI-waiters arrive (or leave) while we are |
4655 | * changing the priority of the task: | 4741 | * changing the priority of the task: |
@@ -4692,10 +4778,19 @@ recheck: | |||
4692 | 4778 | ||
4693 | p->sched_reset_on_fork = reset_on_fork; | 4779 | p->sched_reset_on_fork = reset_on_fork; |
4694 | 4780 | ||
4781 | if (p->policy == SCHED_LITMUS) | ||
4782 | litmus_exit_task(p); | ||
4783 | |||
4695 | oldprio = p->prio; | 4784 | oldprio = p->prio; |
4696 | prev_class = p->sched_class; | 4785 | prev_class = p->sched_class; |
4697 | __setscheduler(rq, p, policy, param->sched_priority); | 4786 | __setscheduler(rq, p, policy, param->sched_priority); |
4698 | 4787 | ||
4788 | if (policy == SCHED_LITMUS) { | ||
4789 | p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; | ||
4790 | p->rt_param.present = running; | ||
4791 | litmus->task_new(p, on_rq, running); | ||
4792 | } | ||
4793 | |||
4699 | if (running) | 4794 | if (running) |
4700 | p->sched_class->set_curr_task(rq); | 4795 | p->sched_class->set_curr_task(rq); |
4701 | if (on_rq) { | 4796 | if (on_rq) { |
@@ -4755,6 +4850,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) | |||
4755 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) | 4850 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) |
4756 | return -EFAULT; | 4851 | return -EFAULT; |
4757 | 4852 | ||
4853 | #ifdef CONFIG_LITMUS_LOCKING | ||
4854 | /* Hack to allow plugin to call into schedule | ||
4855 | * prio to a setscheduler() call. */ | ||
4856 | if (is_realtime(current)) | ||
4857 | litmus->pre_setsched(current, policy); | ||
4858 | #endif | ||
4859 | |||
4758 | rcu_read_lock(); | 4860 | rcu_read_lock(); |
4759 | retval = -ESRCH; | 4861 | retval = -ESRCH; |
4760 | p = find_process_by_pid(pid); | 4862 | p = find_process_by_pid(pid); |
@@ -4865,10 +4967,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4865 | rcu_read_lock(); | 4967 | rcu_read_lock(); |
4866 | 4968 | ||
4867 | p = find_process_by_pid(pid); | 4969 | p = find_process_by_pid(pid); |
4868 | if (!p) { | 4970 | /* Don't set affinity if task not found and for LITMUS tasks */ |
4971 | if (!p || is_realtime(p)) { | ||
4869 | rcu_read_unlock(); | 4972 | rcu_read_unlock(); |
4870 | put_online_cpus(); | 4973 | put_online_cpus(); |
4871 | return -ESRCH; | 4974 | return p ? -EPERM : -ESRCH; |
4872 | } | 4975 | } |
4873 | 4976 | ||
4874 | /* Prevent p going away */ | 4977 | /* Prevent p going away */ |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index db3f674ca49d..e0e8d5ca3c98 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1654,7 +1654,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1656 | 1656 | ||
1657 | if (unlikely(rt_prio(p->prio))) | 1657 | if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) |
1658 | goto preempt; | 1658 | goto preempt; |
1659 | 1659 | ||
1660 | if (unlikely(p->sched_class != &fair_sched_class)) | 1660 | if (unlikely(p->sched_class != &fair_sched_class)) |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d10c80ebb67a..e40e7fe43170 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1013,7 +1013,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
1013 | */ | 1013 | */ |
1014 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) | 1014 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
1015 | { | 1015 | { |
1016 | if (p->prio < rq->curr->prio) { | 1016 | if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) { |
1017 | resched_task(rq->curr); | 1017 | resched_task(rq->curr); |
1018 | return; | 1018 | return; |
1019 | } | 1019 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd1..bb2d8b7850a3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -768,12 +768,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
768 | } | 768 | } |
769 | 769 | ||
770 | /** | 770 | /** |
771 | * tick_set_quanta_type - get the quanta type as a boot option | ||
772 | * Default is standard setup with ticks staggered over first | ||
773 | * half of tick period. | ||
774 | */ | ||
775 | int quanta_type = LINUX_DEFAULT_TICKS; | ||
776 | static int __init tick_set_quanta_type(char *str) | ||
777 | { | ||
778 | if (strcmp("aligned", str) == 0) { | ||
779 | quanta_type = LITMUS_ALIGNED_TICKS; | ||
780 | printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n"); | ||
781 | } | ||
782 | else if (strcmp("staggered", str) == 0) { | ||
783 | quanta_type = LITMUS_STAGGERED_TICKS; | ||
784 | printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n"); | ||
785 | } | ||
786 | return 1; | ||
787 | } | ||
788 | __setup("quanta=", tick_set_quanta_type); | ||
789 | |||
790 | u64 cpu_stagger_offset(int cpu) | ||
791 | { | ||
792 | u64 offset = 0; | ||
793 | switch (quanta_type) { | ||
794 | case LITMUS_ALIGNED_TICKS: | ||
795 | offset = 0; | ||
796 | break; | ||
797 | case LITMUS_STAGGERED_TICKS: | ||
798 | offset = ktime_to_ns(tick_period); | ||
799 | do_div(offset, num_possible_cpus()); | ||
800 | offset *= cpu; | ||
801 | break; | ||
802 | default: | ||
803 | offset = ktime_to_ns(tick_period) >> 1; | ||
804 | do_div(offset, num_possible_cpus()); | ||
805 | offset *= cpu; | ||
806 | } | ||
807 | return offset; | ||
808 | } | ||
809 | |||
810 | /** | ||
771 | * tick_setup_sched_timer - setup the tick emulation timer | 811 | * tick_setup_sched_timer - setup the tick emulation timer |
772 | */ | 812 | */ |
773 | void tick_setup_sched_timer(void) | 813 | void tick_setup_sched_timer(void) |
774 | { | 814 | { |
775 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 815 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
776 | ktime_t now = ktime_get(); | 816 | ktime_t now = ktime_get(); |
817 | u64 offset; | ||
777 | 818 | ||
778 | /* | 819 | /* |
779 | * Emulate tick processing via per-CPU hrtimers: | 820 | * Emulate tick processing via per-CPU hrtimers: |
@@ -784,6 +825,12 @@ void tick_setup_sched_timer(void) | |||
784 | /* Get the next period (per cpu) */ | 825 | /* Get the next period (per cpu) */ |
785 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); | 826 | hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); |
786 | 827 | ||
828 | /* Offset must be set correctly to achieve desired quanta type. */ | ||
829 | offset = cpu_stagger_offset(smp_processor_id()); | ||
830 | |||
831 | /* Add the correct offset to expiration time */ | ||
832 | hrtimer_add_expires_ns(&ts->sched_timer, offset); | ||
833 | |||
787 | for (;;) { | 834 | for (;;) { |
788 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 835 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
789 | hrtimer_start_expires(&ts->sched_timer, | 836 | hrtimer_start_expires(&ts->sched_timer, |
diff --git a/litmus/Kconfig b/litmus/Kconfig new file mode 100644 index 000000000000..ad8dc8308cf0 --- /dev/null +++ b/litmus/Kconfig | |||
@@ -0,0 +1,185 @@ | |||
1 | menu "LITMUS^RT" | ||
2 | |||
3 | menu "Scheduling" | ||
4 | |||
5 | config PLUGIN_CEDF | ||
6 | bool "Clustered-EDF" | ||
7 | depends on X86 && SYSFS | ||
8 | default y | ||
9 | help | ||
10 | Include the Clustered EDF (C-EDF) plugin in the kernel. | ||
11 | This is appropriate for large platforms with shared caches. | ||
12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | ||
13 | makes little sense since there aren't any shared caches. | ||
14 | |||
15 | config PLUGIN_PFAIR | ||
16 | bool "PFAIR" | ||
17 | depends on HIGH_RES_TIMERS && !NO_HZ | ||
18 | default y | ||
19 | help | ||
20 | Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel. | ||
21 | The PFAIR plugin requires high resolution timers (for staggered quanta) | ||
22 | and does not support NO_HZ (quanta could be missed when the system is idle). | ||
23 | |||
24 | If unsure, say Yes. | ||
25 | |||
26 | config RELEASE_MASTER | ||
27 | bool "Release-master Support" | ||
28 | depends on ARCH_HAS_SEND_PULL_TIMERS | ||
29 | default n | ||
30 | help | ||
31 | Allow one processor to act as a dedicated interrupt processor | ||
32 | that services all timer interrupts, but that does not schedule | ||
33 | real-time tasks. See RTSS'09 paper for details | ||
34 | (http://www.cs.unc.edu/~anderson/papers.html). | ||
35 | Currently only supported by GSN-EDF. | ||
36 | |||
37 | endmenu | ||
38 | |||
39 | menu "Real-Time Synchronization" | ||
40 | |||
41 | config NP_SECTION | ||
42 | bool "Non-preemptive section support" | ||
43 | default n | ||
44 | help | ||
45 | Allow tasks to become non-preemptable. | ||
46 | Note that plugins still need to explicitly support non-preemptivity. | ||
47 | Currently, only GSN-EDF and PSN-EDF have such support. | ||
48 | |||
49 | This is required to support locking protocols such as the FMLP. | ||
50 | If disabled, all tasks will be considered preemptable at all times. | ||
51 | |||
52 | config LITMUS_LOCKING | ||
53 | bool "Support for real-time locking protocols" | ||
54 | depends on NP_SECTION | ||
55 | default n | ||
56 | help | ||
57 | Enable LITMUS^RT's deterministic multiprocessor real-time | ||
58 | locking protocols. | ||
59 | |||
60 | Say Yes if you want to include locking protocols such as the FMLP and | ||
61 | Baker's SRP. | ||
62 | |||
63 | endmenu | ||
64 | |||
65 | menu "Tracing" | ||
66 | |||
67 | config FEATHER_TRACE | ||
68 | bool "Feather-Trace Infrastructure" | ||
69 | default y | ||
70 | help | ||
71 | Feather-Trace basic tracing infrastructure. Includes device file | ||
72 | driver and instrumentation point support. | ||
73 | |||
74 | There are actually two implementations of Feather-Trace. | ||
75 | 1) A slower, but portable, default implementation. | ||
76 | 2) Architecture-specific implementations that rewrite kernel .text at runtime. | ||
77 | |||
78 | If enabled, Feather-Trace will be based on 2) if available (currently only for x86). | ||
79 | However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case | ||
80 | to avoid problems with write-protected .text pages. | ||
81 | |||
82 | Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n. | ||
83 | |||
84 | Note that this option only enables the basic Feather-Trace infrastructure; | ||
85 | you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to | ||
86 | actually enable any events. | ||
87 | |||
88 | config SCHED_TASK_TRACE | ||
89 | bool "Trace real-time tasks" | ||
90 | depends on FEATHER_TRACE | ||
91 | default y | ||
92 | help | ||
93 | Include support for the sched_trace_XXX() tracing functions. This | ||
94 | allows the collection of real-time task events such as job | ||
95 | completions, job releases, early completions, etc. This results in a | ||
96 | small overhead in the scheduling code. Disable if the overhead is not | ||
97 | acceptable (e.g., benchmarking). | ||
98 | |||
99 | Say Yes for debugging. | ||
100 | Say No for overhead tracing. | ||
101 | |||
102 | config SCHED_TASK_TRACE_SHIFT | ||
103 | int "Buffer size for sched_trace_xxx() events" | ||
104 | depends on SCHED_TASK_TRACE | ||
105 | range 8 13 | ||
106 | default 9 | ||
107 | help | ||
108 | |||
109 | Select the buffer size of sched_trace_xxx() events as a power of two. | ||
110 | These buffers are statically allocated as per-CPU data. Each event | ||
111 | requires 24 bytes storage plus one additional flag byte. Too large | ||
112 | buffers can cause issues with the per-cpu allocator (and waste | ||
113 | memory). Too small buffers can cause scheduling events to be lost. The | ||
114 | "right" size is workload dependent and depends on the number of tasks, | ||
115 | each task's period, each task's number of suspensions, and how often | ||
116 | the buffer is flushed. | ||
117 | |||
118 | Examples: 12 => 4k events | ||
119 | 10 => 1k events | ||
120 | 8 => 512 events | ||
121 | |||
122 | config SCHED_OVERHEAD_TRACE | ||
123 | bool "Record timestamps for overhead measurements" | ||
124 | depends on FEATHER_TRACE | ||
125 | default n | ||
126 | help | ||
127 | Export event stream for overhead tracing. | ||
128 | Say Yes for overhead tracing. | ||
129 | |||
130 | config SCHED_DEBUG_TRACE | ||
131 | bool "TRACE() debugging" | ||
132 | default y | ||
133 | help | ||
134 | Include support for sched_trace_log_messageg(), which is used to | ||
135 | implement TRACE(). If disabled, no TRACE() messages will be included | ||
136 | in the kernel, and no overheads due to debugging statements will be | ||
137 | incurred by the scheduler. Disable if the overhead is not acceptable | ||
138 | (e.g. benchmarking). | ||
139 | |||
140 | Say Yes for debugging. | ||
141 | Say No for overhead tracing. | ||
142 | |||
143 | config SCHED_DEBUG_TRACE_SHIFT | ||
144 | int "Buffer size for TRACE() buffer" | ||
145 | depends on SCHED_DEBUG_TRACE | ||
146 | range 14 22 | ||
147 | default 18 | ||
148 | help | ||
149 | |||
150 | Select the amount of memory needed per for the TRACE() buffer, as a | ||
151 | power of two. The TRACE() buffer is global and statically allocated. If | ||
152 | the buffer is too small, there will be holes in the TRACE() log if the | ||
153 | buffer-flushing task is starved. | ||
154 | |||
155 | The default should be sufficient for most systems. Increase the buffer | ||
156 | size if the log contains holes. Reduce the buffer size when running on | ||
157 | a memory-constrained system. | ||
158 | |||
159 | Examples: 14 => 16KB | ||
160 | 18 => 256KB | ||
161 | 20 => 1MB | ||
162 | |||
163 | This buffer is exported to usespace using a misc device as | ||
164 | 'litmus/log'. On a system with default udev rules, a corresponding | ||
165 | character device node should be created at /dev/litmus/log. The buffer | ||
166 | can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'. | ||
167 | |||
168 | config SCHED_DEBUG_TRACE_CALLER | ||
169 | bool "Include [function@file:line] tag in TRACE() log" | ||
170 | depends on SCHED_DEBUG_TRACE | ||
171 | default n | ||
172 | help | ||
173 | With this option enabled, TRACE() prepends | ||
174 | |||
175 | "[<function name>@<filename>:<line number>]" | ||
176 | |||
177 | to each message in the debug log. Enable this to aid in figuring out | ||
178 | what was called in which order. The downside is that it adds a lot of | ||
179 | clutter. | ||
180 | |||
181 | If unsure, say No. | ||
182 | |||
183 | endmenu | ||
184 | |||
185 | endmenu | ||
diff --git a/litmus/Makefile b/litmus/Makefile new file mode 100644 index 000000000000..e86fad8c25ec --- /dev/null +++ b/litmus/Makefile | |||
@@ -0,0 +1,30 @@ | |||
1 | # | ||
2 | # Makefile for LITMUS^RT | ||
3 | # | ||
4 | |||
5 | obj-y = sched_plugin.o litmus.o \ | ||
6 | preempt.o \ | ||
7 | litmus_proc.o \ | ||
8 | budget.o \ | ||
9 | clustered.o \ | ||
10 | jobs.o \ | ||
11 | sync.o \ | ||
12 | rt_domain.o \ | ||
13 | edf_common.o \ | ||
14 | fp_common.o \ | ||
15 | fdso.o \ | ||
16 | locking.o \ | ||
17 | srp.o \ | ||
18 | bheap.o \ | ||
19 | ctrldev.o \ | ||
20 | sched_gsn_edf.o \ | ||
21 | sched_psn_edf.o \ | ||
22 | sched_pfp.o | ||
23 | |||
24 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | ||
25 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | ||
26 | |||
27 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | ||
28 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | ||
29 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | ||
30 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | ||
diff --git a/litmus/bheap.c b/litmus/bheap.c new file mode 100644 index 000000000000..528af97f18a6 --- /dev/null +++ b/litmus/bheap.c | |||
@@ -0,0 +1,314 @@ | |||
1 | #include "linux/kernel.h" | ||
2 | #include "litmus/bheap.h" | ||
3 | |||
4 | void bheap_init(struct bheap* heap) | ||
5 | { | ||
6 | heap->head = NULL; | ||
7 | heap->min = NULL; | ||
8 | } | ||
9 | |||
10 | void bheap_node_init(struct bheap_node** _h, void* value) | ||
11 | { | ||
12 | struct bheap_node* h = *_h; | ||
13 | h->parent = NULL; | ||
14 | h->next = NULL; | ||
15 | h->child = NULL; | ||
16 | h->degree = NOT_IN_HEAP; | ||
17 | h->value = value; | ||
18 | h->ref = _h; | ||
19 | } | ||
20 | |||
21 | |||
22 | /* make child a subtree of root */ | ||
23 | static void __bheap_link(struct bheap_node* root, | ||
24 | struct bheap_node* child) | ||
25 | { | ||
26 | child->parent = root; | ||
27 | child->next = root->child; | ||
28 | root->child = child; | ||
29 | root->degree++; | ||
30 | } | ||
31 | |||
32 | /* merge root lists */ | ||
33 | static struct bheap_node* __bheap_merge(struct bheap_node* a, | ||
34 | struct bheap_node* b) | ||
35 | { | ||
36 | struct bheap_node* head = NULL; | ||
37 | struct bheap_node** pos = &head; | ||
38 | |||
39 | while (a && b) { | ||
40 | if (a->degree < b->degree) { | ||
41 | *pos = a; | ||
42 | a = a->next; | ||
43 | } else { | ||
44 | *pos = b; | ||
45 | b = b->next; | ||
46 | } | ||
47 | pos = &(*pos)->next; | ||
48 | } | ||
49 | if (a) | ||
50 | *pos = a; | ||
51 | else | ||
52 | *pos = b; | ||
53 | return head; | ||
54 | } | ||
55 | |||
56 | /* reverse a linked list of nodes. also clears parent pointer */ | ||
57 | static struct bheap_node* __bheap_reverse(struct bheap_node* h) | ||
58 | { | ||
59 | struct bheap_node* tail = NULL; | ||
60 | struct bheap_node* next; | ||
61 | |||
62 | if (!h) | ||
63 | return h; | ||
64 | |||
65 | h->parent = NULL; | ||
66 | while (h->next) { | ||
67 | next = h->next; | ||
68 | h->next = tail; | ||
69 | tail = h; | ||
70 | h = next; | ||
71 | h->parent = NULL; | ||
72 | } | ||
73 | h->next = tail; | ||
74 | return h; | ||
75 | } | ||
76 | |||
77 | static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, | ||
78 | struct bheap_node** prev, struct bheap_node** node) | ||
79 | { | ||
80 | struct bheap_node *_prev, *cur; | ||
81 | *prev = NULL; | ||
82 | |||
83 | if (!heap->head) { | ||
84 | *node = NULL; | ||
85 | return; | ||
86 | } | ||
87 | |||
88 | *node = heap->head; | ||
89 | _prev = heap->head; | ||
90 | cur = heap->head->next; | ||
91 | while (cur) { | ||
92 | if (higher_prio(cur, *node)) { | ||
93 | *node = cur; | ||
94 | *prev = _prev; | ||
95 | } | ||
96 | _prev = cur; | ||
97 | cur = cur->next; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, | ||
102 | struct bheap_node* h2) | ||
103 | { | ||
104 | struct bheap_node* h1; | ||
105 | struct bheap_node *prev, *x, *next; | ||
106 | if (!h2) | ||
107 | return; | ||
108 | h1 = heap->head; | ||
109 | if (!h1) { | ||
110 | heap->head = h2; | ||
111 | return; | ||
112 | } | ||
113 | h1 = __bheap_merge(h1, h2); | ||
114 | prev = NULL; | ||
115 | x = h1; | ||
116 | next = x->next; | ||
117 | while (next) { | ||
118 | if (x->degree != next->degree || | ||
119 | (next->next && next->next->degree == x->degree)) { | ||
120 | /* nothing to do, advance */ | ||
121 | prev = x; | ||
122 | x = next; | ||
123 | } else if (higher_prio(x, next)) { | ||
124 | /* x becomes the root of next */ | ||
125 | x->next = next->next; | ||
126 | __bheap_link(x, next); | ||
127 | } else { | ||
128 | /* next becomes the root of x */ | ||
129 | if (prev) | ||
130 | prev->next = next; | ||
131 | else | ||
132 | h1 = next; | ||
133 | __bheap_link(next, x); | ||
134 | x = next; | ||
135 | } | ||
136 | next = x->next; | ||
137 | } | ||
138 | heap->head = h1; | ||
139 | } | ||
140 | |||
141 | static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, | ||
142 | struct bheap* heap) | ||
143 | { | ||
144 | struct bheap_node *prev, *node; | ||
145 | __bheap_min(higher_prio, heap, &prev, &node); | ||
146 | if (!node) | ||
147 | return NULL; | ||
148 | if (prev) | ||
149 | prev->next = node->next; | ||
150 | else | ||
151 | heap->head = node->next; | ||
152 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
153 | return node; | ||
154 | } | ||
155 | |||
156 | /* insert (and reinitialize) a node into the heap */ | ||
157 | void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, | ||
158 | struct bheap_node* node) | ||
159 | { | ||
160 | struct bheap_node *min; | ||
161 | node->child = NULL; | ||
162 | node->parent = NULL; | ||
163 | node->next = NULL; | ||
164 | node->degree = 0; | ||
165 | if (heap->min && higher_prio(node, heap->min)) { | ||
166 | /* swap min cache */ | ||
167 | min = heap->min; | ||
168 | min->child = NULL; | ||
169 | min->parent = NULL; | ||
170 | min->next = NULL; | ||
171 | min->degree = 0; | ||
172 | __bheap_union(higher_prio, heap, min); | ||
173 | heap->min = node; | ||
174 | } else | ||
175 | __bheap_union(higher_prio, heap, node); | ||
176 | } | ||
177 | |||
178 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) | ||
179 | { | ||
180 | struct bheap_node* min; | ||
181 | if (heap->min) { | ||
182 | min = heap->min; | ||
183 | heap->min = NULL; | ||
184 | bheap_insert(higher_prio, heap, min); | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /* merge addition into target */ | ||
189 | void bheap_union(bheap_prio_t higher_prio, | ||
190 | struct bheap* target, struct bheap* addition) | ||
191 | { | ||
192 | /* first insert any cached minima, if necessary */ | ||
193 | bheap_uncache_min(higher_prio, target); | ||
194 | bheap_uncache_min(higher_prio, addition); | ||
195 | __bheap_union(higher_prio, target, addition->head); | ||
196 | /* this is a destructive merge */ | ||
197 | addition->head = NULL; | ||
198 | } | ||
199 | |||
200 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
201 | struct bheap* heap) | ||
202 | { | ||
203 | if (!heap->min) | ||
204 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
205 | return heap->min; | ||
206 | } | ||
207 | |||
208 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
209 | struct bheap* heap) | ||
210 | { | ||
211 | struct bheap_node *node; | ||
212 | if (!heap->min) | ||
213 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
214 | node = heap->min; | ||
215 | heap->min = NULL; | ||
216 | if (node) | ||
217 | node->degree = NOT_IN_HEAP; | ||
218 | return node; | ||
219 | } | ||
220 | |||
221 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) | ||
222 | { | ||
223 | struct bheap_node *parent; | ||
224 | struct bheap_node** tmp_ref; | ||
225 | void* tmp; | ||
226 | |||
227 | /* bubble up */ | ||
228 | parent = node->parent; | ||
229 | while (parent && higher_prio(node, parent)) { | ||
230 | /* swap parent and node */ | ||
231 | tmp = parent->value; | ||
232 | parent->value = node->value; | ||
233 | node->value = tmp; | ||
234 | /* swap references */ | ||
235 | *(parent->ref) = node; | ||
236 | *(node->ref) = parent; | ||
237 | tmp_ref = parent->ref; | ||
238 | parent->ref = node->ref; | ||
239 | node->ref = tmp_ref; | ||
240 | /* step up */ | ||
241 | node = parent; | ||
242 | parent = node->parent; | ||
243 | } | ||
244 | |||
245 | return parent != NULL; | ||
246 | } | ||
247 | |||
248 | void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, | ||
249 | struct bheap_node* node) | ||
250 | { | ||
251 | struct bheap_node *parent, *prev, *pos; | ||
252 | struct bheap_node** tmp_ref; | ||
253 | void* tmp; | ||
254 | |||
255 | if (heap->min != node) { | ||
256 | /* bubble up */ | ||
257 | parent = node->parent; | ||
258 | while (parent) { | ||
259 | /* swap parent and node */ | ||
260 | tmp = parent->value; | ||
261 | parent->value = node->value; | ||
262 | node->value = tmp; | ||
263 | /* swap references */ | ||
264 | *(parent->ref) = node; | ||
265 | *(node->ref) = parent; | ||
266 | tmp_ref = parent->ref; | ||
267 | parent->ref = node->ref; | ||
268 | node->ref = tmp_ref; | ||
269 | /* step up */ | ||
270 | node = parent; | ||
271 | parent = node->parent; | ||
272 | } | ||
273 | /* now delete: | ||
274 | * first find prev */ | ||
275 | prev = NULL; | ||
276 | pos = heap->head; | ||
277 | while (pos != node) { | ||
278 | prev = pos; | ||
279 | pos = pos->next; | ||
280 | } | ||
281 | /* we have prev, now remove node */ | ||
282 | if (prev) | ||
283 | prev->next = node->next; | ||
284 | else | ||
285 | heap->head = node->next; | ||
286 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
287 | } else | ||
288 | heap->min = NULL; | ||
289 | node->degree = NOT_IN_HEAP; | ||
290 | } | ||
291 | |||
292 | /* allocate a heap node for value and insert into the heap */ | ||
293 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
294 | void* value, int gfp_flags) | ||
295 | { | ||
296 | struct bheap_node* hn = bheap_node_alloc(gfp_flags); | ||
297 | if (likely(hn)) { | ||
298 | bheap_node_init(&hn, value); | ||
299 | bheap_insert(higher_prio, heap, hn); | ||
300 | } | ||
301 | return hn != NULL; | ||
302 | } | ||
303 | |||
304 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
305 | struct bheap* heap) | ||
306 | { | ||
307 | struct bheap_node* hn = bheap_take(higher_prio, heap); | ||
308 | void* ret = NULL; | ||
309 | if (hn) { | ||
310 | ret = hn->value; | ||
311 | bheap_node_free(hn); | ||
312 | } | ||
313 | return ret; | ||
314 | } | ||
diff --git a/litmus/budget.c b/litmus/budget.c new file mode 100644 index 000000000000..310e9a3d4172 --- /dev/null +++ b/litmus/budget.c | |||
@@ -0,0 +1,111 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/hrtimer.h> | ||
4 | |||
5 | #include <litmus/litmus.h> | ||
6 | #include <litmus/preempt.h> | ||
7 | |||
8 | struct enforcement_timer { | ||
9 | /* The enforcement timer is used to accurately police | ||
10 | * slice budgets. */ | ||
11 | struct hrtimer timer; | ||
12 | int armed; | ||
13 | }; | ||
14 | |||
15 | DEFINE_PER_CPU(struct enforcement_timer, budget_timer); | ||
16 | |||
17 | static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) | ||
18 | { | ||
19 | struct enforcement_timer* et = container_of(timer, | ||
20 | struct enforcement_timer, | ||
21 | timer); | ||
22 | unsigned long flags; | ||
23 | |||
24 | local_irq_save(flags); | ||
25 | TRACE("enforcement timer fired.\n"); | ||
26 | et->armed = 0; | ||
27 | /* activate scheduler */ | ||
28 | litmus_reschedule_local(); | ||
29 | local_irq_restore(flags); | ||
30 | |||
31 | return HRTIMER_NORESTART; | ||
32 | } | ||
33 | |||
34 | /* assumes called with IRQs off */ | ||
35 | static void cancel_enforcement_timer(struct enforcement_timer* et) | ||
36 | { | ||
37 | int ret; | ||
38 | |||
39 | TRACE("cancelling enforcement timer.\n"); | ||
40 | |||
41 | /* Since interrupts are disabled and et->armed is only | ||
42 | * modified locally, we do not need any locks. | ||
43 | */ | ||
44 | |||
45 | if (et->armed) { | ||
46 | ret = hrtimer_try_to_cancel(&et->timer); | ||
47 | /* Should never be inactive. */ | ||
48 | BUG_ON(ret == 0); | ||
49 | /* Should never be running concurrently. */ | ||
50 | BUG_ON(ret == -1); | ||
51 | |||
52 | et->armed = 0; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | /* assumes called with IRQs off */ | ||
57 | static void arm_enforcement_timer(struct enforcement_timer* et, | ||
58 | struct task_struct* t) | ||
59 | { | ||
60 | lt_t when_to_fire; | ||
61 | TRACE_TASK(t, "arming enforcement timer.\n"); | ||
62 | |||
63 | /* Calling this when there is no budget left for the task | ||
64 | * makes no sense, unless the task is non-preemptive. */ | ||
65 | BUG_ON(budget_exhausted(t) && (!is_np(t))); | ||
66 | |||
67 | /* __hrtimer_start_range_ns() cancels the timer | ||
68 | * anyway, so we don't have to check whether it is still armed */ | ||
69 | |||
70 | if (likely(!is_np(t))) { | ||
71 | when_to_fire = litmus_clock() + budget_remaining(t); | ||
72 | __hrtimer_start_range_ns(&et->timer, | ||
73 | ns_to_ktime(when_to_fire), | ||
74 | 0 /* delta */, | ||
75 | HRTIMER_MODE_ABS_PINNED, | ||
76 | 0 /* no wakeup */); | ||
77 | et->armed = 1; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | |||
82 | /* expects to be called with IRQs off */ | ||
83 | void update_enforcement_timer(struct task_struct* t) | ||
84 | { | ||
85 | struct enforcement_timer* et = &__get_cpu_var(budget_timer); | ||
86 | |||
87 | if (t && budget_precisely_enforced(t)) { | ||
88 | /* Make sure we call into the scheduler when this budget | ||
89 | * expires. */ | ||
90 | arm_enforcement_timer(et, t); | ||
91 | } else if (et->armed) { | ||
92 | /* Make sure we don't cause unnecessary interrupts. */ | ||
93 | cancel_enforcement_timer(et); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | |||
98 | static int __init init_budget_enforcement(void) | ||
99 | { | ||
100 | int cpu; | ||
101 | struct enforcement_timer* et; | ||
102 | |||
103 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
104 | et = &per_cpu(budget_timer, cpu); | ||
105 | hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
106 | et->timer.function = on_enforcement_timeout; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | module_init(init_budget_enforcement); | ||
diff --git a/litmus/clustered.c b/litmus/clustered.c new file mode 100644 index 000000000000..6fe1b512f628 --- /dev/null +++ b/litmus/clustered.c | |||
@@ -0,0 +1,111 @@ | |||
1 | #include <linux/gfp.h> | ||
2 | #include <linux/cpumask.h> | ||
3 | #include <linux/list.h> | ||
4 | |||
5 | #include <litmus/clustered.h> | ||
6 | |||
7 | #ifndef CONFIG_X86 | ||
8 | /* fake get_shared_cpu_map() on non-x86 architectures */ | ||
9 | |||
10 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
11 | { | ||
12 | if (index != 1) | ||
13 | return 1; | ||
14 | else { | ||
15 | /* Fake L1: CPU is all by itself. */ | ||
16 | cpumask_clear(mask); | ||
17 | cpumask_set_cpu(cpu, mask); | ||
18 | return 0; | ||
19 | } | ||
20 | } | ||
21 | |||
22 | #endif | ||
23 | |||
24 | int get_cluster_size(enum cache_level level) | ||
25 | { | ||
26 | cpumask_var_t mask; | ||
27 | int ok; | ||
28 | int num_cpus; | ||
29 | |||
30 | if (level == GLOBAL_CLUSTER) | ||
31 | return num_online_cpus(); | ||
32 | else { | ||
33 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
34 | return -ENOMEM; | ||
35 | /* assumes CPU 0 is representative of all CPUs */ | ||
36 | ok = get_shared_cpu_map(mask, 0, level); | ||
37 | /* ok == 0 means we got the map; otherwise it's an invalid cache level */ | ||
38 | if (ok == 0) | ||
39 | num_cpus = cpumask_weight(mask); | ||
40 | free_cpumask_var(mask); | ||
41 | |||
42 | if (ok == 0) | ||
43 | return num_cpus; | ||
44 | else | ||
45 | return -EINVAL; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | int assign_cpus_to_clusters(enum cache_level level, | ||
50 | struct scheduling_cluster* clusters[], | ||
51 | unsigned int num_clusters, | ||
52 | struct cluster_cpu* cpus[], | ||
53 | unsigned int num_cpus) | ||
54 | { | ||
55 | cpumask_var_t mask; | ||
56 | unsigned int i, free_cluster = 0, low_cpu; | ||
57 | int err = 0; | ||
58 | |||
59 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
60 | return -ENOMEM; | ||
61 | |||
62 | /* clear cluster pointers */ | ||
63 | for (i = 0; i < num_cpus; i++) { | ||
64 | cpus[i]->id = i; | ||
65 | cpus[i]->cluster = NULL; | ||
66 | } | ||
67 | |||
68 | /* initialize clusters */ | ||
69 | for (i = 0; i < num_clusters; i++) { | ||
70 | clusters[i]->id = i; | ||
71 | INIT_LIST_HEAD(&clusters[i]->cpus); | ||
72 | } | ||
73 | |||
74 | /* Assign each CPU. Two assumtions are made: | ||
75 | * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask). | ||
76 | * 2) All cpus that belong to some cluster are online. | ||
77 | */ | ||
78 | for_each_online_cpu(i) { | ||
79 | /* get lowest-id CPU in cluster */ | ||
80 | if (level != GLOBAL_CLUSTER) { | ||
81 | err = get_shared_cpu_map(mask, cpus[i]->id, level); | ||
82 | if (err != 0) { | ||
83 | /* ugh... wrong cache level? Either caller screwed up | ||
84 | * or the CPU topology is weird. */ | ||
85 | printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n", | ||
86 | level, err); | ||
87 | err = -EINVAL; | ||
88 | goto out; | ||
89 | } | ||
90 | low_cpu = cpumask_first(mask); | ||
91 | } else | ||
92 | low_cpu = 0; | ||
93 | if (low_cpu == i) { | ||
94 | /* caller must provide an appropriate number of clusters */ | ||
95 | BUG_ON(free_cluster >= num_clusters); | ||
96 | |||
97 | /* create new cluster */ | ||
98 | cpus[i]->cluster = clusters[free_cluster++]; | ||
99 | } else { | ||
100 | /* low_cpu points to the right cluster | ||
101 | * Assumption: low_cpu is actually online and was processed earlier. */ | ||
102 | cpus[i]->cluster = cpus[low_cpu]->cluster; | ||
103 | } | ||
104 | /* enqueue in cpus list */ | ||
105 | list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); | ||
106 | printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); | ||
107 | } | ||
108 | out: | ||
109 | free_cpumask_var(mask); | ||
110 | return err; | ||
111 | } | ||
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c new file mode 100644 index 000000000000..6677a67cc945 --- /dev/null +++ b/litmus/ctrldev.c | |||
@@ -0,0 +1,150 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/miscdevice.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | #include <litmus/litmus.h> | ||
8 | |||
9 | /* only one page for now, but we might want to add a RO version at some point */ | ||
10 | |||
11 | #define CTRL_NAME "litmus/ctrl" | ||
12 | |||
13 | /* allocate t->rt_param.ctrl_page*/ | ||
14 | static int alloc_ctrl_page(struct task_struct *t) | ||
15 | { | ||
16 | int err = 0; | ||
17 | |||
18 | /* only allocate if the task doesn't have one yet */ | ||
19 | if (!tsk_rt(t)->ctrl_page) { | ||
20 | tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); | ||
21 | if (!tsk_rt(t)->ctrl_page) | ||
22 | err = -ENOMEM; | ||
23 | /* will get de-allocated in task teardown */ | ||
24 | TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, | ||
25 | tsk_rt(t)->ctrl_page); | ||
26 | } | ||
27 | return err; | ||
28 | } | ||
29 | |||
30 | static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) | ||
31 | { | ||
32 | int err; | ||
33 | unsigned long pfn; | ||
34 | |||
35 | struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); | ||
36 | |||
37 | /* Increase ref count. Is decreased when vma is destroyed. */ | ||
38 | get_page(ctrl); | ||
39 | |||
40 | /* compute page frame number */ | ||
41 | pfn = page_to_pfn(ctrl); | ||
42 | |||
43 | TRACE_CUR(CTRL_NAME | ||
44 | ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n", | ||
45 | tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start, | ||
46 | vma->vm_page_prot); | ||
47 | |||
48 | /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise | ||
49 | * userspace actually gets a copy-on-write page. */ | ||
50 | err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED); | ||
51 | |||
52 | if (err) | ||
53 | TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err); | ||
54 | |||
55 | return err; | ||
56 | } | ||
57 | |||
58 | static void litmus_ctrl_vm_close(struct vm_area_struct* vma) | ||
59 | { | ||
60 | TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, | ||
61 | vma->vm_flags, vma->vm_page_prot); | ||
62 | |||
63 | TRACE_CUR(CTRL_NAME | ||
64 | ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", | ||
65 | (void*) vma->vm_start, (void*) vma->vm_end, vma, | ||
66 | vma->vm_private_data, current->comm, | ||
67 | current->pid); | ||
68 | } | ||
69 | |||
70 | static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, | ||
71 | struct vm_fault* vmf) | ||
72 | { | ||
73 | /* This function should never be called, since | ||
74 | * all pages should have been mapped by mmap() | ||
75 | * already. */ | ||
76 | TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags); | ||
77 | |||
78 | /* nope, you only get one page */ | ||
79 | return VM_FAULT_SIGBUS; | ||
80 | } | ||
81 | |||
82 | static struct vm_operations_struct litmus_ctrl_vm_ops = { | ||
83 | .close = litmus_ctrl_vm_close, | ||
84 | .fault = litmus_ctrl_vm_fault, | ||
85 | }; | ||
86 | |||
87 | static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) | ||
88 | { | ||
89 | int err = 0; | ||
90 | |||
91 | /* first make sure mapper knows what he's doing */ | ||
92 | |||
93 | /* you can only get one page */ | ||
94 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
95 | return -EINVAL; | ||
96 | |||
97 | /* you can only map the "first" page */ | ||
98 | if (vma->vm_pgoff != 0) | ||
99 | return -EINVAL; | ||
100 | |||
101 | /* you can't share it with anyone */ | ||
102 | if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) | ||
103 | return -EINVAL; | ||
104 | |||
105 | vma->vm_ops = &litmus_ctrl_vm_ops; | ||
106 | /* this mapping should not be kept across forks, | ||
107 | * and cannot be expanded */ | ||
108 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; | ||
109 | |||
110 | err = alloc_ctrl_page(current); | ||
111 | if (!err) | ||
112 | err = map_ctrl_page(current, vma); | ||
113 | |||
114 | TRACE_CUR("%s flags=0x%x prot=0x%lx\n", | ||
115 | __FUNCTION__, vma->vm_flags, vma->vm_page_prot); | ||
116 | |||
117 | return err; | ||
118 | } | ||
119 | |||
120 | static struct file_operations litmus_ctrl_fops = { | ||
121 | .owner = THIS_MODULE, | ||
122 | .mmap = litmus_ctrl_mmap, | ||
123 | }; | ||
124 | |||
125 | static struct miscdevice litmus_ctrl_dev = { | ||
126 | .name = CTRL_NAME, | ||
127 | .minor = MISC_DYNAMIC_MINOR, | ||
128 | .fops = &litmus_ctrl_fops, | ||
129 | }; | ||
130 | |||
131 | static int __init init_litmus_ctrl_dev(void) | ||
132 | { | ||
133 | int err; | ||
134 | |||
135 | BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); | ||
136 | |||
137 | printk("Initializing LITMUS^RT control device.\n"); | ||
138 | err = misc_register(&litmus_ctrl_dev); | ||
139 | if (err) | ||
140 | printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); | ||
141 | return err; | ||
142 | } | ||
143 | |||
144 | static void __exit exit_litmus_ctrl_dev(void) | ||
145 | { | ||
146 | misc_deregister(&litmus_ctrl_dev); | ||
147 | } | ||
148 | |||
149 | module_init(init_litmus_ctrl_dev); | ||
150 | module_exit(exit_litmus_ctrl_dev); | ||
diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 000000000000..c7d02ec2e15b --- /dev/null +++ b/litmus/edf_common.c | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * kernel/edf_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/edf_common.h> | ||
16 | |||
17 | |||
18 | #ifdef CONFIG_LITMUS_LOCKING | ||
19 | int edf_higher_base_prio(struct task_struct* first, | ||
20 | struct task_struct* second) | ||
21 | { | ||
22 | struct task_struct *first_task = first; | ||
23 | struct task_struct *second_task = second; | ||
24 | |||
25 | /* check for NULL tasks */ | ||
26 | if (!first || !second) | ||
27 | return first && !second; | ||
28 | |||
29 | return !is_realtime(second_task) || | ||
30 | earlier_deadline(first_task, second_task) || | ||
31 | (get_deadline(first_task) == get_deadline(second_task) && | ||
32 | first_task->pid < second_task->pid); | ||
33 | } | ||
34 | |||
35 | int edf_pending_order(struct bheap_node* a, struct bheap_node* b) | ||
36 | { | ||
37 | return edf_higher_base_prio(bheap2task(a), bheap2task(b)); | ||
38 | } | ||
39 | |||
40 | #endif | ||
41 | |||
42 | /* edf_higher_prio - returns true if first has a higher EDF priority | ||
43 | * than second. Deadline ties are broken by PID. | ||
44 | * | ||
45 | * both first and second may be NULL | ||
46 | */ | ||
47 | int edf_higher_prio(struct task_struct* first, | ||
48 | struct task_struct* second) | ||
49 | { | ||
50 | struct task_struct *first_task = first; | ||
51 | struct task_struct *second_task = second; | ||
52 | |||
53 | /* There is no point in comparing a task to itself. */ | ||
54 | if (first && first == second) { | ||
55 | TRACE_TASK(first, | ||
56 | "WARNING: pointless edf priority comparison.\n"); | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | |||
61 | /* check for NULL tasks */ | ||
62 | if (!first || !second) | ||
63 | return first && !second; | ||
64 | |||
65 | #ifdef CONFIG_LITMUS_LOCKING | ||
66 | |||
67 | /* Check for inherited priorities. Change task | ||
68 | * used for comparison in such a case. | ||
69 | */ | ||
70 | if (unlikely(first->rt_param.inh_task)) | ||
71 | first_task = first->rt_param.inh_task; | ||
72 | if (unlikely(second->rt_param.inh_task)) | ||
73 | second_task = second->rt_param.inh_task; | ||
74 | |||
75 | /* Check for priority boosting. Tie-break by start of boosting. | ||
76 | */ | ||
77 | if (unlikely(is_priority_boosted(first_task))) { | ||
78 | /* first_task is boosted, how about second_task? */ | ||
79 | if (!is_priority_boosted(second_task) || | ||
80 | lt_before(get_boost_start(first_task), | ||
81 | get_boost_start(second_task))) | ||
82 | return 1; | ||
83 | else | ||
84 | return 0; | ||
85 | } else if (unlikely(is_priority_boosted(second_task))) | ||
86 | /* second_task is boosted, first is not*/ | ||
87 | return 0; | ||
88 | |||
89 | #endif | ||
90 | |||
91 | |||
92 | return !is_realtime(second_task) || | ||
93 | |||
94 | /* is the deadline of the first task earlier? | ||
95 | * Then it has higher priority. | ||
96 | */ | ||
97 | earlier_deadline(first_task, second_task) || | ||
98 | |||
99 | /* Do we have a deadline tie? | ||
100 | * Then break by PID. | ||
101 | */ | ||
102 | (get_deadline(first_task) == get_deadline(second_task) && | ||
103 | (first_task->pid < second_task->pid || | ||
104 | |||
105 | /* If the PIDs are the same then the task with the inherited | ||
106 | * priority wins. | ||
107 | */ | ||
108 | (first_task->pid == second_task->pid && | ||
109 | !second->rt_param.inh_task))); | ||
110 | } | ||
111 | |||
112 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
113 | { | ||
114 | return edf_higher_prio(bheap2task(a), bheap2task(b)); | ||
115 | } | ||
116 | |||
117 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
118 | release_jobs_t release) | ||
119 | { | ||
120 | rt_domain_init(rt, edf_ready_order, resched, release); | ||
121 | } | ||
122 | |||
123 | /* need_to_preempt - check whether the task t needs to be preempted | ||
124 | * call only with irqs disabled and with ready_lock acquired | ||
125 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
126 | */ | ||
127 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
128 | { | ||
129 | /* we need the read lock for edf_ready_queue */ | ||
130 | /* no need to preempt if there is nothing pending */ | ||
131 | if (!__jobs_pending(rt)) | ||
132 | return 0; | ||
133 | /* we need to reschedule if t doesn't exist */ | ||
134 | if (!t) | ||
135 | return 1; | ||
136 | |||
137 | /* NOTE: We cannot check for non-preemptibility since we | ||
138 | * don't know what address space we're currently in. | ||
139 | */ | ||
140 | |||
141 | /* make sure to get non-rt stuff out of the way */ | ||
142 | return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); | ||
143 | } | ||
diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 000000000000..2c629598e3c9 --- /dev/null +++ b/litmus/fdso.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* fdso.c - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | * | ||
5 | * Notes: | ||
6 | * - objects descriptor (OD) tables are not cloned during a fork. | ||
7 | * - objects are created on-demand, and freed after the last reference | ||
8 | * is dropped. | ||
9 | * - for now, object types are hard coded. | ||
10 | * - As long as we have live objects, we keep a reference to the inode. | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #include <litmus/fdso.h> | ||
20 | |||
21 | extern struct fdso_ops generic_lock_ops; | ||
22 | |||
23 | static const struct fdso_ops* fdso_ops[] = { | ||
24 | &generic_lock_ops, /* FMLP_SEM */ | ||
25 | &generic_lock_ops, /* SRP_SEM */ | ||
26 | &generic_lock_ops, /* MPCP_SEM */ | ||
27 | &generic_lock_ops, /* MPCP_VS_SEM */ | ||
28 | &generic_lock_ops, /* DPCP_SEM */ | ||
29 | &generic_lock_ops, /* OMLP_SEM */ | ||
30 | }; | ||
31 | |||
32 | static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) | ||
33 | { | ||
34 | if (fdso_ops[type]->create) | ||
35 | return fdso_ops[type]->create(obj_ref, type, config); | ||
36 | else | ||
37 | return -EINVAL; | ||
38 | } | ||
39 | |||
40 | static void fdso_destroy(obj_type_t type, void* obj) | ||
41 | { | ||
42 | fdso_ops[type]->destroy(type, obj); | ||
43 | } | ||
44 | |||
45 | static int fdso_open(struct od_table_entry* entry, void* __user config) | ||
46 | { | ||
47 | if (fdso_ops[entry->obj->type]->open) | ||
48 | return fdso_ops[entry->obj->type]->open(entry, config); | ||
49 | else | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int fdso_close(struct od_table_entry* entry) | ||
54 | { | ||
55 | if (fdso_ops[entry->obj->type]->close) | ||
56 | return fdso_ops[entry->obj->type]->close(entry); | ||
57 | else | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | /* inode must be locked already */ | ||
62 | static int alloc_inode_obj(struct inode_obj_id** obj_ref, | ||
63 | struct inode* inode, | ||
64 | obj_type_t type, | ||
65 | unsigned int id, | ||
66 | void* __user config) | ||
67 | { | ||
68 | struct inode_obj_id* obj; | ||
69 | void* raw_obj; | ||
70 | int err; | ||
71 | |||
72 | obj = kmalloc(sizeof(*obj), GFP_KERNEL); | ||
73 | if (!obj) { | ||
74 | return -ENOMEM; | ||
75 | } | ||
76 | |||
77 | err = fdso_create(&raw_obj, type, config); | ||
78 | if (err != 0) { | ||
79 | kfree(obj); | ||
80 | return err; | ||
81 | } | ||
82 | |||
83 | INIT_LIST_HEAD(&obj->list); | ||
84 | atomic_set(&obj->count, 1); | ||
85 | obj->type = type; | ||
86 | obj->id = id; | ||
87 | obj->obj = raw_obj; | ||
88 | obj->inode = inode; | ||
89 | |||
90 | list_add(&obj->list, &inode->i_obj_list); | ||
91 | atomic_inc(&inode->i_count); | ||
92 | |||
93 | printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); | ||
94 | |||
95 | *obj_ref = obj; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* inode must be locked already */ | ||
100 | static struct inode_obj_id* get_inode_obj(struct inode* inode, | ||
101 | obj_type_t type, | ||
102 | unsigned int id) | ||
103 | { | ||
104 | struct list_head* pos; | ||
105 | struct inode_obj_id* obj = NULL; | ||
106 | |||
107 | list_for_each(pos, &inode->i_obj_list) { | ||
108 | obj = list_entry(pos, struct inode_obj_id, list); | ||
109 | if (obj->id == id && obj->type == type) { | ||
110 | atomic_inc(&obj->count); | ||
111 | return obj; | ||
112 | } | ||
113 | } | ||
114 | printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); | ||
115 | return NULL; | ||
116 | } | ||
117 | |||
118 | |||
119 | static void put_inode_obj(struct inode_obj_id* obj) | ||
120 | { | ||
121 | struct inode* inode; | ||
122 | int let_go = 0; | ||
123 | |||
124 | inode = obj->inode; | ||
125 | if (atomic_dec_and_test(&obj->count)) { | ||
126 | |||
127 | mutex_lock(&inode->i_obj_mutex); | ||
128 | /* no new references can be obtained */ | ||
129 | if (!atomic_read(&obj->count)) { | ||
130 | list_del(&obj->list); | ||
131 | fdso_destroy(obj->type, obj->obj); | ||
132 | kfree(obj); | ||
133 | let_go = 1; | ||
134 | } | ||
135 | mutex_unlock(&inode->i_obj_mutex); | ||
136 | if (let_go) | ||
137 | iput(inode); | ||
138 | } | ||
139 | } | ||
140 | |||
141 | static struct od_table_entry* get_od_entry(struct task_struct* t) | ||
142 | { | ||
143 | struct od_table_entry* table; | ||
144 | int i; | ||
145 | |||
146 | |||
147 | table = t->od_table; | ||
148 | if (!table) { | ||
149 | table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, | ||
150 | GFP_KERNEL); | ||
151 | t->od_table = table; | ||
152 | } | ||
153 | |||
154 | for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) | ||
155 | if (!table[i].used) { | ||
156 | table[i].used = 1; | ||
157 | return table + i; | ||
158 | } | ||
159 | return NULL; | ||
160 | } | ||
161 | |||
162 | static int put_od_entry(struct od_table_entry* od) | ||
163 | { | ||
164 | put_inode_obj(od->obj); | ||
165 | od->used = 0; | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | void exit_od_table(struct task_struct* t) | ||
170 | { | ||
171 | int i; | ||
172 | |||
173 | if (t->od_table) { | ||
174 | for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) | ||
175 | if (t->od_table[i].used) | ||
176 | put_od_entry(t->od_table + i); | ||
177 | kfree(t->od_table); | ||
178 | t->od_table = NULL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | static int do_sys_od_open(struct file* file, obj_type_t type, int id, | ||
183 | void* __user config) | ||
184 | { | ||
185 | int idx = 0, err = 0; | ||
186 | struct inode* inode; | ||
187 | struct inode_obj_id* obj = NULL; | ||
188 | struct od_table_entry* entry; | ||
189 | |||
190 | inode = file->f_dentry->d_inode; | ||
191 | |||
192 | entry = get_od_entry(current); | ||
193 | if (!entry) | ||
194 | return -ENOMEM; | ||
195 | |||
196 | mutex_lock(&inode->i_obj_mutex); | ||
197 | obj = get_inode_obj(inode, type, id); | ||
198 | if (!obj) | ||
199 | err = alloc_inode_obj(&obj, inode, type, id, config); | ||
200 | if (err != 0) { | ||
201 | obj = NULL; | ||
202 | idx = err; | ||
203 | entry->used = 0; | ||
204 | } else { | ||
205 | entry->obj = obj; | ||
206 | entry->class = fdso_ops[type]; | ||
207 | idx = entry - current->od_table; | ||
208 | } | ||
209 | |||
210 | mutex_unlock(&inode->i_obj_mutex); | ||
211 | |||
212 | /* open only if creation succeeded */ | ||
213 | if (!err) | ||
214 | err = fdso_open(entry, config); | ||
215 | if (err < 0) { | ||
216 | /* The class rejected the open call. | ||
217 | * We need to clean up and tell user space. | ||
218 | */ | ||
219 | if (obj) | ||
220 | put_od_entry(entry); | ||
221 | idx = err; | ||
222 | } | ||
223 | |||
224 | return idx; | ||
225 | } | ||
226 | |||
227 | |||
228 | struct od_table_entry* get_entry_for_od(int od) | ||
229 | { | ||
230 | struct task_struct *t = current; | ||
231 | |||
232 | if (!t->od_table) | ||
233 | return NULL; | ||
234 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
235 | return NULL; | ||
236 | if (!t->od_table[od].used) | ||
237 | return NULL; | ||
238 | return t->od_table + od; | ||
239 | } | ||
240 | |||
241 | |||
242 | asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) | ||
243 | { | ||
244 | int ret = 0; | ||
245 | struct file* file; | ||
246 | |||
247 | /* | ||
248 | 1) get file from fd, get inode from file | ||
249 | 2) lock inode | ||
250 | 3) try to lookup object | ||
251 | 4) if not present create and enqueue object, inc inode refcnt | ||
252 | 5) increment refcnt of object | ||
253 | 6) alloc od_table_entry, setup ptrs | ||
254 | 7) unlock inode | ||
255 | 8) return offset in od_table as OD | ||
256 | */ | ||
257 | |||
258 | if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
263 | file = fget(fd); | ||
264 | if (!file) { | ||
265 | ret = -EBADF; | ||
266 | goto out; | ||
267 | } | ||
268 | |||
269 | ret = do_sys_od_open(file, type, obj_id, config); | ||
270 | |||
271 | fput(file); | ||
272 | |||
273 | out: | ||
274 | return ret; | ||
275 | } | ||
276 | |||
277 | |||
278 | asmlinkage long sys_od_close(int od) | ||
279 | { | ||
280 | int ret = -EINVAL; | ||
281 | struct task_struct *t = current; | ||
282 | |||
283 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
284 | return ret; | ||
285 | |||
286 | if (!t->od_table || !t->od_table[od].used) | ||
287 | return ret; | ||
288 | |||
289 | |||
290 | /* give the class a chance to reject the close | ||
291 | */ | ||
292 | ret = fdso_close(t->od_table + od); | ||
293 | if (ret == 0) | ||
294 | ret = put_od_entry(t->od_table + od); | ||
295 | |||
296 | return ret; | ||
297 | } | ||
diff --git a/litmus/fp_common.c b/litmus/fp_common.c new file mode 100644 index 000000000000..31fc2db20adf --- /dev/null +++ b/litmus/fp_common.c | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * litmus/fp_common.c | ||
3 | * | ||
4 | * Common functions for fixed-priority scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/fp_common.h> | ||
16 | |||
17 | /* fp_higher_prio - returns true if first has a higher static priority | ||
18 | * than second. Deadline ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int fp_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (unlikely(first && first == second)) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless FP priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* check for NULL tasks */ | ||
37 | if (!first || !second) | ||
38 | return first && !second; | ||
39 | |||
40 | #ifdef CONFIG_LITMUS_LOCKING | ||
41 | |||
42 | /* Check for inherited priorities. Change task | ||
43 | * used for comparison in such a case. | ||
44 | */ | ||
45 | if (unlikely(first->rt_param.inh_task)) | ||
46 | first_task = first->rt_param.inh_task; | ||
47 | if (unlikely(second->rt_param.inh_task)) | ||
48 | second_task = second->rt_param.inh_task; | ||
49 | |||
50 | /* Check for priority boosting. Tie-break by start of boosting. | ||
51 | */ | ||
52 | if (unlikely(is_priority_boosted(first_task))) { | ||
53 | /* first_task is boosted, how about second_task? */ | ||
54 | if (!is_priority_boosted(second_task) || | ||
55 | lt_before(get_boost_start(first_task), | ||
56 | get_boost_start(second_task))) | ||
57 | return 1; | ||
58 | else | ||
59 | return 0; | ||
60 | } else if (unlikely(is_priority_boosted(second_task))) | ||
61 | /* second_task is boosted, first is not*/ | ||
62 | return 0; | ||
63 | |||
64 | #endif | ||
65 | |||
66 | |||
67 | return !is_realtime(second_task) || | ||
68 | |||
69 | get_priority(first_task) < get_priority(second_task) || | ||
70 | |||
71 | /* Break by PID. | ||
72 | */ | ||
73 | (get_priority(first_task) == get_priority(second_task) && | ||
74 | (first_task->pid < second_task->pid || | ||
75 | |||
76 | /* If the PIDs are the same then the task with the inherited | ||
77 | * priority wins. | ||
78 | */ | ||
79 | (first_task->pid == second_task->pid && | ||
80 | !second->rt_param.inh_task))); | ||
81 | } | ||
82 | |||
83 | int fp_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
84 | { | ||
85 | return fp_higher_prio(bheap2task(a), bheap2task(b)); | ||
86 | } | ||
87 | |||
88 | void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
89 | release_jobs_t release) | ||
90 | { | ||
91 | rt_domain_init(rt, fp_ready_order, resched, release); | ||
92 | } | ||
93 | |||
94 | /* need_to_preempt - check whether the task t needs to be preempted | ||
95 | */ | ||
96 | int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t) | ||
97 | { | ||
98 | struct task_struct *pending; | ||
99 | |||
100 | pending = fp_prio_peek(q); | ||
101 | |||
102 | if (!pending) | ||
103 | return 0; | ||
104 | if (!t) | ||
105 | return 1; | ||
106 | |||
107 | /* make sure to get non-rt stuff out of the way */ | ||
108 | return !is_realtime(t) || fp_higher_prio(pending, t); | ||
109 | } | ||
110 | |||
111 | void fp_prio_queue_init(struct fp_prio_queue* q) | ||
112 | { | ||
113 | int i; | ||
114 | |||
115 | for (i = 0; i < FP_PRIO_BIT_WORDS; i++) | ||
116 | q->bitmask[i] = 0; | ||
117 | for (i = 0; i < LITMUS_MAX_PRIORITY; i++) | ||
118 | bheap_init(&q->queue[i]); | ||
119 | } | ||
diff --git a/litmus/ft_event.c b/litmus/ft_event.c new file mode 100644 index 000000000000..399a07becca5 --- /dev/null +++ b/litmus/ft_event.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <litmus/feather_trace.h> | ||
4 | |||
5 | #if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA) | ||
6 | /* provide dummy implementation */ | ||
7 | |||
8 | int ft_events[MAX_EVENTS]; | ||
9 | |||
10 | int ft_enable_event(unsigned long id) | ||
11 | { | ||
12 | if (id < MAX_EVENTS) { | ||
13 | ft_events[id]++; | ||
14 | return 1; | ||
15 | } else | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | int ft_disable_event(unsigned long id) | ||
20 | { | ||
21 | if (id < MAX_EVENTS && ft_events[id]) { | ||
22 | ft_events[id]--; | ||
23 | return 1; | ||
24 | } else | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | int ft_disable_all_events(void) | ||
29 | { | ||
30 | int i; | ||
31 | |||
32 | for (i = 0; i < MAX_EVENTS; i++) | ||
33 | ft_events[i] = 0; | ||
34 | |||
35 | return MAX_EVENTS; | ||
36 | } | ||
37 | |||
38 | int ft_is_event_enabled(unsigned long id) | ||
39 | { | ||
40 | return id < MAX_EVENTS && ft_events[id]; | ||
41 | } | ||
42 | |||
43 | #endif | ||
diff --git a/litmus/ftdev.c b/litmus/ftdev.c new file mode 100644 index 000000000000..99bc39ffbcef --- /dev/null +++ b/litmus/ftdev.c | |||
@@ -0,0 +1,446 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/fs.h> | ||
3 | #include <linux/slab.h> | ||
4 | #include <linux/cdev.h> | ||
5 | #include <asm/uaccess.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/device.h> | ||
8 | |||
9 | #include <litmus/litmus.h> | ||
10 | #include <litmus/feather_trace.h> | ||
11 | #include <litmus/ftdev.h> | ||
12 | |||
13 | struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size) | ||
14 | { | ||
15 | struct ft_buffer* buf; | ||
16 | size_t total = (size + 1) * count; | ||
17 | char* mem; | ||
18 | int order = 0, pages = 1; | ||
19 | |||
20 | buf = kmalloc(sizeof(*buf), GFP_KERNEL); | ||
21 | if (!buf) | ||
22 | return NULL; | ||
23 | |||
24 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
25 | while (pages < total) { | ||
26 | order++; | ||
27 | pages *= 2; | ||
28 | } | ||
29 | |||
30 | mem = (char*) __get_free_pages(GFP_KERNEL, order); | ||
31 | if (!mem) { | ||
32 | kfree(buf); | ||
33 | return NULL; | ||
34 | } | ||
35 | |||
36 | if (!init_ft_buffer(buf, count, size, | ||
37 | mem + (count * size), /* markers at the end */ | ||
38 | mem)) { /* buffer objects */ | ||
39 | free_pages((unsigned long) mem, order); | ||
40 | kfree(buf); | ||
41 | return NULL; | ||
42 | } | ||
43 | return buf; | ||
44 | } | ||
45 | |||
46 | void free_ft_buffer(struct ft_buffer* buf) | ||
47 | { | ||
48 | int order = 0, pages = 1; | ||
49 | size_t total; | ||
50 | |||
51 | if (buf) { | ||
52 | total = (buf->slot_size + 1) * buf->slot_count; | ||
53 | total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0); | ||
54 | while (pages < total) { | ||
55 | order++; | ||
56 | pages *= 2; | ||
57 | } | ||
58 | free_pages((unsigned long) buf->buffer_mem, order); | ||
59 | kfree(buf); | ||
60 | } | ||
61 | } | ||
62 | |||
63 | struct ftdev_event { | ||
64 | int id; | ||
65 | struct ftdev_event* next; | ||
66 | }; | ||
67 | |||
68 | static int activate(struct ftdev_event** chain, int id) | ||
69 | { | ||
70 | struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL); | ||
71 | if (ev) { | ||
72 | printk(KERN_INFO | ||
73 | "Enabling feather-trace event %d.\n", (int) id); | ||
74 | ft_enable_event(id); | ||
75 | ev->id = id; | ||
76 | ev->next = *chain; | ||
77 | *chain = ev; | ||
78 | } | ||
79 | return ev ? 0 : -ENOMEM; | ||
80 | } | ||
81 | |||
82 | static void deactivate(struct ftdev_event** chain, int id) | ||
83 | { | ||
84 | struct ftdev_event **cur = chain; | ||
85 | struct ftdev_event *nxt; | ||
86 | while (*cur) { | ||
87 | if ((*cur)->id == id) { | ||
88 | nxt = (*cur)->next; | ||
89 | kfree(*cur); | ||
90 | *cur = nxt; | ||
91 | printk(KERN_INFO | ||
92 | "Disabling feather-trace event %d.\n", (int) id); | ||
93 | ft_disable_event(id); | ||
94 | break; | ||
95 | } | ||
96 | cur = &(*cur)->next; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static int ftdev_open(struct inode *in, struct file *filp) | ||
101 | { | ||
102 | struct ftdev* ftdev; | ||
103 | struct ftdev_minor* ftdm; | ||
104 | unsigned int buf_idx = iminor(in); | ||
105 | int err = 0; | ||
106 | |||
107 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
108 | |||
109 | if (buf_idx >= ftdev->minor_cnt) { | ||
110 | err = -ENODEV; | ||
111 | goto out; | ||
112 | } | ||
113 | if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx))) | ||
114 | goto out; | ||
115 | |||
116 | ftdm = ftdev->minor + buf_idx; | ||
117 | ftdm->ftdev = ftdev; | ||
118 | filp->private_data = ftdm; | ||
119 | |||
120 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
121 | err = -ERESTARTSYS; | ||
122 | goto out; | ||
123 | } | ||
124 | |||
125 | if (!ftdm->readers && ftdev->alloc) | ||
126 | err = ftdev->alloc(ftdev, buf_idx); | ||
127 | if (0 == err) | ||
128 | ftdm->readers++; | ||
129 | |||
130 | mutex_unlock(&ftdm->lock); | ||
131 | out: | ||
132 | return err; | ||
133 | } | ||
134 | |||
135 | static int ftdev_release(struct inode *in, struct file *filp) | ||
136 | { | ||
137 | struct ftdev* ftdev; | ||
138 | struct ftdev_minor* ftdm; | ||
139 | unsigned int buf_idx = iminor(in); | ||
140 | int err = 0; | ||
141 | |||
142 | ftdev = container_of(in->i_cdev, struct ftdev, cdev); | ||
143 | |||
144 | if (buf_idx >= ftdev->minor_cnt) { | ||
145 | err = -ENODEV; | ||
146 | goto out; | ||
147 | } | ||
148 | ftdm = ftdev->minor + buf_idx; | ||
149 | |||
150 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
151 | err = -ERESTARTSYS; | ||
152 | goto out; | ||
153 | } | ||
154 | |||
155 | if (ftdm->readers == 1) { | ||
156 | while (ftdm->events) | ||
157 | deactivate(&ftdm->events, ftdm->events->id); | ||
158 | |||
159 | /* wait for any pending events to complete */ | ||
160 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
161 | schedule_timeout(HZ); | ||
162 | |||
163 | printk(KERN_ALERT "Failed trace writes: %u\n", | ||
164 | ftdm->buf->failed_writes); | ||
165 | |||
166 | if (ftdev->free) | ||
167 | ftdev->free(ftdev, buf_idx); | ||
168 | } | ||
169 | |||
170 | ftdm->readers--; | ||
171 | mutex_unlock(&ftdm->lock); | ||
172 | out: | ||
173 | return err; | ||
174 | } | ||
175 | |||
176 | /* based on ft_buffer_read | ||
177 | * @returns < 0 : page fault | ||
178 | * = 0 : no data available | ||
179 | * = 1 : one slot copied | ||
180 | */ | ||
181 | static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest) | ||
182 | { | ||
183 | unsigned int idx; | ||
184 | int err = 0; | ||
185 | if (buf->free_count != buf->slot_count) { | ||
186 | /* data available */ | ||
187 | idx = buf->read_idx % buf->slot_count; | ||
188 | if (buf->slots[idx] == SLOT_READY) { | ||
189 | err = copy_to_user(dest, ((char*) buf->buffer_mem) + | ||
190 | idx * buf->slot_size, | ||
191 | buf->slot_size); | ||
192 | if (err == 0) { | ||
193 | /* copy ok */ | ||
194 | buf->slots[idx] = SLOT_FREE; | ||
195 | buf->read_idx++; | ||
196 | fetch_and_inc(&buf->free_count); | ||
197 | err = 1; | ||
198 | } | ||
199 | } | ||
200 | } | ||
201 | return err; | ||
202 | } | ||
203 | |||
204 | static ssize_t ftdev_read(struct file *filp, | ||
205 | char __user *to, size_t len, loff_t *f_pos) | ||
206 | { | ||
207 | /* we ignore f_pos, this is strictly sequential */ | ||
208 | |||
209 | ssize_t err = 0; | ||
210 | size_t chunk; | ||
211 | int copied; | ||
212 | struct ftdev_minor* ftdm = filp->private_data; | ||
213 | |||
214 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
215 | err = -ERESTARTSYS; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | |||
220 | chunk = ftdm->buf->slot_size; | ||
221 | while (len >= chunk) { | ||
222 | copied = ft_buffer_copy_to_user(ftdm->buf, to); | ||
223 | if (copied == 1) { | ||
224 | len -= chunk; | ||
225 | to += chunk; | ||
226 | err += chunk; | ||
227 | } else if (err == 0 && copied == 0 && ftdm->events) { | ||
228 | /* Only wait if there are any events enabled and only | ||
229 | * if we haven't copied some data yet. We cannot wait | ||
230 | * here with copied data because that data would get | ||
231 | * lost if the task is interrupted (e.g., killed). | ||
232 | */ | ||
233 | mutex_unlock(&ftdm->lock); | ||
234 | set_current_state(TASK_INTERRUPTIBLE); | ||
235 | |||
236 | schedule_timeout(50); | ||
237 | |||
238 | if (signal_pending(current)) { | ||
239 | if (err == 0) | ||
240 | /* nothing read yet, signal problem */ | ||
241 | err = -ERESTARTSYS; | ||
242 | goto out; | ||
243 | } | ||
244 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
245 | err = -ERESTARTSYS; | ||
246 | goto out; | ||
247 | } | ||
248 | } else if (copied < 0) { | ||
249 | /* page fault */ | ||
250 | err = copied; | ||
251 | break; | ||
252 | } else | ||
253 | /* nothing left to get, return to user space */ | ||
254 | break; | ||
255 | } | ||
256 | mutex_unlock(&ftdm->lock); | ||
257 | out: | ||
258 | return err; | ||
259 | } | ||
260 | |||
261 | static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
262 | { | ||
263 | long err = -ENOIOCTLCMD; | ||
264 | struct ftdev_minor* ftdm = filp->private_data; | ||
265 | |||
266 | if (mutex_lock_interruptible(&ftdm->lock)) { | ||
267 | err = -ERESTARTSYS; | ||
268 | goto out; | ||
269 | } | ||
270 | |||
271 | /* FIXME: check id against list of acceptable events */ | ||
272 | |||
273 | switch (cmd) { | ||
274 | case FTDEV_ENABLE_CMD: | ||
275 | if (activate(&ftdm->events, arg)) | ||
276 | err = -ENOMEM; | ||
277 | else | ||
278 | err = 0; | ||
279 | break; | ||
280 | |||
281 | case FTDEV_DISABLE_CMD: | ||
282 | deactivate(&ftdm->events, arg); | ||
283 | err = 0; | ||
284 | break; | ||
285 | |||
286 | default: | ||
287 | printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg); | ||
288 | }; | ||
289 | |||
290 | mutex_unlock(&ftdm->lock); | ||
291 | out: | ||
292 | return err; | ||
293 | } | ||
294 | |||
295 | static ssize_t ftdev_write(struct file *filp, const char __user *from, | ||
296 | size_t len, loff_t *f_pos) | ||
297 | { | ||
298 | struct ftdev_minor* ftdm = filp->private_data; | ||
299 | ssize_t err = -EINVAL; | ||
300 | struct ftdev* ftdev = ftdm->ftdev; | ||
301 | |||
302 | /* dispatch write to buffer-specific code, if available */ | ||
303 | if (ftdev->write) | ||
304 | err = ftdev->write(ftdm->buf, len, from); | ||
305 | |||
306 | return err; | ||
307 | } | ||
308 | |||
309 | struct file_operations ftdev_fops = { | ||
310 | .owner = THIS_MODULE, | ||
311 | .open = ftdev_open, | ||
312 | .release = ftdev_release, | ||
313 | .write = ftdev_write, | ||
314 | .read = ftdev_read, | ||
315 | .unlocked_ioctl = ftdev_ioctl, | ||
316 | }; | ||
317 | |||
318 | int ftdev_init( struct ftdev* ftdev, struct module* owner, | ||
319 | const int minor_cnt, const char* name) | ||
320 | { | ||
321 | int i, err; | ||
322 | |||
323 | BUG_ON(minor_cnt < 1); | ||
324 | |||
325 | cdev_init(&ftdev->cdev, &ftdev_fops); | ||
326 | ftdev->name = name; | ||
327 | ftdev->minor_cnt = minor_cnt; | ||
328 | ftdev->cdev.owner = owner; | ||
329 | ftdev->cdev.ops = &ftdev_fops; | ||
330 | ftdev->alloc = NULL; | ||
331 | ftdev->free = NULL; | ||
332 | ftdev->can_open = NULL; | ||
333 | ftdev->write = NULL; | ||
334 | |||
335 | ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor), | ||
336 | GFP_KERNEL); | ||
337 | if (!ftdev->minor) { | ||
338 | printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n", | ||
339 | ftdev->name); | ||
340 | err = -ENOMEM; | ||
341 | goto err_out; | ||
342 | } | ||
343 | |||
344 | for (i = 0; i < ftdev->minor_cnt; i++) { | ||
345 | mutex_init(&ftdev->minor[i].lock); | ||
346 | ftdev->minor[i].readers = 0; | ||
347 | ftdev->minor[i].buf = NULL; | ||
348 | ftdev->minor[i].events = NULL; | ||
349 | } | ||
350 | |||
351 | ftdev->class = class_create(owner, ftdev->name); | ||
352 | if (IS_ERR(ftdev->class)) { | ||
353 | err = PTR_ERR(ftdev->class); | ||
354 | printk(KERN_WARNING "ftdev(%s): " | ||
355 | "Could not create device class.\n", ftdev->name); | ||
356 | goto err_dealloc; | ||
357 | } | ||
358 | |||
359 | return 0; | ||
360 | |||
361 | err_dealloc: | ||
362 | kfree(ftdev->minor); | ||
363 | err_out: | ||
364 | return err; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Destroy minor devices up to, but not including, up_to. | ||
369 | */ | ||
370 | static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to) | ||
371 | { | ||
372 | dev_t minor_cntr; | ||
373 | |||
374 | if (up_to < 1) | ||
375 | up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt; | ||
376 | |||
377 | for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr) | ||
378 | device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr)); | ||
379 | } | ||
380 | |||
381 | void ftdev_exit(struct ftdev* ftdev) | ||
382 | { | ||
383 | printk("ftdev(%s): Exiting\n", ftdev->name); | ||
384 | ftdev_device_destroy(ftdev, -1); | ||
385 | cdev_del(&ftdev->cdev); | ||
386 | unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt); | ||
387 | class_destroy(ftdev->class); | ||
388 | kfree(ftdev->minor); | ||
389 | } | ||
390 | |||
391 | int register_ftdev(struct ftdev* ftdev) | ||
392 | { | ||
393 | struct device **device; | ||
394 | dev_t trace_dev_tmp, minor_cntr; | ||
395 | int err; | ||
396 | |||
397 | err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt, | ||
398 | ftdev->name); | ||
399 | if (err) { | ||
400 | printk(KERN_WARNING "ftdev(%s): " | ||
401 | "Could not allocate char. device region (%d minors)\n", | ||
402 | ftdev->name, ftdev->minor_cnt); | ||
403 | goto err_out; | ||
404 | } | ||
405 | |||
406 | ftdev->major = MAJOR(trace_dev_tmp); | ||
407 | |||
408 | err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt); | ||
409 | if (err) { | ||
410 | printk(KERN_WARNING "ftdev(%s): " | ||
411 | "Could not add cdev for major %u with %u minor(s).\n", | ||
412 | ftdev->name, ftdev->major, ftdev->minor_cnt); | ||
413 | goto err_unregister; | ||
414 | } | ||
415 | |||
416 | /* create the minor device(s) */ | ||
417 | for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr) | ||
418 | { | ||
419 | trace_dev_tmp = MKDEV(ftdev->major, minor_cntr); | ||
420 | device = &ftdev->minor[minor_cntr].device; | ||
421 | |||
422 | *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL, | ||
423 | "litmus/%s%d", ftdev->name, minor_cntr); | ||
424 | if (IS_ERR(*device)) { | ||
425 | err = PTR_ERR(*device); | ||
426 | printk(KERN_WARNING "ftdev(%s): " | ||
427 | "Could not create device major/minor number " | ||
428 | "%u/%u\n", ftdev->name, ftdev->major, | ||
429 | minor_cntr); | ||
430 | printk(KERN_WARNING "ftdev(%s): " | ||
431 | "will attempt deletion of allocated devices.\n", | ||
432 | ftdev->name); | ||
433 | goto err_minors; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | return 0; | ||
438 | |||
439 | err_minors: | ||
440 | ftdev_device_destroy(ftdev, minor_cntr); | ||
441 | cdev_del(&ftdev->cdev); | ||
442 | err_unregister: | ||
443 | unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt); | ||
444 | err_out: | ||
445 | return err; | ||
446 | } | ||
diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 000000000000..36e314625d86 --- /dev/null +++ b/litmus/jobs.c | |||
@@ -0,0 +1,43 @@ | |||
1 | /* litmus/jobs.c - common job control code | ||
2 | */ | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | |||
6 | #include <litmus/litmus.h> | ||
7 | #include <litmus/jobs.h> | ||
8 | |||
9 | void prepare_for_next_period(struct task_struct *t) | ||
10 | { | ||
11 | BUG_ON(!t); | ||
12 | /* prepare next release */ | ||
13 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | ||
14 | t->rt_param.job_params.deadline += get_rt_period(t); | ||
15 | t->rt_param.job_params.exec_time = 0; | ||
16 | /* update job sequence number */ | ||
17 | t->rt_param.job_params.job_no++; | ||
18 | |||
19 | /* don't confuse Linux */ | ||
20 | t->rt.time_slice = 1; | ||
21 | } | ||
22 | |||
23 | void release_at(struct task_struct *t, lt_t start) | ||
24 | { | ||
25 | t->rt_param.job_params.deadline = start; | ||
26 | prepare_for_next_period(t); | ||
27 | set_rt_flags(t, RT_F_RUNNING); | ||
28 | } | ||
29 | |||
30 | |||
31 | /* | ||
32 | * Deactivate current task until the beginning of the next period. | ||
33 | */ | ||
34 | long complete_job(void) | ||
35 | { | ||
36 | /* Mark that we do not excute anymore */ | ||
37 | set_rt_flags(current, RT_F_SLEEP); | ||
38 | /* call schedule, this will return when a new job arrives | ||
39 | * it also takes care of preparing for the next release | ||
40 | */ | ||
41 | schedule(); | ||
42 | return 0; | ||
43 | } | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..b22f84a02010 --- /dev/null +++ b/litmus/litmus.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | * litmus.c -- Implementation of the LITMUS syscalls, | ||
3 | * the LITMUS intialization code, | ||
4 | * and the procfs interface.. | ||
5 | */ | ||
6 | #include <asm/uaccess.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/sysrq.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/slab.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/bheap.h> | ||
15 | #include <litmus/trace.h> | ||
16 | #include <litmus/rt_domain.h> | ||
17 | #include <litmus/litmus_proc.h> | ||
18 | #include <litmus/sched_trace.h> | ||
19 | |||
20 | /* Number of RT tasks that exist in the system */ | ||
21 | atomic_t rt_task_count = ATOMIC_INIT(0); | ||
22 | static DEFINE_RAW_SPINLOCK(task_transition_lock); | ||
23 | /* synchronize plugin switching */ | ||
24 | atomic_t cannot_use_plugin = ATOMIC_INIT(0); | ||
25 | |||
26 | /* Give log messages sequential IDs. */ | ||
27 | atomic_t __log_seq_no = ATOMIC_INIT(0); | ||
28 | |||
29 | #ifdef CONFIG_RELEASE_MASTER | ||
30 | /* current master CPU for handling timer IRQs */ | ||
31 | atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); | ||
32 | #endif | ||
33 | |||
34 | static struct kmem_cache * bheap_node_cache; | ||
35 | extern struct kmem_cache * release_heap_cache; | ||
36 | |||
37 | struct bheap_node* bheap_node_alloc(int gfp_flags) | ||
38 | { | ||
39 | return kmem_cache_alloc(bheap_node_cache, gfp_flags); | ||
40 | } | ||
41 | |||
42 | void bheap_node_free(struct bheap_node* hn) | ||
43 | { | ||
44 | kmem_cache_free(bheap_node_cache, hn); | ||
45 | } | ||
46 | |||
47 | struct release_heap* release_heap_alloc(int gfp_flags); | ||
48 | void release_heap_free(struct release_heap* rh); | ||
49 | |||
50 | /* | ||
51 | * sys_set_task_rt_param | ||
52 | * @pid: Pid of the task which scheduling parameters must be changed | ||
53 | * @param: New real-time extension parameters such as the execution cost and | ||
54 | * period | ||
55 | * Syscall for manipulating with task rt extension params | ||
56 | * Returns EFAULT if param is NULL. | ||
57 | * ESRCH if pid is not corrsponding | ||
58 | * to a valid task. | ||
59 | * EINVAL if either period or execution cost is <=0 | ||
60 | * EPERM if pid is a real-time task | ||
61 | * 0 if success | ||
62 | * | ||
63 | * Only non-real-time tasks may be configured with this system call | ||
64 | * to avoid races with the scheduler. In practice, this means that a | ||
65 | * task's parameters must be set _before_ calling sys_prepare_rt_task() | ||
66 | * | ||
67 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
68 | * target. | ||
69 | */ | ||
70 | asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
71 | { | ||
72 | struct rt_task tp; | ||
73 | struct task_struct *target; | ||
74 | int retval = -EINVAL; | ||
75 | |||
76 | printk("Setting up rt task parameters for process %d.\n", pid); | ||
77 | |||
78 | if (pid < 0 || param == 0) { | ||
79 | goto out; | ||
80 | } | ||
81 | if (copy_from_user(&tp, param, sizeof(tp))) { | ||
82 | retval = -EFAULT; | ||
83 | goto out; | ||
84 | } | ||
85 | |||
86 | /* Task search and manipulation must be protected */ | ||
87 | read_lock_irq(&tasklist_lock); | ||
88 | if (!(target = find_task_by_vpid(pid))) { | ||
89 | retval = -ESRCH; | ||
90 | goto out_unlock; | ||
91 | } | ||
92 | |||
93 | if (is_realtime(target)) { | ||
94 | /* The task is already a real-time task. | ||
95 | * We cannot not allow parameter changes at this point. | ||
96 | */ | ||
97 | retval = -EBUSY; | ||
98 | goto out_unlock; | ||
99 | } | ||
100 | |||
101 | if (tp.exec_cost <= 0) | ||
102 | goto out_unlock; | ||
103 | if (tp.period <= 0) | ||
104 | goto out_unlock; | ||
105 | if (!cpu_online(tp.cpu)) | ||
106 | goto out_unlock; | ||
107 | if (tp.period < tp.exec_cost) | ||
108 | { | ||
109 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
110 | "because wcet > period\n", pid); | ||
111 | goto out_unlock; | ||
112 | } | ||
113 | if (tp.budget_policy != NO_ENFORCEMENT && | ||
114 | tp.budget_policy != QUANTUM_ENFORCEMENT && | ||
115 | tp.budget_policy != PRECISE_ENFORCEMENT) | ||
116 | { | ||
117 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
118 | "because unsupported budget enforcement policy " | ||
119 | "specified (%d)\n", | ||
120 | pid, tp.budget_policy); | ||
121 | goto out_unlock; | ||
122 | } | ||
123 | |||
124 | if (tp.priority >= LITMUS_MAX_PRIORITY) { | ||
125 | printk(KERN_INFO "litmus: invalid priority (%u); " | ||
126 | "task %s/%d rejected\n", | ||
127 | tp.priority, target->comm, target->pid); | ||
128 | goto out_unlock; | ||
129 | } | ||
130 | |||
131 | target->rt_param.task_params = tp; | ||
132 | |||
133 | retval = 0; | ||
134 | out_unlock: | ||
135 | read_unlock_irq(&tasklist_lock); | ||
136 | out: | ||
137 | return retval; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Getter of task's RT params | ||
142 | * returns EINVAL if param or pid is NULL | ||
143 | * returns ESRCH if pid does not correspond to a valid task | ||
144 | * returns EFAULT if copying of parameters has failed. | ||
145 | * | ||
146 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
147 | * target. | ||
148 | */ | ||
149 | asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
150 | { | ||
151 | int retval = -EINVAL; | ||
152 | struct task_struct *source; | ||
153 | struct rt_task lp; | ||
154 | if (param == 0 || pid < 0) | ||
155 | goto out; | ||
156 | read_lock(&tasklist_lock); | ||
157 | if (!(source = find_task_by_vpid(pid))) { | ||
158 | retval = -ESRCH; | ||
159 | goto out_unlock; | ||
160 | } | ||
161 | lp = source->rt_param.task_params; | ||
162 | read_unlock(&tasklist_lock); | ||
163 | /* Do copying outside the lock */ | ||
164 | retval = | ||
165 | copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; | ||
166 | return retval; | ||
167 | out_unlock: | ||
168 | read_unlock(&tasklist_lock); | ||
169 | out: | ||
170 | return retval; | ||
171 | |||
172 | } | ||
173 | |||
174 | /* | ||
175 | * This is the crucial function for periodic task implementation, | ||
176 | * It checks if a task is periodic, checks if such kind of sleep | ||
177 | * is permitted and calls plugin-specific sleep, which puts the | ||
178 | * task into a wait array. | ||
179 | * returns 0 on successful wakeup | ||
180 | * returns EPERM if current conditions do not permit such sleep | ||
181 | * returns EINVAL if current task is not able to go to sleep | ||
182 | */ | ||
183 | asmlinkage long sys_complete_job(void) | ||
184 | { | ||
185 | int retval = -EPERM; | ||
186 | if (!is_realtime(current)) { | ||
187 | retval = -EINVAL; | ||
188 | goto out; | ||
189 | } | ||
190 | /* Task with negative or zero period cannot sleep */ | ||
191 | if (get_rt_period(current) <= 0) { | ||
192 | retval = -EINVAL; | ||
193 | goto out; | ||
194 | } | ||
195 | /* The plugin has to put the task into an | ||
196 | * appropriate queue and call schedule | ||
197 | */ | ||
198 | retval = litmus->complete_job(); | ||
199 | out: | ||
200 | return retval; | ||
201 | } | ||
202 | |||
203 | /* This is an "improved" version of sys_complete_job that | ||
204 | * addresses the problem of unintentionally missing a job after | ||
205 | * an overrun. | ||
206 | * | ||
207 | * returns 0 on successful wakeup | ||
208 | * returns EPERM if current conditions do not permit such sleep | ||
209 | * returns EINVAL if current task is not able to go to sleep | ||
210 | */ | ||
211 | asmlinkage long sys_wait_for_job_release(unsigned int job) | ||
212 | { | ||
213 | int retval = -EPERM; | ||
214 | if (!is_realtime(current)) { | ||
215 | retval = -EINVAL; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | /* Task with negative or zero period cannot sleep */ | ||
220 | if (get_rt_period(current) <= 0) { | ||
221 | retval = -EINVAL; | ||
222 | goto out; | ||
223 | } | ||
224 | |||
225 | retval = 0; | ||
226 | |||
227 | /* first wait until we have "reached" the desired job | ||
228 | * | ||
229 | * This implementation has at least two problems: | ||
230 | * | ||
231 | * 1) It doesn't gracefully handle the wrap around of | ||
232 | * job_no. Since LITMUS is a prototype, this is not much | ||
233 | * of a problem right now. | ||
234 | * | ||
235 | * 2) It is theoretically racy if a job release occurs | ||
236 | * between checking job_no and calling sleep_next_period(). | ||
237 | * A proper solution would requiring adding another callback | ||
238 | * in the plugin structure and testing the condition with | ||
239 | * interrupts disabled. | ||
240 | * | ||
241 | * FIXME: At least problem 2 should be taken care of eventually. | ||
242 | */ | ||
243 | while (!retval && job > current->rt_param.job_params.job_no) | ||
244 | /* If the last job overran then job <= job_no and we | ||
245 | * don't send the task to sleep. | ||
246 | */ | ||
247 | retval = litmus->complete_job(); | ||
248 | out: | ||
249 | return retval; | ||
250 | } | ||
251 | |||
252 | /* This is a helper syscall to query the current job sequence number. | ||
253 | * | ||
254 | * returns 0 on successful query | ||
255 | * returns EPERM if task is not a real-time task. | ||
256 | * returns EFAULT if &job is not a valid pointer. | ||
257 | */ | ||
258 | asmlinkage long sys_query_job_no(unsigned int __user *job) | ||
259 | { | ||
260 | int retval = -EPERM; | ||
261 | if (is_realtime(current)) | ||
262 | retval = put_user(current->rt_param.job_params.job_no, job); | ||
263 | |||
264 | return retval; | ||
265 | } | ||
266 | |||
267 | /* sys_null_call() is only used for determining raw system call | ||
268 | * overheads (kernel entry, kernel exit). It has no useful side effects. | ||
269 | * If ts is non-NULL, then the current Feather-Trace time is recorded. | ||
270 | */ | ||
271 | asmlinkage long sys_null_call(cycles_t __user *ts) | ||
272 | { | ||
273 | long ret = 0; | ||
274 | cycles_t now; | ||
275 | |||
276 | if (ts) { | ||
277 | now = get_cycles(); | ||
278 | ret = put_user(now, ts); | ||
279 | } | ||
280 | |||
281 | return ret; | ||
282 | } | ||
283 | |||
284 | /* p is a real-time task. Re-init its state as a best-effort task. */ | ||
285 | static void reinit_litmus_state(struct task_struct* p, int restore) | ||
286 | { | ||
287 | struct rt_task user_config = {}; | ||
288 | void* ctrl_page = NULL; | ||
289 | |||
290 | if (restore) { | ||
291 | /* Safe user-space provided configuration data. | ||
292 | * and allocated page. */ | ||
293 | user_config = p->rt_param.task_params; | ||
294 | ctrl_page = p->rt_param.ctrl_page; | ||
295 | } | ||
296 | |||
297 | /* We probably should not be inheriting any task's priority | ||
298 | * at this point in time. | ||
299 | */ | ||
300 | WARN_ON(p->rt_param.inh_task); | ||
301 | |||
302 | /* Cleanup everything else. */ | ||
303 | memset(&p->rt_param, 0, sizeof(p->rt_param)); | ||
304 | |||
305 | /* Restore preserved fields. */ | ||
306 | if (restore) { | ||
307 | p->rt_param.task_params = user_config; | ||
308 | p->rt_param.ctrl_page = ctrl_page; | ||
309 | } | ||
310 | } | ||
311 | |||
312 | long litmus_admit_task(struct task_struct* tsk) | ||
313 | { | ||
314 | long retval = 0; | ||
315 | unsigned long flags; | ||
316 | |||
317 | BUG_ON(is_realtime(tsk)); | ||
318 | |||
319 | if (get_rt_period(tsk) == 0 || | ||
320 | get_exec_cost(tsk) > get_rt_period(tsk)) { | ||
321 | TRACE_TASK(tsk, "litmus admit: invalid task parameters " | ||
322 | "(%lu, %lu)\n", | ||
323 | get_exec_cost(tsk), get_rt_period(tsk)); | ||
324 | retval = -EINVAL; | ||
325 | goto out; | ||
326 | } | ||
327 | |||
328 | if (!cpu_online(get_partition(tsk))) { | ||
329 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
330 | get_partition(tsk)); | ||
331 | retval = -EINVAL; | ||
332 | goto out; | ||
333 | } | ||
334 | |||
335 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); | ||
336 | |||
337 | /* avoid scheduler plugin changing underneath us */ | ||
338 | raw_spin_lock_irqsave(&task_transition_lock, flags); | ||
339 | |||
340 | /* allocate heap node for this task */ | ||
341 | tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); | ||
342 | tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); | ||
343 | |||
344 | if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { | ||
345 | printk(KERN_WARNING "litmus: no more heap node memory!?\n"); | ||
346 | |||
347 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
348 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
349 | |||
350 | retval = -ENOMEM; | ||
351 | goto out_unlock; | ||
352 | } else { | ||
353 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); | ||
354 | } | ||
355 | |||
356 | retval = litmus->admit_task(tsk); | ||
357 | |||
358 | if (!retval) { | ||
359 | sched_trace_task_name(tsk); | ||
360 | sched_trace_task_param(tsk); | ||
361 | atomic_inc(&rt_task_count); | ||
362 | } | ||
363 | |||
364 | out_unlock: | ||
365 | raw_spin_unlock_irqrestore(&task_transition_lock, flags); | ||
366 | out: | ||
367 | return retval; | ||
368 | } | ||
369 | |||
370 | void litmus_exit_task(struct task_struct* tsk) | ||
371 | { | ||
372 | if (is_realtime(tsk)) { | ||
373 | sched_trace_task_completion(tsk, 1); | ||
374 | |||
375 | litmus->task_exit(tsk); | ||
376 | |||
377 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | ||
378 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
379 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
380 | |||
381 | atomic_dec(&rt_task_count); | ||
382 | reinit_litmus_state(tsk, 1); | ||
383 | } | ||
384 | } | ||
385 | |||
386 | /* IPI callback to synchronize plugin switching */ | ||
387 | static void synch_on_plugin_switch(void* info) | ||
388 | { | ||
389 | atomic_inc(&cannot_use_plugin); | ||
390 | while (atomic_read(&cannot_use_plugin) > 0) | ||
391 | cpu_relax(); | ||
392 | } | ||
393 | |||
394 | /* Switching a plugin in use is tricky. | ||
395 | * We must watch out that no real-time tasks exists | ||
396 | * (and that none is created in parallel) and that the plugin is not | ||
397 | * currently in use on any processor (in theory). | ||
398 | */ | ||
399 | int switch_sched_plugin(struct sched_plugin* plugin) | ||
400 | { | ||
401 | unsigned long flags; | ||
402 | int ret = 0; | ||
403 | |||
404 | BUG_ON(!plugin); | ||
405 | |||
406 | /* forbid other cpus to use the plugin */ | ||
407 | atomic_set(&cannot_use_plugin, 1); | ||
408 | /* send IPI to force other CPUs to synch with us */ | ||
409 | smp_call_function(synch_on_plugin_switch, NULL, 0); | ||
410 | |||
411 | /* wait until all other CPUs have started synch */ | ||
412 | while (atomic_read(&cannot_use_plugin) < num_online_cpus()) | ||
413 | cpu_relax(); | ||
414 | |||
415 | /* stop task transitions */ | ||
416 | raw_spin_lock_irqsave(&task_transition_lock, flags); | ||
417 | |||
418 | /* don't switch if there are active real-time tasks */ | ||
419 | if (atomic_read(&rt_task_count) == 0) { | ||
420 | ret = litmus->deactivate_plugin(); | ||
421 | if (0 != ret) | ||
422 | goto out; | ||
423 | ret = plugin->activate_plugin(); | ||
424 | if (0 != ret) { | ||
425 | printk(KERN_INFO "Can't activate %s (%d).\n", | ||
426 | plugin->plugin_name, ret); | ||
427 | plugin = &linux_sched_plugin; | ||
428 | } | ||
429 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | ||
430 | litmus = plugin; | ||
431 | } else | ||
432 | ret = -EBUSY; | ||
433 | out: | ||
434 | raw_spin_unlock_irqrestore(&task_transition_lock, flags); | ||
435 | atomic_set(&cannot_use_plugin, 0); | ||
436 | return ret; | ||
437 | } | ||
438 | |||
439 | /* Called upon fork. | ||
440 | * p is the newly forked task. | ||
441 | */ | ||
442 | void litmus_fork(struct task_struct* p) | ||
443 | { | ||
444 | if (is_realtime(p)) { | ||
445 | /* clean out any litmus related state, don't preserve anything */ | ||
446 | reinit_litmus_state(p, 0); | ||
447 | /* Don't let the child be a real-time task. */ | ||
448 | p->sched_reset_on_fork = 1; | ||
449 | } else | ||
450 | /* non-rt tasks might have ctrl_page set */ | ||
451 | tsk_rt(p)->ctrl_page = NULL; | ||
452 | |||
453 | /* od tables are never inherited across a fork */ | ||
454 | p->od_table = NULL; | ||
455 | } | ||
456 | |||
457 | /* Called upon execve(). | ||
458 | * current is doing the exec. | ||
459 | * Don't let address space specific stuff leak. | ||
460 | */ | ||
461 | void litmus_exec(void) | ||
462 | { | ||
463 | struct task_struct* p = current; | ||
464 | |||
465 | if (is_realtime(p)) { | ||
466 | WARN_ON(p->rt_param.inh_task); | ||
467 | if (tsk_rt(p)->ctrl_page) { | ||
468 | free_page((unsigned long) tsk_rt(p)->ctrl_page); | ||
469 | tsk_rt(p)->ctrl_page = NULL; | ||
470 | } | ||
471 | } | ||
472 | } | ||
473 | |||
474 | void exit_litmus(struct task_struct *dead_tsk) | ||
475 | { | ||
476 | /* We also allow non-RT tasks to | ||
477 | * allocate control pages to allow | ||
478 | * measurements with non-RT tasks. | ||
479 | * So check if we need to free the page | ||
480 | * in any case. | ||
481 | */ | ||
482 | if (tsk_rt(dead_tsk)->ctrl_page) { | ||
483 | TRACE_TASK(dead_tsk, | ||
484 | "freeing ctrl_page %p\n", | ||
485 | tsk_rt(dead_tsk)->ctrl_page); | ||
486 | free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); | ||
487 | } | ||
488 | |||
489 | /* main cleanup only for RT tasks */ | ||
490 | if (is_realtime(dead_tsk)) | ||
491 | litmus_exit_task(dead_tsk); | ||
492 | } | ||
493 | |||
494 | |||
495 | #ifdef CONFIG_MAGIC_SYSRQ | ||
496 | int sys_kill(int pid, int sig); | ||
497 | |||
498 | static void sysrq_handle_kill_rt_tasks(int key) | ||
499 | { | ||
500 | struct task_struct *t; | ||
501 | read_lock(&tasklist_lock); | ||
502 | for_each_process(t) { | ||
503 | if (is_realtime(t)) { | ||
504 | sys_kill(t->pid, SIGKILL); | ||
505 | } | ||
506 | } | ||
507 | read_unlock(&tasklist_lock); | ||
508 | } | ||
509 | |||
510 | static struct sysrq_key_op sysrq_kill_rt_tasks_op = { | ||
511 | .handler = sysrq_handle_kill_rt_tasks, | ||
512 | .help_msg = "quit-rt-tasks(X)", | ||
513 | .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", | ||
514 | }; | ||
515 | #endif | ||
516 | |||
517 | extern struct sched_plugin linux_sched_plugin; | ||
518 | |||
519 | static int __init _init_litmus(void) | ||
520 | { | ||
521 | /* Common initializers, | ||
522 | * mode change lock is used to enforce single mode change | ||
523 | * operation. | ||
524 | */ | ||
525 | printk("Starting LITMUS^RT kernel\n"); | ||
526 | |||
527 | BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t)); | ||
528 | |||
529 | register_sched_plugin(&linux_sched_plugin); | ||
530 | |||
531 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); | ||
532 | release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); | ||
533 | |||
534 | #ifdef CONFIG_MAGIC_SYSRQ | ||
535 | /* offer some debugging help */ | ||
536 | if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) | ||
537 | printk("Registered kill rt tasks magic sysrq.\n"); | ||
538 | else | ||
539 | printk("Could not register kill rt tasks magic sysrq.\n"); | ||
540 | #endif | ||
541 | |||
542 | init_litmus_proc(); | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | static void _exit_litmus(void) | ||
548 | { | ||
549 | exit_litmus_proc(); | ||
550 | kmem_cache_destroy(bheap_node_cache); | ||
551 | kmem_cache_destroy(release_heap_cache); | ||
552 | } | ||
553 | |||
554 | module_init(_init_litmus); | ||
555 | module_exit(_exit_litmus); | ||
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c new file mode 100644 index 000000000000..4bf725a36c9c --- /dev/null +++ b/litmus/litmus_proc.c | |||
@@ -0,0 +1,347 @@ | |||
1 | /* | ||
2 | * litmus_proc.c -- Implementation of the /proc/litmus directory tree. | ||
3 | */ | ||
4 | |||
5 | #include <linux/sched.h> | ||
6 | #include <linux/uaccess.h> | ||
7 | |||
8 | #include <litmus/litmus.h> | ||
9 | #include <litmus/litmus_proc.h> | ||
10 | |||
11 | #include <litmus/clustered.h> | ||
12 | |||
13 | /* in litmus/litmus.c */ | ||
14 | extern atomic_t rt_task_count; | ||
15 | |||
16 | static struct proc_dir_entry *litmus_dir = NULL, | ||
17 | *curr_file = NULL, | ||
18 | *stat_file = NULL, | ||
19 | *plugs_dir = NULL, | ||
20 | #ifdef CONFIG_RELEASE_MASTER | ||
21 | *release_master_file = NULL, | ||
22 | #endif | ||
23 | *plugs_file = NULL; | ||
24 | |||
25 | /* in litmus/sync.c */ | ||
26 | int count_tasks_waiting_for_release(void); | ||
27 | |||
28 | static int proc_read_stats(char *page, char **start, | ||
29 | off_t off, int count, | ||
30 | int *eof, void *data) | ||
31 | { | ||
32 | int len; | ||
33 | |||
34 | len = snprintf(page, PAGE_SIZE, | ||
35 | "real-time tasks = %d\n" | ||
36 | "ready for release = %d\n", | ||
37 | atomic_read(&rt_task_count), | ||
38 | count_tasks_waiting_for_release()); | ||
39 | return len; | ||
40 | } | ||
41 | |||
42 | static int proc_read_plugins(char *page, char **start, | ||
43 | off_t off, int count, | ||
44 | int *eof, void *data) | ||
45 | { | ||
46 | int len; | ||
47 | |||
48 | len = print_sched_plugins(page, PAGE_SIZE); | ||
49 | return len; | ||
50 | } | ||
51 | |||
52 | static int proc_read_curr(char *page, char **start, | ||
53 | off_t off, int count, | ||
54 | int *eof, void *data) | ||
55 | { | ||
56 | int len; | ||
57 | |||
58 | len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name); | ||
59 | return len; | ||
60 | } | ||
61 | |||
62 | /* in litmus/litmus.c */ | ||
63 | int switch_sched_plugin(struct sched_plugin*); | ||
64 | |||
65 | static int proc_write_curr(struct file *file, | ||
66 | const char *buffer, | ||
67 | unsigned long count, | ||
68 | void *data) | ||
69 | { | ||
70 | int len, ret; | ||
71 | char name[65]; | ||
72 | struct sched_plugin* found; | ||
73 | |||
74 | len = copy_and_chomp(name, sizeof(name), buffer, count); | ||
75 | if (len < 0) | ||
76 | return len; | ||
77 | |||
78 | found = find_sched_plugin(name); | ||
79 | |||
80 | if (found) { | ||
81 | ret = switch_sched_plugin(found); | ||
82 | if (ret != 0) | ||
83 | printk(KERN_INFO "Could not switch plugin: %d\n", ret); | ||
84 | } else | ||
85 | printk(KERN_INFO "Plugin '%s' is unknown.\n", name); | ||
86 | |||
87 | return len; | ||
88 | } | ||
89 | |||
90 | #ifdef CONFIG_RELEASE_MASTER | ||
91 | static int proc_read_release_master(char *page, char **start, | ||
92 | off_t off, int count, | ||
93 | int *eof, void *data) | ||
94 | { | ||
95 | int len, master; | ||
96 | master = atomic_read(&release_master_cpu); | ||
97 | if (master == NO_CPU) | ||
98 | len = snprintf(page, PAGE_SIZE, "NO_CPU\n"); | ||
99 | else | ||
100 | len = snprintf(page, PAGE_SIZE, "%d\n", master); | ||
101 | return len; | ||
102 | } | ||
103 | |||
104 | static int proc_write_release_master(struct file *file, | ||
105 | const char *buffer, | ||
106 | unsigned long count, | ||
107 | void *data) | ||
108 | { | ||
109 | int cpu, err, len, online = 0; | ||
110 | char msg[64]; | ||
111 | |||
112 | len = copy_and_chomp(msg, sizeof(msg), buffer, count); | ||
113 | |||
114 | if (len < 0) | ||
115 | return len; | ||
116 | |||
117 | if (strcmp(msg, "NO_CPU") == 0) | ||
118 | atomic_set(&release_master_cpu, NO_CPU); | ||
119 | else { | ||
120 | err = sscanf(msg, "%d", &cpu); | ||
121 | if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { | ||
122 | atomic_set(&release_master_cpu, cpu); | ||
123 | } else { | ||
124 | TRACE("invalid release master: '%s' " | ||
125 | "(err:%d cpu:%d online:%d)\n", | ||
126 | msg, err, cpu, online); | ||
127 | len = -EINVAL; | ||
128 | } | ||
129 | } | ||
130 | return len; | ||
131 | } | ||
132 | #endif | ||
133 | |||
134 | int __init init_litmus_proc(void) | ||
135 | { | ||
136 | litmus_dir = proc_mkdir("litmus", NULL); | ||
137 | if (!litmus_dir) { | ||
138 | printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); | ||
139 | return -ENOMEM; | ||
140 | } | ||
141 | |||
142 | curr_file = create_proc_entry("active_plugin", | ||
143 | 0644, litmus_dir); | ||
144 | if (!curr_file) { | ||
145 | printk(KERN_ERR "Could not allocate active_plugin " | ||
146 | "procfs entry.\n"); | ||
147 | return -ENOMEM; | ||
148 | } | ||
149 | curr_file->read_proc = proc_read_curr; | ||
150 | curr_file->write_proc = proc_write_curr; | ||
151 | |||
152 | #ifdef CONFIG_RELEASE_MASTER | ||
153 | release_master_file = create_proc_entry("release_master", | ||
154 | 0644, litmus_dir); | ||
155 | if (!release_master_file) { | ||
156 | printk(KERN_ERR "Could not allocate release_master " | ||
157 | "procfs entry.\n"); | ||
158 | return -ENOMEM; | ||
159 | } | ||
160 | release_master_file->read_proc = proc_read_release_master; | ||
161 | release_master_file->write_proc = proc_write_release_master; | ||
162 | #endif | ||
163 | |||
164 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | ||
165 | proc_read_stats, NULL); | ||
166 | |||
167 | plugs_dir = proc_mkdir("plugins", litmus_dir); | ||
168 | if (!plugs_dir){ | ||
169 | printk(KERN_ERR "Could not allocate plugins directory " | ||
170 | "procfs entry.\n"); | ||
171 | return -ENOMEM; | ||
172 | } | ||
173 | |||
174 | plugs_file = create_proc_read_entry("loaded", 0444, plugs_dir, | ||
175 | proc_read_plugins, NULL); | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | void exit_litmus_proc(void) | ||
181 | { | ||
182 | if (plugs_file) | ||
183 | remove_proc_entry("loaded", plugs_dir); | ||
184 | if (plugs_dir) | ||
185 | remove_proc_entry("plugins", litmus_dir); | ||
186 | if (stat_file) | ||
187 | remove_proc_entry("stats", litmus_dir); | ||
188 | if (curr_file) | ||
189 | remove_proc_entry("active_plugin", litmus_dir); | ||
190 | #ifdef CONFIG_RELEASE_MASTER | ||
191 | if (release_master_file) | ||
192 | remove_proc_entry("release_master", litmus_dir); | ||
193 | #endif | ||
194 | if (litmus_dir) | ||
195 | remove_proc_entry("litmus", NULL); | ||
196 | } | ||
197 | |||
198 | long make_plugin_proc_dir(struct sched_plugin* plugin, | ||
199 | struct proc_dir_entry** pde_in) | ||
200 | { | ||
201 | struct proc_dir_entry *pde_new = NULL; | ||
202 | long rv; | ||
203 | |||
204 | if (!plugin || !plugin->plugin_name){ | ||
205 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
206 | __func__); | ||
207 | rv = -EINVAL; | ||
208 | goto out_no_pde; | ||
209 | } | ||
210 | |||
211 | if (!plugs_dir){ | ||
212 | printk(KERN_ERR "Could not make plugin sub-directory, because " | ||
213 | "/proc/litmus/plugins does not exist.\n"); | ||
214 | rv = -ENOENT; | ||
215 | goto out_no_pde; | ||
216 | } | ||
217 | |||
218 | pde_new = proc_mkdir(plugin->plugin_name, plugs_dir); | ||
219 | if (!pde_new){ | ||
220 | printk(KERN_ERR "Could not make plugin sub-directory: " | ||
221 | "out of memory?.\n"); | ||
222 | rv = -ENOMEM; | ||
223 | goto out_no_pde; | ||
224 | } | ||
225 | |||
226 | rv = 0; | ||
227 | *pde_in = pde_new; | ||
228 | goto out_ok; | ||
229 | |||
230 | out_no_pde: | ||
231 | *pde_in = NULL; | ||
232 | out_ok: | ||
233 | return rv; | ||
234 | } | ||
235 | |||
236 | void remove_plugin_proc_dir(struct sched_plugin* plugin) | ||
237 | { | ||
238 | if (!plugin || !plugin->plugin_name){ | ||
239 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
240 | __func__); | ||
241 | return; | ||
242 | } | ||
243 | remove_proc_entry(plugin->plugin_name, plugs_dir); | ||
244 | } | ||
245 | |||
246 | |||
247 | |||
248 | /* misc. I/O helper functions */ | ||
249 | |||
250 | int copy_and_chomp(char *kbuf, unsigned long ksize, | ||
251 | __user const char* ubuf, unsigned long ulength) | ||
252 | { | ||
253 | /* caller must provide buffer space */ | ||
254 | BUG_ON(!ksize); | ||
255 | |||
256 | ksize--; /* leave space for null byte */ | ||
257 | |||
258 | if (ksize > ulength) | ||
259 | ksize = ulength; | ||
260 | |||
261 | if(copy_from_user(kbuf, ubuf, ksize)) | ||
262 | return -EFAULT; | ||
263 | |||
264 | kbuf[ksize] = '\0'; | ||
265 | |||
266 | /* chomp kbuf */ | ||
267 | if (ksize > 0 && kbuf[ksize - 1] == '\n') | ||
268 | kbuf[ksize - 1] = '\0'; | ||
269 | |||
270 | return ksize; | ||
271 | } | ||
272 | |||
273 | /* helper functions for clustered plugins */ | ||
274 | static const char* cache_level_names[] = { | ||
275 | "ALL", | ||
276 | "L1", | ||
277 | "L2", | ||
278 | "L3", | ||
279 | }; | ||
280 | |||
281 | int parse_cache_level(const char *cache_name, enum cache_level *level) | ||
282 | { | ||
283 | int err = -EINVAL; | ||
284 | int i; | ||
285 | /* do a quick and dirty comparison to find the cluster size */ | ||
286 | for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++) | ||
287 | if (!strcmp(cache_name, cache_level_names[i])) { | ||
288 | *level = (enum cache_level) i; | ||
289 | err = 0; | ||
290 | break; | ||
291 | } | ||
292 | return err; | ||
293 | } | ||
294 | |||
295 | const char* cache_level_name(enum cache_level level) | ||
296 | { | ||
297 | int idx = level; | ||
298 | |||
299 | if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER) | ||
300 | return cache_level_names[idx]; | ||
301 | else | ||
302 | return "INVALID"; | ||
303 | } | ||
304 | |||
305 | |||
306 | /* proc file interface to configure the cluster size */ | ||
307 | static int proc_read_cluster_size(char *page, char **start, | ||
308 | off_t off, int count, | ||
309 | int *eof, void *data) | ||
310 | { | ||
311 | return snprintf(page, PAGE_SIZE, "%s\n", | ||
312 | cache_level_name(*((enum cache_level*) data)));; | ||
313 | } | ||
314 | |||
315 | static int proc_write_cluster_size(struct file *file, | ||
316 | const char *buffer, | ||
317 | unsigned long count, | ||
318 | void *data) | ||
319 | { | ||
320 | int len; | ||
321 | char cache_name[8]; | ||
322 | |||
323 | len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count); | ||
324 | |||
325 | if (len > 0 && parse_cache_level(cache_name, (enum cache_level*) data)) | ||
326 | printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); | ||
327 | |||
328 | return len; | ||
329 | } | ||
330 | |||
331 | struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, | ||
332 | enum cache_level* level) | ||
333 | { | ||
334 | struct proc_dir_entry* cluster_file; | ||
335 | |||
336 | cluster_file = create_proc_entry("cluster", 0644, parent); | ||
337 | if (!cluster_file) { | ||
338 | printk(KERN_ERR "Could not allocate %s/cluster " | ||
339 | "procfs entry.\n", parent->name); | ||
340 | } else { | ||
341 | cluster_file->read_proc = proc_read_cluster_size; | ||
342 | cluster_file->write_proc = proc_write_cluster_size; | ||
343 | cluster_file->data = level; | ||
344 | } | ||
345 | return cluster_file; | ||
346 | } | ||
347 | |||
diff --git a/litmus/locking.c b/litmus/locking.c new file mode 100644 index 000000000000..84a1d8309699 --- /dev/null +++ b/litmus/locking.c | |||
@@ -0,0 +1,186 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <litmus/litmus.h> | ||
3 | #include <litmus/fdso.h> | ||
4 | |||
5 | #ifdef CONFIG_LITMUS_LOCKING | ||
6 | |||
7 | #include <litmus/sched_plugin.h> | ||
8 | #include <litmus/trace.h> | ||
9 | #include <litmus/wait.h> | ||
10 | |||
11 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); | ||
12 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg); | ||
13 | static int close_generic_lock(struct od_table_entry* entry); | ||
14 | static void destroy_generic_lock(obj_type_t type, void* sem); | ||
15 | |||
16 | struct fdso_ops generic_lock_ops = { | ||
17 | .create = create_generic_lock, | ||
18 | .open = open_generic_lock, | ||
19 | .close = close_generic_lock, | ||
20 | .destroy = destroy_generic_lock | ||
21 | }; | ||
22 | |||
23 | static inline bool is_lock(struct od_table_entry* entry) | ||
24 | { | ||
25 | return entry->class == &generic_lock_ops; | ||
26 | } | ||
27 | |||
28 | static inline struct litmus_lock* get_lock(struct od_table_entry* entry) | ||
29 | { | ||
30 | BUG_ON(!is_lock(entry)); | ||
31 | return (struct litmus_lock*) entry->obj->obj; | ||
32 | } | ||
33 | |||
34 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) | ||
35 | { | ||
36 | struct litmus_lock* lock; | ||
37 | int err; | ||
38 | |||
39 | err = litmus->allocate_lock(&lock, type, arg); | ||
40 | if (err == 0) | ||
41 | *obj_ref = lock; | ||
42 | return err; | ||
43 | } | ||
44 | |||
45 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg) | ||
46 | { | ||
47 | struct litmus_lock* lock = get_lock(entry); | ||
48 | if (lock->ops->open) | ||
49 | return lock->ops->open(lock, arg); | ||
50 | else | ||
51 | return 0; /* default: any task can open it */ | ||
52 | } | ||
53 | |||
54 | static int close_generic_lock(struct od_table_entry* entry) | ||
55 | { | ||
56 | struct litmus_lock* lock = get_lock(entry); | ||
57 | if (lock->ops->close) | ||
58 | return lock->ops->close(lock); | ||
59 | else | ||
60 | return 0; /* default: closing succeeds */ | ||
61 | } | ||
62 | |||
63 | static void destroy_generic_lock(obj_type_t type, void* obj) | ||
64 | { | ||
65 | struct litmus_lock* lock = (struct litmus_lock*) obj; | ||
66 | lock->ops->deallocate(lock); | ||
67 | } | ||
68 | |||
69 | asmlinkage long sys_litmus_lock(int lock_od) | ||
70 | { | ||
71 | long err = -EINVAL; | ||
72 | struct od_table_entry* entry; | ||
73 | struct litmus_lock* l; | ||
74 | |||
75 | TS_SYSCALL_IN_START; | ||
76 | |||
77 | TS_SYSCALL_IN_END; | ||
78 | |||
79 | TS_LOCK_START; | ||
80 | |||
81 | entry = get_entry_for_od(lock_od); | ||
82 | if (entry && is_lock(entry)) { | ||
83 | l = get_lock(entry); | ||
84 | TRACE_CUR("attempts to lock 0x%p\n", l); | ||
85 | err = l->ops->lock(l); | ||
86 | } | ||
87 | |||
88 | /* Note: task my have been suspended or preempted in between! Take | ||
89 | * this into account when computing overheads. */ | ||
90 | TS_LOCK_END; | ||
91 | |||
92 | TS_SYSCALL_OUT_START; | ||
93 | |||
94 | return err; | ||
95 | } | ||
96 | |||
97 | asmlinkage long sys_litmus_unlock(int lock_od) | ||
98 | { | ||
99 | long err = -EINVAL; | ||
100 | struct od_table_entry* entry; | ||
101 | struct litmus_lock* l; | ||
102 | |||
103 | TS_SYSCALL_IN_START; | ||
104 | |||
105 | TS_SYSCALL_IN_END; | ||
106 | |||
107 | TS_UNLOCK_START; | ||
108 | |||
109 | entry = get_entry_for_od(lock_od); | ||
110 | if (entry && is_lock(entry)) { | ||
111 | l = get_lock(entry); | ||
112 | TRACE_CUR("attempts to unlock 0x%p\n", l); | ||
113 | err = l->ops->unlock(l); | ||
114 | } | ||
115 | |||
116 | /* Note: task my have been preempted in between! Take this into | ||
117 | * account when computing overheads. */ | ||
118 | TS_UNLOCK_END; | ||
119 | |||
120 | TS_SYSCALL_OUT_START; | ||
121 | |||
122 | return err; | ||
123 | } | ||
124 | |||
125 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) | ||
126 | { | ||
127 | wait_queue_t* q; | ||
128 | struct task_struct* t = NULL; | ||
129 | |||
130 | if (waitqueue_active(wq)) { | ||
131 | q = list_entry(wq->task_list.next, | ||
132 | wait_queue_t, task_list); | ||
133 | t = (struct task_struct*) q->private; | ||
134 | __remove_wait_queue(wq, q); | ||
135 | } | ||
136 | return(t); | ||
137 | } | ||
138 | |||
139 | unsigned int __add_wait_queue_prio_exclusive( | ||
140 | wait_queue_head_t* head, | ||
141 | prio_wait_queue_t *new) | ||
142 | { | ||
143 | struct list_head *pos; | ||
144 | unsigned int passed = 0; | ||
145 | |||
146 | new->wq.flags |= WQ_FLAG_EXCLUSIVE; | ||
147 | |||
148 | /* find a spot where the new entry is less than the next */ | ||
149 | list_for_each(pos, &head->task_list) { | ||
150 | prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t, | ||
151 | wq.task_list); | ||
152 | |||
153 | if (unlikely(lt_before(new->priority, queued->priority) || | ||
154 | (new->priority == queued->priority && | ||
155 | new->tie_breaker < queued->tie_breaker))) { | ||
156 | /* pos is not less than new, thus insert here */ | ||
157 | __list_add(&new->wq.task_list, pos->prev, pos); | ||
158 | goto out; | ||
159 | } | ||
160 | passed++; | ||
161 | } | ||
162 | |||
163 | /* if we get to this point either the list is empty or every entry | ||
164 | * queued element is less than new. | ||
165 | * Let's add new to the end. */ | ||
166 | list_add_tail(&new->wq.task_list, &head->task_list); | ||
167 | out: | ||
168 | return passed; | ||
169 | } | ||
170 | |||
171 | |||
172 | #else | ||
173 | |||
174 | struct fdso_ops generic_lock_ops = {}; | ||
175 | |||
176 | asmlinkage long sys_litmus_lock(int sem_od) | ||
177 | { | ||
178 | return -ENOSYS; | ||
179 | } | ||
180 | |||
181 | asmlinkage long sys_litmus_unlock(int sem_od) | ||
182 | { | ||
183 | return -ENOSYS; | ||
184 | } | ||
185 | |||
186 | #endif | ||
diff --git a/litmus/preempt.c b/litmus/preempt.c new file mode 100644 index 000000000000..90e09d091e30 --- /dev/null +++ b/litmus/preempt.c | |||
@@ -0,0 +1,131 @@ | |||
1 | #include <linux/sched.h> | ||
2 | |||
3 | #include <litmus/litmus.h> | ||
4 | #include <litmus/preempt.h> | ||
5 | |||
6 | /* The rescheduling state of each processor. | ||
7 | */ | ||
8 | DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); | ||
9 | |||
10 | void sched_state_will_schedule(struct task_struct* tsk) | ||
11 | { | ||
12 | /* Litmus hack: we only care about processor-local invocations of | ||
13 | * set_tsk_need_resched(). We can't reliably set the flag remotely | ||
14 | * since it might race with other updates to the scheduling state. We | ||
15 | * can't rely on the runqueue lock protecting updates to the sched | ||
16 | * state since processors do not acquire the runqueue locks for all | ||
17 | * updates to the sched state (to avoid acquiring two runqueue locks at | ||
18 | * the same time). Further, if tsk is residing on a remote processor, | ||
19 | * then that processor doesn't actually know yet that it is going to | ||
20 | * reschedule; it still must receive an IPI (unless a local invocation | ||
21 | * races). | ||
22 | */ | ||
23 | if (likely(task_cpu(tsk) == smp_processor_id())) { | ||
24 | VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE); | ||
25 | if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) | ||
26 | set_sched_state(PICKED_WRONG_TASK); | ||
27 | else | ||
28 | set_sched_state(WILL_SCHEDULE); | ||
29 | } else | ||
30 | /* Litmus tasks should never be subject to a remote | ||
31 | * set_tsk_need_resched(). */ | ||
32 | BUG_ON(is_realtime(tsk)); | ||
33 | // TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", | ||
34 | // __builtin_return_address(0)); | ||
35 | } | ||
36 | |||
37 | /* Called by the IPI handler after another CPU called smp_send_resched(). */ | ||
38 | void sched_state_ipi(void) | ||
39 | { | ||
40 | /* If the IPI was slow, we might be in any state right now. The IPI is | ||
41 | * only meaningful if we are in SHOULD_SCHEDULE. */ | ||
42 | if (is_in_sched_state(SHOULD_SCHEDULE)) { | ||
43 | /* Cause scheduler to be invoked. | ||
44 | * This will cause a transition to WILL_SCHEDULE. */ | ||
45 | set_tsk_need_resched(current); | ||
46 | TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", | ||
47 | current->comm, current->pid); | ||
48 | } else { | ||
49 | /* ignore */ | ||
50 | TRACE_STATE("ignoring IPI in state %x (%s)\n", | ||
51 | get_sched_state(), | ||
52 | sched_state_name(get_sched_state())); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | /* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must | ||
57 | * hold the lock that is used to serialize scheduling decisions. */ | ||
58 | void litmus_reschedule(int cpu) | ||
59 | { | ||
60 | int picked_transition_ok = 0; | ||
61 | int scheduled_transition_ok = 0; | ||
62 | |||
63 | /* The (remote) CPU could be in any state. */ | ||
64 | |||
65 | /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU | ||
66 | * is not aware of the need to reschedule at this point. */ | ||
67 | |||
68 | /* is a context switch in progress? */ | ||
69 | if (cpu_is_in_sched_state(cpu, TASK_PICKED)) | ||
70 | picked_transition_ok = sched_state_transition_on( | ||
71 | cpu, TASK_PICKED, PICKED_WRONG_TASK); | ||
72 | |||
73 | if (!picked_transition_ok && | ||
74 | cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { | ||
75 | /* We either raced with the end of the context switch, or the | ||
76 | * CPU was in TASK_SCHEDULED anyway. */ | ||
77 | scheduled_transition_ok = sched_state_transition_on( | ||
78 | cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); | ||
79 | } | ||
80 | |||
81 | /* If the CPU was in state TASK_SCHEDULED, then we need to cause the | ||
82 | * scheduler to be invoked. */ | ||
83 | if (scheduled_transition_ok) { | ||
84 | if (smp_processor_id() == cpu) | ||
85 | set_tsk_need_resched(current); | ||
86 | else | ||
87 | smp_send_reschedule(cpu); | ||
88 | } | ||
89 | |||
90 | TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", | ||
91 | __FUNCTION__, | ||
92 | picked_transition_ok, | ||
93 | scheduled_transition_ok); | ||
94 | } | ||
95 | |||
96 | void litmus_reschedule_local(void) | ||
97 | { | ||
98 | if (is_in_sched_state(TASK_PICKED)) | ||
99 | set_sched_state(PICKED_WRONG_TASK); | ||
100 | else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { | ||
101 | set_sched_state(WILL_SCHEDULE); | ||
102 | set_tsk_need_resched(current); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | #ifdef CONFIG_DEBUG_KERNEL | ||
107 | |||
108 | void sched_state_plugin_check(void) | ||
109 | { | ||
110 | if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) { | ||
111 | TRACE("!!!! plugin did not call sched_state_task_picked()!" | ||
112 | "Calling sched_state_task_picked() is mandatory---fix this.\n"); | ||
113 | set_sched_state(TASK_PICKED); | ||
114 | } | ||
115 | } | ||
116 | |||
117 | #define NAME_CHECK(x) case x: return #x | ||
118 | const char* sched_state_name(int s) | ||
119 | { | ||
120 | switch (s) { | ||
121 | NAME_CHECK(TASK_SCHEDULED); | ||
122 | NAME_CHECK(SHOULD_SCHEDULE); | ||
123 | NAME_CHECK(WILL_SCHEDULE); | ||
124 | NAME_CHECK(TASK_PICKED); | ||
125 | NAME_CHECK(PICKED_WRONG_TASK); | ||
126 | default: | ||
127 | return "UNKNOWN"; | ||
128 | }; | ||
129 | } | ||
130 | |||
131 | #endif | ||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 000000000000..d405854cd39c --- /dev/null +++ b/litmus/rt_domain.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * litmus/rt_domain.c | ||
3 | * | ||
4 | * LITMUS real-time infrastructure. This file contains the | ||
5 | * functions that manipulate RT domains. RT domains are an abstraction | ||
6 | * of a ready queue and a release queue. | ||
7 | */ | ||
8 | |||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/slab.h> | ||
13 | |||
14 | #include <litmus/litmus.h> | ||
15 | #include <litmus/sched_plugin.h> | ||
16 | #include <litmus/sched_trace.h> | ||
17 | |||
18 | #include <litmus/rt_domain.h> | ||
19 | |||
20 | #include <litmus/trace.h> | ||
21 | |||
22 | #include <litmus/bheap.h> | ||
23 | |||
24 | /* Uncomment when debugging timer races... */ | ||
25 | #if 0 | ||
26 | #define VTRACE_TASK TRACE_TASK | ||
27 | #define VTRACE TRACE | ||
28 | #else | ||
29 | #define VTRACE_TASK(t, fmt, args...) /* shut up */ | ||
30 | #define VTRACE(fmt, args...) /* be quiet already */ | ||
31 | #endif | ||
32 | |||
33 | static int dummy_resched(rt_domain_t *rt) | ||
34 | { | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static int dummy_order(struct bheap_node* a, struct bheap_node* b) | ||
39 | { | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | /* default implementation: use default lock */ | ||
44 | static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
45 | { | ||
46 | merge_ready(rt, tasks); | ||
47 | } | ||
48 | |||
49 | static unsigned int time2slot(lt_t time) | ||
50 | { | ||
51 | return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; | ||
52 | } | ||
53 | |||
54 | static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | ||
55 | { | ||
56 | unsigned long flags; | ||
57 | struct release_heap* rh; | ||
58 | rh = container_of(timer, struct release_heap, timer); | ||
59 | |||
60 | TS_RELEASE_LATENCY(rh->release_time); | ||
61 | |||
62 | VTRACE("on_release_timer(0x%p) starts.\n", timer); | ||
63 | |||
64 | TS_RELEASE_START; | ||
65 | |||
66 | |||
67 | raw_spin_lock_irqsave(&rh->dom->release_lock, flags); | ||
68 | VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | ||
69 | /* remove from release queue */ | ||
70 | list_del(&rh->list); | ||
71 | raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); | ||
72 | VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); | ||
73 | |||
74 | /* call release callback */ | ||
75 | rh->dom->release_jobs(rh->dom, &rh->heap); | ||
76 | /* WARNING: rh can be referenced from other CPUs from now on. */ | ||
77 | |||
78 | TS_RELEASE_END; | ||
79 | |||
80 | VTRACE("on_release_timer(0x%p) ends.\n", timer); | ||
81 | |||
82 | return HRTIMER_NORESTART; | ||
83 | } | ||
84 | |||
85 | /* allocated in litmus.c */ | ||
86 | struct kmem_cache * release_heap_cache; | ||
87 | |||
88 | struct release_heap* release_heap_alloc(int gfp_flags) | ||
89 | { | ||
90 | struct release_heap* rh; | ||
91 | rh= kmem_cache_alloc(release_heap_cache, gfp_flags); | ||
92 | if (rh) { | ||
93 | /* initialize timer */ | ||
94 | hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
95 | rh->timer.function = on_release_timer; | ||
96 | } | ||
97 | return rh; | ||
98 | } | ||
99 | |||
100 | void release_heap_free(struct release_heap* rh) | ||
101 | { | ||
102 | /* make sure timer is no longer in use */ | ||
103 | hrtimer_cancel(&rh->timer); | ||
104 | kmem_cache_free(release_heap_cache, rh); | ||
105 | } | ||
106 | |||
107 | /* Caller must hold release lock. | ||
108 | * Will return heap for given time. If no such heap exists prior to | ||
109 | * the invocation it will be created. | ||
110 | */ | ||
111 | static struct release_heap* get_release_heap(rt_domain_t *rt, | ||
112 | struct task_struct* t, | ||
113 | int use_task_heap) | ||
114 | { | ||
115 | struct list_head* pos; | ||
116 | struct release_heap* heap = NULL; | ||
117 | struct release_heap* rh; | ||
118 | lt_t release_time = get_release(t); | ||
119 | unsigned int slot = time2slot(release_time); | ||
120 | |||
121 | /* initialize pos for the case that the list is empty */ | ||
122 | pos = rt->release_queue.slot[slot].next; | ||
123 | list_for_each(pos, &rt->release_queue.slot[slot]) { | ||
124 | rh = list_entry(pos, struct release_heap, list); | ||
125 | if (release_time == rh->release_time) { | ||
126 | /* perfect match -- this happens on hyperperiod | ||
127 | * boundaries | ||
128 | */ | ||
129 | heap = rh; | ||
130 | break; | ||
131 | } else if (lt_before(release_time, rh->release_time)) { | ||
132 | /* we need to insert a new node since rh is | ||
133 | * already in the future | ||
134 | */ | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | if (!heap && use_task_heap) { | ||
139 | /* use pre-allocated release heap */ | ||
140 | rh = tsk_rt(t)->rel_heap; | ||
141 | |||
142 | rh->dom = rt; | ||
143 | rh->release_time = release_time; | ||
144 | |||
145 | /* add to release queue */ | ||
146 | list_add(&rh->list, pos->prev); | ||
147 | heap = rh; | ||
148 | } | ||
149 | return heap; | ||
150 | } | ||
151 | |||
152 | static void reinit_release_heap(struct task_struct* t) | ||
153 | { | ||
154 | struct release_heap* rh; | ||
155 | |||
156 | /* use pre-allocated release heap */ | ||
157 | rh = tsk_rt(t)->rel_heap; | ||
158 | |||
159 | /* Make sure it is safe to use. The timer callback could still | ||
160 | * be executing on another CPU; hrtimer_cancel() will wait | ||
161 | * until the timer callback has completed. However, under no | ||
162 | * circumstances should the timer be active (= yet to be | ||
163 | * triggered). | ||
164 | * | ||
165 | * WARNING: If the CPU still holds the release_lock at this point, | ||
166 | * deadlock may occur! | ||
167 | */ | ||
168 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
169 | |||
170 | /* initialize */ | ||
171 | bheap_init(&rh->heap); | ||
172 | #ifdef CONFIG_RELEASE_MASTER | ||
173 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
174 | #endif | ||
175 | } | ||
176 | /* arm_release_timer() - start local release timer or trigger | ||
177 | * remote timer (pull timer) | ||
178 | * | ||
179 | * Called by add_release() with: | ||
180 | * - tobe_lock taken | ||
181 | * - IRQ disabled | ||
182 | */ | ||
183 | #ifdef CONFIG_RELEASE_MASTER | ||
184 | #define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) | ||
185 | static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) | ||
186 | #else | ||
187 | static void arm_release_timer(rt_domain_t *_rt) | ||
188 | #endif | ||
189 | { | ||
190 | rt_domain_t *rt = _rt; | ||
191 | struct list_head list; | ||
192 | struct list_head *pos, *safe; | ||
193 | struct task_struct* t; | ||
194 | struct release_heap* rh; | ||
195 | |||
196 | VTRACE("arm_release_timer() at %llu\n", litmus_clock()); | ||
197 | list_replace_init(&rt->tobe_released, &list); | ||
198 | |||
199 | list_for_each_safe(pos, safe, &list) { | ||
200 | /* pick task of work list */ | ||
201 | t = list_entry(pos, struct task_struct, rt_param.list); | ||
202 | sched_trace_task_release(t); | ||
203 | list_del(pos); | ||
204 | |||
205 | /* put into release heap while holding release_lock */ | ||
206 | raw_spin_lock(&rt->release_lock); | ||
207 | VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); | ||
208 | |||
209 | rh = get_release_heap(rt, t, 0); | ||
210 | if (!rh) { | ||
211 | /* need to use our own, but drop lock first */ | ||
212 | raw_spin_unlock(&rt->release_lock); | ||
213 | VTRACE_TASK(t, "Dropped release_lock 0x%p\n", | ||
214 | &rt->release_lock); | ||
215 | |||
216 | reinit_release_heap(t); | ||
217 | VTRACE_TASK(t, "release_heap ready\n"); | ||
218 | |||
219 | raw_spin_lock(&rt->release_lock); | ||
220 | VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n", | ||
221 | &rt->release_lock); | ||
222 | |||
223 | rh = get_release_heap(rt, t, 1); | ||
224 | } | ||
225 | bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); | ||
226 | VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); | ||
227 | |||
228 | raw_spin_unlock(&rt->release_lock); | ||
229 | VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); | ||
230 | |||
231 | /* To avoid arming the timer multiple times, we only let the | ||
232 | * owner do the arming (which is the "first" task to reference | ||
233 | * this release_heap anyway). | ||
234 | */ | ||
235 | if (rh == tsk_rt(t)->rel_heap) { | ||
236 | VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); | ||
237 | /* we cannot arm the timer using hrtimer_start() | ||
238 | * as it may deadlock on rq->lock | ||
239 | * | ||
240 | * PINNED mode is ok on both local and remote CPU | ||
241 | */ | ||
242 | #ifdef CONFIG_RELEASE_MASTER | ||
243 | if (rt->release_master == NO_CPU && | ||
244 | target_cpu == NO_CPU) | ||
245 | #endif | ||
246 | __hrtimer_start_range_ns(&rh->timer, | ||
247 | ns_to_ktime(rh->release_time), | ||
248 | 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
249 | #ifdef CONFIG_RELEASE_MASTER | ||
250 | else | ||
251 | hrtimer_start_on( | ||
252 | /* target_cpu overrides release master */ | ||
253 | (target_cpu != NO_CPU ? | ||
254 | target_cpu : rt->release_master), | ||
255 | &rh->info, &rh->timer, | ||
256 | ns_to_ktime(rh->release_time), | ||
257 | HRTIMER_MODE_ABS_PINNED); | ||
258 | #endif | ||
259 | } else | ||
260 | VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | void rt_domain_init(rt_domain_t *rt, | ||
265 | bheap_prio_t order, | ||
266 | check_resched_needed_t check, | ||
267 | release_jobs_t release | ||
268 | ) | ||
269 | { | ||
270 | int i; | ||
271 | |||
272 | BUG_ON(!rt); | ||
273 | if (!check) | ||
274 | check = dummy_resched; | ||
275 | if (!release) | ||
276 | release = default_release_jobs; | ||
277 | if (!order) | ||
278 | order = dummy_order; | ||
279 | |||
280 | #ifdef CONFIG_RELEASE_MASTER | ||
281 | rt->release_master = NO_CPU; | ||
282 | #endif | ||
283 | |||
284 | bheap_init(&rt->ready_queue); | ||
285 | INIT_LIST_HEAD(&rt->tobe_released); | ||
286 | for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) | ||
287 | INIT_LIST_HEAD(&rt->release_queue.slot[i]); | ||
288 | |||
289 | raw_spin_lock_init(&rt->ready_lock); | ||
290 | raw_spin_lock_init(&rt->release_lock); | ||
291 | raw_spin_lock_init(&rt->tobe_lock); | ||
292 | |||
293 | rt->check_resched = check; | ||
294 | rt->release_jobs = release; | ||
295 | rt->order = order; | ||
296 | } | ||
297 | |||
298 | /* add_ready - add a real-time task to the rt ready queue. It must be runnable. | ||
299 | * @new: the newly released task | ||
300 | */ | ||
301 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | ||
302 | { | ||
303 | TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n", | ||
304 | new->comm, new->pid, get_exec_cost(new), get_rt_period(new), | ||
305 | get_release(new), litmus_clock()); | ||
306 | |||
307 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); | ||
308 | |||
309 | bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); | ||
310 | rt->check_resched(rt); | ||
311 | } | ||
312 | |||
313 | /* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. | ||
314 | * @tasks - the newly released tasks | ||
315 | */ | ||
316 | void __merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
317 | { | ||
318 | bheap_union(rt->order, &rt->ready_queue, tasks); | ||
319 | rt->check_resched(rt); | ||
320 | } | ||
321 | |||
322 | |||
323 | #ifdef CONFIG_RELEASE_MASTER | ||
324 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
325 | int target_cpu) | ||
326 | { | ||
327 | TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", | ||
328 | get_release(task), target_cpu); | ||
329 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
330 | task->rt_param.domain = rt; | ||
331 | |||
332 | /* start release timer */ | ||
333 | TS_SCHED2_START(task); | ||
334 | |||
335 | arm_release_timer_on(rt, target_cpu); | ||
336 | |||
337 | TS_SCHED2_END(task); | ||
338 | } | ||
339 | #endif | ||
340 | |||
341 | /* add_release - add a real-time task to the rt release queue. | ||
342 | * @task: the sleeping task | ||
343 | */ | ||
344 | void __add_release(rt_domain_t* rt, struct task_struct *task) | ||
345 | { | ||
346 | TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); | ||
347 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
348 | task->rt_param.domain = rt; | ||
349 | |||
350 | /* start release timer */ | ||
351 | TS_SCHED2_START(task); | ||
352 | |||
353 | arm_release_timer(rt); | ||
354 | |||
355 | TS_SCHED2_END(task); | ||
356 | } | ||
357 | |||
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c new file mode 100644 index 000000000000..4f5bb26b339b --- /dev/null +++ b/litmus/sched_cedf.c | |||
@@ -0,0 +1,1526 @@ | |||
1 | /* | ||
2 | * litmus/sched_cedf.c | ||
3 | * | ||
4 | * Implementation of the C-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | |||
33 | #include <linux/module.h> | ||
34 | |||
35 | #include <litmus/litmus.h> | ||
36 | #include <litmus/wait.h> | ||
37 | #include <litmus/jobs.h> | ||
38 | #include <litmus/preempt.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/edf_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | #include <litmus/trace.h> | ||
43 | |||
44 | #include <litmus/clustered.h> | ||
45 | |||
46 | #include <litmus/bheap.h> | ||
47 | |||
48 | /* to configure the cluster size */ | ||
49 | #include <litmus/litmus_proc.h> | ||
50 | #include <linux/uaccess.h> | ||
51 | |||
52 | /* Reference configuration variable. Determines which cache level is used to | ||
53 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
54 | * all CPUs form a single cluster (just like GSN-EDF). | ||
55 | */ | ||
56 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
57 | |||
58 | struct clusterdomain; | ||
59 | |||
60 | /* cpu_entry_t - maintain the linked and scheduled state | ||
61 | * | ||
62 | * A cpu also contains a pointer to the cedf_domain_t cluster | ||
63 | * that owns it (struct clusterdomain*) | ||
64 | */ | ||
65 | typedef struct { | ||
66 | int cpu; | ||
67 | struct clusterdomain* cluster; /* owning cluster */ | ||
68 | struct task_struct* linked; /* only RT tasks */ | ||
69 | struct task_struct* scheduled; /* only RT tasks */ | ||
70 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
71 | struct bheap_node* hn; | ||
72 | #ifdef CONFIG_LITMUS_LOCKING | ||
73 | struct bheap_node* pending_hn; | ||
74 | struct task_struct* pending; | ||
75 | #endif | ||
76 | } cpu_entry_t; | ||
77 | |||
78 | /* one cpu_entry_t per CPU */ | ||
79 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
80 | |||
81 | |||
82 | static struct bheap_node cpu_nodes[NR_CPUS]; | ||
83 | #ifdef CONFIG_LITMUS_LOCKING | ||
84 | static struct bheap_node pending_nodes[NR_CPUS]; | ||
85 | #endif | ||
86 | |||
87 | /* | ||
88 | * In C-EDF there is a cedf domain _per_ cluster | ||
89 | * The number of clusters is dynamically determined accordingly to the | ||
90 | * total cpu number and the cluster size | ||
91 | */ | ||
92 | typedef struct clusterdomain { | ||
93 | /* rt_domain for this cluster */ | ||
94 | rt_domain_t domain; | ||
95 | /* map of this cluster cpus */ | ||
96 | cpumask_var_t cpu_map; | ||
97 | unsigned int num_cpus; | ||
98 | /* the cpus queue themselves according to priority in here */ | ||
99 | struct bheap cpu_heap; | ||
100 | #ifdef CONFIG_LITMUS_LOCKING | ||
101 | struct bheap pending_jobs; | ||
102 | struct bheap pending_cpus; | ||
103 | #endif | ||
104 | /* lock for this cluster */ | ||
105 | #define cluster_lock domain.ready_lock | ||
106 | } cedf_domain_t; | ||
107 | |||
108 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | ||
109 | cedf_domain_t *cedf; | ||
110 | |||
111 | #define remote_cpu(cpu) (&per_cpu(cedf_cpu_entries, cpu)) | ||
112 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | ||
113 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
114 | |||
115 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
116 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
117 | * information during the initialization of the plugin (e.g., topology) | ||
118 | #define WANT_ALL_SCHED_EVENTS | ||
119 | */ | ||
120 | #define VERBOSE_INIT | ||
121 | |||
122 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
123 | { | ||
124 | cpu_entry_t *a, *b; | ||
125 | a = _a->value; | ||
126 | b = _b->value; | ||
127 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
128 | * the top of the heap. | ||
129 | */ | ||
130 | return edf_higher_prio(b->linked, a->linked); | ||
131 | } | ||
132 | |||
133 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
134 | * order in the cpu queue. Caller must hold cedf lock. | ||
135 | */ | ||
136 | static void update_cpu_position(cpu_entry_t *entry) | ||
137 | { | ||
138 | cedf_domain_t *cluster = entry->cluster; | ||
139 | |||
140 | if (likely(bheap_node_in_heap(entry->hn))) | ||
141 | bheap_delete(cpu_lower_prio, | ||
142 | &cluster->cpu_heap, | ||
143 | entry->hn); | ||
144 | |||
145 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
146 | } | ||
147 | |||
148 | /* caller must hold cedf lock */ | ||
149 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) | ||
150 | { | ||
151 | struct bheap_node* hn; | ||
152 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
153 | return hn->value; | ||
154 | } | ||
155 | |||
156 | |||
157 | /* link_task_to_cpu - Update the link of a CPU. | ||
158 | * Handles the case where the to-be-linked task is already | ||
159 | * scheduled on a different CPU. | ||
160 | */ | ||
161 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
162 | cpu_entry_t *entry) | ||
163 | { | ||
164 | cpu_entry_t *sched; | ||
165 | struct task_struct* tmp; | ||
166 | int on_cpu; | ||
167 | |||
168 | BUG_ON(linked && !is_realtime(linked)); | ||
169 | |||
170 | /* Currently linked task is set to be unlinked. */ | ||
171 | if (entry->linked) { | ||
172 | entry->linked->rt_param.linked_on = NO_CPU; | ||
173 | } | ||
174 | |||
175 | /* Link new task to CPU. */ | ||
176 | if (linked) { | ||
177 | /* handle task is already scheduled somewhere! */ | ||
178 | on_cpu = linked->rt_param.scheduled_on; | ||
179 | if (on_cpu != NO_CPU) { | ||
180 | sched = &per_cpu(cedf_cpu_entries, on_cpu); | ||
181 | /* this should only happen if not linked already */ | ||
182 | BUG_ON(sched->linked == linked); | ||
183 | |||
184 | /* If we are already scheduled on the CPU to which we | ||
185 | * wanted to link, we don't need to do the swap -- | ||
186 | * we just link ourselves to the CPU and depend on | ||
187 | * the caller to get things right. | ||
188 | */ | ||
189 | if (entry != sched) { | ||
190 | TRACE_TASK(linked, | ||
191 | "already scheduled on %d, updating link.\n", | ||
192 | sched->cpu); | ||
193 | tmp = sched->linked; | ||
194 | linked->rt_param.linked_on = sched->cpu; | ||
195 | sched->linked = linked; | ||
196 | update_cpu_position(sched); | ||
197 | linked = tmp; | ||
198 | } | ||
199 | } | ||
200 | if (linked) /* might be NULL due to swap */ | ||
201 | linked->rt_param.linked_on = entry->cpu; | ||
202 | } | ||
203 | entry->linked = linked; | ||
204 | #ifdef WANT_ALL_SCHED_EVENTS | ||
205 | if (linked) | ||
206 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
207 | else | ||
208 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
209 | #endif | ||
210 | update_cpu_position(entry); | ||
211 | } | ||
212 | |||
213 | /* unlink - Make sure a task is not linked any longer to an entry | ||
214 | * where it was linked before. Must hold cedf_lock. | ||
215 | */ | ||
216 | static noinline void unlink(struct task_struct* t) | ||
217 | { | ||
218 | cpu_entry_t *entry; | ||
219 | |||
220 | if (t->rt_param.linked_on != NO_CPU) { | ||
221 | /* unlink */ | ||
222 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); | ||
223 | t->rt_param.linked_on = NO_CPU; | ||
224 | link_task_to_cpu(NULL, entry); | ||
225 | } else if (is_queued(t)) { | ||
226 | /* This is an interesting situation: t is scheduled, | ||
227 | * but was just recently unlinked. It cannot be | ||
228 | * linked anywhere else (because then it would have | ||
229 | * been relinked to this CPU), thus it must be in some | ||
230 | * queue. We must remove it from the list in this | ||
231 | * case. | ||
232 | * | ||
233 | * in C-EDF case is should be somewhere in the queue for | ||
234 | * its domain, therefore and we can get the domain using | ||
235 | * task_cpu_cluster | ||
236 | */ | ||
237 | remove(&(task_cpu_cluster(t))->domain, t); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | |||
242 | /* preempt - force a CPU to reschedule | ||
243 | */ | ||
244 | static void preempt(cpu_entry_t *entry) | ||
245 | { | ||
246 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
247 | } | ||
248 | |||
249 | #ifdef CONFIG_LITMUS_LOCKING | ||
250 | static int update_pending_job(cedf_domain_t* cluster, struct task_struct* t); | ||
251 | static void priodon_become_eligible(void); | ||
252 | static void priodon_complete_request(void); | ||
253 | |||
254 | static inline int in_pending_heap(struct task_struct* t) | ||
255 | { | ||
256 | return bheap_node_in_heap(tsk_rt(t)->pending_node); | ||
257 | } | ||
258 | |||
259 | /* has this task already been processed for pending */ | ||
260 | static inline int is_pending(struct task_struct* t) | ||
261 | { | ||
262 | return tsk_rt(t)->pending_on != NO_CPU || | ||
263 | in_pending_heap(t); | ||
264 | } | ||
265 | |||
266 | #endif | ||
267 | |||
268 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
269 | * Caller must hold cedf_lock. | ||
270 | */ | ||
271 | static noinline void requeue(struct task_struct* task) | ||
272 | { | ||
273 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
274 | BUG_ON(!task); | ||
275 | /* sanity check before insertion */ | ||
276 | BUG_ON(is_queued(task)); | ||
277 | |||
278 | if (is_released(task, litmus_clock())) { | ||
279 | #ifdef CONFIG_LITMUS_LOCKING | ||
280 | if (!is_pending(task)) | ||
281 | update_pending_job(cluster, task); | ||
282 | #endif | ||
283 | __add_ready(&cluster->domain, task); | ||
284 | } else { | ||
285 | /* it has got to wait */ | ||
286 | add_release(&cluster->domain, task); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* check for any necessary preemptions */ | ||
291 | static void check_for_preemptions(cedf_domain_t *cluster) | ||
292 | { | ||
293 | struct task_struct *task; | ||
294 | cpu_entry_t* last; | ||
295 | |||
296 | for(last = lowest_prio_cpu(cluster); | ||
297 | edf_preemption_needed(&cluster->domain, last->linked); | ||
298 | last = lowest_prio_cpu(cluster)) { | ||
299 | /* preemption necessary */ | ||
300 | |||
301 | #ifdef CONFIG_LITMUS_LOCKING | ||
302 | task = __peek_ready(&cluster->domain); | ||
303 | if (update_pending_job(cluster, task)) { | ||
304 | /* Something changed, re-evaluate priorites to | ||
305 | * see if we still need to preempt. | ||
306 | * */ | ||
307 | TRACE_TASK(task, "hitting continue\n"); | ||
308 | continue; | ||
309 | } | ||
310 | #endif | ||
311 | task = __take_ready(&cluster->domain); | ||
312 | TRACE_TASK(task, "attempting to link task to P%d\n", | ||
313 | last->cpu); | ||
314 | if (last->linked) | ||
315 | requeue(last->linked); | ||
316 | link_task_to_cpu(task, last); | ||
317 | preempt(last); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | #ifdef CONFIG_LITMUS_LOCKING | ||
322 | |||
323 | static int pending_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
324 | { | ||
325 | cpu_entry_t *a, *b; | ||
326 | a = _a->value; | ||
327 | b = _b->value; | ||
328 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
329 | * the top of the heap. | ||
330 | */ | ||
331 | return edf_higher_base_prio(b->pending, a->pending); | ||
332 | } | ||
333 | |||
334 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
335 | * order in the cpu queue. Caller must hold cedf lock. | ||
336 | */ | ||
337 | static void update_pending_position(cpu_entry_t *entry) | ||
338 | { | ||
339 | cedf_domain_t *cluster = entry->cluster; | ||
340 | |||
341 | if (likely(bheap_node_in_heap(entry->pending_hn))) | ||
342 | bheap_delete(pending_lower_prio, | ||
343 | &cluster->pending_cpus, | ||
344 | entry->pending_hn); | ||
345 | |||
346 | bheap_insert(pending_lower_prio, &cluster->pending_cpus, entry->pending_hn); | ||
347 | } | ||
348 | |||
349 | /* caller must hold cedf lock */ | ||
350 | static cpu_entry_t* lowest_pending_cpu(cedf_domain_t *cluster) | ||
351 | { | ||
352 | struct bheap_node* hn; | ||
353 | hn = bheap_peek(pending_lower_prio, &cluster->pending_cpus); | ||
354 | return hn->value; | ||
355 | } | ||
356 | |||
357 | static void priority_raised(struct task_struct* t) | ||
358 | { | ||
359 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
360 | int linked_on; | ||
361 | |||
362 | linked_on = tsk_rt(t)->linked_on; | ||
363 | |||
364 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
365 | if (linked_on != NO_CPU) { | ||
366 | TRACE_TASK(t, "%s: linked on %d\n", | ||
367 | __FUNCTION__, linked_on); | ||
368 | /* Holder is scheduled; need to re-order CPUs. | ||
369 | * We can't use heap_decrease() here since | ||
370 | * the cpu_heap is ordered in reverse direction, so | ||
371 | * it is actually an increase. */ | ||
372 | bheap_delete(cpu_lower_prio, &cluster->cpu_heap, | ||
373 | remote_cpu(linked_on)->hn); | ||
374 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, | ||
375 | remote_cpu(linked_on)->hn); | ||
376 | } else { | ||
377 | /* holder may be queued: first stop queue changes */ | ||
378 | raw_spin_lock(&cluster->domain.release_lock); | ||
379 | if (is_queued(t)) { | ||
380 | TRACE_TASK(t, "%s: is queued\n", | ||
381 | __FUNCTION__); | ||
382 | bheap_decrease(edf_ready_order, | ||
383 | tsk_rt(t)->heap_node); | ||
384 | } else { | ||
385 | /* Nothing to do: if it is not queued and not linked | ||
386 | * then it is either sleeping or currently being moved | ||
387 | * by other code (e.g., a timer interrupt handler) that | ||
388 | * will use the correct priority when enqueuing the | ||
389 | * task. */ | ||
390 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
391 | __FUNCTION__); | ||
392 | } | ||
393 | raw_spin_unlock(&cluster->domain.release_lock); | ||
394 | } | ||
395 | } | ||
396 | |||
397 | static void priority_lowered(struct task_struct* t) | ||
398 | { | ||
399 | /* assumption: t is not in a release heap */ | ||
400 | if (is_queued(t) || tsk_rt(t)->linked_on != NO_CPU) { | ||
401 | unlink(t); | ||
402 | requeue(t); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | static void donate_priority(struct task_struct* recipient, struct task_struct* donor) | ||
407 | { | ||
408 | cedf_domain_t *cluster = task_cpu_cluster(donor); | ||
409 | |||
410 | BUG_ON(task_cpu_cluster(recipient) != task_cpu_cluster(donor)); | ||
411 | BUG_ON(tsk_rt(donor)->is_donor); | ||
412 | BUG_ON(tsk_rt(recipient)->is_donor); | ||
413 | BUG_ON(tsk_rt(donor)->inh_task); | ||
414 | BUG_ON(tsk_rt(recipient)->inh_task); | ||
415 | |||
416 | TRACE_TASK(donor, "priodon: becomes priority donor for %s/%d\n", | ||
417 | recipient->comm, recipient->pid); | ||
418 | |||
419 | /* swap priorities */ | ||
420 | tsk_rt(recipient)->inh_task = donor; | ||
421 | tsk_rt(donor)->inh_task = recipient; | ||
422 | tsk_rt(donor)->is_donor = 1; | ||
423 | |||
424 | priority_lowered(donor); | ||
425 | priority_raised(recipient); | ||
426 | |||
427 | bheap_uncache_min(edf_ready_order, | ||
428 | &cluster->domain.ready_queue); | ||
429 | } | ||
430 | |||
431 | /* assumption: new_donor has a higher priority than old_donor */ | ||
432 | static void switch_donor(struct task_struct* recipient, | ||
433 | struct task_struct* old_donor, | ||
434 | struct task_struct* new_donor) | ||
435 | { | ||
436 | TRACE_TASK(new_donor, "becomes donor for %s/%d instead of %s/%d\n", | ||
437 | recipient->comm, recipient->pid, old_donor->comm, old_donor->pid); | ||
438 | |||
439 | BUG_ON(tsk_rt(recipient)->inh_task != old_donor); | ||
440 | BUG_ON(tsk_rt(old_donor)->inh_task != recipient); | ||
441 | BUG_ON(tsk_rt(new_donor)->inh_task != NULL); | ||
442 | BUG_ON(tsk_rt(new_donor)->is_donor); | ||
443 | |||
444 | tsk_rt(old_donor)->inh_task = NULL; | ||
445 | tsk_rt(old_donor)->is_donor = 0; | ||
446 | |||
447 | tsk_rt(recipient)->inh_task = new_donor; | ||
448 | tsk_rt(new_donor)->inh_task = recipient; | ||
449 | tsk_rt(new_donor)->is_donor = 1; | ||
450 | |||
451 | priority_raised(recipient); | ||
452 | priority_raised(old_donor); | ||
453 | priority_lowered(new_donor); | ||
454 | } | ||
455 | |||
456 | static void undonate_priority(struct task_struct* recipient, struct task_struct* donor) | ||
457 | { | ||
458 | cedf_domain_t *cluster = task_cpu_cluster(donor); | ||
459 | |||
460 | BUG_ON(tsk_rt(recipient)->inh_task != donor); | ||
461 | BUG_ON(tsk_rt(donor)->inh_task != recipient); | ||
462 | |||
463 | TRACE_TASK(donor, "priodon: is no longer priority donor of %s/%d\n", | ||
464 | recipient->comm, recipient->pid); | ||
465 | |||
466 | tsk_rt(recipient)->inh_task = NULL; | ||
467 | tsk_rt(donor)->inh_task = NULL; | ||
468 | tsk_rt(donor)->is_donor = 0; | ||
469 | |||
470 | priority_lowered(recipient); | ||
471 | priority_raised(donor); | ||
472 | |||
473 | bheap_uncache_min(edf_ready_order, | ||
474 | &cluster->domain.ready_queue); | ||
475 | } | ||
476 | |||
477 | static inline void add_to_pending(cedf_domain_t* cluster, struct task_struct* t) | ||
478 | { | ||
479 | TRACE_TASK(t, "priodon: adding to pending heap wait:%u donor:%u req:%u pend:%d\n", | ||
480 | tsk_rt(t)->waiting_eligible, | ||
481 | tsk_rt(t)->is_donor, tsk_rt(t)->request_incomplete, | ||
482 | tsk_rt(t)->pending_on); | ||
483 | bheap_insert(edf_pending_order, | ||
484 | &cluster->pending_jobs, | ||
485 | tsk_rt(t)->pending_node); | ||
486 | } | ||
487 | |||
488 | static inline struct task_struct* take_pending(cedf_domain_t* cluster) | ||
489 | { | ||
490 | struct bheap_node* node; | ||
491 | node = bheap_take(edf_pending_order, &cluster->pending_jobs); | ||
492 | return node ? (struct task_struct*) node->value : NULL; | ||
493 | } | ||
494 | |||
495 | static inline struct task_struct* peek_pending(cedf_domain_t* cluster) | ||
496 | { | ||
497 | struct bheap_node* node; | ||
498 | node = bheap_peek(edf_pending_order, &cluster->pending_jobs); | ||
499 | return node ? (struct task_struct*) node->value : NULL; | ||
500 | } | ||
501 | |||
502 | static inline int fake_resume(struct task_struct* t) | ||
503 | { | ||
504 | TRACE_TASK(t, "priodon: fake resume wait:%u donor:%u\n", | ||
505 | tsk_rt(t)->waiting_eligible, tsk_rt(t)->is_donor); | ||
506 | /* Fake suspended. Let's resume it. */ | ||
507 | if (tsk_rt(t)->waiting_eligible) { | ||
508 | tsk_rt(t)->waiting_eligible = 0; | ||
509 | if (tsk_rt(t)->scheduled_on == NO_CPU) { | ||
510 | /* it was removed from the queue */ | ||
511 | requeue(t); | ||
512 | return 1; | ||
513 | } | ||
514 | } | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | |||
519 | /* Lazily update set of highest-priority pending jobs. | ||
520 | * Returns 1 if priority recheck is required. | ||
521 | */ | ||
522 | static int update_pending_job(cedf_domain_t* cluster, | ||
523 | struct task_struct* to_be_linked) | ||
524 | { | ||
525 | cpu_entry_t* entry; | ||
526 | struct task_struct* lowest_hp; /* lowest-priority high-priority task */ | ||
527 | struct task_struct* highest_lp; /* highest-priority low-priority task */ | ||
528 | int reeval = 0; | ||
529 | |||
530 | entry = lowest_pending_cpu(cluster); | ||
531 | lowest_hp = entry->pending; | ||
532 | |||
533 | if (to_be_linked && !is_pending(to_be_linked)) | ||
534 | /* not yet accounted for, stick in heap */ | ||
535 | add_to_pending(cluster, to_be_linked); | ||
536 | |||
537 | highest_lp = peek_pending(cluster); | ||
538 | if (edf_higher_base_prio(highest_lp, lowest_hp)) { | ||
539 | /* yep, should be become of the c highest-prior pending jobs */ | ||
540 | |||
541 | TRACE_TASK(highest_lp, | ||
542 | "priodon: became one of the %u highest-prio tasks (P%d, req:%u) X\n", | ||
543 | cluster->num_cpus, | ||
544 | entry->cpu, | ||
545 | tsk_rt(highest_lp)->request_incomplete); | ||
546 | |||
547 | /* get it out of the heap */ | ||
548 | highest_lp = take_pending(cluster); | ||
549 | |||
550 | BUG_ON(highest_lp == lowest_hp); | ||
551 | |||
552 | /* it should never be a priority donor at this point */ | ||
553 | BUG_ON(tsk_rt(highest_lp)->is_donor); | ||
554 | |||
555 | entry->pending = highest_lp; | ||
556 | update_pending_position(entry); | ||
557 | tsk_rt(highest_lp)->pending_on = entry->cpu; | ||
558 | |||
559 | /* things that could happen: | ||
560 | * | ||
561 | * 1) lowest_hp has no donor, but is in a request => highest_lp becomes donor | ||
562 | * 2) lowest_hp is donor => highest_lp becomes new donor, old donor is resumed if suspended | ||
563 | * 3) lowest_hp is not in a request, and highest_lp is waiting => highest_lp is resumed | ||
564 | * 4) lowest_hp is not in a request, and highest_lp is not waiting => nothing to do | ||
565 | * 5) highest_lp has a priority donor => resume its donor | ||
566 | */ | ||
567 | |||
568 | /* do we need to put it back? */ | ||
569 | if (lowest_hp) { | ||
570 | TRACE_TASK(lowest_hp, | ||
571 | "priodon: no longer among %u highest-prio tasks req:%u\n", | ||
572 | cluster->num_cpus, | ||
573 | tsk_rt(lowest_hp)->request_incomplete); | ||
574 | tsk_rt(lowest_hp)->pending_on = NO_CPU; | ||
575 | add_to_pending(cluster, lowest_hp); | ||
576 | |||
577 | |||
578 | if (tsk_rt(lowest_hp)->request_incomplete) { | ||
579 | /* case 1) */ | ||
580 | donate_priority(lowest_hp, highest_lp); | ||
581 | reeval = 1; | ||
582 | } else if (tsk_rt(lowest_hp)->inh_task) { | ||
583 | /* case 2) */ | ||
584 | switch_donor(tsk_rt(lowest_hp)->inh_task, | ||
585 | lowest_hp, highest_lp); | ||
586 | fake_resume(lowest_hp); | ||
587 | reeval = 1; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | |||
592 | if (!tsk_rt(highest_lp)->is_donor) { | ||
593 | if (tsk_rt(highest_lp)->waiting_eligible) { | ||
594 | /* case 3) */ | ||
595 | reeval = fake_resume(highest_lp); | ||
596 | BUG_ON(tsk_rt(highest_lp)->inh_task); | ||
597 | } else if (tsk_rt(highest_lp)->inh_task) { | ||
598 | /* case 5 */ | ||
599 | struct task_struct* donor = tsk_rt(highest_lp)->inh_task; | ||
600 | undonate_priority(highest_lp, donor); | ||
601 | reeval = fake_resume(donor); | ||
602 | } | ||
603 | } | ||
604 | } | ||
605 | |||
606 | return reeval; | ||
607 | } | ||
608 | |||
609 | /* job has exited => no longer pending */ | ||
610 | |||
611 | static void job_pending_exit(struct task_struct* t) | ||
612 | { | ||
613 | cedf_domain_t *cluster; | ||
614 | cpu_entry_t* entry; | ||
615 | |||
616 | TRACE_TASK(t, "priodon: is no longer pending (pending_on:%d, queued:%d)\n", | ||
617 | tsk_rt(t)->pending_on, in_pending_heap(t)); | ||
618 | |||
619 | cluster = task_cpu_cluster(t); | ||
620 | |||
621 | if (tsk_rt(t)->pending_on != NO_CPU) { | ||
622 | entry = &per_cpu(cedf_cpu_entries, tsk_rt(t)->pending_on); | ||
623 | tsk_rt(t)->pending_on = NO_CPU; | ||
624 | entry->pending = NULL; | ||
625 | update_pending_position(entry); | ||
626 | |||
627 | /* let's see if anything changed */ | ||
628 | update_pending_job(cluster, NULL); | ||
629 | } else if (in_pending_heap(t)) { | ||
630 | bheap_delete(edf_pending_order, &cluster->pending_jobs, | ||
631 | tsk_rt(t)->pending_node); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | #endif | ||
636 | |||
637 | |||
638 | /* cedf_job_arrival: task is either resumed or released */ | ||
639 | static noinline void cedf_job_arrival(struct task_struct* task) | ||
640 | { | ||
641 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
642 | BUG_ON(!task); | ||
643 | |||
644 | requeue(task); | ||
645 | check_for_preemptions(cluster); | ||
646 | } | ||
647 | |||
648 | |||
649 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
650 | { | ||
651 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); | ||
652 | unsigned long flags; | ||
653 | |||
654 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
655 | |||
656 | __merge_ready(&cluster->domain, tasks); | ||
657 | check_for_preemptions(cluster); | ||
658 | |||
659 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
660 | } | ||
661 | |||
662 | /* caller holds cedf_lock */ | ||
663 | static noinline void job_completion(struct task_struct *t, int forced) | ||
664 | { | ||
665 | BUG_ON(!t); | ||
666 | |||
667 | sched_trace_task_completion(t, forced); | ||
668 | |||
669 | TRACE_TASK(t, "job_completion().\n"); | ||
670 | |||
671 | #ifdef CONFIG_LITMUS_LOCKING | ||
672 | job_pending_exit(t); | ||
673 | #endif | ||
674 | |||
675 | /* prepare for next period */ | ||
676 | prepare_for_next_period(t); | ||
677 | if (is_released(t, litmus_clock())) | ||
678 | sched_trace_task_release(t); | ||
679 | /* unlink */ | ||
680 | unlink(t); | ||
681 | /* requeue | ||
682 | * But don't requeue a blocking task. */ | ||
683 | set_rt_flags(t, RT_F_RUNNING); | ||
684 | if (is_running(t)) | ||
685 | cedf_job_arrival(t); | ||
686 | } | ||
687 | |||
688 | /* cedf_tick - this function is called for every local timer | ||
689 | * interrupt. | ||
690 | * | ||
691 | * checks whether the current task has expired and checks | ||
692 | * whether we need to preempt it if it has not expired | ||
693 | */ | ||
694 | static void cedf_tick(struct task_struct* t) | ||
695 | { | ||
696 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
697 | if (!is_np(t)) { | ||
698 | /* np tasks will be preempted when they become | ||
699 | * preemptable again | ||
700 | */ | ||
701 | litmus_reschedule_local(); | ||
702 | TRACE("cedf_scheduler_tick: " | ||
703 | "%d is preemptable " | ||
704 | " => FORCE_RESCHED\n", t->pid); | ||
705 | } else if (is_user_np(t)) { | ||
706 | TRACE("cedf_scheduler_tick: " | ||
707 | "%d is non-preemptable, " | ||
708 | "preemption delayed.\n", t->pid); | ||
709 | request_exit_np(t); | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
715 | * assumptions on the state of the current task since it may be called for a | ||
716 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
717 | * was necessary, because sys_exit_np() was called, because some Linux | ||
718 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
719 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
720 | * current state is. | ||
721 | * | ||
722 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
723 | * | ||
724 | * The following assertions for the scheduled task could hold: | ||
725 | * | ||
726 | * - !is_running(scheduled) // the job blocks | ||
727 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
728 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
729 | * - linked != scheduled // we need to reschedule (for any reason) | ||
730 | * - is_np(scheduled) // rescheduling must be delayed, | ||
731 | * sys_exit_np must be requested | ||
732 | * | ||
733 | * Any of these can occur together. | ||
734 | */ | ||
735 | static struct task_struct* cedf_schedule(struct task_struct * prev) | ||
736 | { | ||
737 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
738 | cedf_domain_t *cluster = entry->cluster; | ||
739 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
740 | struct task_struct* next = NULL; | ||
741 | |||
742 | #ifdef CONFIG_LITMUS_LOCKING | ||
743 | int priodon; | ||
744 | #else | ||
745 | #define priodon 0 | ||
746 | #endif | ||
747 | |||
748 | #ifdef CONFIG_RELEASE_MASTER | ||
749 | /* Bail out early if we are the release master. | ||
750 | * The release master never schedules any real-time tasks. | ||
751 | */ | ||
752 | if (cluster->domain.release_master == entry->cpu) { | ||
753 | sched_state_task_picked(); | ||
754 | return NULL; | ||
755 | } | ||
756 | #endif | ||
757 | |||
758 | raw_spin_lock(&cluster->cluster_lock); | ||
759 | |||
760 | /* sanity checking */ | ||
761 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
762 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
763 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
764 | |||
765 | /* (0) Determine state */ | ||
766 | exists = entry->scheduled != NULL; | ||
767 | blocks = exists && !is_running(entry->scheduled); | ||
768 | out_of_time = exists && | ||
769 | budget_enforced(entry->scheduled) && | ||
770 | budget_exhausted(entry->scheduled); | ||
771 | np = exists && is_np(entry->scheduled); | ||
772 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
773 | preempt = entry->scheduled != entry->linked; | ||
774 | |||
775 | #ifdef CONFIG_LITMUS_LOCKING | ||
776 | priodon = exists && (tsk_rt(entry->scheduled)->waiting_eligible || | ||
777 | /* can't allow job to exit until request is over */ | ||
778 | (tsk_rt(entry->scheduled)->is_donor && sleep)); | ||
779 | |||
780 | /* this should never happend together (at least we don't handle it atm) */ | ||
781 | BUG_ON(priodon && blocks); | ||
782 | #endif | ||
783 | |||
784 | #ifdef WANT_ALL_SCHED_EVENTS | ||
785 | TRACE_TASK(prev, "invoked cedf_schedule.\n"); | ||
786 | #endif | ||
787 | |||
788 | if (exists) | ||
789 | TRACE_TASK(prev, | ||
790 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
791 | "state:%d sig:%d priodon:%d\n", | ||
792 | blocks, out_of_time, np, sleep, preempt, | ||
793 | prev->state, signal_pending(prev), priodon); | ||
794 | if (entry->linked && preempt) | ||
795 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
796 | entry->linked->comm, entry->linked->pid); | ||
797 | |||
798 | |||
799 | /* If a task blocks we have no choice but to reschedule. | ||
800 | */ | ||
801 | if (blocks || priodon) | ||
802 | unlink(entry->scheduled); | ||
803 | |||
804 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
805 | * Do not unlink since entry->scheduled is currently in the ready queue. | ||
806 | * We don't process out_of_time and sleep until the job is preemptive again. | ||
807 | */ | ||
808 | if (np && (out_of_time || preempt || sleep)) { | ||
809 | request_exit_np(entry->scheduled); | ||
810 | } | ||
811 | |||
812 | /* Any task that is preemptable and either exhausts its execution | ||
813 | * budget or wants to sleep completes. We may have to reschedule after | ||
814 | * this. Don't do a job completion if we block (can't have timers running | ||
815 | * for blocked jobs). Preemption go first for the same reason. | ||
816 | */ | ||
817 | if (!np && (out_of_time || sleep) && !blocks && !preempt | ||
818 | && !priodon) | ||
819 | /* note: priority donation prevents job completion */ | ||
820 | job_completion(entry->scheduled, !sleep); | ||
821 | |||
822 | /* Link pending task if we became unlinked. | ||
823 | */ | ||
824 | |||
825 | if (!entry->linked) { | ||
826 | #ifdef CONFIG_LITMUS_LOCKING | ||
827 | struct task_struct *pulled; | ||
828 | int reeval; | ||
829 | do { | ||
830 | pulled = __take_ready(&cluster->domain); | ||
831 | reeval = 0; | ||
832 | if (pulled && !is_pending(pulled)) { | ||
833 | /* Pulled an un-processed task from the ready queue. */ | ||
834 | TRACE_TASK(pulled, "pulled unprocessed\n"); | ||
835 | reeval = update_pending_job(cluster, pulled); | ||
836 | if (reeval) | ||
837 | /* priority may have changed --- try again */ | ||
838 | requeue(pulled); | ||
839 | } | ||
840 | } while (reeval); | ||
841 | link_task_to_cpu(pulled, entry); | ||
842 | #else | ||
843 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
844 | #endif | ||
845 | } | ||
846 | |||
847 | /* The final scheduling decision. Do we need to switch for some reason? | ||
848 | * If linked is different from scheduled, then select linked as next. | ||
849 | */ | ||
850 | if ((!np || blocks || priodon) && | ||
851 | entry->linked != entry->scheduled) { | ||
852 | /* Schedule a linked job? */ | ||
853 | if (entry->linked) { | ||
854 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
855 | next = entry->linked; | ||
856 | } | ||
857 | if (entry->scheduled) { | ||
858 | /* not gonna be scheduled soon */ | ||
859 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
860 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
861 | } | ||
862 | } else | ||
863 | /* Only override Linux scheduler if we have a real-time task | ||
864 | * scheduled that needs to continue. | ||
865 | */ | ||
866 | if (exists) | ||
867 | next = prev; | ||
868 | |||
869 | sched_state_task_picked(); | ||
870 | raw_spin_unlock(&cluster->cluster_lock); | ||
871 | |||
872 | #ifdef WANT_ALL_SCHED_EVENTS | ||
873 | TRACE("cedf_lock released, next=0x%p\n", next); | ||
874 | |||
875 | if (next) | ||
876 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
877 | else if (exists && !next) | ||
878 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
879 | #endif | ||
880 | |||
881 | |||
882 | return next; | ||
883 | } | ||
884 | |||
885 | |||
886 | /* _finish_switch - we just finished the switch away from prev | ||
887 | */ | ||
888 | static void cedf_finish_switch(struct task_struct *prev) | ||
889 | { | ||
890 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
891 | |||
892 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
893 | #ifdef WANT_ALL_SCHED_EVENTS | ||
894 | TRACE_TASK(prev, "switched away from\n"); | ||
895 | #endif | ||
896 | } | ||
897 | |||
898 | |||
899 | /* Prepare a task for running in RT mode | ||
900 | */ | ||
901 | static void cedf_task_new(struct task_struct * t, int on_rq, int running) | ||
902 | { | ||
903 | unsigned long flags; | ||
904 | cpu_entry_t* entry; | ||
905 | cedf_domain_t* cluster; | ||
906 | |||
907 | TRACE("gsn edf: task new %d\n", t->pid); | ||
908 | |||
909 | /* the cluster doesn't change even if t is running */ | ||
910 | cluster = task_cpu_cluster(t); | ||
911 | |||
912 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
913 | |||
914 | /* setup job params */ | ||
915 | release_at(t, litmus_clock()); | ||
916 | |||
917 | #ifdef CONFIG_LITMUS_LOCKING | ||
918 | tsk_rt(t)->pending_node = bheap_node_alloc(GFP_ATOMIC | __GFP_NOFAIL); | ||
919 | bheap_node_init(&tsk_rt(t)->pending_node, t); | ||
920 | tsk_rt(t)->pending_on = NO_CPU; | ||
921 | add_to_pending(cluster, t); | ||
922 | #endif | ||
923 | |||
924 | if (running) { | ||
925 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | ||
926 | BUG_ON(entry->scheduled); | ||
927 | |||
928 | #ifdef CONFIG_RELEASE_MASTER | ||
929 | if (entry->cpu != cluster->domain.release_master) { | ||
930 | #endif | ||
931 | entry->scheduled = t; | ||
932 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
933 | #ifdef CONFIG_RELEASE_MASTER | ||
934 | } else { | ||
935 | /* do not schedule on release master */ | ||
936 | preempt(entry); /* force resched */ | ||
937 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
938 | } | ||
939 | #endif | ||
940 | } else { | ||
941 | t->rt_param.scheduled_on = NO_CPU; | ||
942 | } | ||
943 | t->rt_param.linked_on = NO_CPU; | ||
944 | |||
945 | cedf_job_arrival(t); | ||
946 | raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags); | ||
947 | } | ||
948 | |||
949 | static void cedf_task_wake_up(struct task_struct *task) | ||
950 | { | ||
951 | unsigned long flags; | ||
952 | lt_t now; | ||
953 | cedf_domain_t *cluster; | ||
954 | |||
955 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
956 | |||
957 | cluster = task_cpu_cluster(task); | ||
958 | |||
959 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
960 | /* We need to take suspensions because of semaphores into | ||
961 | * account! If a job resumes after being suspended due to acquiring | ||
962 | * a semaphore, it should never be treated as a new job release. | ||
963 | */ | ||
964 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
965 | set_rt_flags(task, RT_F_RUNNING); | ||
966 | } else { | ||
967 | now = litmus_clock(); | ||
968 | if (is_tardy(task, now)) { | ||
969 | /* new sporadic release */ | ||
970 | release_at(task, now); | ||
971 | sched_trace_task_release(task); | ||
972 | } | ||
973 | else { | ||
974 | if (task->rt.time_slice) { | ||
975 | /* came back in time before deadline | ||
976 | */ | ||
977 | set_rt_flags(task, RT_F_RUNNING); | ||
978 | } | ||
979 | } | ||
980 | } | ||
981 | cedf_job_arrival(task); | ||
982 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
983 | } | ||
984 | |||
985 | static void cedf_task_block(struct task_struct *t) | ||
986 | { | ||
987 | unsigned long flags; | ||
988 | cedf_domain_t *cluster; | ||
989 | |||
990 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
991 | |||
992 | cluster = task_cpu_cluster(t); | ||
993 | |||
994 | /* unlink if necessary */ | ||
995 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
996 | unlink(t); | ||
997 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
998 | |||
999 | BUG_ON(!is_realtime(t)); | ||
1000 | } | ||
1001 | |||
1002 | #ifdef CONFIG_LITMUS_LOCKING | ||
1003 | static void cedf_pre_setsched(struct task_struct *t, int policy) | ||
1004 | { | ||
1005 | |||
1006 | unsigned long flags; | ||
1007 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
1008 | |||
1009 | int delay_donor_exit = 0; | ||
1010 | |||
1011 | while (1) { | ||
1012 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1013 | |||
1014 | TRACE_CUR("cedf_pre_setsched wait:%u pend:%d donor:%u req:%u\n", | ||
1015 | tsk_rt(t)->waiting_eligible, | ||
1016 | tsk_rt(t)->pending_on, tsk_rt(t)->is_donor, | ||
1017 | tsk_rt(t)->request_incomplete); | ||
1018 | |||
1019 | delay_donor_exit = tsk_rt(current)->is_donor; | ||
1020 | |||
1021 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1022 | |||
1023 | if (!delay_donor_exit) | ||
1024 | break; | ||
1025 | |||
1026 | TRACE_CUR("donor exit delay\n"); | ||
1027 | set_current_state(TASK_INTERRUPTIBLE); | ||
1028 | schedule_timeout(HZ); | ||
1029 | } | ||
1030 | } | ||
1031 | #endif | ||
1032 | |||
1033 | static void cedf_task_exit(struct task_struct * t) | ||
1034 | { | ||
1035 | unsigned long flags; | ||
1036 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
1037 | |||
1038 | /* unlink if necessary */ | ||
1039 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1040 | |||
1041 | unlink(t); | ||
1042 | |||
1043 | #ifdef CONFIG_LITMUS_LOCKING | ||
1044 | /* make sure it's not pending anymore */ | ||
1045 | job_pending_exit(t); | ||
1046 | bheap_node_free(tsk_rt(t)->pending_node); | ||
1047 | #endif | ||
1048 | |||
1049 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1050 | cpu_entry_t *cpu; | ||
1051 | cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on); | ||
1052 | cpu->scheduled = NULL; | ||
1053 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1054 | } | ||
1055 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1056 | |||
1057 | |||
1058 | BUG_ON(!is_realtime(t)); | ||
1059 | TRACE_TASK(t, "RIP\n"); | ||
1060 | } | ||
1061 | |||
1062 | #ifdef CONFIG_LITMUS_LOCKING | ||
1063 | |||
1064 | #include <litmus/fdso.h> | ||
1065 | #include <litmus/locking.h> | ||
1066 | |||
1067 | /* NOTE: we use fake suspensions because we must wake the task from within the | ||
1068 | * scheduler */ | ||
1069 | |||
1070 | /* suspend until the current task becomes eligible to issue a lock request */ | ||
1071 | static void priodon_become_eligible(void) | ||
1072 | { | ||
1073 | struct task_struct* t = current; | ||
1074 | unsigned long flags; | ||
1075 | cedf_domain_t *cluster; | ||
1076 | |||
1077 | cluster = task_cpu_cluster(t); | ||
1078 | |||
1079 | do { | ||
1080 | TRACE_CUR("priodon: checking whether request may be issued\n"); | ||
1081 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1082 | |||
1083 | if (tsk_rt(t)->pending_on == NO_CPU || | ||
1084 | tsk_rt(t)->is_donor) { | ||
1085 | /* nope, gotta wait */ | ||
1086 | tsk_rt(t)->waiting_eligible = 1; | ||
1087 | TRACE_CUR("priodon: not eligible pend:%u donor:%u\n", | ||
1088 | tsk_rt(t)->pending_on, tsk_rt(t)->is_donor); | ||
1089 | } else { | ||
1090 | /* alright! we are good to go! */ | ||
1091 | tsk_rt(t)->request_incomplete = 1; | ||
1092 | TRACE_CUR("priodon: request issued\n"); | ||
1093 | } | ||
1094 | |||
1095 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1096 | |||
1097 | if (tsk_rt(t)->waiting_eligible) { | ||
1098 | TRACE_CUR("priodon: fake suspending\n"); | ||
1099 | TS_LOCK_SUSPEND; | ||
1100 | schedule(); | ||
1101 | TS_LOCK_RESUME; | ||
1102 | } | ||
1103 | |||
1104 | } while (!tsk_rt(t)->request_incomplete); | ||
1105 | } | ||
1106 | |||
1107 | /* current task has completed its request */ | ||
1108 | static void priodon_complete_request(void) | ||
1109 | { | ||
1110 | struct task_struct* t = current; | ||
1111 | struct task_struct* donor; | ||
1112 | unsigned long flags; | ||
1113 | cedf_domain_t *cluster; | ||
1114 | |||
1115 | cluster = task_cpu_cluster(t); | ||
1116 | |||
1117 | preempt_disable(); | ||
1118 | |||
1119 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1120 | |||
1121 | TRACE_CUR("priodon: completing request\n"); | ||
1122 | |||
1123 | if (tsk_rt(t)->inh_task) { | ||
1124 | /* we have a donor job --- see if we need to wake it */ | ||
1125 | donor = tsk_rt(t)->inh_task; | ||
1126 | undonate_priority(t, donor); | ||
1127 | |||
1128 | if (fake_resume(donor)) | ||
1129 | check_for_preemptions(cluster); | ||
1130 | } | ||
1131 | |||
1132 | tsk_rt(t)->request_incomplete = 0; | ||
1133 | |||
1134 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1135 | |||
1136 | preempt_enable(); | ||
1137 | } | ||
1138 | |||
1139 | /* struct for semaphore with priority inheritance */ | ||
1140 | struct omlp_semaphore { | ||
1141 | struct litmus_lock litmus_lock; | ||
1142 | |||
1143 | /* current resource holder */ | ||
1144 | struct task_struct *owner; | ||
1145 | |||
1146 | /* FIFO queue of waiting tasks */ | ||
1147 | wait_queue_head_t fifo_wait; | ||
1148 | }; | ||
1149 | |||
1150 | static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock) | ||
1151 | { | ||
1152 | return container_of(lock, struct omlp_semaphore, litmus_lock); | ||
1153 | } | ||
1154 | |||
1155 | static int cedf_omlp_lock(struct litmus_lock* l) | ||
1156 | { | ||
1157 | struct task_struct* t = current; | ||
1158 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1159 | wait_queue_t wait; | ||
1160 | unsigned long flags; | ||
1161 | |||
1162 | if (!is_realtime(t)) | ||
1163 | return -EPERM; | ||
1164 | |||
1165 | priodon_become_eligible(); | ||
1166 | |||
1167 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1168 | |||
1169 | if (sem->owner) { | ||
1170 | /* resource is not free => must suspend and wait */ | ||
1171 | |||
1172 | init_waitqueue_entry(&wait, t); | ||
1173 | |||
1174 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1175 | |||
1176 | __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait); | ||
1177 | |||
1178 | TS_LOCK_SUSPEND; | ||
1179 | |||
1180 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1181 | |||
1182 | schedule(); | ||
1183 | |||
1184 | TS_LOCK_RESUME; | ||
1185 | |||
1186 | BUG_ON(sem->owner != t); | ||
1187 | } else { | ||
1188 | /* it's ours now */ | ||
1189 | sem->owner = t; | ||
1190 | |||
1191 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1192 | } | ||
1193 | |||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | static int cedf_omlp_unlock(struct litmus_lock* l) | ||
1198 | { | ||
1199 | struct task_struct *t = current, *next; | ||
1200 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1201 | unsigned long flags; | ||
1202 | int err = 0; | ||
1203 | |||
1204 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1205 | |||
1206 | if (sem->owner != t) { | ||
1207 | err = -EINVAL; | ||
1208 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1209 | goto out; | ||
1210 | } | ||
1211 | |||
1212 | /* check if there are jobs waiting for this resource */ | ||
1213 | next = __waitqueue_remove_first(&sem->fifo_wait); | ||
1214 | if (next) { | ||
1215 | /* next becomes the resouce holder */ | ||
1216 | sem->owner = next; | ||
1217 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
1218 | |||
1219 | /* wake up next */ | ||
1220 | wake_up_process(next); | ||
1221 | } else | ||
1222 | /* becomes available */ | ||
1223 | sem->owner = NULL; | ||
1224 | |||
1225 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1226 | |||
1227 | priodon_complete_request(); | ||
1228 | |||
1229 | out: | ||
1230 | return err; | ||
1231 | } | ||
1232 | |||
1233 | static int cedf_omlp_close(struct litmus_lock* l) | ||
1234 | { | ||
1235 | struct task_struct *t = current; | ||
1236 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1237 | unsigned long flags; | ||
1238 | |||
1239 | int owner; | ||
1240 | |||
1241 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1242 | |||
1243 | owner = sem->owner == t; | ||
1244 | |||
1245 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1246 | |||
1247 | if (owner) | ||
1248 | cedf_omlp_unlock(l); | ||
1249 | |||
1250 | return 0; | ||
1251 | } | ||
1252 | |||
1253 | static void cedf_omlp_free(struct litmus_lock* lock) | ||
1254 | { | ||
1255 | kfree(omlp_from_lock(lock)); | ||
1256 | } | ||
1257 | |||
1258 | static struct litmus_lock_ops cedf_omlp_lock_ops = { | ||
1259 | .close = cedf_omlp_close, | ||
1260 | .lock = cedf_omlp_lock, | ||
1261 | .unlock = cedf_omlp_unlock, | ||
1262 | .deallocate = cedf_omlp_free, | ||
1263 | }; | ||
1264 | |||
1265 | static struct litmus_lock* cedf_new_omlp(void) | ||
1266 | { | ||
1267 | struct omlp_semaphore* sem; | ||
1268 | |||
1269 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1270 | if (!sem) | ||
1271 | return NULL; | ||
1272 | |||
1273 | sem->owner = NULL; | ||
1274 | init_waitqueue_head(&sem->fifo_wait); | ||
1275 | sem->litmus_lock.ops = &cedf_omlp_lock_ops; | ||
1276 | |||
1277 | return &sem->litmus_lock; | ||
1278 | } | ||
1279 | |||
1280 | static long cedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1281 | void* __user unused) | ||
1282 | { | ||
1283 | int err = -ENXIO; | ||
1284 | |||
1285 | switch (type) { | ||
1286 | |||
1287 | case OMLP_SEM: | ||
1288 | /* O(m) Multiprocessor Locking Protocol */ | ||
1289 | *lock = cedf_new_omlp(); | ||
1290 | if (*lock) | ||
1291 | err = 0; | ||
1292 | else | ||
1293 | err = -ENOMEM; | ||
1294 | break; | ||
1295 | |||
1296 | }; | ||
1297 | |||
1298 | return err; | ||
1299 | } | ||
1300 | |||
1301 | |||
1302 | #endif | ||
1303 | |||
1304 | static long cedf_admit_task(struct task_struct* tsk) | ||
1305 | { | ||
1306 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu) { | ||
1307 | #ifdef CONFIG_LITMUS_LOCKING | ||
1308 | |||
1309 | #endif | ||
1310 | return 0; | ||
1311 | } | ||
1312 | else | ||
1313 | return -EINVAL; | ||
1314 | } | ||
1315 | |||
1316 | /* total number of cluster */ | ||
1317 | static int num_clusters; | ||
1318 | /* we do not support cluster of different sizes */ | ||
1319 | static unsigned int cluster_size; | ||
1320 | |||
1321 | #ifdef VERBOSE_INIT | ||
1322 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
1323 | { | ||
1324 | int chk; | ||
1325 | char buf[255]; | ||
1326 | |||
1327 | chk = cpulist_scnprintf(buf, 254, mask); | ||
1328 | buf[chk] = '\0'; | ||
1329 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
1330 | |||
1331 | } | ||
1332 | #endif | ||
1333 | |||
1334 | static int clusters_allocated = 0; | ||
1335 | |||
1336 | static void cleanup_cedf(void) | ||
1337 | { | ||
1338 | int i; | ||
1339 | |||
1340 | if (clusters_allocated) { | ||
1341 | for (i = 0; i < num_clusters; i++) { | ||
1342 | free_cpumask_var(cedf[i].cpu_map); | ||
1343 | } | ||
1344 | |||
1345 | kfree(cedf); | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | static long cedf_activate_plugin(void) | ||
1350 | { | ||
1351 | int i, j, cpu, ccpu, cpu_count; | ||
1352 | cpu_entry_t *entry; | ||
1353 | |||
1354 | cpumask_var_t mask; | ||
1355 | int chk = 0; | ||
1356 | |||
1357 | /* de-allocate old clusters, if any */ | ||
1358 | cleanup_cedf(); | ||
1359 | |||
1360 | printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n", | ||
1361 | cluster_config); | ||
1362 | |||
1363 | /* need to get cluster_size first */ | ||
1364 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
1365 | return -ENOMEM; | ||
1366 | |||
1367 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
1368 | cluster_size = num_online_cpus(); | ||
1369 | } else { | ||
1370 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
1371 | if (chk) { | ||
1372 | /* if chk != 0 then it is the max allowed index */ | ||
1373 | printk(KERN_INFO "C-EDF: Cluster configuration = %d " | ||
1374 | "is not supported on this hardware.\n", | ||
1375 | cluster_config); | ||
1376 | /* User should notice that the configuration failed, so | ||
1377 | * let's bail out. */ | ||
1378 | return -EINVAL; | ||
1379 | } | ||
1380 | |||
1381 | cluster_size = cpumask_weight(mask); | ||
1382 | } | ||
1383 | |||
1384 | if ((num_online_cpus() % cluster_size) != 0) { | ||
1385 | /* this can't be right, some cpus are left out */ | ||
1386 | printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", | ||
1387 | num_online_cpus(), cluster_size); | ||
1388 | return -1; | ||
1389 | } | ||
1390 | |||
1391 | num_clusters = num_online_cpus() / cluster_size; | ||
1392 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | ||
1393 | num_clusters, cluster_size); | ||
1394 | |||
1395 | /* initialize clusters */ | ||
1396 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | ||
1397 | for (i = 0; i < num_clusters; i++) { | ||
1398 | bheap_init(&(cedf[i].cpu_heap)); | ||
1399 | #ifdef CONFIG_LITMUS_LOCKING | ||
1400 | bheap_init(&(cedf[i].pending_jobs)); | ||
1401 | bheap_init(&(cedf[i].pending_cpus)); | ||
1402 | #endif | ||
1403 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | ||
1404 | |||
1405 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | ||
1406 | return -ENOMEM; | ||
1407 | #ifdef CONFIG_RELEASE_MASTER | ||
1408 | cedf[i].domain.release_master = atomic_read(&release_master_cpu); | ||
1409 | #endif | ||
1410 | } | ||
1411 | |||
1412 | /* cycle through cluster and add cpus to them */ | ||
1413 | for (i = 0; i < num_clusters; i++) { | ||
1414 | |||
1415 | for_each_online_cpu(cpu) { | ||
1416 | /* check if the cpu is already in a cluster */ | ||
1417 | for (j = 0; j < num_clusters; j++) | ||
1418 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
1419 | break; | ||
1420 | /* if it is in a cluster go to next cpu */ | ||
1421 | if (j < num_clusters && | ||
1422 | cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
1423 | continue; | ||
1424 | |||
1425 | /* this cpu isn't in any cluster */ | ||
1426 | /* get the shared cpus */ | ||
1427 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
1428 | cpumask_copy(mask, cpu_online_mask); | ||
1429 | else | ||
1430 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
1431 | |||
1432 | cpumask_copy(cedf[i].cpu_map, mask); | ||
1433 | #ifdef VERBOSE_INIT | ||
1434 | print_cluster_topology(mask, cpu); | ||
1435 | #endif | ||
1436 | /* add cpus to current cluster and init cpu_entry_t */ | ||
1437 | cpu_count = 0; | ||
1438 | cedf[i].num_cpus = 0; | ||
1439 | for_each_cpu(ccpu, cedf[i].cpu_map) { | ||
1440 | |||
1441 | entry = &per_cpu(cedf_cpu_entries, ccpu); | ||
1442 | atomic_set(&entry->will_schedule, 0); | ||
1443 | entry->cpu = ccpu; | ||
1444 | entry->cluster = &cedf[i]; | ||
1445 | entry->hn = cpu_nodes + ccpu; | ||
1446 | bheap_node_init(&entry->hn, entry); | ||
1447 | |||
1448 | #ifdef CONFIG_LITMUS_LOCKING | ||
1449 | entry->pending_hn = pending_nodes + ccpu; | ||
1450 | bheap_node_init(&entry->pending_hn, entry); | ||
1451 | entry->pending = NULL; | ||
1452 | #endif | ||
1453 | |||
1454 | cpu_count++; | ||
1455 | |||
1456 | entry->linked = NULL; | ||
1457 | entry->scheduled = NULL; | ||
1458 | #ifdef CONFIG_RELEASE_MASTER | ||
1459 | /* only add CPUs that should schedule jobs */ | ||
1460 | if (entry->cpu != entry->cluster->domain.release_master) | ||
1461 | #endif | ||
1462 | { | ||
1463 | cedf[i].num_cpus++; | ||
1464 | update_cpu_position(entry); | ||
1465 | #ifdef CONFIG_LITMUS_LOCKING | ||
1466 | update_pending_position(entry); | ||
1467 | #endif | ||
1468 | } | ||
1469 | } | ||
1470 | /* done with this cluster */ | ||
1471 | break; | ||
1472 | } | ||
1473 | } | ||
1474 | |||
1475 | free_cpumask_var(mask); | ||
1476 | clusters_allocated = 1; | ||
1477 | return 0; | ||
1478 | } | ||
1479 | |||
1480 | /* Plugin object */ | ||
1481 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | ||
1482 | .plugin_name = "C-EDF", | ||
1483 | .finish_switch = cedf_finish_switch, | ||
1484 | .tick = cedf_tick, | ||
1485 | .task_new = cedf_task_new, | ||
1486 | .complete_job = complete_job, | ||
1487 | .task_exit = cedf_task_exit, | ||
1488 | .schedule = cedf_schedule, | ||
1489 | .task_wake_up = cedf_task_wake_up, | ||
1490 | .task_block = cedf_task_block, | ||
1491 | .admit_task = cedf_admit_task, | ||
1492 | .activate_plugin = cedf_activate_plugin, | ||
1493 | #ifdef CONFIG_LITMUS_LOCKING | ||
1494 | .allocate_lock = cedf_allocate_lock, | ||
1495 | .pre_setsched = cedf_pre_setsched, | ||
1496 | #endif | ||
1497 | }; | ||
1498 | |||
1499 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; | ||
1500 | |||
1501 | static int __init init_cedf(void) | ||
1502 | { | ||
1503 | int err, fs; | ||
1504 | |||
1505 | err = register_sched_plugin(&cedf_plugin); | ||
1506 | if (!err) { | ||
1507 | fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir); | ||
1508 | if (!fs) | ||
1509 | cluster_file = create_cluster_file(cedf_dir, &cluster_config); | ||
1510 | else | ||
1511 | printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n"); | ||
1512 | } | ||
1513 | return err; | ||
1514 | } | ||
1515 | |||
1516 | static void clean_cedf(void) | ||
1517 | { | ||
1518 | cleanup_cedf(); | ||
1519 | if (cluster_file) | ||
1520 | remove_proc_entry("cluster", cedf_dir); | ||
1521 | if (cedf_dir) | ||
1522 | remove_plugin_proc_dir(&cedf_plugin); | ||
1523 | } | ||
1524 | |||
1525 | module_init(init_cedf); | ||
1526 | module_exit(clean_cedf); | ||
diff --git a/litmus/sched_cedf.c.rej b/litmus/sched_cedf.c.rej new file mode 100644 index 000000000000..ec74da6c4a64 --- /dev/null +++ b/litmus/sched_cedf.c.rej | |||
@@ -0,0 +1,53 @@ | |||
1 | --- litmus/sched_cedf.c | ||
2 | +++ litmus/sched_cedf.c | ||
3 | @@ -739,6 +1100,12 @@ | ||
4 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
5 | struct task_struct* next = NULL; | ||
6 | |||
7 | +#ifdef CONFIG_LITMUS_LOCKING | ||
8 | + int priodon; | ||
9 | +#else | ||
10 | +#define priodon 0 | ||
11 | +#endif | ||
12 | + | ||
13 | #ifdef CONFIG_RELEASE_MASTER | ||
14 | /* Bail out early if we are the release master. | ||
15 | * The release master never schedules any real-time tasks. | ||
16 | @@ -750,7 +1117,6 @@ | ||
17 | #endif | ||
18 | |||
19 | raw_spin_lock(&cluster->cluster_lock); | ||
20 | - clear_will_schedule(); | ||
21 | |||
22 | /* sanity checking */ | ||
23 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
24 | @@ -1032,7 +1466,15 @@ | ||
25 | |||
26 | /* unlink if necessary */ | ||
27 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
28 | + | ||
29 | unlink(t); | ||
30 | + | ||
31 | +#ifdef CONFIG_LITMUS_LOCKING | ||
32 | + /* make sure it's not pending anymore */ | ||
33 | + job_pending_exit(t); | ||
34 | + bheap_node_free(tsk_rt(t)->pending_node); | ||
35 | +#endif | ||
36 | + | ||
37 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
38 | cpu_entry_t *cpu; | ||
39 | cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on); | ||
40 | @@ -1446,7 +2140,13 @@ | ||
41 | /* only add CPUs that should schedule jobs */ | ||
42 | if (entry->cpu != entry->cluster->domain.release_master) | ||
43 | #endif | ||
44 | + { | ||
45 | + cedf[i].num_cpus++; | ||
46 | update_cpu_position(entry); | ||
47 | +#ifdef CONFIG_LITMUS_LOCKING | ||
48 | + update_pending_position(entry); | ||
49 | +#endif | ||
50 | + } | ||
51 | } | ||
52 | /* done with this cluster */ | ||
53 | break; | ||
diff --git a/litmus/sched_gfl_split_namechange.c b/litmus/sched_gfl_split_namechange.c new file mode 100644 index 000000000000..c154b115a00e --- /dev/null +++ b/litmus/sched_gfl_split_namechange.c | |||
@@ -0,0 +1,1149 @@ | |||
1 | /* | ||
2 | * litmus/sched_gfl_split.c | ||
3 | * | ||
4 | * Implementation of the G-FL with job splitting. See the Erickson/Anderson | ||
5 | * paper at ECRTS 2012 for a description of G-FL. | ||
6 | * | ||
7 | * This plugin is a modified version of the prior GSN-EDF-split plugin in | ||
8 | * litmus/sched_gsn_edf_split.c. Job splitting works the same way as in that | ||
9 | * plugin. The subjob "deadlines" (really priorities) are computed according | ||
10 | * to G-FL with respect to the post-split (smaller) jobs. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/percpu.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/slab.h> | ||
18 | |||
19 | #include <litmus/litmus.h> | ||
20 | #include <litmus/jobs.h> | ||
21 | #include <litmus/sched_plugin.h> | ||
22 | #include <litmus/edf_split_common.h> | ||
23 | #include <litmus/sched_trace.h> | ||
24 | #include <litmus/trace.h> | ||
25 | |||
26 | #include <litmus/preempt.h> | ||
27 | |||
28 | #include <litmus/bheap.h> | ||
29 | |||
30 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
31 | #include <litmus/affinity.h> | ||
32 | #endif | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | |||
36 | /* cpu_entry_t - maintain the linked and scheduled state | ||
37 | */ | ||
38 | typedef struct { | ||
39 | int cpu; | ||
40 | struct task_struct* linked; /* only RT tasks */ | ||
41 | struct task_struct* scheduled; /* only RT tasks */ | ||
42 | struct bheap_node* hn; | ||
43 | struct hrtimer split_timer; | ||
44 | int timer_armed; | ||
45 | } cpu_entry_t; | ||
46 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
47 | |||
48 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | ||
49 | |||
50 | /* the cpus queue themselves according to priority in here */ | ||
51 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | ||
52 | static struct bheap gsnedf_cpu_heap; | ||
53 | |||
54 | static rt_domain_t gsnedf; | ||
55 | #define gsnedf_lock (gsnedf.ready_lock) | ||
56 | |||
57 | inline static int get_slice_num(struct task_struct* t) | ||
58 | { | ||
59 | int basic = ((t->rt_param.job_params.exec_time * | ||
60 | t->rt_param.task_params.split) / | ||
61 | t->rt_param.task_params.exec_cost) + 1; | ||
62 | if (basic <= t->rt_param.task_params.split){ | ||
63 | return basic; | ||
64 | } | ||
65 | else{ | ||
66 | /*Since we don't police budget, just leave where it's at.*/ | ||
67 | return t->rt_param.task_params.split; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | /* Returns the appropriate subjob deadline.*/ | ||
72 | inline static lt_t get_proper_deadline(struct task_struct* t) | ||
73 | { | ||
74 | unsigned int num_cpus = num_online_cpus(); | ||
75 | return t->rt_param.job_params.release + | ||
76 | ((t->rt_param.task_params.period * get_slice_num(t)) | ||
77 | / t->rt_param.task_params.split) | ||
78 | /* G-FL correction */ | ||
79 | - (((num_cpus - 1) * t->rt_param.task_params.exec_cost) | ||
80 | / (num_cpus * t->rt_param.task_params.split)); | ||
81 | } | ||
82 | |||
83 | /* Tells us if the current deadline is too small.*/ | ||
84 | inline static int needs_deadline_move(struct task_struct* t) | ||
85 | { | ||
86 | BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline); | ||
87 | #ifdef CONFIG_LITMUS_LOCKING | ||
88 | return !is_in_crit_section(t) && | ||
89 | (get_proper_deadline(t) != | ||
90 | tsk_rt(t)->job_params.subjob_deadline); | ||
91 | #else | ||
92 | return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline; | ||
93 | #endif | ||
94 | } | ||
95 | |||
96 | /*Returns execution time until the next deadline move. | ||
97 | * 0 means the task has no more deadline moves | ||
98 | */ | ||
99 | inline static lt_t time_to_next_move(struct task_struct* t) | ||
100 | { | ||
101 | if (get_slice_num(t) == t->rt_param.task_params.split){ | ||
102 | return 0; | ||
103 | } | ||
104 | /* +1 upper bounds ceiling, since integer division is floor*/ | ||
105 | return ((get_slice_num(t) * t->rt_param.task_params.exec_cost) | ||
106 | / t->rt_param.task_params.split) + 1 | ||
107 | - t->rt_param.job_params.exec_time; | ||
108 | } | ||
109 | |||
110 | /* Timer stuff - similar to budget.c. */ | ||
111 | static enum hrtimer_restart on_split_timeout(struct hrtimer *timer) | ||
112 | { | ||
113 | cpu_entry_t* st = container_of(timer, | ||
114 | cpu_entry_t, | ||
115 | split_timer); | ||
116 | |||
117 | unsigned long flags; | ||
118 | |||
119 | local_irq_save(flags); | ||
120 | TRACE("split timer fired.\n"); | ||
121 | st->timer_armed = 0; | ||
122 | /* Activate scheduler */ | ||
123 | litmus_reschedule_local(); | ||
124 | local_irq_restore(flags); | ||
125 | |||
126 | return HRTIMER_NORESTART; | ||
127 | } | ||
128 | |||
129 | static void cancel_split_timer(cpu_entry_t* ce) | ||
130 | { | ||
131 | int ret; | ||
132 | |||
133 | TRACE("cancelling split time.\n"); | ||
134 | |||
135 | /* Since interrupts are disabled and et->timer_armed is only | ||
136 | * modified locally, we do not need any locks. | ||
137 | */ | ||
138 | |||
139 | if (ce->timer_armed) { | ||
140 | ret = hrtimer_try_to_cancel(&ce->split_timer); | ||
141 | /* Should never be inactive. */ | ||
142 | BUG_ON(ret == 0); | ||
143 | /* Should never be running concurrently.*/ | ||
144 | BUG_ON(ret == -1); | ||
145 | |||
146 | ce->timer_armed = 0; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | /* assumes called with IRQs off */ | ||
151 | static void arm_split_timer(cpu_entry_t *ce, | ||
152 | struct task_struct* t) | ||
153 | { | ||
154 | lt_t when_to_fire; | ||
155 | lt_t time_to_move; | ||
156 | TRACE_TASK(t, "arming split timer.\n"); | ||
157 | |||
158 | /* __hrtimer_start_range_ns() cancels the timer | ||
159 | * anyway, so we don't have to check whether it is still armed */ | ||
160 | |||
161 | /*We won't do any new deadline moves if the budget has been exhausted*/ | ||
162 | if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) { | ||
163 | when_to_fire = litmus_clock() + time_to_move; | ||
164 | TRACE_TASK(t, "actually arming for %llu into the future\n", | ||
165 | time_to_move); | ||
166 | __hrtimer_start_range_ns(&ce->split_timer, | ||
167 | ns_to_ktime(when_to_fire), | ||
168 | 0 /* delta */, | ||
169 | HRTIMER_MODE_ABS_PINNED, | ||
170 | 0 /* no wakeup */); | ||
171 | ce->timer_armed = 1; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | /* Uncomment this if you want to see all scheduling decisions in the | ||
176 | * TRACE() log. | ||
177 | #define WANT_ALL_SCHED_EVENTS | ||
178 | */ | ||
179 | |||
180 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
181 | { | ||
182 | cpu_entry_t *a, *b; | ||
183 | a = _a->value; | ||
184 | b = _b->value; | ||
185 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
186 | * the top of the heap. | ||
187 | */ | ||
188 | return edf_split_higher_prio(b->linked, a->linked); | ||
189 | } | ||
190 | |||
191 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
192 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
193 | */ | ||
194 | static void update_cpu_position(cpu_entry_t *entry) | ||
195 | { | ||
196 | if (likely(bheap_node_in_heap(entry->hn))) | ||
197 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
198 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
199 | } | ||
200 | |||
201 | /* caller must hold gsnedf lock */ | ||
202 | static cpu_entry_t* lowest_prio_cpu(void) | ||
203 | { | ||
204 | struct bheap_node* hn; | ||
205 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
206 | return hn->value; | ||
207 | } | ||
208 | |||
209 | |||
210 | /* link_task_to_cpu - Update the link of a CPU. | ||
211 | * Handles the case where the to-be-linked task is already | ||
212 | * scheduled on a different CPU. | ||
213 | */ | ||
214 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
215 | cpu_entry_t *entry) | ||
216 | { | ||
217 | cpu_entry_t *sched; | ||
218 | struct task_struct* tmp; | ||
219 | int on_cpu; | ||
220 | |||
221 | BUG_ON(linked && !is_realtime(linked)); | ||
222 | |||
223 | /* Currently linked task is set to be unlinked. */ | ||
224 | if (entry->linked) { | ||
225 | entry->linked->rt_param.linked_on = NO_CPU; | ||
226 | } | ||
227 | |||
228 | /* Link new task to CPU. */ | ||
229 | if (linked) { | ||
230 | set_rt_flags(linked, RT_F_RUNNING); | ||
231 | /* handle task is already scheduled somewhere! */ | ||
232 | on_cpu = linked->rt_param.scheduled_on; | ||
233 | if (on_cpu != NO_CPU) { | ||
234 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
235 | /* this should only happen if not linked already */ | ||
236 | BUG_ON(sched->linked == linked); | ||
237 | |||
238 | /* If we are already scheduled on the CPU to which we | ||
239 | * wanted to link, we don't need to do the swap -- | ||
240 | * we just link ourselves to the CPU and depend on | ||
241 | * the caller to get things right. | ||
242 | */ | ||
243 | if (entry != sched) { | ||
244 | TRACE_TASK(linked, | ||
245 | "already scheduled on %d, updating link.\n", | ||
246 | sched->cpu); | ||
247 | tmp = sched->linked; | ||
248 | linked->rt_param.linked_on = sched->cpu; | ||
249 | sched->linked = linked; | ||
250 | update_cpu_position(sched); | ||
251 | linked = tmp; | ||
252 | } | ||
253 | } | ||
254 | if (linked) /* might be NULL due to swap */ | ||
255 | linked->rt_param.linked_on = entry->cpu; | ||
256 | } | ||
257 | entry->linked = linked; | ||
258 | #ifdef WANT_ALL_SCHED_EVENTS | ||
259 | if (linked) | ||
260 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
261 | else | ||
262 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
263 | #endif | ||
264 | update_cpu_position(entry); | ||
265 | } | ||
266 | |||
267 | /* unlink - Make sure a task is not linked any longer to an entry | ||
268 | * where it was linked before. Must hold gsnedf_lock. | ||
269 | */ | ||
270 | static noinline void unlink(struct task_struct* t) | ||
271 | { | ||
272 | cpu_entry_t *entry; | ||
273 | |||
274 | if (t->rt_param.linked_on != NO_CPU) { | ||
275 | /* unlink */ | ||
276 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
277 | t->rt_param.linked_on = NO_CPU; | ||
278 | link_task_to_cpu(NULL, entry); | ||
279 | } else if (is_queued(t)) { | ||
280 | /* This is an interesting situation: t is scheduled, | ||
281 | * but was just recently unlinked. It cannot be | ||
282 | * linked anywhere else (because then it would have | ||
283 | * been relinked to this CPU), thus it must be in some | ||
284 | * queue. We must remove it from the list in this | ||
285 | * case. | ||
286 | */ | ||
287 | remove(&gsnedf, t); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | |||
292 | /* preempt - force a CPU to reschedule | ||
293 | */ | ||
294 | static void preempt(cpu_entry_t *entry) | ||
295 | { | ||
296 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
297 | } | ||
298 | |||
299 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
300 | * Caller must hold gsnedf_lock. | ||
301 | */ | ||
302 | static noinline void requeue(struct task_struct* task) | ||
303 | { | ||
304 | BUG_ON(!task); | ||
305 | /* sanity check before insertion */ | ||
306 | BUG_ON(is_queued(task)); | ||
307 | |||
308 | if (is_released(task, litmus_clock())) | ||
309 | __add_ready(&gsnedf, task); | ||
310 | else { | ||
311 | /* it has got to wait */ | ||
312 | add_release(&gsnedf, task); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
317 | static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start) | ||
318 | { | ||
319 | cpu_entry_t *affinity; | ||
320 | |||
321 | get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, | ||
322 | #ifdef CONFIG_RELEASE_MASTER | ||
323 | gsnedf.release_master | ||
324 | #else | ||
325 | NO_CPU | ||
326 | #endif | ||
327 | ); | ||
328 | |||
329 | return(affinity); | ||
330 | } | ||
331 | #endif | ||
332 | |||
333 | /* check for any necessary preemptions */ | ||
334 | static void check_for_preemptions(void) | ||
335 | { | ||
336 | struct task_struct *task; | ||
337 | cpu_entry_t *last; | ||
338 | |||
339 | for (last = lowest_prio_cpu(); | ||
340 | edf_split_preemption_needed(&gsnedf, last->linked); | ||
341 | last = lowest_prio_cpu()) { | ||
342 | /* preemption necessary */ | ||
343 | task = __take_ready(&gsnedf); | ||
344 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
345 | task->pid, last->cpu); | ||
346 | |||
347 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
348 | { | ||
349 | cpu_entry_t *affinity = | ||
350 | gsnedf_get_nearest_available_cpu( | ||
351 | &per_cpu(gsnedf_cpu_entries, | ||
352 | task_cpu(task))); | ||
353 | if (affinity) | ||
354 | last = affinity; | ||
355 | else if (last->linked) | ||
356 | requeue(last->linked); | ||
357 | } | ||
358 | #else | ||
359 | if (last->linked) | ||
360 | requeue(last->linked); | ||
361 | #endif | ||
362 | |||
363 | link_task_to_cpu(task, last); | ||
364 | preempt(last); | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
369 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
370 | { | ||
371 | BUG_ON(!task); | ||
372 | |||
373 | requeue(task); | ||
374 | check_for_preemptions(); | ||
375 | } | ||
376 | |||
377 | static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
378 | { | ||
379 | unsigned long flags; | ||
380 | |||
381 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
382 | |||
383 | __merge_ready(rt, tasks); | ||
384 | check_for_preemptions(); | ||
385 | |||
386 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
387 | } | ||
388 | |||
389 | /* caller holds gsnedf_lock */ | ||
390 | static noinline void job_completion(struct task_struct *t, int forced) | ||
391 | { | ||
392 | BUG_ON(!t); | ||
393 | |||
394 | sched_trace_task_completion(t, forced); | ||
395 | |||
396 | TRACE_TASK(t, "job_completion().\n"); | ||
397 | |||
398 | /* set flags */ | ||
399 | set_rt_flags(t, RT_F_SLEEP); | ||
400 | /* prepare for next period */ | ||
401 | /* prepare_for_next_period assumes implicit deadlines and no splitting, | ||
402 | * so we call it with the job deadline it expects. | ||
403 | */ | ||
404 | t->rt_param.job_params.deadline = t->rt_param.job_params.release + | ||
405 | t->rt_param.task_params.period; | ||
406 | prepare_for_next_period(t); | ||
407 | /* We now set the subjob deadline to what it should be for scheduling | ||
408 | * priority. | ||
409 | */ | ||
410 | t->rt_param.job_params.subjob_deadline = get_proper_deadline(t); | ||
411 | if (is_released(t, litmus_clock())) | ||
412 | sched_trace_task_release(t); | ||
413 | /* unlink */ | ||
414 | unlink(t); | ||
415 | /* requeue | ||
416 | * But don't requeue a blocking task. */ | ||
417 | if (is_running(t)) | ||
418 | gsnedf_job_arrival(t); | ||
419 | } | ||
420 | |||
421 | static void move_deadline(struct task_struct *t) | ||
422 | { | ||
423 | tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t); | ||
424 | /* Check if rescheduling needed with lower priority. */ | ||
425 | unlink(t); | ||
426 | gsnedf_job_arrival(t); | ||
427 | } | ||
428 | |||
429 | /* gsnedf_tick - this function is called for every local timer | ||
430 | * interrupt. | ||
431 | * | ||
432 | * checks whether the current task has expired and checks | ||
433 | * whether we need to preempt it if it has not expired | ||
434 | */ | ||
435 | static void gsnedf_tick(struct task_struct* t) | ||
436 | { | ||
437 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
438 | if (!is_np(t)) { | ||
439 | /* np tasks will be preempted when they become | ||
440 | * preemptable again | ||
441 | */ | ||
442 | litmus_reschedule_local(); | ||
443 | TRACE("gsnedf_scheduler_tick: " | ||
444 | "%d is preemptable " | ||
445 | " => FORCE_RESCHED\n", t->pid); | ||
446 | } else if (is_user_np(t)) { | ||
447 | TRACE("gsnedf_scheduler_tick: " | ||
448 | "%d is non-preemptable, " | ||
449 | "preemption delayed.\n", t->pid); | ||
450 | request_exit_np(t); | ||
451 | } | ||
452 | } | ||
453 | } | ||
454 | |||
455 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
456 | * assumptions on the state of the current task since it may be called for a | ||
457 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
458 | * was necessary, because sys_exit_np() was called, because some Linux | ||
459 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
460 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
461 | * current state is. | ||
462 | * | ||
463 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
464 | * | ||
465 | * The following assertions for the scheduled task could hold: | ||
466 | * | ||
467 | * - !is_running(scheduled) // the job blocks | ||
468 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
469 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
470 | * - linked != scheduled // we need to reschedule (for any reason) | ||
471 | * - is_np(scheduled) // rescheduling must be delayed, | ||
472 | * sys_exit_np must be requested | ||
473 | * | ||
474 | * Any of these can occur together. | ||
475 | */ | ||
476 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
477 | { | ||
478 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
479 | int out_of_time, sleep, preempt, np, exists, blocks, needs_move; | ||
480 | struct task_struct* next = NULL; | ||
481 | |||
482 | #ifdef CONFIG_RELEASE_MASTER | ||
483 | /* Bail out early if we are the release master. | ||
484 | * The release master never schedules any real-time tasks. | ||
485 | */ | ||
486 | if (unlikely(gsnedf.release_master == entry->cpu)) { | ||
487 | sched_state_task_picked(); | ||
488 | return NULL; | ||
489 | } | ||
490 | #endif | ||
491 | |||
492 | raw_spin_lock(&gsnedf_lock); | ||
493 | |||
494 | /* sanity checking */ | ||
495 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
496 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
497 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
498 | |||
499 | /* (0) Determine state */ | ||
500 | exists = entry->scheduled != NULL; | ||
501 | blocks = exists && !is_running(entry->scheduled); | ||
502 | out_of_time = exists && | ||
503 | budget_enforced(entry->scheduled) && | ||
504 | budget_exhausted(entry->scheduled); | ||
505 | needs_move = exists && needs_deadline_move(entry->scheduled); | ||
506 | np = exists && is_np(entry->scheduled); | ||
507 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
508 | preempt = entry->scheduled != entry->linked; | ||
509 | |||
510 | #ifdef WANT_ALL_SCHED_EVENTS | ||
511 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | ||
512 | #endif | ||
513 | |||
514 | if (exists) | ||
515 | TRACE_TASK(prev, | ||
516 | "blocks:%d out_of_time:%d needs_move:%d np:%d" | ||
517 | " sleep:%d preempt:%d state:%d sig:%d\n", | ||
518 | blocks, out_of_time, needs_move, np, sleep, preempt, | ||
519 | prev->state, signal_pending(prev)); | ||
520 | if (entry->linked && preempt) | ||
521 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
522 | entry->linked->comm, entry->linked->pid); | ||
523 | |||
524 | |||
525 | /* If a task blocks we have no choice but to reschedule. | ||
526 | */ | ||
527 | if (blocks) | ||
528 | unlink(entry->scheduled); | ||
529 | |||
530 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
531 | * We need to make sure to update the link structure anyway in case | ||
532 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
533 | * hurt. | ||
534 | * | ||
535 | * Job deadline moves handled similarly | ||
536 | */ | ||
537 | if (np && (out_of_time || preempt || sleep)) { | ||
538 | unlink(entry->scheduled); | ||
539 | request_exit_np(entry->scheduled); | ||
540 | } | ||
541 | else if (np && needs_move) { | ||
542 | move_deadline(entry->scheduled); | ||
543 | } | ||
544 | |||
545 | /* Any task that is preemptable and either exhausts its execution | ||
546 | * budget or wants to sleep completes. We may have to reschedule after | ||
547 | * this. Don't do a job completion if we block (can't have timers running | ||
548 | * for blocked jobs). Preemption go first for the same reason. | ||
549 | */ | ||
550 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
551 | job_completion(entry->scheduled, !sleep); | ||
552 | else if (!np && needs_move && !blocks && !preempt) { | ||
553 | move_deadline(entry->scheduled); | ||
554 | } | ||
555 | |||
556 | /* Link pending task if we became unlinked. | ||
557 | */ | ||
558 | if (!entry->linked) | ||
559 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
560 | |||
561 | /* The final scheduling decision. Do we need to switch for some reason? | ||
562 | * If linked is different from scheduled, then select linked as next. | ||
563 | */ | ||
564 | if ((!np || blocks) && | ||
565 | entry->linked != entry->scheduled) { | ||
566 | /* Schedule a linked job? */ | ||
567 | if (entry->linked) { | ||
568 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
569 | next = entry->linked; | ||
570 | TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id()); | ||
571 | } | ||
572 | if (entry->scheduled) { | ||
573 | /* not gonna be scheduled soon */ | ||
574 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
575 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
576 | } | ||
577 | } else | ||
578 | /* Only override Linux scheduler if we have a real-time task | ||
579 | * scheduled that needs to continue. | ||
580 | */ | ||
581 | if (exists) | ||
582 | next = prev; | ||
583 | |||
584 | sched_state_task_picked(); | ||
585 | |||
586 | raw_spin_unlock(&gsnedf_lock); | ||
587 | |||
588 | if (next) { | ||
589 | arm_split_timer(entry, next); | ||
590 | } | ||
591 | else if (entry->timer_armed) { | ||
592 | cancel_split_timer(entry); | ||
593 | } | ||
594 | |||
595 | #ifdef WANT_ALL_SCHED_EVENTS | ||
596 | TRACE("gsnedf_lock released, next=0x%p\n", next); | ||
597 | |||
598 | if (next) | ||
599 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
600 | else if (exists && !next) | ||
601 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
602 | #endif | ||
603 | |||
604 | |||
605 | return next; | ||
606 | } | ||
607 | |||
608 | |||
609 | /* _finish_switch - we just finished the switch away from prev | ||
610 | */ | ||
611 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
612 | { | ||
613 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
614 | |||
615 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
616 | #ifdef WANT_ALL_SCHED_EVENTS | ||
617 | TRACE_TASK(prev, "switched away from\n"); | ||
618 | #endif | ||
619 | } | ||
620 | |||
621 | static void gsnedf_release_at(struct task_struct *t, lt_t start) | ||
622 | { | ||
623 | t->rt_param.job_params.deadline = start; | ||
624 | prepare_for_next_period(t); | ||
625 | t->rt_param.job_params.subjob_deadline = get_proper_deadline(t); | ||
626 | set_rt_flags(t, RT_F_RUNNING); | ||
627 | } | ||
628 | |||
629 | /* Prepare a task for running in RT mode | ||
630 | */ | ||
631 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
632 | { | ||
633 | unsigned long flags; | ||
634 | cpu_entry_t* entry; | ||
635 | |||
636 | TRACE("gsn edf: task new %d\n", t->pid); | ||
637 | |||
638 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
639 | |||
640 | /* setup job params */ | ||
641 | gsnedf_release_at(t, litmus_clock()); | ||
642 | |||
643 | if (running) { | ||
644 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
645 | BUG_ON(entry->scheduled); | ||
646 | |||
647 | #ifdef CONFIG_RELEASE_MASTER | ||
648 | if (entry->cpu != gsnedf.release_master) { | ||
649 | #endif | ||
650 | entry->scheduled = t; | ||
651 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
652 | #ifdef CONFIG_RELEASE_MASTER | ||
653 | } else { | ||
654 | /* do not schedule on release master */ | ||
655 | preempt(entry); /* force resched */ | ||
656 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
657 | } | ||
658 | #endif | ||
659 | } else { | ||
660 | t->rt_param.scheduled_on = NO_CPU; | ||
661 | } | ||
662 | t->rt_param.linked_on = NO_CPU; | ||
663 | |||
664 | gsnedf_job_arrival(t); | ||
665 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
666 | } | ||
667 | |||
668 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
669 | { | ||
670 | unsigned long flags; | ||
671 | lt_t now; | ||
672 | |||
673 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
674 | |||
675 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
676 | /* We need to take suspensions because of semaphores into | ||
677 | * account! If a job resumes after being suspended due to acquiring | ||
678 | * a semaphore, it should never be treated as a new job release. | ||
679 | */ | ||
680 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
681 | set_rt_flags(task, RT_F_RUNNING); | ||
682 | } else { | ||
683 | now = litmus_clock(); | ||
684 | if (is_tardy(task, now)) { | ||
685 | /* new sporadic release */ | ||
686 | gsnedf_release_at(task, now); | ||
687 | sched_trace_task_release(task); | ||
688 | } | ||
689 | else { | ||
690 | if (task->rt.time_slice) { | ||
691 | /* came back in time before deadline | ||
692 | */ | ||
693 | set_rt_flags(task, RT_F_RUNNING); | ||
694 | } | ||
695 | } | ||
696 | } | ||
697 | gsnedf_job_arrival(task); | ||
698 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
699 | } | ||
700 | |||
701 | static void gsnedf_task_block(struct task_struct *t) | ||
702 | { | ||
703 | unsigned long flags; | ||
704 | |||
705 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
706 | |||
707 | /* unlink if necessary */ | ||
708 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
709 | unlink(t); | ||
710 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
711 | |||
712 | BUG_ON(!is_realtime(t)); | ||
713 | } | ||
714 | |||
715 | |||
716 | static void gsnedf_task_exit(struct task_struct * t) | ||
717 | { | ||
718 | unsigned long flags; | ||
719 | |||
720 | /* unlink if necessary */ | ||
721 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
722 | unlink(t); | ||
723 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
724 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
725 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
726 | } | ||
727 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
728 | |||
729 | BUG_ON(!is_realtime(t)); | ||
730 | TRACE_TASK(t, "RIP\n"); | ||
731 | } | ||
732 | |||
733 | |||
734 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
735 | { | ||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | #ifdef CONFIG_LITMUS_LOCKING | ||
740 | |||
741 | #include <litmus/fdso.h> | ||
742 | |||
743 | /* called with IRQs off */ | ||
744 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
745 | { | ||
746 | int linked_on; | ||
747 | int check_preempt = 0; | ||
748 | |||
749 | raw_spin_lock(&gsnedf_lock); | ||
750 | |||
751 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
752 | tsk_rt(t)->inh_task = prio_inh; | ||
753 | |||
754 | linked_on = tsk_rt(t)->linked_on; | ||
755 | |||
756 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
757 | if (linked_on != NO_CPU) { | ||
758 | TRACE_TASK(t, "%s: linked on %d\n", | ||
759 | __FUNCTION__, linked_on); | ||
760 | /* Holder is scheduled; need to re-order CPUs. | ||
761 | * We can't use heap_decrease() here since | ||
762 | * the cpu_heap is ordered in reverse direction, so | ||
763 | * it is actually an increase. */ | ||
764 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | ||
765 | gsnedf_cpus[linked_on]->hn); | ||
766 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | ||
767 | gsnedf_cpus[linked_on]->hn); | ||
768 | } else { | ||
769 | /* holder may be queued: first stop queue changes */ | ||
770 | raw_spin_lock(&gsnedf.release_lock); | ||
771 | if (is_queued(t)) { | ||
772 | TRACE_TASK(t, "%s: is queued\n", | ||
773 | __FUNCTION__); | ||
774 | /* We need to update the position of holder in some | ||
775 | * heap. Note that this could be a release heap if we | ||
776 | * budget enforcement is used and this job overran. */ | ||
777 | check_preempt = | ||
778 | !bheap_decrease(edf_split_ready_order, | ||
779 | tsk_rt(t)->heap_node); | ||
780 | } else { | ||
781 | /* Nothing to do: if it is not queued and not linked | ||
782 | * then it is either sleeping or currently being moved | ||
783 | * by other code (e.g., a timer interrupt handler) that | ||
784 | * will use the correct priority when enqueuing the | ||
785 | * task. */ | ||
786 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
787 | __FUNCTION__); | ||
788 | } | ||
789 | raw_spin_unlock(&gsnedf.release_lock); | ||
790 | |||
791 | /* If holder was enqueued in a release heap, then the following | ||
792 | * preemption check is pointless, but we can't easily detect | ||
793 | * that case. If you want to fix this, then consider that | ||
794 | * simply adding a state flag requires O(n) time to update when | ||
795 | * releasing n tasks, which conflicts with the goal to have | ||
796 | * O(log n) merges. */ | ||
797 | if (check_preempt) { | ||
798 | /* heap_decrease() hit the top level of the heap: make | ||
799 | * sure preemption checks get the right task, not the | ||
800 | * potentially stale cache. */ | ||
801 | bheap_uncache_min(edf_split_ready_order, | ||
802 | &gsnedf.ready_queue); | ||
803 | check_for_preemptions(); | ||
804 | } | ||
805 | } | ||
806 | |||
807 | raw_spin_unlock(&gsnedf_lock); | ||
808 | } | ||
809 | |||
810 | /* called with IRQs off */ | ||
811 | static void update_unlocked_priority(struct task_struct* t) | ||
812 | { | ||
813 | raw_spin_lock(&gsnedf_lock); | ||
814 | |||
815 | /* A job only stops inheriting a priority when it releases a | ||
816 | * resource. Thus we can make the following assumption.*/ | ||
817 | BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); | ||
818 | |||
819 | /* Clear priority inheritance */ | ||
820 | TRACE_TASK(t, "priority restored\n"); | ||
821 | tsk_rt(t)->inh_task = NULL; | ||
822 | |||
823 | /* Update splitting deadline */ | ||
824 | tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t); | ||
825 | |||
826 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
827 | * since the priority was effectively lowered. */ | ||
828 | unlink(t); | ||
829 | gsnedf_job_arrival(t); | ||
830 | |||
831 | raw_spin_unlock(&gsnedf_lock); | ||
832 | } | ||
833 | |||
834 | |||
835 | /* ******************** FMLP support ********************** */ | ||
836 | |||
837 | /* struct for semaphore with priority inheritance */ | ||
838 | struct fmlp_semaphore { | ||
839 | struct litmus_lock litmus_lock; | ||
840 | |||
841 | /* current resource holder */ | ||
842 | struct task_struct *owner; | ||
843 | |||
844 | /* highest-priority waiter */ | ||
845 | struct task_struct *hp_waiter; | ||
846 | |||
847 | /* FIFO queue of waiting tasks */ | ||
848 | wait_queue_head_t wait; | ||
849 | }; | ||
850 | |||
851 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
852 | { | ||
853 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
854 | } | ||
855 | |||
856 | /* caller is responsible for locking */ | ||
857 | static struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem, | ||
858 | struct task_struct* skip) | ||
859 | { | ||
860 | struct list_head *pos; | ||
861 | struct task_struct *queued, *found = NULL; | ||
862 | |||
863 | list_for_each(pos, &sem->wait.task_list) { | ||
864 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
865 | task_list)->private; | ||
866 | |||
867 | /* Compare task prios, find high prio task. */ | ||
868 | if (queued != skip && edf_split_higher_prio(queued, found)) | ||
869 | found = queued; | ||
870 | } | ||
871 | return found; | ||
872 | } | ||
873 | |||
874 | int gsnedf_fmlp_lock(struct litmus_lock* l) | ||
875 | { | ||
876 | struct task_struct* t = current; | ||
877 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
878 | cpu_entry_t* entry; | ||
879 | wait_queue_t wait; | ||
880 | unsigned long flags; | ||
881 | |||
882 | if (!is_realtime(t)) | ||
883 | return -EPERM; | ||
884 | |||
885 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
886 | entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
887 | |||
888 | tsk_rt(t)->in_crit_section = 1; | ||
889 | if (entry->timer_armed) { | ||
890 | cancel_split_timer(entry); | ||
891 | } | ||
892 | |||
893 | if (sem->owner) { | ||
894 | /* resource is not free => must suspend and wait */ | ||
895 | |||
896 | init_waitqueue_entry(&wait, t); | ||
897 | |||
898 | /* FIXME: interruptible would be nice some day */ | ||
899 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
900 | |||
901 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
902 | |||
903 | /* check if we need to activate priority inheritance */ | ||
904 | if (edf_split_higher_prio(t, sem->hp_waiter)) { | ||
905 | sem->hp_waiter = t; | ||
906 | if (edf_split_higher_prio(t, sem->owner)) | ||
907 | set_priority_inheritance(sem->owner, sem->hp_waiter); | ||
908 | } | ||
909 | |||
910 | TS_LOCK_SUSPEND; | ||
911 | |||
912 | /* release lock before sleeping */ | ||
913 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
914 | |||
915 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
916 | * when we wake up; we are guaranteed to have the lock since | ||
917 | * there is only one wake up per release. | ||
918 | */ | ||
919 | |||
920 | schedule(); | ||
921 | |||
922 | TS_LOCK_RESUME; | ||
923 | |||
924 | /* Since we hold the lock, no other task will change | ||
925 | * ->owner. We can thus check it without acquiring the spin | ||
926 | * lock. */ | ||
927 | BUG_ON(sem->owner != t); | ||
928 | } else { | ||
929 | /* it's ours now */ | ||
930 | sem->owner = t; | ||
931 | |||
932 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
933 | } | ||
934 | |||
935 | return 0; | ||
936 | } | ||
937 | |||
938 | int gsnedf_fmlp_unlock(struct litmus_lock* l) | ||
939 | { | ||
940 | struct task_struct *t = current, *next; | ||
941 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
942 | unsigned long flags; | ||
943 | int err = 0; | ||
944 | |||
945 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
946 | |||
947 | if (sem->owner != t) { | ||
948 | err = -EINVAL; | ||
949 | goto out; | ||
950 | } | ||
951 | |||
952 | /* check if there are jobs waiting for this resource */ | ||
953 | next = __waitqueue_remove_first(&sem->wait); | ||
954 | if (next) { | ||
955 | /* next becomes the resouce holder */ | ||
956 | sem->owner = next; | ||
957 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
958 | |||
959 | /* determine new hp_waiter if necessary */ | ||
960 | if (next == sem->hp_waiter) { | ||
961 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
962 | /* next has the highest priority --- it doesn't need to | ||
963 | * inherit. However, we need to make sure that the | ||
964 | * next-highest priority in the queue is reflected in | ||
965 | * hp_waiter. */ | ||
966 | sem->hp_waiter = find_hp_waiter(sem, next); | ||
967 | if (sem->hp_waiter) | ||
968 | TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n"); | ||
969 | else | ||
970 | TRACE("no further waiters\n"); | ||
971 | } else { | ||
972 | /* Well, if next is not the highest-priority waiter, | ||
973 | * then it ought to inherit the highest-priority | ||
974 | * waiter's priority. */ | ||
975 | set_priority_inheritance(next, sem->hp_waiter); | ||
976 | } | ||
977 | |||
978 | /* wake up next */ | ||
979 | wake_up_process(next); | ||
980 | } else | ||
981 | /* becomes available */ | ||
982 | sem->owner = NULL; | ||
983 | |||
984 | /* We are no longer in the critical section */ | ||
985 | tsk_rt(t)->in_crit_section = 0; | ||
986 | |||
987 | /* we lose the benefit of priority inheritance (if any) and may need | ||
988 | * to move the deadline. In either case, may need to reschedule | ||
989 | * due to reduced priority. */ | ||
990 | if (tsk_rt(t)->inh_task || needs_deadline_move(t)) | ||
991 | update_unlocked_priority(t); | ||
992 | /* TODO: Check that schedule() gets called - it needs to arm the | ||
993 | * enforcement timer. Otherwise we should do it here or in | ||
994 | * update_unlocked_priority. */ | ||
995 | |||
996 | out: | ||
997 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
998 | |||
999 | return err; | ||
1000 | } | ||
1001 | |||
1002 | int gsnedf_fmlp_close(struct litmus_lock* l) | ||
1003 | { | ||
1004 | struct task_struct *t = current; | ||
1005 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
1006 | unsigned long flags; | ||
1007 | |||
1008 | int owner; | ||
1009 | |||
1010 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
1011 | |||
1012 | owner = sem->owner == t; | ||
1013 | |||
1014 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
1015 | |||
1016 | if (owner) | ||
1017 | gsnedf_fmlp_unlock(l); | ||
1018 | |||
1019 | return 0; | ||
1020 | } | ||
1021 | |||
1022 | void gsnedf_fmlp_free(struct litmus_lock* lock) | ||
1023 | { | ||
1024 | kfree(fmlp_from_lock(lock)); | ||
1025 | } | ||
1026 | |||
1027 | static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { | ||
1028 | .close = gsnedf_fmlp_close, | ||
1029 | .lock = gsnedf_fmlp_lock, | ||
1030 | .unlock = gsnedf_fmlp_unlock, | ||
1031 | .deallocate = gsnedf_fmlp_free, | ||
1032 | }; | ||
1033 | |||
1034 | static struct litmus_lock* gsnedf_new_fmlp(void) | ||
1035 | { | ||
1036 | struct fmlp_semaphore* sem; | ||
1037 | |||
1038 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1039 | if (!sem) | ||
1040 | return NULL; | ||
1041 | |||
1042 | sem->owner = NULL; | ||
1043 | sem->hp_waiter = NULL; | ||
1044 | init_waitqueue_head(&sem->wait); | ||
1045 | sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops; | ||
1046 | |||
1047 | return &sem->litmus_lock; | ||
1048 | } | ||
1049 | |||
1050 | /* **** lock constructor **** */ | ||
1051 | |||
1052 | |||
1053 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1054 | void* __user unused) | ||
1055 | { | ||
1056 | int err = -ENXIO; | ||
1057 | |||
1058 | /* GSN-EDF currently only supports the FMLP for global resources. */ | ||
1059 | switch (type) { | ||
1060 | |||
1061 | case FMLP_SEM: | ||
1062 | /* Flexible Multiprocessor Locking Protocol */ | ||
1063 | *lock = gsnedf_new_fmlp(); | ||
1064 | if (*lock) | ||
1065 | err = 0; | ||
1066 | else | ||
1067 | err = -ENOMEM; | ||
1068 | break; | ||
1069 | |||
1070 | }; | ||
1071 | |||
1072 | return err; | ||
1073 | } | ||
1074 | |||
1075 | #endif | ||
1076 | |||
1077 | |||
1078 | static long gsnedf_activate_plugin(void) | ||
1079 | { | ||
1080 | int cpu; | ||
1081 | cpu_entry_t *entry; | ||
1082 | |||
1083 | bheap_init(&gsnedf_cpu_heap); | ||
1084 | #ifdef CONFIG_RELEASE_MASTER | ||
1085 | gsnedf.release_master = atomic_read(&release_master_cpu); | ||
1086 | #endif | ||
1087 | |||
1088 | for_each_online_cpu(cpu) { | ||
1089 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1090 | bheap_node_init(&entry->hn, entry); | ||
1091 | entry->linked = NULL; | ||
1092 | entry->scheduled = NULL; | ||
1093 | #ifdef CONFIG_RELEASE_MASTER | ||
1094 | if (cpu != gsnedf.release_master) { | ||
1095 | #endif | ||
1096 | TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); | ||
1097 | update_cpu_position(entry); | ||
1098 | #ifdef CONFIG_RELEASE_MASTER | ||
1099 | } else { | ||
1100 | TRACE("GSN-EDF: CPU %d is release master.\n", cpu); | ||
1101 | } | ||
1102 | #endif | ||
1103 | } | ||
1104 | return 0; | ||
1105 | } | ||
1106 | |||
1107 | /* Plugin object */ | ||
1108 | static struct sched_plugin gfl_plugin __cacheline_aligned_in_smp = { | ||
1109 | .plugin_name = "GSN-EDF", | ||
1110 | .finish_switch = gsnedf_finish_switch, | ||
1111 | .tick = gsnedf_tick, | ||
1112 | .task_new = gsnedf_task_new, | ||
1113 | .complete_job = complete_job, | ||
1114 | .task_exit = gsnedf_task_exit, | ||
1115 | .schedule = gsnedf_schedule, | ||
1116 | .release_at = gsnedf_release_at, | ||
1117 | .task_wake_up = gsnedf_task_wake_up, | ||
1118 | .task_block = gsnedf_task_block, | ||
1119 | .admit_task = gsnedf_admit_task, | ||
1120 | .activate_plugin = gsnedf_activate_plugin, | ||
1121 | #ifdef CONFIG_LITMUS_LOCKING | ||
1122 | .allocate_lock = gsnedf_allocate_lock, | ||
1123 | #endif | ||
1124 | }; | ||
1125 | |||
1126 | |||
1127 | static int __init init_gfl(void) | ||
1128 | { | ||
1129 | int cpu; | ||
1130 | cpu_entry_t *entry; | ||
1131 | |||
1132 | bheap_init(&gsnedf_cpu_heap); | ||
1133 | /* initialize CPU state */ | ||
1134 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1135 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1136 | gsnedf_cpus[cpu] = entry; | ||
1137 | entry->cpu = cpu; | ||
1138 | entry->hn = &gsnedf_heap_node[cpu]; | ||
1139 | hrtimer_init(&entry->split_timer, | ||
1140 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
1141 | entry->split_timer.function = on_split_timeout; | ||
1142 | bheap_node_init(&entry->hn, entry); | ||
1143 | } | ||
1144 | edf_split_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | ||
1145 | return register_sched_plugin(&gfl_plugin); | ||
1146 | } | ||
1147 | |||
1148 | |||
1149 | module_init(init_gfl); | ||
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c new file mode 100644 index 000000000000..9debea981419 --- /dev/null +++ b/litmus/sched_gsn_edf.c | |||
@@ -0,0 +1,1286 @@ | |||
1 | /* | ||
2 | * litmus/sched_gsn_edf.c | ||
3 | * | ||
4 | * Implementation of the GSN-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This version uses the simple approach and serializes all scheduling | ||
7 | * decisions by the use of a queue lock. This is probably not the | ||
8 | * best way to do it, but it should suffice for now. | ||
9 | */ | ||
10 | |||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/percpu.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include <litmus/litmus.h> | ||
17 | #include <litmus/wait.h> | ||
18 | #include <litmus/jobs.h> | ||
19 | #include <litmus/sched_plugin.h> | ||
20 | #include <litmus/edf_common.h> | ||
21 | #include <litmus/sched_trace.h> | ||
22 | #include <litmus/trace.h> | ||
23 | |||
24 | #include <litmus/preempt.h> | ||
25 | |||
26 | #include <litmus/bheap.h> | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | |||
30 | /* Overview of GSN-EDF operations. | ||
31 | * | ||
32 | * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This | ||
33 | * description only covers how the individual operations are implemented in | ||
34 | * LITMUS. | ||
35 | * | ||
36 | * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage | ||
37 | * structure (NOT the actually scheduled | ||
38 | * task). If there is another linked task To | ||
39 | * already it will set To->linked_on = NO_CPU | ||
40 | * (thereby removing its association with this | ||
41 | * CPU). However, it will not requeue the | ||
42 | * previously linked task (if any). It will set | ||
43 | * T's state to RT_F_RUNNING and check whether | ||
44 | * it is already running somewhere else. If T | ||
45 | * is scheduled somewhere else it will link | ||
46 | * it to that CPU instead (and pull the linked | ||
47 | * task to cpu). T may be NULL. | ||
48 | * | ||
49 | * unlink(T) - Unlink removes T from all scheduler data | ||
50 | * structures. If it is linked to some CPU it | ||
51 | * will link NULL to that CPU. If it is | ||
52 | * currently queued in the gsnedf queue it will | ||
53 | * be removed from the rt_domain. It is safe to | ||
54 | * call unlink(T) if T is not linked. T may not | ||
55 | * be NULL. | ||
56 | * | ||
57 | * requeue(T) - Requeue will insert T into the appropriate | ||
58 | * queue. If the system is in real-time mode and | ||
59 | * the T is released already, it will go into the | ||
60 | * ready queue. If the system is not in | ||
61 | * real-time mode is T, then T will go into the | ||
62 | * release queue. If T's release time is in the | ||
63 | * future, it will go into the release | ||
64 | * queue. That means that T's release time/job | ||
65 | * no/etc. has to be updated before requeu(T) is | ||
66 | * called. It is not safe to call requeue(T) | ||
67 | * when T is already queued. T may not be NULL. | ||
68 | * | ||
69 | * gsnedf_job_arrival(T) - This is the catch all function when T enters | ||
70 | * the system after either a suspension or at a | ||
71 | * job release. It will queue T (which means it | ||
72 | * is not safe to call gsnedf_job_arrival(T) if | ||
73 | * T is already queued) and then check whether a | ||
74 | * preemption is necessary. If a preemption is | ||
75 | * necessary it will update the linkage | ||
76 | * accordingly and cause scheduled to be called | ||
77 | * (either with an IPI or need_resched). It is | ||
78 | * safe to call gsnedf_job_arrival(T) if T's | ||
79 | * next job has not been actually released yet | ||
80 | * (releast time in the future). T will be put | ||
81 | * on the release queue in that case. | ||
82 | * | ||
83 | * job_completion(T) - Take care of everything that needs to be done | ||
84 | * to prepare T for its next release and place | ||
85 | * it in the right queue with | ||
86 | * gsnedf_job_arrival(). | ||
87 | * | ||
88 | * | ||
89 | * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is | ||
90 | * equivalent to unlink(T). Note that if you unlink a task from a CPU none of | ||
91 | * the functions will automatically propagate pending task from the ready queue | ||
92 | * to a linked task. This is the job of the calling function ( by means of | ||
93 | * __take_ready). | ||
94 | */ | ||
95 | |||
96 | |||
97 | /* cpu_entry_t - maintain the linked and scheduled state | ||
98 | */ | ||
99 | typedef struct { | ||
100 | int cpu; | ||
101 | struct task_struct* linked; /* only RT tasks */ | ||
102 | struct task_struct* scheduled; /* only RT tasks */ | ||
103 | struct bheap_node* hn; | ||
104 | } cpu_entry_t; | ||
105 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
106 | |||
107 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | ||
108 | |||
109 | /* the cpus queue themselves according to priority in here */ | ||
110 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | ||
111 | static struct bheap gsnedf_cpu_heap; | ||
112 | |||
113 | static rt_domain_t gsnedf; | ||
114 | #define gsnedf_lock (gsnedf.ready_lock) | ||
115 | |||
116 | |||
117 | /* Uncomment this if you want to see all scheduling decisions in the | ||
118 | * TRACE() log. | ||
119 | #define WANT_ALL_SCHED_EVENTS | ||
120 | */ | ||
121 | |||
122 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
123 | { | ||
124 | cpu_entry_t *a, *b; | ||
125 | a = _a->value; | ||
126 | b = _b->value; | ||
127 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
128 | * the top of the heap. | ||
129 | */ | ||
130 | return edf_higher_prio(b->linked, a->linked); | ||
131 | } | ||
132 | |||
133 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
134 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
135 | */ | ||
136 | static void update_cpu_position(cpu_entry_t *entry) | ||
137 | { | ||
138 | if (likely(bheap_node_in_heap(entry->hn))) | ||
139 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
140 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
141 | } | ||
142 | |||
143 | /* caller must hold gsnedf lock */ | ||
144 | static cpu_entry_t* lowest_prio_cpu(void) | ||
145 | { | ||
146 | struct bheap_node* hn; | ||
147 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
148 | return hn->value; | ||
149 | } | ||
150 | |||
151 | |||
152 | /* link_task_to_cpu - Update the link of a CPU. | ||
153 | * Handles the case where the to-be-linked task is already | ||
154 | * scheduled on a different CPU. | ||
155 | */ | ||
156 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
157 | cpu_entry_t *entry) | ||
158 | { | ||
159 | cpu_entry_t *sched; | ||
160 | struct task_struct* tmp; | ||
161 | int on_cpu; | ||
162 | |||
163 | BUG_ON(linked && !is_realtime(linked)); | ||
164 | |||
165 | /* Currently linked task is set to be unlinked. */ | ||
166 | if (entry->linked) { | ||
167 | entry->linked->rt_param.linked_on = NO_CPU; | ||
168 | } | ||
169 | |||
170 | /* Link new task to CPU. */ | ||
171 | if (linked) { | ||
172 | set_rt_flags(linked, RT_F_RUNNING); | ||
173 | /* handle task is already scheduled somewhere! */ | ||
174 | on_cpu = linked->rt_param.scheduled_on; | ||
175 | if (on_cpu != NO_CPU) { | ||
176 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
177 | /* this should only happen if not linked already */ | ||
178 | BUG_ON(sched->linked == linked); | ||
179 | |||
180 | /* If we are already scheduled on the CPU to which we | ||
181 | * wanted to link, we don't need to do the swap -- | ||
182 | * we just link ourselves to the CPU and depend on | ||
183 | * the caller to get things right. | ||
184 | */ | ||
185 | if (entry != sched) { | ||
186 | TRACE_TASK(linked, | ||
187 | "already scheduled on %d, updating link.\n", | ||
188 | sched->cpu); | ||
189 | tmp = sched->linked; | ||
190 | linked->rt_param.linked_on = sched->cpu; | ||
191 | sched->linked = linked; | ||
192 | update_cpu_position(sched); | ||
193 | linked = tmp; | ||
194 | } | ||
195 | } | ||
196 | if (linked) /* might be NULL due to swap */ | ||
197 | linked->rt_param.linked_on = entry->cpu; | ||
198 | } | ||
199 | entry->linked = linked; | ||
200 | #ifdef WANT_ALL_SCHED_EVENTS | ||
201 | if (linked) | ||
202 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
203 | else | ||
204 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
205 | #endif | ||
206 | update_cpu_position(entry); | ||
207 | } | ||
208 | |||
209 | /* unlink - Make sure a task is not linked any longer to an entry | ||
210 | * where it was linked before. Must hold gsnedf_lock. | ||
211 | */ | ||
212 | static noinline void unlink(struct task_struct* t) | ||
213 | { | ||
214 | cpu_entry_t *entry; | ||
215 | |||
216 | if (t->rt_param.linked_on != NO_CPU) { | ||
217 | /* unlink */ | ||
218 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
219 | t->rt_param.linked_on = NO_CPU; | ||
220 | link_task_to_cpu(NULL, entry); | ||
221 | } else if (is_queued(t)) { | ||
222 | /* This is an interesting situation: t is scheduled, | ||
223 | * but was just recently unlinked. It cannot be | ||
224 | * linked anywhere else (because then it would have | ||
225 | * been relinked to this CPU), thus it must be in some | ||
226 | * queue. We must remove it from the list in this | ||
227 | * case. | ||
228 | */ | ||
229 | remove(&gsnedf, t); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | |||
234 | /* preempt - force a CPU to reschedule | ||
235 | */ | ||
236 | static void preempt(cpu_entry_t *entry) | ||
237 | { | ||
238 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
239 | } | ||
240 | |||
241 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
242 | * Caller must hold gsnedf_lock. | ||
243 | */ | ||
244 | static noinline void requeue(struct task_struct* task) | ||
245 | { | ||
246 | BUG_ON(!task); | ||
247 | /* sanity check before insertion */ | ||
248 | BUG_ON(is_queued(task)); | ||
249 | |||
250 | if (is_released(task, litmus_clock())) | ||
251 | __add_ready(&gsnedf, task); | ||
252 | else { | ||
253 | /* it has got to wait */ | ||
254 | add_release(&gsnedf, task); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | /* check for any necessary preemptions */ | ||
259 | static void check_for_preemptions(void) | ||
260 | { | ||
261 | struct task_struct *task; | ||
262 | cpu_entry_t* last; | ||
263 | |||
264 | for(last = lowest_prio_cpu(); | ||
265 | edf_preemption_needed(&gsnedf, last->linked); | ||
266 | last = lowest_prio_cpu()) { | ||
267 | /* preemption necessary */ | ||
268 | task = __take_ready(&gsnedf); | ||
269 | TRACE_TASK(task, "attempting to link to P%d\n", | ||
270 | last->cpu); | ||
271 | if (last->linked) | ||
272 | requeue(last->linked); | ||
273 | link_task_to_cpu(task, last); | ||
274 | preempt(last); | ||
275 | } | ||
276 | } | ||
277 | |||
278 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
279 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
280 | { | ||
281 | BUG_ON(!task); | ||
282 | |||
283 | requeue(task); | ||
284 | check_for_preemptions(); | ||
285 | } | ||
286 | |||
287 | static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
288 | { | ||
289 | unsigned long flags; | ||
290 | |||
291 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
292 | |||
293 | __merge_ready(rt, tasks); | ||
294 | check_for_preemptions(); | ||
295 | |||
296 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
297 | } | ||
298 | |||
299 | /* caller holds gsnedf_lock */ | ||
300 | static noinline void job_completion(struct task_struct *t, int forced) | ||
301 | { | ||
302 | BUG_ON(!t); | ||
303 | |||
304 | sched_trace_task_completion(t, forced); | ||
305 | |||
306 | TRACE_TASK(t, "job_completion().\n"); | ||
307 | |||
308 | /* set flags */ | ||
309 | set_rt_flags(t, RT_F_SLEEP); | ||
310 | /* prepare for next period */ | ||
311 | prepare_for_next_period(t); | ||
312 | if (is_released(t, litmus_clock())) | ||
313 | sched_trace_task_release(t); | ||
314 | /* unlink */ | ||
315 | unlink(t); | ||
316 | /* requeue | ||
317 | * But don't requeue a blocking task. */ | ||
318 | if (is_running(t)) | ||
319 | gsnedf_job_arrival(t); | ||
320 | } | ||
321 | |||
322 | /* gsnedf_tick - this function is called for every local timer | ||
323 | * interrupt. | ||
324 | * | ||
325 | * checks whether the current task has expired and checks | ||
326 | * whether we need to preempt it if it has not expired | ||
327 | */ | ||
328 | static void gsnedf_tick(struct task_struct* t) | ||
329 | { | ||
330 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
331 | if (!is_np(t)) { | ||
332 | /* np tasks will be preempted when they become | ||
333 | * preemptable again | ||
334 | */ | ||
335 | litmus_reschedule_local(); | ||
336 | TRACE("gsnedf_scheduler_tick: " | ||
337 | "%d is preemptable " | ||
338 | " => FORCE_RESCHED\n", t->pid); | ||
339 | } else if (is_user_np(t)) { | ||
340 | TRACE("gsnedf_scheduler_tick: " | ||
341 | "%d is non-preemptable, " | ||
342 | "preemption delayed.\n", t->pid); | ||
343 | request_exit_np(t); | ||
344 | } | ||
345 | } | ||
346 | } | ||
347 | |||
348 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
349 | * assumptions on the state of the current task since it may be called for a | ||
350 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
351 | * was necessary, because sys_exit_np() was called, because some Linux | ||
352 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
353 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
354 | * current state is. | ||
355 | * | ||
356 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
357 | * | ||
358 | * The following assertions for the scheduled task could hold: | ||
359 | * | ||
360 | * - !is_running(scheduled) // the job blocks | ||
361 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
362 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
363 | * - linked != scheduled // we need to reschedule (for any reason) | ||
364 | * - is_np(scheduled) // rescheduling must be delayed, | ||
365 | * sys_exit_np must be requested | ||
366 | * | ||
367 | * Any of these can occur together. | ||
368 | */ | ||
369 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
370 | { | ||
371 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
372 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
373 | struct task_struct* next = NULL; | ||
374 | |||
375 | #ifdef CONFIG_RELEASE_MASTER | ||
376 | /* Bail out early if we are the release master. | ||
377 | * The release master never schedules any real-time tasks. | ||
378 | */ | ||
379 | if (gsnedf.release_master == entry->cpu) { | ||
380 | sched_state_task_picked(); | ||
381 | return NULL; | ||
382 | } | ||
383 | #endif | ||
384 | |||
385 | raw_spin_lock(&gsnedf_lock); | ||
386 | |||
387 | /* sanity checking */ | ||
388 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
389 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
390 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
391 | |||
392 | /* (0) Determine state */ | ||
393 | exists = entry->scheduled != NULL; | ||
394 | blocks = exists && !is_running(entry->scheduled); | ||
395 | out_of_time = exists && | ||
396 | budget_enforced(entry->scheduled) && | ||
397 | budget_exhausted(entry->scheduled); | ||
398 | np = exists && is_np(entry->scheduled); | ||
399 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
400 | preempt = entry->scheduled != entry->linked; | ||
401 | |||
402 | #ifdef WANT_ALL_SCHED_EVENTS | ||
403 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | ||
404 | #endif | ||
405 | |||
406 | if (exists) | ||
407 | TRACE_TASK(prev, | ||
408 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
409 | "state:%d sig:%d\n", | ||
410 | blocks, out_of_time, np, sleep, preempt, | ||
411 | prev->state, signal_pending(prev)); | ||
412 | if (entry->linked && preempt && !np) | ||
413 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
414 | entry->linked->comm, entry->linked->pid); | ||
415 | |||
416 | |||
417 | /* If a task blocks we have no choice but to reschedule. | ||
418 | */ | ||
419 | if (blocks) | ||
420 | unlink(entry->scheduled); | ||
421 | |||
422 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
423 | * Do not unlink since entry->scheduled is currently in the ready queue. | ||
424 | * We don't process out_of_time and sleep until the job is preemptive again. | ||
425 | */ | ||
426 | if (np && (out_of_time || preempt || sleep)) { | ||
427 | request_exit_np(entry->scheduled); | ||
428 | } | ||
429 | |||
430 | /* Any task that is preemptable and either exhausts its execution | ||
431 | * budget or wants to sleep completes. We may have to reschedule after | ||
432 | * this. Don't do a job completion if we block (can't have timers running | ||
433 | * for blocked jobs). Preemption go first for the same reason. | ||
434 | */ | ||
435 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
436 | job_completion(entry->scheduled, !sleep); | ||
437 | |||
438 | /* Link pending task if we became unlinked. | ||
439 | */ | ||
440 | if (!entry->linked) | ||
441 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
442 | |||
443 | /* The final scheduling decision. Do we need to switch for some reason? | ||
444 | * If linked is different from scheduled, then select linked as next. | ||
445 | */ | ||
446 | if ((!np || blocks) && | ||
447 | entry->linked != entry->scheduled) { | ||
448 | /* Schedule a linked job? */ | ||
449 | if (entry->linked) { | ||
450 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
451 | next = entry->linked; | ||
452 | TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id()); | ||
453 | } | ||
454 | if (entry->scheduled) { | ||
455 | /* not gonna be scheduled soon */ | ||
456 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
457 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
458 | } | ||
459 | } else | ||
460 | /* Only override Linux scheduler if we have a real-time task | ||
461 | * scheduled that needs to continue. | ||
462 | */ | ||
463 | if (exists) | ||
464 | next = prev; | ||
465 | |||
466 | sched_state_task_picked(); | ||
467 | |||
468 | raw_spin_unlock(&gsnedf_lock); | ||
469 | |||
470 | #ifdef WANT_ALL_SCHED_EVENTS | ||
471 | TRACE("gsnedf_lock released, next=0x%p\n", next); | ||
472 | |||
473 | if (next) | ||
474 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
475 | else if (exists && !next) | ||
476 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
477 | #endif | ||
478 | |||
479 | |||
480 | return next; | ||
481 | } | ||
482 | |||
483 | |||
484 | /* _finish_switch - we just finished the switch away from prev | ||
485 | */ | ||
486 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
487 | { | ||
488 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
489 | |||
490 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
491 | #ifdef WANT_ALL_SCHED_EVENTS | ||
492 | TRACE_TASK(prev, "switched away from\n"); | ||
493 | #endif | ||
494 | } | ||
495 | |||
496 | |||
497 | /* Prepare a task for running in RT mode | ||
498 | */ | ||
499 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
500 | { | ||
501 | unsigned long flags; | ||
502 | cpu_entry_t* entry; | ||
503 | |||
504 | TRACE("gsn edf: task new %d\n", t->pid); | ||
505 | |||
506 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
507 | |||
508 | /* setup job params */ | ||
509 | release_at(t, litmus_clock()); | ||
510 | |||
511 | if (running) { | ||
512 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
513 | BUG_ON(entry->scheduled); | ||
514 | |||
515 | #ifdef CONFIG_RELEASE_MASTER | ||
516 | if (entry->cpu != gsnedf.release_master) { | ||
517 | #endif | ||
518 | entry->scheduled = t; | ||
519 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
520 | #ifdef CONFIG_RELEASE_MASTER | ||
521 | } else { | ||
522 | /* do not schedule on release master */ | ||
523 | preempt(entry); /* force resched */ | ||
524 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
525 | } | ||
526 | #endif | ||
527 | } else { | ||
528 | t->rt_param.scheduled_on = NO_CPU; | ||
529 | } | ||
530 | t->rt_param.linked_on = NO_CPU; | ||
531 | |||
532 | gsnedf_job_arrival(t); | ||
533 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
534 | } | ||
535 | |||
536 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
537 | { | ||
538 | unsigned long flags; | ||
539 | lt_t now; | ||
540 | |||
541 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
542 | |||
543 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
544 | /* We need to take suspensions because of semaphores into | ||
545 | * account! If a job resumes after being suspended due to acquiring | ||
546 | * a semaphore, it should never be treated as a new job release. | ||
547 | */ | ||
548 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
549 | set_rt_flags(task, RT_F_RUNNING); | ||
550 | } else { | ||
551 | now = litmus_clock(); | ||
552 | if (is_tardy(task, now)) { | ||
553 | /* new sporadic release */ | ||
554 | release_at(task, now); | ||
555 | sched_trace_task_release(task); | ||
556 | } | ||
557 | else { | ||
558 | if (task->rt.time_slice) { | ||
559 | /* came back in time before deadline | ||
560 | */ | ||
561 | set_rt_flags(task, RT_F_RUNNING); | ||
562 | } | ||
563 | } | ||
564 | } | ||
565 | gsnedf_job_arrival(task); | ||
566 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
567 | } | ||
568 | |||
569 | static void gsnedf_task_block(struct task_struct *t) | ||
570 | { | ||
571 | unsigned long flags; | ||
572 | |||
573 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
574 | |||
575 | /* unlink if necessary */ | ||
576 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
577 | unlink(t); | ||
578 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
579 | |||
580 | BUG_ON(!is_realtime(t)); | ||
581 | } | ||
582 | |||
583 | |||
584 | static void gsnedf_task_exit(struct task_struct * t) | ||
585 | { | ||
586 | unsigned long flags; | ||
587 | |||
588 | /* unlink if necessary */ | ||
589 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
590 | unlink(t); | ||
591 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
592 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
593 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
594 | } | ||
595 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
596 | |||
597 | BUG_ON(!is_realtime(t)); | ||
598 | TRACE_TASK(t, "RIP\n"); | ||
599 | } | ||
600 | |||
601 | |||
602 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
603 | { | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | #ifdef CONFIG_LITMUS_LOCKING | ||
608 | |||
609 | #include <litmus/fdso.h> | ||
610 | |||
611 | |||
612 | |||
613 | /* called with IRQs off */ | ||
614 | static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
615 | { | ||
616 | int linked_on; | ||
617 | int check_preempt = 0; | ||
618 | |||
619 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
620 | tsk_rt(t)->inh_task = prio_inh; | ||
621 | |||
622 | linked_on = tsk_rt(t)->linked_on; | ||
623 | |||
624 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
625 | if (linked_on != NO_CPU) { | ||
626 | TRACE_TASK(t, "%s: linked on %d\n", | ||
627 | __FUNCTION__, linked_on); | ||
628 | /* Holder is scheduled; need to re-order CPUs. | ||
629 | * We can't use heap_decrease() here since | ||
630 | * the cpu_heap is ordered in reverse direction, so | ||
631 | * it is actually an increase. */ | ||
632 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | ||
633 | gsnedf_cpus[linked_on]->hn); | ||
634 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | ||
635 | gsnedf_cpus[linked_on]->hn); | ||
636 | } else { | ||
637 | /* holder may be queued: first stop queue changes */ | ||
638 | raw_spin_lock(&gsnedf.release_lock); | ||
639 | if (is_queued(t)) { | ||
640 | TRACE_TASK(t, "%s: is queued\n", | ||
641 | __FUNCTION__); | ||
642 | /* We need to update the position of holder in some | ||
643 | * heap. Note that this could be a release heap if | ||
644 | * budget enforcement is used and this job overran. */ | ||
645 | check_preempt = | ||
646 | !bheap_decrease(edf_ready_order, | ||
647 | tsk_rt(t)->heap_node); | ||
648 | } else { | ||
649 | /* Nothing to do: if it is not queued and not linked | ||
650 | * then it is either sleeping or currently being moved | ||
651 | * by other code (e.g., a timer interrupt handler) that | ||
652 | * will use the correct priority when enqueuing the | ||
653 | * task. */ | ||
654 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
655 | __FUNCTION__); | ||
656 | } | ||
657 | raw_spin_unlock(&gsnedf.release_lock); | ||
658 | |||
659 | /* If holder was enqueued in a release heap, then the following | ||
660 | * preemption check is pointless, but we can't easily detect | ||
661 | * that case. If you want to fix this, then consider that | ||
662 | * simply adding a state flag requires O(n) time to update when | ||
663 | * releasing n tasks, which conflicts with the goal to have | ||
664 | * O(log n) merges. */ | ||
665 | if (check_preempt) { | ||
666 | /* heap_decrease() hit the top level of the heap: make | ||
667 | * sure preemption checks get the right task, not the | ||
668 | * potentially stale cache. */ | ||
669 | bheap_uncache_min(edf_ready_order, | ||
670 | &gsnedf.ready_queue); | ||
671 | check_for_preemptions(); | ||
672 | } | ||
673 | } | ||
674 | } | ||
675 | |||
676 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
677 | { | ||
678 | raw_spin_lock(&gsnedf_lock); | ||
679 | __set_priority_inheritance(t, prio_inh); | ||
680 | raw_spin_unlock(&gsnedf_lock); | ||
681 | } | ||
682 | |||
683 | static void __clear_priority_inheritance(struct task_struct* t) | ||
684 | { | ||
685 | /* A job only stops inheriting a priority when it releases a | ||
686 | * resource. Thus we can make the following assumption.*/ | ||
687 | BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); | ||
688 | |||
689 | TRACE_TASK(t, "priority restored\n"); | ||
690 | tsk_rt(t)->inh_task = NULL; | ||
691 | |||
692 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
693 | * since the priority was effectively lowered. */ | ||
694 | unlink(t); | ||
695 | gsnedf_job_arrival(t); | ||
696 | } | ||
697 | |||
698 | /* set and clear at the same time to avoid having to | ||
699 | * acquire the runqueue lock twice */ | ||
700 | static void update_priority_inheritance( | ||
701 | struct task_struct* deprived, | ||
702 | struct task_struct* blocker, | ||
703 | struct task_struct* blocked) | ||
704 | { | ||
705 | /* things to do: | ||
706 | * 1) deprived no longer inherits anything. | ||
707 | * 2) blocker gets blocked's priority. | ||
708 | */ | ||
709 | |||
710 | raw_spin_lock(&gsnedf_lock); | ||
711 | |||
712 | if (tsk_rt(deprived)->inh_task) | ||
713 | __clear_priority_inheritance(deprived); | ||
714 | |||
715 | if (blocked) | ||
716 | __set_priority_inheritance(blocker, blocked); | ||
717 | |||
718 | raw_spin_unlock(&gsnedf_lock); | ||
719 | } | ||
720 | |||
721 | |||
722 | /* ******************** FMLP support ********************** */ | ||
723 | |||
724 | /* struct for semaphore with priority inheritance */ | ||
725 | struct fmlp_semaphore { | ||
726 | struct litmus_lock litmus_lock; | ||
727 | |||
728 | /* current resource holder */ | ||
729 | struct task_struct *owner; | ||
730 | |||
731 | /* highest-priority waiter */ | ||
732 | struct task_struct *hp_waiter; | ||
733 | |||
734 | /* FIFO queue of waiting tasks */ | ||
735 | wait_queue_head_t wait; | ||
736 | }; | ||
737 | |||
738 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
739 | { | ||
740 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
741 | } | ||
742 | |||
743 | /* caller is responsible for locking */ | ||
744 | struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem, | ||
745 | struct task_struct* skip) | ||
746 | { | ||
747 | struct list_head *pos; | ||
748 | struct task_struct *queued, *found = NULL; | ||
749 | |||
750 | list_for_each(pos, &sem->wait.task_list) { | ||
751 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
752 | task_list)->private; | ||
753 | |||
754 | /* Compare task prios, find high prio task. */ | ||
755 | if (queued != skip && edf_higher_prio(queued, found)) | ||
756 | found = queued; | ||
757 | } | ||
758 | return found; | ||
759 | } | ||
760 | |||
761 | int gsnedf_fmlp_lock(struct litmus_lock* l) | ||
762 | { | ||
763 | struct task_struct* t = current; | ||
764 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
765 | wait_queue_t wait; | ||
766 | unsigned long flags; | ||
767 | |||
768 | if (!is_realtime(t)) | ||
769 | return -EPERM; | ||
770 | |||
771 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
772 | |||
773 | if (sem->owner) { | ||
774 | /* resource is not free => must suspend and wait */ | ||
775 | |||
776 | init_waitqueue_entry(&wait, t); | ||
777 | |||
778 | /* FIXME: interruptible would be nice some day */ | ||
779 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
780 | |||
781 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
782 | |||
783 | /* check if we need to activate priority inheritance */ | ||
784 | if (edf_higher_prio(t, sem->hp_waiter)) { | ||
785 | sem->hp_waiter = t; | ||
786 | if (edf_higher_prio(t, sem->owner)) | ||
787 | set_priority_inheritance(sem->owner, sem->hp_waiter); | ||
788 | } | ||
789 | |||
790 | TS_LOCK_SUSPEND; | ||
791 | |||
792 | /* release lock before sleeping */ | ||
793 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
794 | |||
795 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
796 | * when we wake up; we are guaranteed to have the lock since | ||
797 | * there is only one wake up per release. | ||
798 | */ | ||
799 | |||
800 | schedule(); | ||
801 | |||
802 | TS_LOCK_RESUME; | ||
803 | |||
804 | /* Since we hold the lock, no other task will change | ||
805 | * ->owner. We can thus check it without acquiring the spin | ||
806 | * lock. */ | ||
807 | BUG_ON(sem->owner != t); | ||
808 | } else { | ||
809 | /* it's ours now */ | ||
810 | sem->owner = t; | ||
811 | |||
812 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
813 | } | ||
814 | |||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | int gsnedf_fmlp_unlock(struct litmus_lock* l) | ||
819 | { | ||
820 | struct task_struct *t = current, *next, *blocked = NULL; | ||
821 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
822 | unsigned long flags; | ||
823 | int err = 0; | ||
824 | |||
825 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
826 | |||
827 | if (sem->owner != t) { | ||
828 | err = -EINVAL; | ||
829 | goto out; | ||
830 | } | ||
831 | |||
832 | /* check if there are jobs waiting for this resource */ | ||
833 | next = __waitqueue_remove_first(&sem->wait); | ||
834 | if (next) { | ||
835 | /* next becomes the resouce holder */ | ||
836 | sem->owner = next; | ||
837 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
838 | |||
839 | /* determine new hp_waiter if necessary */ | ||
840 | if (next == sem->hp_waiter) { | ||
841 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
842 | /* next has the highest priority --- it doesn't need to | ||
843 | * inherit. However, we need to make sure that the | ||
844 | * next-highest priority in the queue is reflected in | ||
845 | * hp_waiter. */ | ||
846 | sem->hp_waiter = find_hp_waiter(sem, next); | ||
847 | if (sem->hp_waiter) | ||
848 | TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n"); | ||
849 | else | ||
850 | TRACE("no further waiters\n"); | ||
851 | } else { | ||
852 | /* Well, if next is not the highest-priority waiter, | ||
853 | * then it ought to inherit the highest-priority | ||
854 | * waiter's priority. */ | ||
855 | blocked = sem->hp_waiter; | ||
856 | } | ||
857 | |||
858 | /* wake up next */ | ||
859 | wake_up_process(next); | ||
860 | } else | ||
861 | /* becomes available */ | ||
862 | sem->owner = NULL; | ||
863 | |||
864 | /* we lose the benefit of priority inheritance (if any) */ | ||
865 | if (tsk_rt(t)->inh_task || blocked) | ||
866 | update_priority_inheritance(t, next, blocked); | ||
867 | |||
868 | out: | ||
869 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
870 | |||
871 | return err; | ||
872 | } | ||
873 | |||
874 | int gsnedf_fmlp_close(struct litmus_lock* l) | ||
875 | { | ||
876 | struct task_struct *t = current; | ||
877 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
878 | unsigned long flags; | ||
879 | |||
880 | int owner; | ||
881 | |||
882 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
883 | |||
884 | owner = sem->owner == t; | ||
885 | |||
886 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
887 | |||
888 | if (owner) | ||
889 | gsnedf_fmlp_unlock(l); | ||
890 | |||
891 | return 0; | ||
892 | } | ||
893 | |||
894 | void gsnedf_fmlp_free(struct litmus_lock* lock) | ||
895 | { | ||
896 | kfree(fmlp_from_lock(lock)); | ||
897 | } | ||
898 | |||
899 | static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { | ||
900 | .close = gsnedf_fmlp_close, | ||
901 | .lock = gsnedf_fmlp_lock, | ||
902 | .unlock = gsnedf_fmlp_unlock, | ||
903 | .deallocate = gsnedf_fmlp_free, | ||
904 | }; | ||
905 | |||
906 | static struct litmus_lock* gsnedf_new_fmlp(void) | ||
907 | { | ||
908 | struct fmlp_semaphore* sem; | ||
909 | |||
910 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
911 | if (!sem) | ||
912 | return NULL; | ||
913 | |||
914 | sem->owner = NULL; | ||
915 | sem->hp_waiter = NULL; | ||
916 | init_waitqueue_head(&sem->wait); | ||
917 | sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops; | ||
918 | |||
919 | return &sem->litmus_lock; | ||
920 | } | ||
921 | |||
922 | |||
923 | /* ******************** OMLP support ********************** */ | ||
924 | |||
925 | /* struct for semaphore with priority inheritance */ | ||
926 | struct omlp_semaphore { | ||
927 | struct litmus_lock litmus_lock; | ||
928 | |||
929 | /* current resource holder */ | ||
930 | struct task_struct *owner; | ||
931 | |||
932 | /* highest-priority waiter */ | ||
933 | struct task_struct *hp_waiter; | ||
934 | |||
935 | /* FIFO queue of waiting tasks */ | ||
936 | wait_queue_head_t fifo_wait; | ||
937 | /* Priority queue of waiting tasks */ | ||
938 | wait_queue_head_t prio_wait; | ||
939 | |||
940 | /* How many slots remaining in FIFO queue? */ | ||
941 | unsigned int num_free; | ||
942 | }; | ||
943 | |||
944 | static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock) | ||
945 | { | ||
946 | return container_of(lock, struct omlp_semaphore, litmus_lock); | ||
947 | } | ||
948 | |||
949 | /* already locked */ | ||
950 | static void omlp_enqueue(struct omlp_semaphore *sem, prio_wait_queue_t* wait) | ||
951 | { | ||
952 | if (sem->num_free) { | ||
953 | /* there is space in the FIFO queue */ | ||
954 | sem->num_free--; | ||
955 | __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait->wq); | ||
956 | } else { | ||
957 | /* nope, gotta go to the priority queue */ | ||
958 | __add_wait_queue_prio_exclusive(&sem->prio_wait, wait); | ||
959 | } | ||
960 | } | ||
961 | |||
962 | /* already locked */ | ||
963 | static int omlp_move(struct omlp_semaphore *sem) | ||
964 | { | ||
965 | struct list_head* first; | ||
966 | |||
967 | if (waitqueue_active(&sem->prio_wait)) { | ||
968 | first = sem->prio_wait.task_list.next; | ||
969 | list_move_tail(first, &sem->fifo_wait.task_list); | ||
970 | return 1; | ||
971 | } | ||
972 | else | ||
973 | return 0; | ||
974 | } | ||
975 | |||
976 | static struct task_struct* omlp_dequeue(struct omlp_semaphore *sem) | ||
977 | { | ||
978 | struct task_struct* first = __waitqueue_remove_first(&sem->fifo_wait); | ||
979 | |||
980 | if (first && !omlp_move(sem)) | ||
981 | sem->num_free++; | ||
982 | |||
983 | return first; | ||
984 | } | ||
985 | |||
986 | /* caller is responsible for locking */ | ||
987 | static struct task_struct* omlp_find_hp_waiter(struct omlp_semaphore *sem, | ||
988 | struct task_struct* skip) | ||
989 | { | ||
990 | struct list_head *pos; | ||
991 | struct task_struct *queued, *found = NULL; | ||
992 | |||
993 | /* check FIFO queue first */ | ||
994 | list_for_each(pos, &sem->fifo_wait.task_list) { | ||
995 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
996 | task_list)->private; | ||
997 | |||
998 | /* Compare task prios, find high prio task. */ | ||
999 | if (queued != skip && edf_higher_prio(queued, found)) | ||
1000 | found = queued; | ||
1001 | } | ||
1002 | |||
1003 | /* check priority queue next */ | ||
1004 | if (waitqueue_active(&sem->prio_wait)) { | ||
1005 | /* first has highest priority */ | ||
1006 | pos = sem->prio_wait.task_list.next; | ||
1007 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
1008 | task_list)->private; | ||
1009 | if (edf_higher_prio(queued, found)) | ||
1010 | found = queued; | ||
1011 | } | ||
1012 | |||
1013 | return found; | ||
1014 | } | ||
1015 | |||
1016 | int gsnedf_omlp_lock(struct litmus_lock* l) | ||
1017 | { | ||
1018 | struct task_struct* t = current; | ||
1019 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1020 | prio_wait_queue_t wait; | ||
1021 | unsigned long flags; | ||
1022 | |||
1023 | if (!is_realtime(t)) | ||
1024 | return -EPERM; | ||
1025 | |||
1026 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1027 | |||
1028 | if (sem->owner) { | ||
1029 | /* resource is not free => must suspend and wait */ | ||
1030 | |||
1031 | init_prio_waitqueue_entry(&wait, t, get_deadline(t)); | ||
1032 | |||
1033 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1034 | |||
1035 | omlp_enqueue(sem, &wait); | ||
1036 | |||
1037 | /* check if we need to activate priority inheritance */ | ||
1038 | if (edf_higher_prio(t, sem->hp_waiter)) { | ||
1039 | sem->hp_waiter = t; | ||
1040 | if (edf_higher_prio(t, sem->owner)) | ||
1041 | set_priority_inheritance(sem->owner, sem->hp_waiter); | ||
1042 | } | ||
1043 | |||
1044 | TS_LOCK_SUSPEND; | ||
1045 | |||
1046 | /* release lock before sleeping */ | ||
1047 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1048 | |||
1049 | schedule(); | ||
1050 | |||
1051 | TS_LOCK_RESUME; | ||
1052 | |||
1053 | /* Since we hold the lock, no other task will change | ||
1054 | * ->owner. We can thus check it without acquiring the spin | ||
1055 | * lock. */ | ||
1056 | BUG_ON(sem->owner != t); | ||
1057 | } else { | ||
1058 | /* it's ours now */ | ||
1059 | sem->owner = t; | ||
1060 | |||
1061 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1062 | } | ||
1063 | |||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | static int gsnedf_omlp_unlock(struct litmus_lock* l) | ||
1068 | { | ||
1069 | struct task_struct *t = current, *next, *blocked = NULL; | ||
1070 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1071 | unsigned long flags; | ||
1072 | int err = 0; | ||
1073 | |||
1074 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1075 | |||
1076 | if (sem->owner != t) { | ||
1077 | err = -EINVAL; | ||
1078 | goto out; | ||
1079 | } | ||
1080 | |||
1081 | /* check if there are jobs waiting for this resource */ | ||
1082 | next = omlp_dequeue(sem); | ||
1083 | if (next) { | ||
1084 | /* next becomes the resouce holder */ | ||
1085 | sem->owner = next; | ||
1086 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
1087 | |||
1088 | /* determine new hp_waiter if necessary */ | ||
1089 | if (next == sem->hp_waiter) { | ||
1090 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1091 | /* next has the highest priority --- it doesn't need to | ||
1092 | * inherit. However, we need to make sure that the | ||
1093 | * next-highest priority in the queue is reflected in | ||
1094 | * hp_waiter. */ | ||
1095 | sem->hp_waiter = omlp_find_hp_waiter(sem, next); | ||
1096 | if (sem->hp_waiter) | ||
1097 | TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n"); | ||
1098 | else | ||
1099 | TRACE("no further waiters\n"); | ||
1100 | } else { | ||
1101 | /* Well, if next is not the highest-priority waiter, | ||
1102 | * then it ought to inherit the highest-priority | ||
1103 | * waiter's priority. */ | ||
1104 | blocked = sem->hp_waiter; | ||
1105 | } | ||
1106 | |||
1107 | /* wake up next */ | ||
1108 | wake_up_process(next); | ||
1109 | } else | ||
1110 | /* becomes available */ | ||
1111 | sem->owner = NULL; | ||
1112 | |||
1113 | /* we lose the benefit of priority inheritance (if any) */ | ||
1114 | if (tsk_rt(t)->inh_task || blocked) | ||
1115 | update_priority_inheritance(t, next, blocked); | ||
1116 | |||
1117 | out: | ||
1118 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1119 | |||
1120 | return err; | ||
1121 | } | ||
1122 | |||
1123 | static int gsnedf_omlp_close(struct litmus_lock* l) | ||
1124 | { | ||
1125 | struct task_struct *t = current; | ||
1126 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1127 | unsigned long flags; | ||
1128 | |||
1129 | int owner; | ||
1130 | |||
1131 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1132 | |||
1133 | owner = sem->owner == t; | ||
1134 | |||
1135 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1136 | |||
1137 | if (owner) | ||
1138 | gsnedf_omlp_unlock(l); | ||
1139 | |||
1140 | return 0; | ||
1141 | } | ||
1142 | |||
1143 | static void gsnedf_omlp_free(struct litmus_lock* lock) | ||
1144 | { | ||
1145 | kfree(omlp_from_lock(lock)); | ||
1146 | } | ||
1147 | |||
1148 | static struct litmus_lock_ops gsnedf_omlp_lock_ops = { | ||
1149 | .close = gsnedf_omlp_close, | ||
1150 | .lock = gsnedf_omlp_lock, | ||
1151 | .unlock = gsnedf_omlp_unlock, | ||
1152 | .deallocate = gsnedf_omlp_free, | ||
1153 | }; | ||
1154 | |||
1155 | static struct litmus_lock* gsnedf_new_omlp(void) | ||
1156 | { | ||
1157 | struct omlp_semaphore* sem; | ||
1158 | |||
1159 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1160 | if (!sem) | ||
1161 | return NULL; | ||
1162 | |||
1163 | sem->owner = NULL; | ||
1164 | sem->hp_waiter = NULL; | ||
1165 | init_waitqueue_head(&sem->fifo_wait); | ||
1166 | init_waitqueue_head(&sem->prio_wait); | ||
1167 | sem->litmus_lock.ops = &gsnedf_omlp_lock_ops; | ||
1168 | /* free = cpus -1 since ->owner is the head and also counted */ | ||
1169 | sem->num_free = num_online_cpus() - 1; | ||
1170 | |||
1171 | #ifdef CONFIG_RELEASE_MASTER | ||
1172 | /* If we use dedicated interrupt handling, then there are actually | ||
1173 | * only m - 1 CPUs around. */ | ||
1174 | if (gsnedf.release_master != NO_CPU) | ||
1175 | sem->num_free -= 1; | ||
1176 | #endif | ||
1177 | |||
1178 | return &sem->litmus_lock; | ||
1179 | } | ||
1180 | |||
1181 | |||
1182 | /* **** lock constructor **** */ | ||
1183 | |||
1184 | |||
1185 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1186 | void* __user unused) | ||
1187 | { | ||
1188 | int err = -ENXIO; | ||
1189 | |||
1190 | /* GSN-EDF currently only supports the FMLP for global resources. */ | ||
1191 | switch (type) { | ||
1192 | |||
1193 | case FMLP_SEM: | ||
1194 | /* Flexible Multiprocessor Locking Protocol */ | ||
1195 | *lock = gsnedf_new_fmlp(); | ||
1196 | if (*lock) | ||
1197 | err = 0; | ||
1198 | else | ||
1199 | err = -ENOMEM; | ||
1200 | break; | ||
1201 | |||
1202 | case OMLP_SEM: | ||
1203 | /* O(m) Multiprocessor Locking Protocol */ | ||
1204 | *lock = gsnedf_new_omlp(); | ||
1205 | if (*lock) | ||
1206 | err = 0; | ||
1207 | else | ||
1208 | err = -ENOMEM; | ||
1209 | break; | ||
1210 | |||
1211 | }; | ||
1212 | |||
1213 | return err; | ||
1214 | } | ||
1215 | |||
1216 | #endif | ||
1217 | |||
1218 | |||
1219 | static long gsnedf_activate_plugin(void) | ||
1220 | { | ||
1221 | int cpu; | ||
1222 | cpu_entry_t *entry; | ||
1223 | |||
1224 | bheap_init(&gsnedf_cpu_heap); | ||
1225 | #ifdef CONFIG_RELEASE_MASTER | ||
1226 | gsnedf.release_master = atomic_read(&release_master_cpu); | ||
1227 | #endif | ||
1228 | |||
1229 | for_each_online_cpu(cpu) { | ||
1230 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1231 | bheap_node_init(&entry->hn, entry); | ||
1232 | entry->linked = NULL; | ||
1233 | entry->scheduled = NULL; | ||
1234 | #ifdef CONFIG_RELEASE_MASTER | ||
1235 | if (cpu != gsnedf.release_master) { | ||
1236 | #endif | ||
1237 | TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); | ||
1238 | update_cpu_position(entry); | ||
1239 | #ifdef CONFIG_RELEASE_MASTER | ||
1240 | } else { | ||
1241 | TRACE("GSN-EDF: CPU %d is release master.\n", cpu); | ||
1242 | } | ||
1243 | #endif | ||
1244 | } | ||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
1248 | /* Plugin object */ | ||
1249 | static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | ||
1250 | .plugin_name = "GSN-EDF", | ||
1251 | .finish_switch = gsnedf_finish_switch, | ||
1252 | .tick = gsnedf_tick, | ||
1253 | .task_new = gsnedf_task_new, | ||
1254 | .complete_job = complete_job, | ||
1255 | .task_exit = gsnedf_task_exit, | ||
1256 | .schedule = gsnedf_schedule, | ||
1257 | .task_wake_up = gsnedf_task_wake_up, | ||
1258 | .task_block = gsnedf_task_block, | ||
1259 | .admit_task = gsnedf_admit_task, | ||
1260 | .activate_plugin = gsnedf_activate_plugin, | ||
1261 | #ifdef CONFIG_LITMUS_LOCKING | ||
1262 | .allocate_lock = gsnedf_allocate_lock, | ||
1263 | #endif | ||
1264 | }; | ||
1265 | |||
1266 | |||
1267 | static int __init init_gsn_edf(void) | ||
1268 | { | ||
1269 | int cpu; | ||
1270 | cpu_entry_t *entry; | ||
1271 | |||
1272 | bheap_init(&gsnedf_cpu_heap); | ||
1273 | /* initialize CPU state */ | ||
1274 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1275 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1276 | gsnedf_cpus[cpu] = entry; | ||
1277 | entry->cpu = cpu; | ||
1278 | entry->hn = &gsnedf_heap_node[cpu]; | ||
1279 | bheap_node_init(&entry->hn, entry); | ||
1280 | } | ||
1281 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | ||
1282 | return register_sched_plugin(&gsn_edf_plugin); | ||
1283 | } | ||
1284 | |||
1285 | |||
1286 | module_init(init_gsn_edf); | ||
diff --git a/litmus/sched_gsn_edf_split_namechange.c b/litmus/sched_gsn_edf_split_namechange.c new file mode 100644 index 000000000000..6839ae642b3a --- /dev/null +++ b/litmus/sched_gsn_edf_split_namechange.c | |||
@@ -0,0 +1,1165 @@ | |||
1 | /* | ||
2 | * litmus/sched_gsn_edf.c | ||
3 | * | ||
4 | * Implementation of the GSN-EDF scheduling algorithm with job splitting, i.e. | ||
5 | * GSN-EDF. | ||
6 | * | ||
7 | * This plugin is a modified version of the prior GSN-EDF plugin in | ||
8 | * litmus/sched_gsn_edf.c | ||
9 | * | ||
10 | * Splitting an implicit-deadline job simply means splitting each job into an | ||
11 | * integral number of subjobs. For example, a task with a period of 10 ms and | ||
12 | * a runtime of 4 ms could be re-organized as a task with a period of 5 ms and | ||
13 | * a runtime of 2 ms, with analytical benefit for bounded tardiness (ignoring | ||
14 | * overheads and assuming no critical sections). This would have a "splitting | ||
15 | * factor" of 2. | ||
16 | * | ||
17 | * Because our analysis works with early releasing, we actually only release | ||
18 | * each job once, but move the subjob deadline back when the appropriate amount | ||
19 | * of execution has been completed. (In the example above, a job released at | ||
20 | * time 0 would intially have a subjob deadline at time 5, but this deadline | ||
21 | * would be moved to time 10 as soon as 2 ms of execution had completed.) | ||
22 | */ | ||
23 | |||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/percpu.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/slab.h> | ||
28 | |||
29 | #include <litmus/litmus.h> | ||
30 | #include <litmus/jobs.h> | ||
31 | #include <litmus/sched_plugin.h> | ||
32 | #include <litmus/edf_common.h> | ||
33 | #include <litmus/sched_trace.h> | ||
34 | #include <litmus/trace.h> | ||
35 | |||
36 | #include <litmus/preempt.h> | ||
37 | |||
38 | #include <litmus/bheap.h> | ||
39 | |||
40 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
41 | #include <litmus/affinity.h> | ||
42 | #endif | ||
43 | |||
44 | #include <linux/module.h> | ||
45 | |||
46 | /* cpu_entry_t - maintain the linked and scheduled state | ||
47 | */ | ||
48 | typedef struct { | ||
49 | int cpu; | ||
50 | struct task_struct* linked; /* only RT tasks */ | ||
51 | struct task_struct* scheduled; /* only RT tasks */ | ||
52 | struct bheap_node* hn; | ||
53 | struct hrtimer split_timer; | ||
54 | int timer_armed; | ||
55 | } cpu_entry_t; | ||
56 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | ||
57 | |||
58 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | ||
59 | |||
60 | /* the cpus queue themselves according to priority in here */ | ||
61 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | ||
62 | static struct bheap gsnedf_cpu_heap; | ||
63 | |||
64 | static rt_domain_t gsnedf; | ||
65 | #define gsnedf_lock (gsnedf.ready_lock) | ||
66 | |||
67 | inline static int get_slice_num(struct task_struct* t) | ||
68 | { | ||
69 | int basic = ((t->rt_param.job_params.exec_time * | ||
70 | t->rt_param.task_params.split) / | ||
71 | t->rt_param.task_params.exec_cost) + 1; | ||
72 | if (basic <= t->rt_param.task_params.split){ | ||
73 | return basic; | ||
74 | } | ||
75 | else{ | ||
76 | /*Since we don't police budget, just leave where it's at.*/ | ||
77 | return t->rt_param.task_params.split; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | /* Returns the appropriate subjob deadline.*/ | ||
82 | inline static lt_t get_proper_deadline(struct task_struct* t) | ||
83 | { | ||
84 | return t->rt_param.job_params.release + | ||
85 | ((t->rt_param.task_params.period * get_slice_num(t)) | ||
86 | / t->rt_param.task_params.split); | ||
87 | } | ||
88 | |||
89 | /* Tells us if the current deadline is too small.*/ | ||
90 | inline static int needs_deadline_move(struct task_struct* t) | ||
91 | { | ||
92 | BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline); | ||
93 | #ifdef CONFIG_LITMUS_LOCKING | ||
94 | return !is_in_crit_section(t) && | ||
95 | (get_proper_deadline(t) != | ||
96 | tsk_rt(t)->job_params.subjob_deadline); | ||
97 | #else | ||
98 | return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline; | ||
99 | #endif | ||
100 | } | ||
101 | |||
102 | /*Returns execution time until the next deadline move. | ||
103 | * 0 means the task has no more deadline moves | ||
104 | */ | ||
105 | inline static lt_t time_to_next_move(struct task_struct* t) | ||
106 | { | ||
107 | if (get_slice_num(t) == t->rt_param.task_params.split){ | ||
108 | return 0; | ||
109 | } | ||
110 | /* +1 upper bounds ceiling, since integer division is floor*/ | ||
111 | return ((get_slice_num(t) * t->rt_param.task_params.exec_cost) | ||
112 | / t->rt_param.task_params.split) + 1 | ||
113 | - t->rt_param.job_params.exec_time; | ||
114 | } | ||
115 | |||
116 | /* Timer stuff - similar to budget.c. */ | ||
117 | static enum hrtimer_restart on_split_timeout(struct hrtimer *timer) | ||
118 | { | ||
119 | cpu_entry_t* st = container_of(timer, | ||
120 | cpu_entry_t, | ||
121 | split_timer); | ||
122 | |||
123 | unsigned long flags; | ||
124 | |||
125 | local_irq_save(flags); | ||
126 | TRACE("split timer fired.\n"); | ||
127 | st->timer_armed = 0; | ||
128 | /* Activate scheduler */ | ||
129 | litmus_reschedule_local(); | ||
130 | local_irq_restore(flags); | ||
131 | |||
132 | return HRTIMER_NORESTART; | ||
133 | } | ||
134 | |||
135 | static void cancel_split_timer(cpu_entry_t* ce) | ||
136 | { | ||
137 | int ret; | ||
138 | |||
139 | TRACE("cancelling split time.\n"); | ||
140 | |||
141 | /* Since interrupts are disabled and et->timer_armed is only | ||
142 | * modified locally, we do not need any locks. | ||
143 | */ | ||
144 | |||
145 | if (ce->timer_armed) { | ||
146 | ret = hrtimer_try_to_cancel(&ce->split_timer); | ||
147 | /* Should never be inactive. */ | ||
148 | BUG_ON(ret == 0); | ||
149 | /* Should never be running concurrently.*/ | ||
150 | BUG_ON(ret == -1); | ||
151 | |||
152 | ce->timer_armed = 0; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | /* assumes called with IRQs off */ | ||
157 | static void arm_split_timer(cpu_entry_t *ce, | ||
158 | struct task_struct* t) | ||
159 | { | ||
160 | lt_t when_to_fire; | ||
161 | lt_t time_to_move; | ||
162 | TRACE_TASK(t, "arming split timer.\n"); | ||
163 | |||
164 | /* __hrtimer_start_range_ns() cancels the timer | ||
165 | * anyway, so we don't have to check whether it is still armed */ | ||
166 | |||
167 | /*We won't do any new deadline moves if the budget has been exhausted*/ | ||
168 | if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) { | ||
169 | when_to_fire = litmus_clock() + time_to_move; | ||
170 | TRACE_TASK(t, "actually arming for %llu into the future\n", | ||
171 | time_to_move); | ||
172 | __hrtimer_start_range_ns(&ce->split_timer, | ||
173 | ns_to_ktime(when_to_fire), | ||
174 | 0 /* delta */, | ||
175 | HRTIMER_MODE_ABS_PINNED, | ||
176 | 0 /* no wakeup */); | ||
177 | ce->timer_armed = 1; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | /* Uncomment this if you want to see all scheduling decisions in the | ||
182 | * TRACE() log. | ||
183 | #define WANT_ALL_SCHED_EVENTS | ||
184 | */ | ||
185 | |||
186 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
187 | { | ||
188 | cpu_entry_t *a, *b; | ||
189 | a = _a->value; | ||
190 | b = _b->value; | ||
191 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
192 | * the top of the heap. | ||
193 | */ | ||
194 | return edf_higher_prio(b->linked, a->linked); | ||
195 | } | ||
196 | |||
197 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
198 | * order in the cpu queue. Caller must hold gsnedf lock. | ||
199 | */ | ||
200 | static void update_cpu_position(cpu_entry_t *entry) | ||
201 | { | ||
202 | if (likely(bheap_node_in_heap(entry->hn))) | ||
203 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
204 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | ||
205 | } | ||
206 | |||
207 | /* caller must hold gsnedf lock */ | ||
208 | static cpu_entry_t* lowest_prio_cpu(void) | ||
209 | { | ||
210 | struct bheap_node* hn; | ||
211 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
212 | return hn->value; | ||
213 | } | ||
214 | |||
215 | |||
216 | /* link_task_to_cpu - Update the link of a CPU. | ||
217 | * Handles the case where the to-be-linked task is already | ||
218 | * scheduled on a different CPU. | ||
219 | */ | ||
220 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
221 | cpu_entry_t *entry) | ||
222 | { | ||
223 | cpu_entry_t *sched; | ||
224 | struct task_struct* tmp; | ||
225 | int on_cpu; | ||
226 | |||
227 | BUG_ON(linked && !is_realtime(linked)); | ||
228 | |||
229 | /* Currently linked task is set to be unlinked. */ | ||
230 | if (entry->linked) { | ||
231 | entry->linked->rt_param.linked_on = NO_CPU; | ||
232 | } | ||
233 | |||
234 | /* Link new task to CPU. */ | ||
235 | if (linked) { | ||
236 | set_rt_flags(linked, RT_F_RUNNING); | ||
237 | /* handle task is already scheduled somewhere! */ | ||
238 | on_cpu = linked->rt_param.scheduled_on; | ||
239 | if (on_cpu != NO_CPU) { | ||
240 | sched = &per_cpu(gsnedf_cpu_entries, on_cpu); | ||
241 | /* this should only happen if not linked already */ | ||
242 | BUG_ON(sched->linked == linked); | ||
243 | |||
244 | /* If we are already scheduled on the CPU to which we | ||
245 | * wanted to link, we don't need to do the swap -- | ||
246 | * we just link ourselves to the CPU and depend on | ||
247 | * the caller to get things right. | ||
248 | */ | ||
249 | if (entry != sched) { | ||
250 | TRACE_TASK(linked, | ||
251 | "already scheduled on %d, updating link.\n", | ||
252 | sched->cpu); | ||
253 | tmp = sched->linked; | ||
254 | linked->rt_param.linked_on = sched->cpu; | ||
255 | sched->linked = linked; | ||
256 | update_cpu_position(sched); | ||
257 | linked = tmp; | ||
258 | } | ||
259 | } | ||
260 | if (linked) /* might be NULL due to swap */ | ||
261 | linked->rt_param.linked_on = entry->cpu; | ||
262 | } | ||
263 | entry->linked = linked; | ||
264 | #ifdef WANT_ALL_SCHED_EVENTS | ||
265 | if (linked) | ||
266 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
267 | else | ||
268 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
269 | #endif | ||
270 | update_cpu_position(entry); | ||
271 | } | ||
272 | |||
273 | /* unlink - Make sure a task is not linked any longer to an entry | ||
274 | * where it was linked before. Must hold gsnedf_lock. | ||
275 | */ | ||
276 | static noinline void unlink(struct task_struct* t) | ||
277 | { | ||
278 | cpu_entry_t *entry; | ||
279 | |||
280 | if (t->rt_param.linked_on != NO_CPU) { | ||
281 | /* unlink */ | ||
282 | entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on); | ||
283 | t->rt_param.linked_on = NO_CPU; | ||
284 | link_task_to_cpu(NULL, entry); | ||
285 | } else if (is_queued(t)) { | ||
286 | /* This is an interesting situation: t is scheduled, | ||
287 | * but was just recently unlinked. It cannot be | ||
288 | * linked anywhere else (because then it would have | ||
289 | * been relinked to this CPU), thus it must be in some | ||
290 | * queue. We must remove it from the list in this | ||
291 | * case. | ||
292 | */ | ||
293 | remove(&gsnedf, t); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | |||
298 | /* preempt - force a CPU to reschedule | ||
299 | */ | ||
300 | static void preempt(cpu_entry_t *entry) | ||
301 | { | ||
302 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
303 | } | ||
304 | |||
305 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
306 | * Caller must hold gsnedf_lock. | ||
307 | */ | ||
308 | static noinline void requeue(struct task_struct* task) | ||
309 | { | ||
310 | BUG_ON(!task); | ||
311 | /* sanity check before insertion */ | ||
312 | BUG_ON(is_queued(task)); | ||
313 | |||
314 | if (is_released(task, litmus_clock())) | ||
315 | __add_ready(&gsnedf, task); | ||
316 | else { | ||
317 | /* it has got to wait */ | ||
318 | add_release(&gsnedf, task); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
323 | static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start) | ||
324 | { | ||
325 | cpu_entry_t *affinity; | ||
326 | |||
327 | get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries, | ||
328 | #ifdef CONFIG_RELEASE_MASTER | ||
329 | gsnedf.release_master | ||
330 | #else | ||
331 | NO_CPU | ||
332 | #endif | ||
333 | ); | ||
334 | |||
335 | return(affinity); | ||
336 | } | ||
337 | #endif | ||
338 | |||
339 | /* check for any necessary preemptions */ | ||
340 | static void check_for_preemptions(void) | ||
341 | { | ||
342 | struct task_struct *task; | ||
343 | cpu_entry_t *last; | ||
344 | |||
345 | for (last = lowest_prio_cpu(); | ||
346 | edf_preemption_needed(&gsnedf, last->linked); | ||
347 | last = lowest_prio_cpu()) { | ||
348 | /* preemption necessary */ | ||
349 | task = __take_ready(&gsnedf); | ||
350 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
351 | task->pid, last->cpu); | ||
352 | |||
353 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
354 | { | ||
355 | cpu_entry_t *affinity = | ||
356 | gsnedf_get_nearest_available_cpu( | ||
357 | &per_cpu(gsnedf_cpu_entries, | ||
358 | task_cpu(task))); | ||
359 | if (affinity) | ||
360 | last = affinity; | ||
361 | else if (last->linked) | ||
362 | requeue(last->linked); | ||
363 | } | ||
364 | #else | ||
365 | if (last->linked) | ||
366 | requeue(last->linked); | ||
367 | #endif | ||
368 | |||
369 | link_task_to_cpu(task, last); | ||
370 | preempt(last); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | /* gsnedf_job_arrival: task is either resumed or released */ | ||
375 | static noinline void gsnedf_job_arrival(struct task_struct* task) | ||
376 | { | ||
377 | BUG_ON(!task); | ||
378 | |||
379 | requeue(task); | ||
380 | check_for_preemptions(); | ||
381 | } | ||
382 | |||
383 | static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
384 | { | ||
385 | unsigned long flags; | ||
386 | |||
387 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
388 | |||
389 | __merge_ready(rt, tasks); | ||
390 | check_for_preemptions(); | ||
391 | |||
392 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
393 | } | ||
394 | |||
395 | /* caller holds gsnedf_lock */ | ||
396 | static noinline void job_completion(struct task_struct *t, int forced) | ||
397 | { | ||
398 | BUG_ON(!t); | ||
399 | |||
400 | sched_trace_task_completion(t, forced); | ||
401 | |||
402 | TRACE_TASK(t, "job_completion().\n"); | ||
403 | |||
404 | /* set flags */ | ||
405 | set_rt_flags(t, RT_F_SLEEP); | ||
406 | /* prepare for next period */ | ||
407 | /* prepare_for_next_period assumes implicit deadlines and no splitting, | ||
408 | * so we call it with the job deadline it expects. | ||
409 | */ | ||
410 | t->rt_param.job_params.deadline = t->rt_param.job_params.release + | ||
411 | t->rt_param.task_params.period; | ||
412 | prepare_for_next_period(t); | ||
413 | /* We now set the subjob deadline to what it should be for scheduling | ||
414 | * priority. | ||
415 | */ | ||
416 | t->rt_param.job_params.subjob_deadline = get_proper_deadline(t); | ||
417 | if (is_released(t, litmus_clock())) | ||
418 | sched_trace_task_release(t); | ||
419 | /* unlink */ | ||
420 | unlink(t); | ||
421 | /* requeue | ||
422 | * But don't requeue a blocking task. */ | ||
423 | if (is_running(t)) | ||
424 | gsnedf_job_arrival(t); | ||
425 | } | ||
426 | |||
427 | static void move_deadline(struct task_struct *t) | ||
428 | { | ||
429 | tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t); | ||
430 | TRACE_TASK(t, "move_deadline called\nRelease: %llu\nPeriod: %llu" | ||
431 | "\nRelease + Period: %llu\nDeadline: %llu" | ||
432 | "\nDeadline - Release: %llu\n", | ||
433 | t->rt_param.job_params.release, | ||
434 | t->rt_param.task_params.period, | ||
435 | t->rt_param.job_params.release | ||
436 | + t->rt_param.task_params.period, | ||
437 | t->rt_param.job_params.subjob_deadline, | ||
438 | t->rt_param.job_params.subjob_deadline | ||
439 | - t->rt_param.job_params.release); | ||
440 | /* Check if rescheduling needed with lower priority. */ | ||
441 | unlink(t); | ||
442 | gsnedf_job_arrival(t); | ||
443 | } | ||
444 | |||
445 | /* gsnedf_tick - this function is called for every local timer | ||
446 | * interrupt. | ||
447 | * | ||
448 | * checks whether the current task has expired and checks | ||
449 | * whether we need to preempt it if it has not expired | ||
450 | */ | ||
451 | static void gsnedf_tick(struct task_struct* t) | ||
452 | { | ||
453 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
454 | if (!is_np(t)) { | ||
455 | /* np tasks will be preempted when they become | ||
456 | * preemptable again | ||
457 | */ | ||
458 | litmus_reschedule_local(); | ||
459 | TRACE("gsnedf_scheduler_tick: " | ||
460 | "%d is preemptable " | ||
461 | " => FORCE_RESCHED\n", t->pid); | ||
462 | } else if (is_user_np(t)) { | ||
463 | TRACE("gsnedf_scheduler_tick: " | ||
464 | "%d is non-preemptable, " | ||
465 | "preemption delayed.\n", t->pid); | ||
466 | request_exit_np(t); | ||
467 | } | ||
468 | } | ||
469 | } | ||
470 | |||
471 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
472 | * assumptions on the state of the current task since it may be called for a | ||
473 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
474 | * was necessary, because sys_exit_np() was called, because some Linux | ||
475 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
476 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
477 | * current state is. | ||
478 | * | ||
479 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
480 | * | ||
481 | * The following assertions for the scheduled task could hold: | ||
482 | * | ||
483 | * - !is_running(scheduled) // the job blocks | ||
484 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
485 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
486 | * - linked != scheduled // we need to reschedule (for any reason) | ||
487 | * - is_np(scheduled) // rescheduling must be delayed, | ||
488 | * sys_exit_np must be requested | ||
489 | * | ||
490 | * Any of these can occur together. | ||
491 | */ | ||
492 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | ||
493 | { | ||
494 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
495 | int out_of_time, sleep, preempt, np, exists, blocks, needs_move; | ||
496 | struct task_struct* next = NULL; | ||
497 | |||
498 | #ifdef CONFIG_RELEASE_MASTER | ||
499 | /* Bail out early if we are the release master. | ||
500 | * The release master never schedules any real-time tasks. | ||
501 | */ | ||
502 | if (unlikely(gsnedf.release_master == entry->cpu)) { | ||
503 | sched_state_task_picked(); | ||
504 | return NULL; | ||
505 | } | ||
506 | #endif | ||
507 | |||
508 | raw_spin_lock(&gsnedf_lock); | ||
509 | |||
510 | /* sanity checking */ | ||
511 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
512 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
513 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
514 | |||
515 | /* (0) Determine state */ | ||
516 | exists = entry->scheduled != NULL; | ||
517 | blocks = exists && !is_running(entry->scheduled); | ||
518 | out_of_time = exists && | ||
519 | budget_enforced(entry->scheduled) && | ||
520 | budget_exhausted(entry->scheduled); | ||
521 | needs_move = exists && needs_deadline_move(entry->scheduled); | ||
522 | np = exists && is_np(entry->scheduled); | ||
523 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
524 | preempt = entry->scheduled != entry->linked; | ||
525 | |||
526 | #ifdef WANT_ALL_SCHED_EVENTS | ||
527 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | ||
528 | #endif | ||
529 | |||
530 | if (exists) | ||
531 | TRACE_TASK(prev, | ||
532 | "blocks:%d out_of_time:%d needs_move:%d np:%d" | ||
533 | " sleep:%d preempt:%d state:%d sig:%d\n", | ||
534 | blocks, out_of_time, needs_move, np, sleep, preempt, | ||
535 | prev->state, signal_pending(prev)); | ||
536 | if (entry->linked && preempt) | ||
537 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
538 | entry->linked->comm, entry->linked->pid); | ||
539 | |||
540 | |||
541 | /* If a task blocks we have no choice but to reschedule. | ||
542 | */ | ||
543 | if (blocks) | ||
544 | unlink(entry->scheduled); | ||
545 | |||
546 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
547 | * We need to make sure to update the link structure anyway in case | ||
548 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
549 | * hurt. | ||
550 | * | ||
551 | * Job deadline moves handled similarly | ||
552 | */ | ||
553 | if (np && (out_of_time || preempt || sleep)) { | ||
554 | unlink(entry->scheduled); | ||
555 | request_exit_np(entry->scheduled); | ||
556 | } | ||
557 | else if (np && needs_move) { | ||
558 | move_deadline(entry->scheduled); | ||
559 | } | ||
560 | |||
561 | /* Any task that is preemptable and either exhausts its execution | ||
562 | * budget or wants to sleep completes. We may have to reschedule after | ||
563 | * this. Don't do a job completion if we block (can't have timers running | ||
564 | * for blocked jobs). Preemption go first for the same reason. | ||
565 | */ | ||
566 | if (!np && (out_of_time || sleep) && !blocks && !preempt) | ||
567 | job_completion(entry->scheduled, !sleep); | ||
568 | else if (!np && needs_move && !blocks && !preempt) { | ||
569 | move_deadline(entry->scheduled); | ||
570 | } | ||
571 | |||
572 | /* Link pending task if we became unlinked. | ||
573 | */ | ||
574 | if (!entry->linked) | ||
575 | link_task_to_cpu(__take_ready(&gsnedf), entry); | ||
576 | |||
577 | /* The final scheduling decision. Do we need to switch for some reason? | ||
578 | * If linked is different from scheduled, then select linked as next. | ||
579 | */ | ||
580 | if ((!np || blocks) && | ||
581 | entry->linked != entry->scheduled) { | ||
582 | /* Schedule a linked job? */ | ||
583 | if (entry->linked) { | ||
584 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
585 | next = entry->linked; | ||
586 | TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id()); | ||
587 | } | ||
588 | if (entry->scheduled) { | ||
589 | /* not gonna be scheduled soon */ | ||
590 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
591 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
592 | } | ||
593 | } else | ||
594 | /* Only override Linux scheduler if we have a real-time task | ||
595 | * scheduled that needs to continue. | ||
596 | */ | ||
597 | if (exists) | ||
598 | next = prev; | ||
599 | |||
600 | sched_state_task_picked(); | ||
601 | |||
602 | raw_spin_unlock(&gsnedf_lock); | ||
603 | |||
604 | if (next) { | ||
605 | arm_split_timer(entry, next); | ||
606 | } | ||
607 | else if (entry->timer_armed) { | ||
608 | cancel_split_timer(entry); | ||
609 | } | ||
610 | |||
611 | #ifdef WANT_ALL_SCHED_EVENTS | ||
612 | TRACE("gsnedf_lock released, next=0x%p\n", next); | ||
613 | |||
614 | if (next) | ||
615 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
616 | else if (exists && !next) | ||
617 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
618 | #endif | ||
619 | |||
620 | |||
621 | return next; | ||
622 | } | ||
623 | |||
624 | |||
625 | /* _finish_switch - we just finished the switch away from prev | ||
626 | */ | ||
627 | static void gsnedf_finish_switch(struct task_struct *prev) | ||
628 | { | ||
629 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
630 | |||
631 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
632 | #ifdef WANT_ALL_SCHED_EVENTS | ||
633 | TRACE_TASK(prev, "switched away from\n"); | ||
634 | #endif | ||
635 | } | ||
636 | |||
637 | static void gsnedf_release_at(struct task_struct *t, lt_t start) | ||
638 | { | ||
639 | t->rt_param.job_params.deadline = start; | ||
640 | prepare_for_next_period(t); | ||
641 | t->rt_param.job_params.subjob_deadline = get_proper_deadline(t); | ||
642 | set_rt_flags(t, RT_F_RUNNING); | ||
643 | } | ||
644 | |||
645 | /* Prepare a task for running in RT mode | ||
646 | */ | ||
647 | static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
648 | { | ||
649 | unsigned long flags; | ||
650 | cpu_entry_t* entry; | ||
651 | |||
652 | TRACE("gsn edf: task new %d\n", t->pid); | ||
653 | |||
654 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
655 | |||
656 | /* setup job params */ | ||
657 | gsnedf_release_at(t, litmus_clock()); | ||
658 | |||
659 | if (running) { | ||
660 | entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t)); | ||
661 | BUG_ON(entry->scheduled); | ||
662 | |||
663 | #ifdef CONFIG_RELEASE_MASTER | ||
664 | if (entry->cpu != gsnedf.release_master) { | ||
665 | #endif | ||
666 | entry->scheduled = t; | ||
667 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
668 | #ifdef CONFIG_RELEASE_MASTER | ||
669 | } else { | ||
670 | /* do not schedule on release master */ | ||
671 | preempt(entry); /* force resched */ | ||
672 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
673 | } | ||
674 | #endif | ||
675 | } else { | ||
676 | t->rt_param.scheduled_on = NO_CPU; | ||
677 | } | ||
678 | t->rt_param.linked_on = NO_CPU; | ||
679 | |||
680 | gsnedf_job_arrival(t); | ||
681 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
682 | } | ||
683 | |||
684 | static void gsnedf_task_wake_up(struct task_struct *task) | ||
685 | { | ||
686 | unsigned long flags; | ||
687 | lt_t now; | ||
688 | |||
689 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
690 | |||
691 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
692 | /* We need to take suspensions because of semaphores into | ||
693 | * account! If a job resumes after being suspended due to acquiring | ||
694 | * a semaphore, it should never be treated as a new job release. | ||
695 | */ | ||
696 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
697 | set_rt_flags(task, RT_F_RUNNING); | ||
698 | } else { | ||
699 | now = litmus_clock(); | ||
700 | if (is_tardy(task, now)) { | ||
701 | /* new sporadic release */ | ||
702 | gsnedf_release_at(task, now); | ||
703 | sched_trace_task_release(task); | ||
704 | } | ||
705 | else { | ||
706 | if (task->rt.time_slice) { | ||
707 | /* came back in time before deadline | ||
708 | */ | ||
709 | set_rt_flags(task, RT_F_RUNNING); | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | gsnedf_job_arrival(task); | ||
714 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
715 | } | ||
716 | |||
717 | static void gsnedf_task_block(struct task_struct *t) | ||
718 | { | ||
719 | unsigned long flags; | ||
720 | |||
721 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
722 | |||
723 | /* unlink if necessary */ | ||
724 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
725 | unlink(t); | ||
726 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
727 | |||
728 | BUG_ON(!is_realtime(t)); | ||
729 | } | ||
730 | |||
731 | |||
732 | static void gsnedf_task_exit(struct task_struct * t) | ||
733 | { | ||
734 | unsigned long flags; | ||
735 | |||
736 | /* unlink if necessary */ | ||
737 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
738 | unlink(t); | ||
739 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
740 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | ||
741 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
742 | } | ||
743 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
744 | |||
745 | BUG_ON(!is_realtime(t)); | ||
746 | TRACE_TASK(t, "RIP\n"); | ||
747 | } | ||
748 | |||
749 | |||
750 | static long gsnedf_admit_task(struct task_struct* tsk) | ||
751 | { | ||
752 | return 0; | ||
753 | } | ||
754 | |||
755 | #ifdef CONFIG_LITMUS_LOCKING | ||
756 | |||
757 | #include <litmus/fdso.h> | ||
758 | |||
759 | /* called with IRQs off */ | ||
760 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
761 | { | ||
762 | int linked_on; | ||
763 | int check_preempt = 0; | ||
764 | |||
765 | raw_spin_lock(&gsnedf_lock); | ||
766 | |||
767 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
768 | tsk_rt(t)->inh_task = prio_inh; | ||
769 | |||
770 | linked_on = tsk_rt(t)->linked_on; | ||
771 | |||
772 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
773 | if (linked_on != NO_CPU) { | ||
774 | TRACE_TASK(t, "%s: linked on %d\n", | ||
775 | __FUNCTION__, linked_on); | ||
776 | /* Holder is scheduled; need to re-order CPUs. | ||
777 | * We can't use heap_decrease() here since | ||
778 | * the cpu_heap is ordered in reverse direction, so | ||
779 | * it is actually an increase. */ | ||
780 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | ||
781 | gsnedf_cpus[linked_on]->hn); | ||
782 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | ||
783 | gsnedf_cpus[linked_on]->hn); | ||
784 | } else { | ||
785 | /* holder may be queued: first stop queue changes */ | ||
786 | raw_spin_lock(&gsnedf.release_lock); | ||
787 | if (is_queued(t)) { | ||
788 | TRACE_TASK(t, "%s: is queued\n", | ||
789 | __FUNCTION__); | ||
790 | /* We need to update the position of holder in some | ||
791 | * heap. Note that this could be a release heap if we | ||
792 | * budget enforcement is used and this job overran. */ | ||
793 | check_preempt = | ||
794 | !bheap_decrease(edf_ready_order, | ||
795 | tsk_rt(t)->heap_node); | ||
796 | } else { | ||
797 | /* Nothing to do: if it is not queued and not linked | ||
798 | * then it is either sleeping or currently being moved | ||
799 | * by other code (e.g., a timer interrupt handler) that | ||
800 | * will use the correct priority when enqueuing the | ||
801 | * task. */ | ||
802 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
803 | __FUNCTION__); | ||
804 | } | ||
805 | raw_spin_unlock(&gsnedf.release_lock); | ||
806 | |||
807 | /* If holder was enqueued in a release heap, then the following | ||
808 | * preemption check is pointless, but we can't easily detect | ||
809 | * that case. If you want to fix this, then consider that | ||
810 | * simply adding a state flag requires O(n) time to update when | ||
811 | * releasing n tasks, which conflicts with the goal to have | ||
812 | * O(log n) merges. */ | ||
813 | if (check_preempt) { | ||
814 | /* heap_decrease() hit the top level of the heap: make | ||
815 | * sure preemption checks get the right task, not the | ||
816 | * potentially stale cache. */ | ||
817 | bheap_uncache_min(edf_ready_order, | ||
818 | &gsnedf.ready_queue); | ||
819 | check_for_preemptions(); | ||
820 | } | ||
821 | } | ||
822 | |||
823 | raw_spin_unlock(&gsnedf_lock); | ||
824 | } | ||
825 | |||
826 | /* called with IRQs off */ | ||
827 | static void update_unlocked_priority(struct task_struct* t) | ||
828 | { | ||
829 | raw_spin_lock(&gsnedf_lock); | ||
830 | |||
831 | /* A job only stops inheriting a priority when it releases a | ||
832 | * resource. Thus we can make the following assumption.*/ | ||
833 | BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); | ||
834 | |||
835 | /* Clear priority inheritance */ | ||
836 | TRACE_TASK(t, "priority restored\n"); | ||
837 | tsk_rt(t)->inh_task = NULL; | ||
838 | |||
839 | /* Update splitting deadline */ | ||
840 | tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t); | ||
841 | |||
842 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
843 | * since the priority was effectively lowered. */ | ||
844 | unlink(t); | ||
845 | gsnedf_job_arrival(t); | ||
846 | |||
847 | raw_spin_unlock(&gsnedf_lock); | ||
848 | } | ||
849 | |||
850 | |||
851 | /* ******************** FMLP support ********************** */ | ||
852 | |||
853 | /* struct for semaphore with priority inheritance */ | ||
854 | struct fmlp_semaphore { | ||
855 | struct litmus_lock litmus_lock; | ||
856 | |||
857 | /* current resource holder */ | ||
858 | struct task_struct *owner; | ||
859 | |||
860 | /* highest-priority waiter */ | ||
861 | struct task_struct *hp_waiter; | ||
862 | |||
863 | /* FIFO queue of waiting tasks */ | ||
864 | wait_queue_head_t wait; | ||
865 | }; | ||
866 | |||
867 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
868 | { | ||
869 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
870 | } | ||
871 | |||
872 | /* caller is responsible for locking */ | ||
873 | static struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem, | ||
874 | struct task_struct* skip) | ||
875 | { | ||
876 | struct list_head *pos; | ||
877 | struct task_struct *queued, *found = NULL; | ||
878 | |||
879 | list_for_each(pos, &sem->wait.task_list) { | ||
880 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
881 | task_list)->private; | ||
882 | |||
883 | /* Compare task prios, find high prio task. */ | ||
884 | if (queued != skip && edf_higher_prio(queued, found)) | ||
885 | found = queued; | ||
886 | } | ||
887 | return found; | ||
888 | } | ||
889 | |||
890 | int gsnedf_fmlp_lock(struct litmus_lock* l) | ||
891 | { | ||
892 | struct task_struct* t = current; | ||
893 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
894 | cpu_entry_t* entry; | ||
895 | wait_queue_t wait; | ||
896 | unsigned long flags; | ||
897 | |||
898 | if (!is_realtime(t)) | ||
899 | return -EPERM; | ||
900 | |||
901 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
902 | entry = &__get_cpu_var(gsnedf_cpu_entries); | ||
903 | |||
904 | tsk_rt(t)->in_crit_section = 1; | ||
905 | if (entry->timer_armed) { | ||
906 | cancel_split_timer(entry); | ||
907 | } | ||
908 | |||
909 | if (sem->owner) { | ||
910 | /* resource is not free => must suspend and wait */ | ||
911 | |||
912 | init_waitqueue_entry(&wait, t); | ||
913 | |||
914 | /* FIXME: interruptible would be nice some day */ | ||
915 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
916 | |||
917 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
918 | |||
919 | /* check if we need to activate priority inheritance */ | ||
920 | if (edf_higher_prio(t, sem->hp_waiter)) { | ||
921 | sem->hp_waiter = t; | ||
922 | if (edf_higher_prio(t, sem->owner)) | ||
923 | set_priority_inheritance(sem->owner, sem->hp_waiter); | ||
924 | } | ||
925 | |||
926 | TS_LOCK_SUSPEND; | ||
927 | |||
928 | /* release lock before sleeping */ | ||
929 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
930 | |||
931 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
932 | * when we wake up; we are guaranteed to have the lock since | ||
933 | * there is only one wake up per release. | ||
934 | */ | ||
935 | |||
936 | schedule(); | ||
937 | |||
938 | TS_LOCK_RESUME; | ||
939 | |||
940 | /* Since we hold the lock, no other task will change | ||
941 | * ->owner. We can thus check it without acquiring the spin | ||
942 | * lock. */ | ||
943 | BUG_ON(sem->owner != t); | ||
944 | } else { | ||
945 | /* it's ours now */ | ||
946 | sem->owner = t; | ||
947 | |||
948 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
949 | } | ||
950 | |||
951 | return 0; | ||
952 | } | ||
953 | |||
954 | int gsnedf_fmlp_unlock(struct litmus_lock* l) | ||
955 | { | ||
956 | struct task_struct *t = current, *next; | ||
957 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
958 | unsigned long flags; | ||
959 | int err = 0; | ||
960 | |||
961 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
962 | |||
963 | if (sem->owner != t) { | ||
964 | err = -EINVAL; | ||
965 | goto out; | ||
966 | } | ||
967 | |||
968 | /* check if there are jobs waiting for this resource */ | ||
969 | next = __waitqueue_remove_first(&sem->wait); | ||
970 | if (next) { | ||
971 | /* next becomes the resouce holder */ | ||
972 | sem->owner = next; | ||
973 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
974 | |||
975 | /* determine new hp_waiter if necessary */ | ||
976 | if (next == sem->hp_waiter) { | ||
977 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
978 | /* next has the highest priority --- it doesn't need to | ||
979 | * inherit. However, we need to make sure that the | ||
980 | * next-highest priority in the queue is reflected in | ||
981 | * hp_waiter. */ | ||
982 | sem->hp_waiter = find_hp_waiter(sem, next); | ||
983 | if (sem->hp_waiter) | ||
984 | TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n"); | ||
985 | else | ||
986 | TRACE("no further waiters\n"); | ||
987 | } else { | ||
988 | /* Well, if next is not the highest-priority waiter, | ||
989 | * then it ought to inherit the highest-priority | ||
990 | * waiter's priority. */ | ||
991 | set_priority_inheritance(next, sem->hp_waiter); | ||
992 | } | ||
993 | |||
994 | /* wake up next */ | ||
995 | wake_up_process(next); | ||
996 | } else | ||
997 | /* becomes available */ | ||
998 | sem->owner = NULL; | ||
999 | |||
1000 | /* We are no longer in a critical section */ | ||
1001 | tsk_rt(t)->in_crit_section = 0; | ||
1002 | |||
1003 | /* we lose the benefit of priority inheritance (if any) and may need | ||
1004 | * to move the deadline. In either case, may need to reschedule | ||
1005 | * due to reduced priority. */ | ||
1006 | if (tsk_rt(t)->inh_task || needs_deadline_move(t)) | ||
1007 | update_unlocked_priority(t); | ||
1008 | /* TODO: Check that schedule() gets called - it needs to arm the | ||
1009 | * enforcement timer. Otherwise we should do it here or in | ||
1010 | * update_unlocked_priority. */ | ||
1011 | |||
1012 | out: | ||
1013 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
1014 | |||
1015 | return err; | ||
1016 | } | ||
1017 | |||
1018 | int gsnedf_fmlp_close(struct litmus_lock* l) | ||
1019 | { | ||
1020 | struct task_struct *t = current; | ||
1021 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
1022 | unsigned long flags; | ||
1023 | |||
1024 | int owner; | ||
1025 | |||
1026 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
1027 | |||
1028 | owner = sem->owner == t; | ||
1029 | |||
1030 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
1031 | |||
1032 | if (owner) | ||
1033 | gsnedf_fmlp_unlock(l); | ||
1034 | |||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | void gsnedf_fmlp_free(struct litmus_lock* lock) | ||
1039 | { | ||
1040 | kfree(fmlp_from_lock(lock)); | ||
1041 | } | ||
1042 | |||
1043 | static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { | ||
1044 | .close = gsnedf_fmlp_close, | ||
1045 | .lock = gsnedf_fmlp_lock, | ||
1046 | .unlock = gsnedf_fmlp_unlock, | ||
1047 | .deallocate = gsnedf_fmlp_free, | ||
1048 | }; | ||
1049 | |||
1050 | static struct litmus_lock* gsnedf_new_fmlp(void) | ||
1051 | { | ||
1052 | struct fmlp_semaphore* sem; | ||
1053 | |||
1054 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1055 | if (!sem) | ||
1056 | return NULL; | ||
1057 | |||
1058 | sem->owner = NULL; | ||
1059 | sem->hp_waiter = NULL; | ||
1060 | init_waitqueue_head(&sem->wait); | ||
1061 | sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops; | ||
1062 | |||
1063 | return &sem->litmus_lock; | ||
1064 | } | ||
1065 | |||
1066 | /* **** lock constructor **** */ | ||
1067 | |||
1068 | |||
1069 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1070 | void* __user unused) | ||
1071 | { | ||
1072 | int err = -ENXIO; | ||
1073 | |||
1074 | /* GSN-EDF currently only supports the FMLP for global resources. */ | ||
1075 | switch (type) { | ||
1076 | |||
1077 | case FMLP_SEM: | ||
1078 | /* Flexible Multiprocessor Locking Protocol */ | ||
1079 | *lock = gsnedf_new_fmlp(); | ||
1080 | if (*lock) | ||
1081 | err = 0; | ||
1082 | else | ||
1083 | err = -ENOMEM; | ||
1084 | break; | ||
1085 | |||
1086 | }; | ||
1087 | |||
1088 | return err; | ||
1089 | } | ||
1090 | |||
1091 | #endif | ||
1092 | |||
1093 | |||
1094 | static long gsnedf_activate_plugin(void) | ||
1095 | { | ||
1096 | int cpu; | ||
1097 | cpu_entry_t *entry; | ||
1098 | |||
1099 | bheap_init(&gsnedf_cpu_heap); | ||
1100 | #ifdef CONFIG_RELEASE_MASTER | ||
1101 | gsnedf.release_master = atomic_read(&release_master_cpu); | ||
1102 | #endif | ||
1103 | |||
1104 | for_each_online_cpu(cpu) { | ||
1105 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1106 | bheap_node_init(&entry->hn, entry); | ||
1107 | entry->linked = NULL; | ||
1108 | entry->scheduled = NULL; | ||
1109 | #ifdef CONFIG_RELEASE_MASTER | ||
1110 | if (cpu != gsnedf.release_master) { | ||
1111 | #endif | ||
1112 | TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu); | ||
1113 | update_cpu_position(entry); | ||
1114 | #ifdef CONFIG_RELEASE_MASTER | ||
1115 | } else { | ||
1116 | TRACE("GSN-EDF: CPU %d is release master.\n", cpu); | ||
1117 | } | ||
1118 | #endif | ||
1119 | } | ||
1120 | return 0; | ||
1121 | } | ||
1122 | |||
1123 | /* Plugin object */ | ||
1124 | static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | ||
1125 | .plugin_name = "GSN-EDF", | ||
1126 | .finish_switch = gsnedf_finish_switch, | ||
1127 | .tick = gsnedf_tick, | ||
1128 | .task_new = gsnedf_task_new, | ||
1129 | .complete_job = complete_job, | ||
1130 | .task_exit = gsnedf_task_exit, | ||
1131 | .schedule = gsnedf_schedule, | ||
1132 | .release_at = gsnedf_release_at, | ||
1133 | .task_wake_up = gsnedf_task_wake_up, | ||
1134 | .task_block = gsnedf_task_block, | ||
1135 | .admit_task = gsnedf_admit_task, | ||
1136 | .activate_plugin = gsnedf_activate_plugin, | ||
1137 | #ifdef CONFIG_LITMUS_LOCKING | ||
1138 | .allocate_lock = gsnedf_allocate_lock, | ||
1139 | #endif | ||
1140 | }; | ||
1141 | |||
1142 | |||
1143 | static int __init init_gsn_edf(void) | ||
1144 | { | ||
1145 | int cpu; | ||
1146 | cpu_entry_t *entry; | ||
1147 | |||
1148 | bheap_init(&gsnedf_cpu_heap); | ||
1149 | /* initialize CPU state */ | ||
1150 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1151 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | ||
1152 | gsnedf_cpus[cpu] = entry; | ||
1153 | entry->cpu = cpu; | ||
1154 | entry->hn = &gsnedf_heap_node[cpu]; | ||
1155 | hrtimer_init(&entry->split_timer, | ||
1156 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
1157 | entry->split_timer.function = on_split_timeout; | ||
1158 | bheap_node_init(&entry->hn, entry); | ||
1159 | } | ||
1160 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | ||
1161 | return register_sched_plugin(&gsn_edf_plugin); | ||
1162 | } | ||
1163 | |||
1164 | |||
1165 | module_init(init_gsn_edf); | ||
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c new file mode 100644 index 000000000000..5877307a996b --- /dev/null +++ b/litmus/sched_litmus.c | |||
@@ -0,0 +1,328 @@ | |||
1 | /* This file is included from kernel/sched.c */ | ||
2 | |||
3 | #include <litmus/litmus.h> | ||
4 | #include <litmus/budget.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/preempt.h> | ||
7 | |||
8 | static void update_time_litmus(struct rq *rq, struct task_struct *p) | ||
9 | { | ||
10 | u64 delta = rq->clock - p->se.exec_start; | ||
11 | if (unlikely((s64)delta < 0)) | ||
12 | delta = 0; | ||
13 | /* per job counter */ | ||
14 | p->rt_param.job_params.exec_time += delta; | ||
15 | /* task counter */ | ||
16 | p->se.sum_exec_runtime += delta; | ||
17 | /* sched_clock() */ | ||
18 | p->se.exec_start = rq->clock; | ||
19 | cpuacct_charge(p, delta); | ||
20 | } | ||
21 | |||
22 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
23 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2); | ||
24 | |||
25 | /* | ||
26 | * litmus_tick gets called by scheduler_tick() with HZ freq | ||
27 | * Interrupts are disabled | ||
28 | */ | ||
29 | static void litmus_tick(struct rq *rq, struct task_struct *p) | ||
30 | { | ||
31 | TS_PLUGIN_TICK_START; | ||
32 | |||
33 | if (is_realtime(p)) | ||
34 | update_time_litmus(rq, p); | ||
35 | |||
36 | /* plugin tick */ | ||
37 | litmus->tick(p); | ||
38 | |||
39 | TS_PLUGIN_TICK_END; | ||
40 | |||
41 | return; | ||
42 | } | ||
43 | |||
44 | static struct task_struct * | ||
45 | litmus_schedule(struct rq *rq, struct task_struct *prev) | ||
46 | { | ||
47 | struct rq* other_rq; | ||
48 | struct task_struct *next; | ||
49 | |||
50 | long was_running; | ||
51 | lt_t _maybe_deadlock = 0; | ||
52 | |||
53 | /* let the plugin schedule */ | ||
54 | next = litmus->schedule(prev); | ||
55 | |||
56 | sched_state_plugin_check(); | ||
57 | |||
58 | /* check if a global plugin pulled a task from a different RQ */ | ||
59 | if (next && task_rq(next) != rq) { | ||
60 | /* we need to migrate the task */ | ||
61 | other_rq = task_rq(next); | ||
62 | TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); | ||
63 | |||
64 | /* while we drop the lock, the prev task could change its | ||
65 | * state | ||
66 | */ | ||
67 | was_running = is_running(prev); | ||
68 | mb(); | ||
69 | raw_spin_unlock(&rq->lock); | ||
70 | |||
71 | /* Don't race with a concurrent switch. This could deadlock in | ||
72 | * the case of cross or circular migrations. It's the job of | ||
73 | * the plugin to make sure that doesn't happen. | ||
74 | */ | ||
75 | TRACE_TASK(next, "stack_in_use=%d\n", | ||
76 | next->rt_param.stack_in_use); | ||
77 | if (next->rt_param.stack_in_use != NO_CPU) { | ||
78 | TRACE_TASK(next, "waiting to deschedule\n"); | ||
79 | _maybe_deadlock = litmus_clock(); | ||
80 | } | ||
81 | while (next->rt_param.stack_in_use != NO_CPU) { | ||
82 | cpu_relax(); | ||
83 | mb(); | ||
84 | if (next->rt_param.stack_in_use == NO_CPU) | ||
85 | TRACE_TASK(next,"descheduled. Proceeding.\n"); | ||
86 | |||
87 | if (lt_before(_maybe_deadlock + 10000000, | ||
88 | litmus_clock())) { | ||
89 | /* We've been spinning for 10ms. | ||
90 | * Something can't be right! | ||
91 | * Let's abandon the task and bail out; at least | ||
92 | * we will have debug info instead of a hard | ||
93 | * deadlock. | ||
94 | */ | ||
95 | TRACE_TASK(next,"stack too long in use. " | ||
96 | "Deadlock?\n"); | ||
97 | next = NULL; | ||
98 | |||
99 | /* bail out */ | ||
100 | raw_spin_lock(&rq->lock); | ||
101 | return next; | ||
102 | } | ||
103 | } | ||
104 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | ||
105 | if (next->oncpu) | ||
106 | TRACE_TASK(next, "waiting for !oncpu"); | ||
107 | while (next->oncpu) { | ||
108 | cpu_relax(); | ||
109 | mb(); | ||
110 | } | ||
111 | #endif | ||
112 | double_rq_lock(rq, other_rq); | ||
113 | mb(); | ||
114 | if (is_realtime(prev) && is_running(prev) != was_running) { | ||
115 | TRACE_TASK(prev, | ||
116 | "state changed while we dropped" | ||
117 | " the lock: is_running=%d, was_running=%d\n", | ||
118 | is_running(prev), was_running); | ||
119 | if (is_running(prev) && !was_running) { | ||
120 | /* prev task became unblocked | ||
121 | * we need to simulate normal sequence of events | ||
122 | * to scheduler plugins. | ||
123 | */ | ||
124 | litmus->task_block(prev); | ||
125 | litmus->task_wake_up(prev); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | set_task_cpu(next, smp_processor_id()); | ||
130 | |||
131 | /* DEBUG: now that we have the lock we need to make sure a | ||
132 | * couple of things still hold: | ||
133 | * - it is still a real-time task | ||
134 | * - it is still runnable (could have been stopped) | ||
135 | * If either is violated, then the active plugin is | ||
136 | * doing something wrong. | ||
137 | */ | ||
138 | if (!is_realtime(next) || !is_running(next)) { | ||
139 | /* BAD BAD BAD */ | ||
140 | TRACE_TASK(next,"BAD: migration invariant FAILED: " | ||
141 | "rt=%d running=%d\n", | ||
142 | is_realtime(next), | ||
143 | is_running(next)); | ||
144 | /* drop the task */ | ||
145 | next = NULL; | ||
146 | } | ||
147 | /* release the other CPU's runqueue, but keep ours */ | ||
148 | raw_spin_unlock(&other_rq->lock); | ||
149 | } | ||
150 | if (next) { | ||
151 | next->rt_param.stack_in_use = rq->cpu; | ||
152 | next->se.exec_start = rq->clock; | ||
153 | } | ||
154 | |||
155 | update_enforcement_timer(next); | ||
156 | return next; | ||
157 | } | ||
158 | |||
159 | static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, | ||
160 | int flags) | ||
161 | { | ||
162 | if (flags & ENQUEUE_WAKEUP) { | ||
163 | sched_trace_task_resume(p); | ||
164 | tsk_rt(p)->present = 1; | ||
165 | /* LITMUS^RT plugins need to update the state | ||
166 | * _before_ making it available in global structures. | ||
167 | * Linux gets away with being lazy about the task state | ||
168 | * update. We can't do that, hence we update the task | ||
169 | * state already here. | ||
170 | * | ||
171 | * WARNING: this needs to be re-evaluated when porting | ||
172 | * to newer kernel versions. | ||
173 | */ | ||
174 | p->state = TASK_RUNNING; | ||
175 | litmus->task_wake_up(p); | ||
176 | |||
177 | rq->litmus.nr_running++; | ||
178 | } else | ||
179 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); | ||
180 | } | ||
181 | |||
182 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, | ||
183 | int flags) | ||
184 | { | ||
185 | if (flags & DEQUEUE_SLEEP) { | ||
186 | litmus->task_block(p); | ||
187 | tsk_rt(p)->present = 0; | ||
188 | sched_trace_task_block(p); | ||
189 | |||
190 | rq->litmus.nr_running--; | ||
191 | } else | ||
192 | TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); | ||
193 | } | ||
194 | |||
195 | static void yield_task_litmus(struct rq *rq) | ||
196 | { | ||
197 | TS_SYSCALL_IN_START; | ||
198 | |||
199 | TS_SYSCALL_IN_END; | ||
200 | |||
201 | TRACE_CUR("yields\n"); | ||
202 | |||
203 | BUG_ON(rq->curr != current); | ||
204 | /* sched_yield() is called to trigger delayed preemptions. | ||
205 | * Thus, mark the current task as needing to be rescheduled. | ||
206 | * This will cause the scheduler plugin to be invoked, which can | ||
207 | * then determine if a preemption is still required. | ||
208 | */ | ||
209 | clear_exit_np(current); | ||
210 | litmus_reschedule_local(); | ||
211 | |||
212 | TS_SYSCALL_OUT_START; | ||
213 | } | ||
214 | |||
215 | /* Plugins are responsible for this. | ||
216 | */ | ||
217 | static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) | ||
218 | { | ||
219 | } | ||
220 | |||
221 | static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) | ||
222 | { | ||
223 | } | ||
224 | |||
225 | static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev) | ||
226 | { | ||
227 | update_time_litmus(rq, prev); | ||
228 | if (!is_running(prev)) | ||
229 | tsk_rt(prev)->present = 0; | ||
230 | } | ||
231 | |||
232 | /* pick_next_task_litmus() - litmus_schedule() function | ||
233 | * | ||
234 | * return the next task to be scheduled | ||
235 | */ | ||
236 | static struct task_struct *pick_next_task_litmus(struct rq *rq) | ||
237 | { | ||
238 | /* get the to-be-switched-out task (prev) */ | ||
239 | struct task_struct *prev = rq->litmus.prev; | ||
240 | struct task_struct *next; | ||
241 | |||
242 | /* if not called from schedule() but from somewhere | ||
243 | * else (e.g., migration), return now! | ||
244 | */ | ||
245 | if(!rq->litmus.prev) | ||
246 | return NULL; | ||
247 | |||
248 | rq->litmus.prev = NULL; | ||
249 | |||
250 | TS_PLUGIN_SCHED_START; | ||
251 | next = litmus_schedule(rq, prev); | ||
252 | TS_PLUGIN_SCHED_END; | ||
253 | |||
254 | return next; | ||
255 | } | ||
256 | |||
257 | static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) | ||
258 | { | ||
259 | /* nothing to do; tick related tasks are done by litmus_tick() */ | ||
260 | return; | ||
261 | } | ||
262 | |||
263 | static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running) | ||
264 | { | ||
265 | } | ||
266 | |||
267 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, | ||
268 | int oldprio, int running) | ||
269 | { | ||
270 | } | ||
271 | |||
272 | unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p) | ||
273 | { | ||
274 | /* return infinity */ | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | /* This is called when a task became a real-time task, either due to a SCHED_* | ||
279 | * class transition or due to PI mutex inheritance. We don't handle Linux PI | ||
280 | * mutex inheritance yet (and probably never will). Use LITMUS provided | ||
281 | * synchronization primitives instead. | ||
282 | */ | ||
283 | static void set_curr_task_litmus(struct rq *rq) | ||
284 | { | ||
285 | rq->curr->se.exec_start = rq->clock; | ||
286 | } | ||
287 | |||
288 | |||
289 | #ifdef CONFIG_SMP | ||
290 | /* execve tries to rebalance task in this scheduling domain. | ||
291 | * We don't care about the scheduling domain; can gets called from | ||
292 | * exec, fork, wakeup. | ||
293 | */ | ||
294 | static int select_task_rq_litmus(struct rq *rq, struct task_struct *p, | ||
295 | int sd_flag, int flags) | ||
296 | { | ||
297 | /* preemption is already disabled. | ||
298 | * We don't want to change cpu here | ||
299 | */ | ||
300 | return task_cpu(p); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | static const struct sched_class litmus_sched_class = { | ||
305 | .next = &rt_sched_class, | ||
306 | .enqueue_task = enqueue_task_litmus, | ||
307 | .dequeue_task = dequeue_task_litmus, | ||
308 | .yield_task = yield_task_litmus, | ||
309 | |||
310 | .check_preempt_curr = check_preempt_curr_litmus, | ||
311 | |||
312 | .pick_next_task = pick_next_task_litmus, | ||
313 | .put_prev_task = put_prev_task_litmus, | ||
314 | |||
315 | #ifdef CONFIG_SMP | ||
316 | .select_task_rq = select_task_rq_litmus, | ||
317 | |||
318 | .pre_schedule = pre_schedule_litmus, | ||
319 | #endif | ||
320 | |||
321 | .set_curr_task = set_curr_task_litmus, | ||
322 | .task_tick = task_tick_litmus, | ||
323 | |||
324 | .get_rr_interval = get_rr_interval_litmus, | ||
325 | |||
326 | .prio_changed = prio_changed_litmus, | ||
327 | .switched_to = switched_to_litmus, | ||
328 | }; | ||
diff --git a/litmus/sched_litmus.c.rej b/litmus/sched_litmus.c.rej new file mode 100644 index 000000000000..e0750ecbe7a2 --- /dev/null +++ b/litmus/sched_litmus.c.rej | |||
@@ -0,0 +1,11 @@ | |||
1 | --- litmus/sched_litmus.c | ||
2 | +++ litmus/sched_litmus.c | ||
3 | @@ -196,7 +196,7 @@ | ||
4 | { | ||
5 | TS_SYSCALL_IN_START; | ||
6 | |||
7 | - TS_SYSCALL_OUT_END; | ||
8 | + TS_SYSCALL_IN_END; | ||
9 | |||
10 | TRACE_CUR("yields\n"); | ||
11 | |||
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c new file mode 100644 index 000000000000..c95bde87b5d7 --- /dev/null +++ b/litmus/sched_pfair.c | |||
@@ -0,0 +1,1056 @@ | |||
1 | /* | ||
2 | * kernel/sched_pfair.c | ||
3 | * | ||
4 | * Implementation of the PD^2 pfair scheduling algorithm. This | ||
5 | * implementation realizes "early releasing," i.e., it is work-conserving. | ||
6 | * | ||
7 | */ | ||
8 | |||
9 | #include <asm/div64.h> | ||
10 | #include <linux/delay.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/spinlock.h> | ||
13 | #include <linux/percpu.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/list.h> | ||
16 | #include <linux/slab.h> | ||
17 | |||
18 | #include <litmus/litmus.h> | ||
19 | #include <litmus/jobs.h> | ||
20 | #include <litmus/preempt.h> | ||
21 | #include <litmus/rt_domain.h> | ||
22 | #include <litmus/sched_plugin.h> | ||
23 | #include <litmus/sched_trace.h> | ||
24 | |||
25 | #include <litmus/bheap.h> | ||
26 | |||
27 | /* to configure the cluster size */ | ||
28 | #include <litmus/litmus_proc.h> | ||
29 | |||
30 | #include <litmus/clustered.h> | ||
31 | |||
32 | static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER; | ||
33 | |||
34 | struct subtask { | ||
35 | /* measured in quanta relative to job release */ | ||
36 | quanta_t release; | ||
37 | quanta_t deadline; | ||
38 | quanta_t overlap; /* called "b bit" by PD^2 */ | ||
39 | quanta_t group_deadline; | ||
40 | }; | ||
41 | |||
42 | struct pfair_param { | ||
43 | quanta_t quanta; /* number of subtasks */ | ||
44 | quanta_t cur; /* index of current subtask */ | ||
45 | |||
46 | quanta_t release; /* in quanta */ | ||
47 | quanta_t period; /* in quanta */ | ||
48 | |||
49 | quanta_t last_quantum; /* when scheduled last */ | ||
50 | int last_cpu; /* where scheduled last */ | ||
51 | |||
52 | struct pfair_cluster* cluster; /* where this task is scheduled */ | ||
53 | |||
54 | struct subtask subtasks[0]; /* allocate together with pfair_param */ | ||
55 | }; | ||
56 | |||
57 | #define tsk_pfair(tsk) ((tsk)->rt_param.pfair) | ||
58 | |||
59 | struct pfair_state { | ||
60 | struct cluster_cpu topology; | ||
61 | |||
62 | volatile quanta_t cur_tick; /* updated by the CPU that is advancing | ||
63 | * the time */ | ||
64 | volatile quanta_t local_tick; /* What tick is the local CPU currently | ||
65 | * executing? Updated only by the local | ||
66 | * CPU. In QEMU, this may lag behind the | ||
67 | * current tick. In a real system, with | ||
68 | * proper timers and aligned quanta, | ||
69 | * that should only be the case for a | ||
70 | * very short time after the time | ||
71 | * advanced. With staggered quanta, it | ||
72 | * will lag for the duration of the | ||
73 | * offset. | ||
74 | */ | ||
75 | |||
76 | struct task_struct* linked; /* the task that should be executing */ | ||
77 | struct task_struct* local; /* the local copy of linked */ | ||
78 | struct task_struct* scheduled; /* what is actually scheduled */ | ||
79 | |||
80 | lt_t offset; /* stagger offset */ | ||
81 | unsigned int missed_updates; | ||
82 | unsigned int missed_quanta; | ||
83 | }; | ||
84 | |||
85 | struct pfair_cluster { | ||
86 | struct scheduling_cluster topology; | ||
87 | |||
88 | /* The "global" time in this cluster. */ | ||
89 | quanta_t pfair_time; /* the "official" PFAIR clock */ | ||
90 | |||
91 | /* The ready queue for this cluster. */ | ||
92 | rt_domain_t pfair; | ||
93 | |||
94 | /* The set of jobs that should have their release enacted at the next | ||
95 | * quantum boundary. | ||
96 | */ | ||
97 | struct bheap release_queue; | ||
98 | raw_spinlock_t release_lock; | ||
99 | }; | ||
100 | |||
101 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) | ||
102 | { | ||
103 | return container_of(state->topology.cluster, struct pfair_cluster, topology); | ||
104 | } | ||
105 | |||
106 | static inline int cpu_id(struct pfair_state* state) | ||
107 | { | ||
108 | return state->topology.id; | ||
109 | } | ||
110 | |||
111 | static inline struct pfair_state* from_cluster_list(struct list_head* pos) | ||
112 | { | ||
113 | return list_entry(pos, struct pfair_state, topology.cluster_list); | ||
114 | } | ||
115 | |||
116 | static inline struct pfair_cluster* from_domain(rt_domain_t* rt) | ||
117 | { | ||
118 | return container_of(rt, struct pfair_cluster, pfair); | ||
119 | } | ||
120 | |||
121 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) | ||
122 | { | ||
123 | /* The ready_lock is used to serialize all scheduling events. */ | ||
124 | return &cluster->pfair.ready_lock; | ||
125 | } | ||
126 | |||
127 | static inline raw_spinlock_t* cpu_lock(struct pfair_state* state) | ||
128 | { | ||
129 | return cluster_lock(cpu_cluster(state)); | ||
130 | } | ||
131 | |||
132 | DEFINE_PER_CPU(struct pfair_state, pfair_state); | ||
133 | struct pfair_state* *pstate; /* short cut */ | ||
134 | |||
135 | static struct pfair_cluster* pfair_clusters; | ||
136 | static int num_pfair_clusters; | ||
137 | |||
138 | /* Enable for lots of trace info. | ||
139 | * #define PFAIR_DEBUG | ||
140 | */ | ||
141 | |||
142 | #ifdef PFAIR_DEBUG | ||
143 | #define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args) | ||
144 | #define PTRACE(f, args...) TRACE(f, ## args) | ||
145 | #else | ||
146 | #define PTRACE_TASK(t, f, args...) | ||
147 | #define PTRACE(f, args...) | ||
148 | #endif | ||
149 | |||
150 | /* gcc will inline all of these accessor functions... */ | ||
151 | static struct subtask* cur_subtask(struct task_struct* t) | ||
152 | { | ||
153 | return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur; | ||
154 | } | ||
155 | |||
156 | static quanta_t cur_deadline(struct task_struct* t) | ||
157 | { | ||
158 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | ||
159 | } | ||
160 | |||
161 | static quanta_t cur_release(struct task_struct* t) | ||
162 | { | ||
163 | /* This is early releasing: only the release of the first subtask | ||
164 | * counts. */ | ||
165 | return tsk_pfair(t)->release; | ||
166 | } | ||
167 | |||
168 | static quanta_t cur_overlap(struct task_struct* t) | ||
169 | { | ||
170 | return cur_subtask(t)->overlap; | ||
171 | } | ||
172 | |||
173 | static quanta_t cur_group_deadline(struct task_struct* t) | ||
174 | { | ||
175 | quanta_t gdl = cur_subtask(t)->group_deadline; | ||
176 | if (gdl) | ||
177 | return gdl + tsk_pfair(t)->release; | ||
178 | else | ||
179 | return gdl; | ||
180 | } | ||
181 | |||
182 | |||
183 | static int pfair_higher_prio(struct task_struct* first, | ||
184 | struct task_struct* second) | ||
185 | { | ||
186 | return /* first task must exist */ | ||
187 | first && ( | ||
188 | /* Does the second task exist and is it a real-time task? If | ||
189 | * not, the first task (which is a RT task) has higher | ||
190 | * priority. | ||
191 | */ | ||
192 | !second || !is_realtime(second) || | ||
193 | |||
194 | /* Is the (subtask) deadline of the first task earlier? | ||
195 | * Then it has higher priority. | ||
196 | */ | ||
197 | time_before(cur_deadline(first), cur_deadline(second)) || | ||
198 | |||
199 | /* Do we have a deadline tie? | ||
200 | * Then break by B-bit. | ||
201 | */ | ||
202 | (cur_deadline(first) == cur_deadline(second) && | ||
203 | (cur_overlap(first) > cur_overlap(second) || | ||
204 | |||
205 | /* Do we have a B-bit tie? | ||
206 | * Then break by group deadline. | ||
207 | */ | ||
208 | (cur_overlap(first) == cur_overlap(second) && | ||
209 | (time_after(cur_group_deadline(first), | ||
210 | cur_group_deadline(second)) || | ||
211 | |||
212 | /* Do we have a group deadline tie? | ||
213 | * Then break by PID, which are unique. | ||
214 | */ | ||
215 | (cur_group_deadline(first) == | ||
216 | cur_group_deadline(second) && | ||
217 | first->pid < second->pid)))))); | ||
218 | } | ||
219 | |||
220 | int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
221 | { | ||
222 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | ||
223 | } | ||
224 | |||
225 | static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
226 | { | ||
227 | struct pfair_cluster* cluster = from_domain(rt); | ||
228 | unsigned long flags; | ||
229 | |||
230 | raw_spin_lock_irqsave(&cluster->release_lock, flags); | ||
231 | |||
232 | bheap_union(pfair_ready_order, &cluster->release_queue, tasks); | ||
233 | |||
234 | raw_spin_unlock_irqrestore(&cluster->release_lock, flags); | ||
235 | } | ||
236 | |||
237 | static void prepare_release(struct task_struct* t, quanta_t at) | ||
238 | { | ||
239 | tsk_pfair(t)->release = at; | ||
240 | tsk_pfair(t)->cur = 0; | ||
241 | } | ||
242 | |||
243 | /* pull released tasks from the release queue */ | ||
244 | static void poll_releases(struct pfair_cluster* cluster) | ||
245 | { | ||
246 | raw_spin_lock(&cluster->release_lock); | ||
247 | __merge_ready(&cluster->pfair, &cluster->release_queue); | ||
248 | raw_spin_unlock(&cluster->release_lock); | ||
249 | } | ||
250 | |||
251 | static void check_preempt(struct task_struct* t) | ||
252 | { | ||
253 | int cpu = NO_CPU; | ||
254 | if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && | ||
255 | tsk_rt(t)->present) { | ||
256 | /* the task can be scheduled and | ||
257 | * is not scheduled where it ought to be scheduled | ||
258 | */ | ||
259 | cpu = tsk_rt(t)->linked_on != NO_CPU ? | ||
260 | tsk_rt(t)->linked_on : | ||
261 | tsk_rt(t)->scheduled_on; | ||
262 | PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n", | ||
263 | tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on); | ||
264 | /* preempt */ | ||
265 | litmus_reschedule(cpu); | ||
266 | } | ||
267 | } | ||
268 | |||
269 | /* caller must hold pfair.ready_lock */ | ||
270 | static void drop_all_references(struct task_struct *t) | ||
271 | { | ||
272 | int cpu; | ||
273 | struct pfair_state* s; | ||
274 | struct pfair_cluster* cluster; | ||
275 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | ||
276 | /* It must be in the ready queue; drop references isn't called | ||
277 | * when the job is in a release queue. */ | ||
278 | cluster = tsk_pfair(t)->cluster; | ||
279 | bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue, | ||
280 | tsk_rt(t)->heap_node); | ||
281 | } | ||
282 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
283 | s = &per_cpu(pfair_state, cpu); | ||
284 | if (s->linked == t) | ||
285 | s->linked = NULL; | ||
286 | if (s->local == t) | ||
287 | s->local = NULL; | ||
288 | if (s->scheduled == t) | ||
289 | s->scheduled = NULL; | ||
290 | } | ||
291 | } | ||
292 | |||
293 | static void pfair_prepare_next_period(struct task_struct* t) | ||
294 | { | ||
295 | struct pfair_param* p = tsk_pfair(t); | ||
296 | |||
297 | prepare_for_next_period(t); | ||
298 | get_rt_flags(t) = RT_F_RUNNING; | ||
299 | p->release += p->period; | ||
300 | } | ||
301 | |||
302 | /* returns 1 if the task needs to go the release queue */ | ||
303 | static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | ||
304 | { | ||
305 | struct pfair_param* p = tsk_pfair(t); | ||
306 | int to_relq; | ||
307 | p->cur = (p->cur + 1) % p->quanta; | ||
308 | if (!p->cur) { | ||
309 | if (tsk_rt(t)->present) { | ||
310 | /* The job overran; we start a new budget allocation. */ | ||
311 | pfair_prepare_next_period(t); | ||
312 | } else { | ||
313 | /* remove task from system until it wakes */ | ||
314 | drop_all_references(t); | ||
315 | TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n", | ||
316 | cpu, p->cur); | ||
317 | return 0; | ||
318 | } | ||
319 | } | ||
320 | to_relq = time_after(cur_release(t), time); | ||
321 | TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n", | ||
322 | cpu, p->cur, to_relq, cur_release(t), time); | ||
323 | return to_relq; | ||
324 | } | ||
325 | |||
326 | static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | ||
327 | { | ||
328 | struct task_struct* l; | ||
329 | struct pfair_param* p; | ||
330 | struct list_head* pos; | ||
331 | struct pfair_state* cpu; | ||
332 | |||
333 | list_for_each(pos, &cluster->topology.cpus) { | ||
334 | cpu = from_cluster_list(pos); | ||
335 | l = cpu->linked; | ||
336 | cpu->missed_updates += cpu->linked != cpu->local; | ||
337 | if (l) { | ||
338 | p = tsk_pfair(l); | ||
339 | p->last_quantum = time; | ||
340 | p->last_cpu = cpu_id(cpu); | ||
341 | if (advance_subtask(time, l, cpu_id(cpu))) { | ||
342 | //cpu->linked = NULL; | ||
343 | PTRACE_TASK(l, "should go to release queue. " | ||
344 | "scheduled_on=%d present=%d\n", | ||
345 | tsk_rt(l)->scheduled_on, | ||
346 | tsk_rt(l)->present); | ||
347 | } | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | |||
352 | static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu) | ||
353 | { | ||
354 | int cpu; | ||
355 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
356 | /* always observe scheduled_on linkage */ | ||
357 | default_cpu = tsk_rt(t)->scheduled_on; | ||
358 | } else if (tsk_pfair(t)->last_quantum == time - 1) { | ||
359 | /* back2back quanta */ | ||
360 | /* Only observe last_quantum if no scheduled_on is in the way. | ||
361 | * This should only kick in if a CPU missed quanta, and that | ||
362 | * *should* only happen in QEMU. | ||
363 | */ | ||
364 | cpu = tsk_pfair(t)->last_cpu; | ||
365 | if (!pstate[cpu]->linked || | ||
366 | tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) { | ||
367 | default_cpu = cpu; | ||
368 | } | ||
369 | } | ||
370 | return default_cpu; | ||
371 | } | ||
372 | |||
373 | /* returns one if linking was redirected */ | ||
374 | static int pfair_link(quanta_t time, int cpu, | ||
375 | struct task_struct* t) | ||
376 | { | ||
377 | int target = target_cpu(time, t, cpu); | ||
378 | struct task_struct* prev = pstate[cpu]->linked; | ||
379 | struct task_struct* other; | ||
380 | struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]); | ||
381 | |||
382 | if (target != cpu) { | ||
383 | BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster); | ||
384 | other = pstate[target]->linked; | ||
385 | pstate[target]->linked = t; | ||
386 | tsk_rt(t)->linked_on = target; | ||
387 | if (!other) | ||
388 | /* linked ok, but reschedule this CPU */ | ||
389 | return 1; | ||
390 | if (target < cpu) { | ||
391 | /* link other to cpu instead */ | ||
392 | tsk_rt(other)->linked_on = cpu; | ||
393 | pstate[cpu]->linked = other; | ||
394 | if (prev) { | ||
395 | /* prev got pushed back into the ready queue */ | ||
396 | tsk_rt(prev)->linked_on = NO_CPU; | ||
397 | __add_ready(&cluster->pfair, prev); | ||
398 | } | ||
399 | /* we are done with this cpu */ | ||
400 | return 0; | ||
401 | } else { | ||
402 | /* re-add other, it's original CPU was not considered yet */ | ||
403 | tsk_rt(other)->linked_on = NO_CPU; | ||
404 | __add_ready(&cluster->pfair, other); | ||
405 | /* reschedule this CPU */ | ||
406 | return 1; | ||
407 | } | ||
408 | } else { | ||
409 | pstate[cpu]->linked = t; | ||
410 | tsk_rt(t)->linked_on = cpu; | ||
411 | if (prev) { | ||
412 | /* prev got pushed back into the ready queue */ | ||
413 | tsk_rt(prev)->linked_on = NO_CPU; | ||
414 | __add_ready(&cluster->pfair, prev); | ||
415 | } | ||
416 | /* we are done with this CPU */ | ||
417 | return 0; | ||
418 | } | ||
419 | } | ||
420 | |||
421 | static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time) | ||
422 | { | ||
423 | int retry; | ||
424 | struct list_head *pos; | ||
425 | struct pfair_state *cpu_state; | ||
426 | |||
427 | list_for_each(pos, &cluster->topology.cpus) { | ||
428 | cpu_state = from_cluster_list(pos); | ||
429 | retry = 1; | ||
430 | #ifdef CONFIG_RELEASE_MASTER | ||
431 | /* skip release master */ | ||
432 | if (cluster->pfair.release_master == cpu_id(cpu_state)) | ||
433 | continue; | ||
434 | #endif | ||
435 | while (retry) { | ||
436 | if (pfair_higher_prio(__peek_ready(&cluster->pfair), | ||
437 | cpu_state->linked)) | ||
438 | retry = pfair_link(time, cpu_id(cpu_state), | ||
439 | __take_ready(&cluster->pfair)); | ||
440 | else | ||
441 | retry = 0; | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time) | ||
447 | { | ||
448 | struct pfair_state *cpu; | ||
449 | struct list_head* pos; | ||
450 | |||
451 | /* called with interrupts disabled */ | ||
452 | PTRACE("--- Q %lu at %llu PRE-SPIN\n", | ||
453 | time, litmus_clock()); | ||
454 | raw_spin_lock(cluster_lock(cluster)); | ||
455 | PTRACE("<<< Q %lu at %llu\n", | ||
456 | time, litmus_clock()); | ||
457 | |||
458 | sched_trace_quantum_boundary(); | ||
459 | |||
460 | advance_subtasks(cluster, time); | ||
461 | poll_releases(cluster); | ||
462 | schedule_subtasks(cluster, time); | ||
463 | |||
464 | list_for_each(pos, &cluster->topology.cpus) { | ||
465 | cpu = from_cluster_list(pos); | ||
466 | if (cpu->linked) | ||
467 | PTRACE_TASK(cpu->linked, | ||
468 | " linked on %d.\n", cpu_id(cpu)); | ||
469 | else | ||
470 | PTRACE("(null) linked on %d.\n", cpu_id(cpu)); | ||
471 | } | ||
472 | /* We are done. Advance time. */ | ||
473 | mb(); | ||
474 | list_for_each(pos, &cluster->topology.cpus) { | ||
475 | cpu = from_cluster_list(pos); | ||
476 | if (cpu->local_tick != cpu->cur_tick) { | ||
477 | TRACE("BAD Quantum not acked on %d " | ||
478 | "(l:%lu c:%lu p:%lu)\n", | ||
479 | cpu_id(cpu), | ||
480 | cpu->local_tick, | ||
481 | cpu->cur_tick, | ||
482 | cluster->pfair_time); | ||
483 | cpu->missed_quanta++; | ||
484 | } | ||
485 | cpu->cur_tick = time; | ||
486 | } | ||
487 | PTRACE(">>> Q %lu at %llu\n", | ||
488 | time, litmus_clock()); | ||
489 | raw_spin_unlock(cluster_lock(cluster)); | ||
490 | } | ||
491 | |||
492 | static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state) | ||
493 | { | ||
494 | quanta_t loc; | ||
495 | |||
496 | goto first; /* skip mb() on first iteration */ | ||
497 | do { | ||
498 | cpu_relax(); | ||
499 | mb(); | ||
500 | first: loc = state->cur_tick; | ||
501 | /* FIXME: what if loc > cur? */ | ||
502 | } while (time_before(loc, q)); | ||
503 | PTRACE("observed cur_tick:%lu >= q:%lu\n", | ||
504 | loc, q); | ||
505 | } | ||
506 | |||
507 | static quanta_t current_quantum(struct pfair_state* state) | ||
508 | { | ||
509 | lt_t t = litmus_clock() - state->offset; | ||
510 | return time2quanta(t, FLOOR); | ||
511 | } | ||
512 | |||
513 | static void catchup_quanta(quanta_t from, quanta_t target, | ||
514 | struct pfair_state* state) | ||
515 | { | ||
516 | quanta_t cur = from, time; | ||
517 | TRACE("+++< BAD catching up quanta from %lu to %lu\n", | ||
518 | from, target); | ||
519 | while (time_before(cur, target)) { | ||
520 | wait_for_quantum(cur, state); | ||
521 | cur++; | ||
522 | time = cmpxchg(&cpu_cluster(state)->pfair_time, | ||
523 | cur - 1, /* expected */ | ||
524 | cur /* next */ | ||
525 | ); | ||
526 | if (time == cur - 1) | ||
527 | schedule_next_quantum(cpu_cluster(state), cur); | ||
528 | } | ||
529 | TRACE("+++> catching up done\n"); | ||
530 | } | ||
531 | |||
532 | /* pfair_tick - this function is called for every local timer | ||
533 | * interrupt. | ||
534 | */ | ||
535 | static void pfair_tick(struct task_struct* t) | ||
536 | { | ||
537 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
538 | quanta_t time, cur; | ||
539 | int retry = 10; | ||
540 | |||
541 | do { | ||
542 | cur = current_quantum(state); | ||
543 | PTRACE("q %lu at %llu\n", cur, litmus_clock()); | ||
544 | |||
545 | /* Attempt to advance time. First CPU to get here | ||
546 | * will prepare the next quantum. | ||
547 | */ | ||
548 | time = cmpxchg(&cpu_cluster(state)->pfair_time, | ||
549 | cur - 1, /* expected */ | ||
550 | cur /* next */ | ||
551 | ); | ||
552 | if (time == cur - 1) { | ||
553 | /* exchange succeeded */ | ||
554 | wait_for_quantum(cur - 1, state); | ||
555 | schedule_next_quantum(cpu_cluster(state), cur); | ||
556 | retry = 0; | ||
557 | } else if (time_before(time, cur - 1)) { | ||
558 | /* the whole system missed a tick !? */ | ||
559 | catchup_quanta(time, cur, state); | ||
560 | retry--; | ||
561 | } else if (time_after(time, cur)) { | ||
562 | /* our timer lagging behind!? */ | ||
563 | TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur); | ||
564 | retry--; | ||
565 | } else { | ||
566 | /* Some other CPU already started scheduling | ||
567 | * this quantum. Let it do its job and then update. | ||
568 | */ | ||
569 | retry = 0; | ||
570 | } | ||
571 | } while (retry); | ||
572 | |||
573 | /* Spin locally until time advances. */ | ||
574 | wait_for_quantum(cur, state); | ||
575 | |||
576 | /* copy assignment */ | ||
577 | /* FIXME: what if we race with a future update? Corrupted state? */ | ||
578 | state->local = state->linked; | ||
579 | /* signal that we are done */ | ||
580 | mb(); | ||
581 | state->local_tick = state->cur_tick; | ||
582 | |||
583 | if (state->local != current | ||
584 | && (is_realtime(current) || is_present(state->local))) | ||
585 | litmus_reschedule_local(); | ||
586 | } | ||
587 | |||
588 | static int safe_to_schedule(struct task_struct* t, int cpu) | ||
589 | { | ||
590 | int where = tsk_rt(t)->scheduled_on; | ||
591 | if (where != NO_CPU && where != cpu) { | ||
592 | TRACE_TASK(t, "BAD: can't be scheduled on %d, " | ||
593 | "scheduled already on %d.\n", cpu, where); | ||
594 | return 0; | ||
595 | } else | ||
596 | return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING; | ||
597 | } | ||
598 | |||
599 | static struct task_struct* pfair_schedule(struct task_struct * prev) | ||
600 | { | ||
601 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
602 | struct pfair_cluster* cluster = cpu_cluster(state); | ||
603 | int blocks, completion, out_of_time; | ||
604 | struct task_struct* next = NULL; | ||
605 | |||
606 | #ifdef CONFIG_RELEASE_MASTER | ||
607 | /* Bail out early if we are the release master. | ||
608 | * The release master never schedules any real-time tasks. | ||
609 | */ | ||
610 | if (unlikely(cluster->pfair.release_master == cpu_id(state))) { | ||
611 | sched_state_task_picked(); | ||
612 | return NULL; | ||
613 | } | ||
614 | #endif | ||
615 | |||
616 | raw_spin_lock(cpu_lock(state)); | ||
617 | |||
618 | blocks = is_realtime(prev) && !is_running(prev); | ||
619 | completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP; | ||
620 | out_of_time = is_realtime(prev) && time_after(cur_release(prev), | ||
621 | state->local_tick); | ||
622 | |||
623 | if (is_realtime(prev)) | ||
624 | PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n", | ||
625 | blocks, completion, out_of_time); | ||
626 | |||
627 | if (completion) { | ||
628 | sched_trace_task_completion(prev, 0); | ||
629 | pfair_prepare_next_period(prev); | ||
630 | prepare_release(prev, cur_release(prev)); | ||
631 | } | ||
632 | |||
633 | if (!blocks && (completion || out_of_time)) { | ||
634 | drop_all_references(prev); | ||
635 | sched_trace_task_release(prev); | ||
636 | add_release(&cluster->pfair, prev); | ||
637 | } | ||
638 | |||
639 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) | ||
640 | next = state->local; | ||
641 | |||
642 | if (prev != next) { | ||
643 | tsk_rt(prev)->scheduled_on = NO_CPU; | ||
644 | if (next) | ||
645 | tsk_rt(next)->scheduled_on = cpu_id(state); | ||
646 | } | ||
647 | sched_state_task_picked(); | ||
648 | raw_spin_unlock(cpu_lock(state)); | ||
649 | |||
650 | if (next) | ||
651 | TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n", | ||
652 | tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock()); | ||
653 | else if (is_realtime(prev)) | ||
654 | TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock()); | ||
655 | |||
656 | return next; | ||
657 | } | ||
658 | |||
659 | static void pfair_task_new(struct task_struct * t, int on_rq, int running) | ||
660 | { | ||
661 | unsigned long flags; | ||
662 | struct pfair_cluster* cluster; | ||
663 | |||
664 | TRACE("pfair: task new %d state:%d\n", t->pid, t->state); | ||
665 | |||
666 | cluster = tsk_pfair(t)->cluster; | ||
667 | |||
668 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
669 | |||
670 | prepare_release(t, cluster->pfair_time + 1); | ||
671 | |||
672 | t->rt_param.scheduled_on = NO_CPU; | ||
673 | |||
674 | if (running) { | ||
675 | #ifdef CONFIG_RELEASE_MASTER | ||
676 | if (task_cpu(t) != cluster->pfair.release_master) | ||
677 | #endif | ||
678 | t->rt_param.scheduled_on = task_cpu(t); | ||
679 | __add_ready(&cluster->pfair, t); | ||
680 | } | ||
681 | |||
682 | check_preempt(t); | ||
683 | |||
684 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
685 | } | ||
686 | |||
687 | static void pfair_task_wake_up(struct task_struct *t) | ||
688 | { | ||
689 | unsigned long flags; | ||
690 | lt_t now; | ||
691 | struct pfair_cluster* cluster; | ||
692 | |||
693 | cluster = tsk_pfair(t)->cluster; | ||
694 | |||
695 | TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n", | ||
696 | litmus_clock(), cur_release(t), cluster->pfair_time); | ||
697 | |||
698 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
699 | |||
700 | /* If a task blocks and wakes before its next job release, | ||
701 | * then it may resume if it is currently linked somewhere | ||
702 | * (as if it never blocked at all). Otherwise, we have a | ||
703 | * new sporadic job release. | ||
704 | */ | ||
705 | now = litmus_clock(); | ||
706 | if (lt_before(get_deadline(t), now)) { | ||
707 | release_at(t, now); | ||
708 | prepare_release(t, time2quanta(now, CEIL)); | ||
709 | sched_trace_task_release(t); | ||
710 | } | ||
711 | |||
712 | /* only add to ready queue if the task isn't still linked somewhere */ | ||
713 | if (tsk_rt(t)->linked_on == NO_CPU) | ||
714 | __add_ready(&cluster->pfair, t); | ||
715 | |||
716 | check_preempt(t); | ||
717 | |||
718 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
719 | TRACE_TASK(t, "wake up done at %llu\n", litmus_clock()); | ||
720 | } | ||
721 | |||
722 | static void pfair_task_block(struct task_struct *t) | ||
723 | { | ||
724 | BUG_ON(!is_realtime(t)); | ||
725 | TRACE_TASK(t, "blocks at %llu, state:%d\n", | ||
726 | litmus_clock(), t->state); | ||
727 | } | ||
728 | |||
729 | static void pfair_task_exit(struct task_struct * t) | ||
730 | { | ||
731 | unsigned long flags; | ||
732 | struct pfair_cluster *cluster; | ||
733 | |||
734 | BUG_ON(!is_realtime(t)); | ||
735 | |||
736 | cluster = tsk_pfair(t)->cluster; | ||
737 | |||
738 | /* Remote task from release or ready queue, and ensure | ||
739 | * that it is not the scheduled task for ANY CPU. We | ||
740 | * do this blanket check because occassionally when | ||
741 | * tasks exit while blocked, the task_cpu of the task | ||
742 | * might not be the same as the CPU that the PFAIR scheduler | ||
743 | * has chosen for it. | ||
744 | */ | ||
745 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
746 | |||
747 | TRACE_TASK(t, "RIP, state:%d\n", t->state); | ||
748 | drop_all_references(t); | ||
749 | |||
750 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
751 | |||
752 | kfree(t->rt_param.pfair); | ||
753 | t->rt_param.pfair = NULL; | ||
754 | } | ||
755 | |||
756 | |||
757 | static void pfair_release_at(struct task_struct* task, lt_t start) | ||
758 | { | ||
759 | unsigned long flags; | ||
760 | quanta_t release; | ||
761 | |||
762 | struct pfair_cluster *cluster; | ||
763 | |||
764 | cluster = tsk_pfair(task)->cluster; | ||
765 | |||
766 | BUG_ON(!is_realtime(task)); | ||
767 | |||
768 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | ||
769 | release_at(task, start); | ||
770 | release = time2quanta(start, CEIL); | ||
771 | |||
772 | TRACE_TASK(task, "sys release at %lu\n", release); | ||
773 | |||
774 | drop_all_references(task); | ||
775 | prepare_release(task, release); | ||
776 | add_release(&cluster->pfair, task); | ||
777 | |||
778 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | ||
779 | } | ||
780 | |||
781 | static void init_subtask(struct subtask* sub, unsigned long i, | ||
782 | lt_t quanta, lt_t period) | ||
783 | { | ||
784 | /* since i is zero-based, the formulas are shifted by one */ | ||
785 | lt_t tmp; | ||
786 | |||
787 | /* release */ | ||
788 | tmp = period * i; | ||
789 | do_div(tmp, quanta); /* floor */ | ||
790 | sub->release = (quanta_t) tmp; | ||
791 | |||
792 | /* deadline */ | ||
793 | tmp = period * (i + 1); | ||
794 | if (do_div(tmp, quanta)) /* ceil */ | ||
795 | tmp++; | ||
796 | sub->deadline = (quanta_t) tmp; | ||
797 | |||
798 | /* next release */ | ||
799 | tmp = period * (i + 1); | ||
800 | do_div(tmp, quanta); /* floor */ | ||
801 | sub->overlap = sub->deadline - (quanta_t) tmp; | ||
802 | |||
803 | /* Group deadline. | ||
804 | * Based on the formula given in Uma's thesis. | ||
805 | */ | ||
806 | if (2 * quanta >= period) { | ||
807 | /* heavy */ | ||
808 | tmp = (sub->deadline - (i + 1)) * period; | ||
809 | if (period > quanta && | ||
810 | do_div(tmp, (period - quanta))) /* ceil */ | ||
811 | tmp++; | ||
812 | sub->group_deadline = (quanta_t) tmp; | ||
813 | } else | ||
814 | sub->group_deadline = 0; | ||
815 | } | ||
816 | |||
817 | static void dump_subtasks(struct task_struct* t) | ||
818 | { | ||
819 | unsigned long i; | ||
820 | for (i = 0; i < t->rt_param.pfair->quanta; i++) | ||
821 | TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n", | ||
822 | i + 1, | ||
823 | t->rt_param.pfair->subtasks[i].release, | ||
824 | t->rt_param.pfair->subtasks[i].deadline, | ||
825 | t->rt_param.pfair->subtasks[i].overlap, | ||
826 | t->rt_param.pfair->subtasks[i].group_deadline); | ||
827 | } | ||
828 | |||
829 | static long pfair_admit_task(struct task_struct* t) | ||
830 | { | ||
831 | lt_t quanta; | ||
832 | lt_t period; | ||
833 | s64 quantum_length = ktime_to_ns(tick_period); | ||
834 | struct pfair_param* param; | ||
835 | unsigned long i; | ||
836 | |||
837 | /* first check that the task is in the right cluster */ | ||
838 | if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) != | ||
839 | cpu_cluster(pstate[task_cpu(t)])) | ||
840 | return -EINVAL; | ||
841 | |||
842 | /* Pfair is a tick-based method, so the time | ||
843 | * of interest is jiffies. Calculate tick-based | ||
844 | * times for everything. | ||
845 | * (Ceiling of exec cost, floor of period.) | ||
846 | */ | ||
847 | |||
848 | quanta = get_exec_cost(t); | ||
849 | period = get_rt_period(t); | ||
850 | |||
851 | quanta = time2quanta(get_exec_cost(t), CEIL); | ||
852 | |||
853 | if (do_div(period, quantum_length)) | ||
854 | printk(KERN_WARNING | ||
855 | "The period of %s/%d is not a multiple of %llu.\n", | ||
856 | t->comm, t->pid, (unsigned long long) quantum_length); | ||
857 | |||
858 | if (quanta == period) { | ||
859 | /* special case: task has weight 1.0 */ | ||
860 | printk(KERN_INFO | ||
861 | "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n", | ||
862 | t->comm, t->pid, quanta, period); | ||
863 | quanta = 1; | ||
864 | period = 1; | ||
865 | } | ||
866 | |||
867 | param = kmalloc(sizeof(*param) + | ||
868 | quanta * sizeof(struct subtask), GFP_ATOMIC); | ||
869 | |||
870 | if (!param) | ||
871 | return -ENOMEM; | ||
872 | |||
873 | param->quanta = quanta; | ||
874 | param->cur = 0; | ||
875 | param->release = 0; | ||
876 | param->period = period; | ||
877 | |||
878 | param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]); | ||
879 | |||
880 | for (i = 0; i < quanta; i++) | ||
881 | init_subtask(param->subtasks + i, i, quanta, period); | ||
882 | |||
883 | if (t->rt_param.pfair) | ||
884 | /* get rid of stale allocation */ | ||
885 | kfree(t->rt_param.pfair); | ||
886 | |||
887 | t->rt_param.pfair = param; | ||
888 | |||
889 | /* spew out some debug info */ | ||
890 | dump_subtasks(t); | ||
891 | |||
892 | return 0; | ||
893 | } | ||
894 | |||
895 | static void pfair_init_cluster(struct pfair_cluster* cluster) | ||
896 | { | ||
897 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs); | ||
898 | bheap_init(&cluster->release_queue); | ||
899 | raw_spin_lock_init(&cluster->release_lock); | ||
900 | INIT_LIST_HEAD(&cluster->topology.cpus); | ||
901 | } | ||
902 | |||
903 | static void cleanup_clusters(void) | ||
904 | { | ||
905 | int i; | ||
906 | |||
907 | if (num_pfair_clusters) | ||
908 | kfree(pfair_clusters); | ||
909 | pfair_clusters = NULL; | ||
910 | num_pfair_clusters = 0; | ||
911 | |||
912 | /* avoid stale pointers */ | ||
913 | for (i = 0; i < num_online_cpus(); i++) { | ||
914 | pstate[i]->topology.cluster = NULL; | ||
915 | printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]), | ||
916 | pstate[i]->missed_updates, pstate[i]->missed_quanta); | ||
917 | } | ||
918 | } | ||
919 | |||
920 | static long pfair_activate_plugin(void) | ||
921 | { | ||
922 | int err, i; | ||
923 | struct pfair_state* state; | ||
924 | struct pfair_cluster* cluster ; | ||
925 | quanta_t now; | ||
926 | int cluster_size; | ||
927 | struct cluster_cpu* cpus[NR_CPUS]; | ||
928 | struct scheduling_cluster* clust[NR_CPUS]; | ||
929 | |||
930 | cluster_size = get_cluster_size(pfair_cluster_level); | ||
931 | |||
932 | if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0) | ||
933 | return -EINVAL; | ||
934 | |||
935 | num_pfair_clusters = num_online_cpus() / cluster_size; | ||
936 | |||
937 | pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC); | ||
938 | if (!pfair_clusters) { | ||
939 | num_pfair_clusters = 0; | ||
940 | printk(KERN_ERR "Could not allocate Pfair clusters!\n"); | ||
941 | return -ENOMEM; | ||
942 | } | ||
943 | |||
944 | state = &__get_cpu_var(pfair_state); | ||
945 | now = current_quantum(state); | ||
946 | TRACE("Activating PFAIR at q=%lu\n", now); | ||
947 | |||
948 | for (i = 0; i < num_pfair_clusters; i++) { | ||
949 | cluster = &pfair_clusters[i]; | ||
950 | pfair_init_cluster(cluster); | ||
951 | cluster->pfair_time = now; | ||
952 | clust[i] = &cluster->topology; | ||
953 | #ifdef CONFIG_RELEASE_MASTER | ||
954 | cluster->pfair.release_master = atomic_read(&release_master_cpu); | ||
955 | #endif | ||
956 | } | ||
957 | |||
958 | for (i = 0; i < num_online_cpus(); i++) { | ||
959 | state = &per_cpu(pfair_state, i); | ||
960 | state->cur_tick = now; | ||
961 | state->local_tick = now; | ||
962 | state->missed_quanta = 0; | ||
963 | state->missed_updates = 0; | ||
964 | state->offset = cpu_stagger_offset(i); | ||
965 | printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus()); | ||
966 | cpus[i] = &state->topology; | ||
967 | } | ||
968 | |||
969 | err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters, | ||
970 | cpus, num_online_cpus()); | ||
971 | |||
972 | if (err < 0) | ||
973 | cleanup_clusters(); | ||
974 | |||
975 | return err; | ||
976 | } | ||
977 | |||
978 | static long pfair_deactivate_plugin(void) | ||
979 | { | ||
980 | cleanup_clusters(); | ||
981 | return 0; | ||
982 | } | ||
983 | |||
984 | /* Plugin object */ | ||
985 | static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = { | ||
986 | .plugin_name = "PFAIR", | ||
987 | .tick = pfair_tick, | ||
988 | .task_new = pfair_task_new, | ||
989 | .task_exit = pfair_task_exit, | ||
990 | .schedule = pfair_schedule, | ||
991 | .task_wake_up = pfair_task_wake_up, | ||
992 | .task_block = pfair_task_block, | ||
993 | .admit_task = pfair_admit_task, | ||
994 | .release_at = pfair_release_at, | ||
995 | .complete_job = complete_job, | ||
996 | .activate_plugin = pfair_activate_plugin, | ||
997 | .deactivate_plugin = pfair_deactivate_plugin, | ||
998 | }; | ||
999 | |||
1000 | |||
1001 | static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL; | ||
1002 | |||
1003 | static int __init init_pfair(void) | ||
1004 | { | ||
1005 | int cpu, err, fs; | ||
1006 | struct pfair_state *state; | ||
1007 | |||
1008 | /* | ||
1009 | * initialize short_cut for per-cpu pfair state; | ||
1010 | * there may be a problem here if someone removes a cpu | ||
1011 | * while we are doing this initialization... and if cpus | ||
1012 | * are added / removed later... but we don't support CPU hotplug atm anyway. | ||
1013 | */ | ||
1014 | pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL); | ||
1015 | |||
1016 | /* initialize CPU state */ | ||
1017 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | ||
1018 | state = &per_cpu(pfair_state, cpu); | ||
1019 | state->topology.id = cpu; | ||
1020 | state->cur_tick = 0; | ||
1021 | state->local_tick = 0; | ||
1022 | state->linked = NULL; | ||
1023 | state->local = NULL; | ||
1024 | state->scheduled = NULL; | ||
1025 | state->missed_quanta = 0; | ||
1026 | state->offset = cpu_stagger_offset(cpu); | ||
1027 | pstate[cpu] = state; | ||
1028 | } | ||
1029 | |||
1030 | pfair_clusters = NULL; | ||
1031 | num_pfair_clusters = 0; | ||
1032 | |||
1033 | err = register_sched_plugin(&pfair_plugin); | ||
1034 | if (!err) { | ||
1035 | fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir); | ||
1036 | if (!fs) | ||
1037 | cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level); | ||
1038 | else | ||
1039 | printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n"); | ||
1040 | } | ||
1041 | |||
1042 | return err; | ||
1043 | } | ||
1044 | |||
1045 | static void __exit clean_pfair(void) | ||
1046 | { | ||
1047 | kfree(pstate); | ||
1048 | |||
1049 | if (cluster_file) | ||
1050 | remove_proc_entry("cluster", pfair_dir); | ||
1051 | if (pfair_dir) | ||
1052 | remove_plugin_proc_dir(&pfair_plugin); | ||
1053 | } | ||
1054 | |||
1055 | module_init(init_pfair); | ||
1056 | module_exit(clean_pfair); | ||
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c new file mode 100644 index 000000000000..74a77e7a4959 --- /dev/null +++ b/litmus/sched_pfp.c | |||
@@ -0,0 +1,1542 @@ | |||
1 | /* | ||
2 | * litmus/sched_pfp.c | ||
3 | * | ||
4 | * Implementation of partitioned fixed-priority scheduling. | ||
5 | * Based on PSN-EDF. | ||
6 | */ | ||
7 | |||
8 | #include <linux/percpu.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/list.h> | ||
11 | #include <linux/spinlock.h> | ||
12 | #include <linux/module.h> | ||
13 | |||
14 | #include <litmus/litmus.h> | ||
15 | #include <litmus/wait.h> | ||
16 | #include <litmus/jobs.h> | ||
17 | #include <litmus/preempt.h> | ||
18 | #include <litmus/fp_common.h> | ||
19 | #include <litmus/sched_plugin.h> | ||
20 | #include <litmus/sched_trace.h> | ||
21 | #include <litmus/trace.h> | ||
22 | |||
23 | #include <linux/uaccess.h> | ||
24 | |||
25 | |||
26 | typedef struct { | ||
27 | rt_domain_t domain; | ||
28 | struct fp_prio_queue ready_queue; | ||
29 | int cpu; | ||
30 | struct task_struct* scheduled; /* only RT tasks */ | ||
31 | /* | ||
32 | * scheduling lock slock | ||
33 | * protects the domain and serializes scheduling decisions | ||
34 | */ | ||
35 | #define slock domain.ready_lock | ||
36 | |||
37 | } pfp_domain_t; | ||
38 | |||
39 | DEFINE_PER_CPU(pfp_domain_t, pfp_domains); | ||
40 | |||
41 | pfp_domain_t* pfp_doms[NR_CPUS]; | ||
42 | |||
43 | #define local_pfp (&__get_cpu_var(pfp_domains)) | ||
44 | #define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain) | ||
45 | #define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu)) | ||
46 | #define task_dom(task) remote_dom(get_partition(task)) | ||
47 | #define task_pfp(task) remote_pfp(get_partition(task)) | ||
48 | |||
49 | /* we assume the lock is being held */ | ||
50 | static void preempt(pfp_domain_t *pfp) | ||
51 | { | ||
52 | preempt_if_preemptable(pfp->scheduled, pfp->cpu); | ||
53 | } | ||
54 | |||
55 | static unsigned int priority_index(struct task_struct* t) | ||
56 | { | ||
57 | #ifdef CONFIG_LOCKING | ||
58 | if (unlikely(t->rt_param.inh_task)) | ||
59 | /* use effective priority */ | ||
60 | t = t->rt_param.inh_task; | ||
61 | |||
62 | if (is_priority_boosted(t)) { | ||
63 | /* zero is reserved for priority-boosted tasks */ | ||
64 | return 0; | ||
65 | } else | ||
66 | #endif | ||
67 | return get_priority(t); | ||
68 | } | ||
69 | |||
70 | |||
71 | static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
72 | { | ||
73 | pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain); | ||
74 | unsigned long flags; | ||
75 | struct task_struct* t; | ||
76 | struct bheap_node* hn; | ||
77 | |||
78 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
79 | |||
80 | while (!bheap_empty(tasks)) { | ||
81 | hn = bheap_take(fp_ready_order, tasks); | ||
82 | t = bheap2task(hn); | ||
83 | TRACE_TASK(t, "released (part:%d prio:%d)\n", | ||
84 | get_partition(t), get_priority(t)); | ||
85 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
86 | } | ||
87 | |||
88 | /* do we need to preempt? */ | ||
89 | if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) { | ||
90 | TRACE_CUR("preempted by new release\n"); | ||
91 | preempt(pfp); | ||
92 | } | ||
93 | |||
94 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
95 | } | ||
96 | |||
97 | static void pfp_domain_init(pfp_domain_t* pfp, | ||
98 | int cpu) | ||
99 | { | ||
100 | fp_domain_init(&pfp->domain, NULL, pfp_release_jobs); | ||
101 | pfp->cpu = cpu; | ||
102 | pfp->scheduled = NULL; | ||
103 | fp_prio_queue_init(&pfp->ready_queue); | ||
104 | } | ||
105 | |||
106 | static void requeue(struct task_struct* t, pfp_domain_t *pfp) | ||
107 | { | ||
108 | if (t->state != TASK_RUNNING) | ||
109 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
110 | |||
111 | set_rt_flags(t, RT_F_RUNNING); | ||
112 | if (is_released(t, litmus_clock())) | ||
113 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
114 | else | ||
115 | add_release(&pfp->domain, t); /* it has got to wait */ | ||
116 | } | ||
117 | |||
118 | static void job_completion(struct task_struct* t, int forced) | ||
119 | { | ||
120 | sched_trace_task_completion(t,forced); | ||
121 | TRACE_TASK(t, "job_completion().\n"); | ||
122 | |||
123 | set_rt_flags(t, RT_F_SLEEP); | ||
124 | prepare_for_next_period(t); | ||
125 | } | ||
126 | |||
127 | static void pfp_tick(struct task_struct *t) | ||
128 | { | ||
129 | pfp_domain_t *pfp = local_pfp; | ||
130 | |||
131 | /* Check for inconsistency. We don't need the lock for this since | ||
132 | * ->scheduled is only changed in schedule, which obviously is not | ||
133 | * executing in parallel on this CPU | ||
134 | */ | ||
135 | BUG_ON(is_realtime(t) && t != pfp->scheduled); | ||
136 | |||
137 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
138 | if (!is_np(t)) { | ||
139 | litmus_reschedule_local(); | ||
140 | TRACE("pfp_scheduler_tick: " | ||
141 | "%d is preemptable " | ||
142 | " => FORCE_RESCHED\n", t->pid); | ||
143 | } else if (is_user_np(t)) { | ||
144 | TRACE("pfp_scheduler_tick: " | ||
145 | "%d is non-preemptable, " | ||
146 | "preemption delayed.\n", t->pid); | ||
147 | request_exit_np(t); | ||
148 | } | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static struct task_struct* pfp_schedule(struct task_struct * prev) | ||
153 | { | ||
154 | pfp_domain_t* pfp = local_pfp; | ||
155 | struct task_struct* next; | ||
156 | |||
157 | int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; | ||
158 | |||
159 | raw_spin_lock(&pfp->slock); | ||
160 | |||
161 | /* sanity checking | ||
162 | * differently from gedf, when a task exits (dead) | ||
163 | * pfp->schedule may be null and prev _is_ realtime | ||
164 | */ | ||
165 | BUG_ON(pfp->scheduled && pfp->scheduled != prev); | ||
166 | BUG_ON(pfp->scheduled && !is_realtime(prev)); | ||
167 | |||
168 | /* (0) Determine state */ | ||
169 | exists = pfp->scheduled != NULL; | ||
170 | blocks = exists && !is_running(pfp->scheduled); | ||
171 | out_of_time = exists && | ||
172 | budget_enforced(pfp->scheduled) && | ||
173 | budget_exhausted(pfp->scheduled); | ||
174 | np = exists && is_np(pfp->scheduled); | ||
175 | sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP; | ||
176 | migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; | ||
177 | preempt = migrate || fp_preemption_needed(&pfp->ready_queue, prev); | ||
178 | |||
179 | /* If we need to preempt do so. | ||
180 | * The following checks set resched to 1 in case of special | ||
181 | * circumstances. | ||
182 | */ | ||
183 | resched = preempt; | ||
184 | |||
185 | /* If a task blocks we have no choice but to reschedule. | ||
186 | */ | ||
187 | if (blocks) | ||
188 | resched = 1; | ||
189 | |||
190 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
191 | * Multiple calls to request_exit_np() don't hurt. | ||
192 | */ | ||
193 | if (np && (out_of_time || preempt || sleep)) | ||
194 | request_exit_np(pfp->scheduled); | ||
195 | |||
196 | /* Any task that is preemptable and either exhausts its execution | ||
197 | * budget or wants to sleep completes. We may have to reschedule after | ||
198 | * this. | ||
199 | */ | ||
200 | if (!np && (out_of_time || sleep) && !blocks && !migrate) { | ||
201 | job_completion(pfp->scheduled, !sleep); | ||
202 | resched = 1; | ||
203 | } | ||
204 | |||
205 | /* The final scheduling decision. Do we need to switch for some reason? | ||
206 | * Switch if we are in RT mode and have no task or if we need to | ||
207 | * resched. | ||
208 | */ | ||
209 | next = NULL; | ||
210 | if ((!np || blocks) && (resched || !exists)) { | ||
211 | /* When preempting a task that does not block, then | ||
212 | * re-insert it into either the ready queue or the | ||
213 | * release queue (if it completed). requeue() picks | ||
214 | * the appropriate queue. | ||
215 | */ | ||
216 | if (pfp->scheduled && !blocks && !migrate) | ||
217 | requeue(pfp->scheduled, pfp); | ||
218 | next = fp_prio_take(&pfp->ready_queue); | ||
219 | } else | ||
220 | /* Only override Linux scheduler if we have a real-time task | ||
221 | * scheduled that needs to continue. | ||
222 | */ | ||
223 | if (exists) | ||
224 | next = prev; | ||
225 | |||
226 | if (next) { | ||
227 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
228 | set_rt_flags(next, RT_F_RUNNING); | ||
229 | } else { | ||
230 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
231 | } | ||
232 | |||
233 | pfp->scheduled = next; | ||
234 | sched_state_task_picked(); | ||
235 | raw_spin_unlock(&pfp->slock); | ||
236 | |||
237 | return next; | ||
238 | } | ||
239 | |||
240 | #ifdef CONFIG_LITMUS_LOCKING | ||
241 | |||
242 | /* prev is no longer scheduled --- see if it needs to migrate */ | ||
243 | static void pfp_finish_switch(struct task_struct *prev) | ||
244 | { | ||
245 | pfp_domain_t *to; | ||
246 | |||
247 | if (is_realtime(prev) && | ||
248 | is_running(prev) && | ||
249 | get_partition(prev) != smp_processor_id()) { | ||
250 | TRACE_TASK(prev, "needs to migrate from P%d to P%d\n", | ||
251 | smp_processor_id(), get_partition(prev)); | ||
252 | |||
253 | to = task_pfp(prev); | ||
254 | |||
255 | raw_spin_lock(&to->slock); | ||
256 | |||
257 | TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu); | ||
258 | requeue(prev, to); | ||
259 | if (fp_preemption_needed(&to->ready_queue, to->scheduled)) | ||
260 | preempt(to); | ||
261 | |||
262 | raw_spin_unlock(&to->slock); | ||
263 | |||
264 | } | ||
265 | } | ||
266 | |||
267 | #endif | ||
268 | |||
269 | /* Prepare a task for running in RT mode | ||
270 | */ | ||
271 | static void pfp_task_new(struct task_struct * t, int on_rq, int running) | ||
272 | { | ||
273 | pfp_domain_t* pfp = task_pfp(t); | ||
274 | unsigned long flags; | ||
275 | |||
276 | TRACE_TASK(t, "P-FP: task new, cpu = %d\n", | ||
277 | t->rt_param.task_params.cpu); | ||
278 | |||
279 | /* setup job parameters */ | ||
280 | release_at(t, litmus_clock()); | ||
281 | |||
282 | /* The task should be running in the queue, otherwise signal | ||
283 | * code will try to wake it up with fatal consequences. | ||
284 | */ | ||
285 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
286 | if (running) { | ||
287 | /* there shouldn't be anything else running at the time */ | ||
288 | BUG_ON(pfp->scheduled); | ||
289 | pfp->scheduled = t; | ||
290 | } else { | ||
291 | requeue(t, pfp); | ||
292 | /* maybe we have to reschedule */ | ||
293 | preempt(pfp); | ||
294 | } | ||
295 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
296 | } | ||
297 | |||
298 | static void pfp_task_wake_up(struct task_struct *task) | ||
299 | { | ||
300 | unsigned long flags; | ||
301 | pfp_domain_t* pfp = task_pfp(task); | ||
302 | lt_t now; | ||
303 | |||
304 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
305 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
306 | |||
307 | #ifdef CONFIG_LITMUS_LOCKING | ||
308 | /* Should only be queued when processing a fake-wake up due to a | ||
309 | * migration-related state change. */ | ||
310 | if (unlikely(is_queued(task))) { | ||
311 | TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n"); | ||
312 | goto out_unlock; | ||
313 | } | ||
314 | #else | ||
315 | BUG_ON(is_queued(task)); | ||
316 | #endif | ||
317 | now = litmus_clock(); | ||
318 | if (is_tardy(task, now) | ||
319 | #ifdef CONFIG_LITMUS_LOCKING | ||
320 | /* We need to take suspensions because of semaphores into | ||
321 | * account! If a job resumes after being suspended due to acquiring | ||
322 | * a semaphore, it should never be treated as a new job release. | ||
323 | */ | ||
324 | && !is_priority_boosted(task) | ||
325 | #endif | ||
326 | ) { | ||
327 | /* new sporadic release */ | ||
328 | release_at(task, now); | ||
329 | sched_trace_task_release(task); | ||
330 | } | ||
331 | |||
332 | /* Only add to ready queue if it is not the currently-scheduled | ||
333 | * task. This could be the case if a task was woken up concurrently | ||
334 | * on a remote CPU before the executing CPU got around to actually | ||
335 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
336 | * and won. Also, don't requeue if it is still queued, which can | ||
337 | * happen under the DPCP due wake-ups racing with migrations. | ||
338 | */ | ||
339 | if (pfp->scheduled != task) | ||
340 | requeue(task, pfp); | ||
341 | |||
342 | out_unlock: | ||
343 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
344 | TRACE_TASK(task, "wake up done\n"); | ||
345 | } | ||
346 | |||
347 | static void pfp_task_block(struct task_struct *t) | ||
348 | { | ||
349 | /* only running tasks can block, thus t is in no queue */ | ||
350 | TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); | ||
351 | |||
352 | BUG_ON(!is_realtime(t)); | ||
353 | |||
354 | /* If this task blocked normally, it shouldn't be queued. The exception is | ||
355 | * if this is a simulated block()/wakeup() pair from the pull-migration code path. | ||
356 | * This should only happen if the DPCP is being used. | ||
357 | */ | ||
358 | #ifdef CONFIG_LITMUS_LOCKING | ||
359 | if (unlikely(is_queued(t))) | ||
360 | TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n"); | ||
361 | #else | ||
362 | BUG_ON(is_queued(t)); | ||
363 | #endif | ||
364 | } | ||
365 | |||
366 | static void pfp_task_exit(struct task_struct * t) | ||
367 | { | ||
368 | unsigned long flags; | ||
369 | pfp_domain_t* pfp = task_pfp(t); | ||
370 | rt_domain_t* dom; | ||
371 | |||
372 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
373 | if (is_queued(t)) { | ||
374 | BUG(); /* This currently doesn't work. */ | ||
375 | /* dequeue */ | ||
376 | dom = task_dom(t); | ||
377 | remove(dom, t); | ||
378 | } | ||
379 | if (pfp->scheduled == t) { | ||
380 | pfp->scheduled = NULL; | ||
381 | preempt(pfp); | ||
382 | } | ||
383 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
384 | |||
385 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
386 | } | ||
387 | |||
388 | #ifdef CONFIG_LITMUS_LOCKING | ||
389 | |||
390 | #include <litmus/fdso.h> | ||
391 | #include <litmus/srp.h> | ||
392 | |||
393 | static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t) | ||
394 | { | ||
395 | BUG_ON(pfp->scheduled == t && is_queued(t)); | ||
396 | if (is_queued(t)) | ||
397 | fp_prio_remove(&pfp->ready_queue, t, priority_index(t)); | ||
398 | } | ||
399 | |||
400 | static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t, | ||
401 | struct task_struct* prio_inh) | ||
402 | { | ||
403 | int requeue; | ||
404 | |||
405 | if (!t || t->rt_param.inh_task == prio_inh) { | ||
406 | /* no update required */ | ||
407 | if (t) | ||
408 | TRACE_TASK(t, "no prio-inh update required\n"); | ||
409 | return; | ||
410 | } | ||
411 | |||
412 | requeue = is_queued(t); | ||
413 | TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue); | ||
414 | |||
415 | if (requeue) | ||
416 | /* first remove */ | ||
417 | fp_dequeue(pfp, t); | ||
418 | |||
419 | t->rt_param.inh_task = prio_inh; | ||
420 | |||
421 | if (requeue) | ||
422 | /* add again to the right queue */ | ||
423 | fp_prio_add(&pfp->ready_queue, t, priority_index(t)); | ||
424 | } | ||
425 | |||
426 | static int effective_agent_priority(int prio) | ||
427 | { | ||
428 | /* make sure agents have higher priority */ | ||
429 | return prio - LITMUS_MAX_PRIORITY; | ||
430 | } | ||
431 | |||
432 | static lt_t prio_point(int eprio) | ||
433 | { | ||
434 | /* make sure we have non-negative prio points */ | ||
435 | return eprio + LITMUS_MAX_PRIORITY; | ||
436 | } | ||
437 | |||
438 | static int prio_from_point(lt_t prio_point) | ||
439 | { | ||
440 | return ((int) prio_point) - LITMUS_MAX_PRIORITY; | ||
441 | } | ||
442 | |||
443 | static void boost_priority(struct task_struct* t, lt_t priority_point) | ||
444 | { | ||
445 | unsigned long flags; | ||
446 | pfp_domain_t* pfp = task_pfp(t); | ||
447 | |||
448 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
449 | |||
450 | |||
451 | TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock()); | ||
452 | |||
453 | tsk_rt(t)->priority_boosted = 1; | ||
454 | /* tie-break by protocol-specific priority point */ | ||
455 | tsk_rt(t)->boost_start_time = priority_point; | ||
456 | |||
457 | if (pfp->scheduled != t) { | ||
458 | /* holder may be queued: first stop queue changes */ | ||
459 | raw_spin_lock(&pfp->domain.release_lock); | ||
460 | if (is_queued(t) && | ||
461 | /* If it is queued, then we need to re-order. */ | ||
462 | bheap_decrease(fp_ready_order, tsk_rt(t)->heap_node) && | ||
463 | /* If we bubbled to the top, then we need to check for preemptions. */ | ||
464 | fp_preemption_needed(&pfp->ready_queue, pfp->scheduled)) | ||
465 | preempt(pfp); | ||
466 | raw_spin_unlock(&pfp->domain.release_lock); | ||
467 | } /* else: nothing to do since the job is not queued while scheduled */ | ||
468 | |||
469 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
470 | } | ||
471 | |||
472 | static void unboost_priority(struct task_struct* t) | ||
473 | { | ||
474 | unsigned long flags; | ||
475 | pfp_domain_t* pfp = task_pfp(t); | ||
476 | lt_t now; | ||
477 | |||
478 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
479 | now = litmus_clock(); | ||
480 | |||
481 | /* assumption: this only happens when the job is scheduled */ | ||
482 | BUG_ON(pfp->scheduled != t); | ||
483 | |||
484 | TRACE_TASK(t, "priority restored at %llu\n", now); | ||
485 | |||
486 | /* priority boosted jobs must be scheduled */ | ||
487 | BUG_ON(pfp->scheduled != t); | ||
488 | |||
489 | tsk_rt(t)->priority_boosted = 0; | ||
490 | tsk_rt(t)->boost_start_time = 0; | ||
491 | |||
492 | /* check if this changes anything */ | ||
493 | if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled)) | ||
494 | preempt(pfp); | ||
495 | |||
496 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
497 | } | ||
498 | |||
499 | /* ******************** SRP support ************************ */ | ||
500 | |||
501 | static unsigned int pfp_get_srp_prio(struct task_struct* t) | ||
502 | { | ||
503 | return get_priority(t); | ||
504 | } | ||
505 | |||
506 | /* ******************** FMLP support ********************** */ | ||
507 | |||
508 | struct fmlp_semaphore { | ||
509 | struct litmus_lock litmus_lock; | ||
510 | |||
511 | /* current resource holder */ | ||
512 | struct task_struct *owner; | ||
513 | |||
514 | /* FIFO queue of waiting tasks */ | ||
515 | wait_queue_head_t wait; | ||
516 | }; | ||
517 | |||
518 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
519 | { | ||
520 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
521 | } | ||
522 | int pfp_fmlp_lock(struct litmus_lock* l) | ||
523 | { | ||
524 | struct task_struct* t = current; | ||
525 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
526 | wait_queue_t wait; | ||
527 | unsigned long flags; | ||
528 | lt_t time_of_request; | ||
529 | |||
530 | if (!is_realtime(t)) | ||
531 | return -EPERM; | ||
532 | |||
533 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
534 | |||
535 | /* tie-break by this point in time */ | ||
536 | time_of_request = litmus_clock(); | ||
537 | |||
538 | /* Priority-boost ourself *before* we suspend so that | ||
539 | * our priority is boosted when we resume. */ | ||
540 | boost_priority(t, time_of_request); | ||
541 | |||
542 | if (sem->owner) { | ||
543 | /* resource is not free => must suspend and wait */ | ||
544 | |||
545 | init_waitqueue_entry(&wait, t); | ||
546 | |||
547 | /* FIXME: interruptible would be nice some day */ | ||
548 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
549 | |||
550 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
551 | |||
552 | TS_LOCK_SUSPEND; | ||
553 | |||
554 | /* release lock before sleeping */ | ||
555 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
556 | |||
557 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
558 | * when we wake up; we are guaranteed to have the lock since | ||
559 | * there is only one wake up per release. | ||
560 | */ | ||
561 | |||
562 | schedule(); | ||
563 | |||
564 | TS_LOCK_RESUME; | ||
565 | |||
566 | /* Since we hold the lock, no other task will change | ||
567 | * ->owner. We can thus check it without acquiring the spin | ||
568 | * lock. */ | ||
569 | BUG_ON(sem->owner != t); | ||
570 | } else { | ||
571 | /* it's ours now */ | ||
572 | sem->owner = t; | ||
573 | |||
574 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
575 | } | ||
576 | |||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | int pfp_fmlp_unlock(struct litmus_lock* l) | ||
581 | { | ||
582 | struct task_struct *t = current, *next; | ||
583 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
584 | unsigned long flags; | ||
585 | int err = 0; | ||
586 | |||
587 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
588 | |||
589 | if (sem->owner != t) { | ||
590 | err = -EINVAL; | ||
591 | goto out; | ||
592 | } | ||
593 | |||
594 | /* we lose the benefit of priority boosting */ | ||
595 | |||
596 | unboost_priority(t); | ||
597 | |||
598 | /* check if there are jobs waiting for this resource */ | ||
599 | next = __waitqueue_remove_first(&sem->wait); | ||
600 | if (next) { | ||
601 | /* next becomes the resouce holder */ | ||
602 | sem->owner = next; | ||
603 | |||
604 | /* Wake up next. The waiting job is already priority-boosted. */ | ||
605 | wake_up_process(next); | ||
606 | } else | ||
607 | /* resource becomes available */ | ||
608 | sem->owner = NULL; | ||
609 | |||
610 | out: | ||
611 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | int pfp_fmlp_close(struct litmus_lock* l) | ||
616 | { | ||
617 | struct task_struct *t = current; | ||
618 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
619 | unsigned long flags; | ||
620 | |||
621 | int owner; | ||
622 | |||
623 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
624 | |||
625 | owner = sem->owner == t; | ||
626 | |||
627 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
628 | |||
629 | if (owner) | ||
630 | pfp_fmlp_unlock(l); | ||
631 | |||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | void pfp_fmlp_free(struct litmus_lock* lock) | ||
636 | { | ||
637 | kfree(fmlp_from_lock(lock)); | ||
638 | } | ||
639 | |||
640 | static struct litmus_lock_ops pfp_fmlp_lock_ops = { | ||
641 | .close = pfp_fmlp_close, | ||
642 | .lock = pfp_fmlp_lock, | ||
643 | .unlock = pfp_fmlp_unlock, | ||
644 | .deallocate = pfp_fmlp_free, | ||
645 | }; | ||
646 | |||
647 | static struct litmus_lock* pfp_new_fmlp(void) | ||
648 | { | ||
649 | struct fmlp_semaphore* sem; | ||
650 | |||
651 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
652 | if (!sem) | ||
653 | return NULL; | ||
654 | |||
655 | sem->owner = NULL; | ||
656 | init_waitqueue_head(&sem->wait); | ||
657 | sem->litmus_lock.ops = &pfp_fmlp_lock_ops; | ||
658 | |||
659 | return &sem->litmus_lock; | ||
660 | } | ||
661 | |||
662 | /* ******************** MPCP support ********************** */ | ||
663 | |||
664 | struct mpcp_semaphore { | ||
665 | struct litmus_lock litmus_lock; | ||
666 | |||
667 | /* current resource holder */ | ||
668 | struct task_struct *owner; | ||
669 | |||
670 | /* priority queue of waiting tasks */ | ||
671 | wait_queue_head_t wait; | ||
672 | |||
673 | /* priority ceiling per cpu */ | ||
674 | unsigned int prio_ceiling[NR_CPUS]; | ||
675 | |||
676 | /* should jobs spin "virtually" for this resource? */ | ||
677 | int vspin; | ||
678 | }; | ||
679 | |||
680 | #define OMEGA_CEILING UINT_MAX | ||
681 | |||
682 | /* Since jobs spin "virtually" while waiting to acquire a lock, | ||
683 | * they first must aquire a local per-cpu resource. | ||
684 | */ | ||
685 | static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait); | ||
686 | static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin); | ||
687 | |||
688 | /* called with preemptions off <=> no local modifications */ | ||
689 | static void mpcp_vspin_enter(void) | ||
690 | { | ||
691 | struct task_struct* t = current; | ||
692 | |||
693 | while (1) { | ||
694 | if (__get_cpu_var(mpcpvs_vspin) == NULL) { | ||
695 | /* good, we get to issue our request */ | ||
696 | __get_cpu_var(mpcpvs_vspin) = t; | ||
697 | break; | ||
698 | } else { | ||
699 | /* some job is spinning => enqueue in request queue */ | ||
700 | prio_wait_queue_t wait; | ||
701 | wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait); | ||
702 | unsigned long flags; | ||
703 | |||
704 | /* ordered by regular priority */ | ||
705 | init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); | ||
706 | |||
707 | spin_lock_irqsave(&vspin->lock, flags); | ||
708 | |||
709 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
710 | |||
711 | __add_wait_queue_prio_exclusive(vspin, &wait); | ||
712 | |||
713 | spin_unlock_irqrestore(&vspin->lock, flags); | ||
714 | |||
715 | TS_LOCK_SUSPEND; | ||
716 | |||
717 | preempt_enable_no_resched(); | ||
718 | |||
719 | schedule(); | ||
720 | |||
721 | preempt_disable(); | ||
722 | |||
723 | TS_LOCK_RESUME; | ||
724 | /* Recheck if we got it --- some higher-priority process might | ||
725 | * have swooped in. */ | ||
726 | } | ||
727 | } | ||
728 | /* ok, now it is ours */ | ||
729 | } | ||
730 | |||
731 | /* called with preemptions off */ | ||
732 | static void mpcp_vspin_exit(void) | ||
733 | { | ||
734 | struct task_struct* t = current, *next; | ||
735 | unsigned long flags; | ||
736 | wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait); | ||
737 | |||
738 | BUG_ON(__get_cpu_var(mpcpvs_vspin) != t); | ||
739 | |||
740 | /* no spinning job */ | ||
741 | __get_cpu_var(mpcpvs_vspin) = NULL; | ||
742 | |||
743 | /* see if anyone is waiting for us to stop "spinning" */ | ||
744 | spin_lock_irqsave(&vspin->lock, flags); | ||
745 | next = __waitqueue_remove_first(vspin); | ||
746 | |||
747 | if (next) | ||
748 | wake_up_process(next); | ||
749 | |||
750 | spin_unlock_irqrestore(&vspin->lock, flags); | ||
751 | } | ||
752 | |||
753 | static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock) | ||
754 | { | ||
755 | return container_of(lock, struct mpcp_semaphore, litmus_lock); | ||
756 | } | ||
757 | |||
758 | int pfp_mpcp_lock(struct litmus_lock* l) | ||
759 | { | ||
760 | struct task_struct* t = current; | ||
761 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
762 | prio_wait_queue_t wait; | ||
763 | unsigned long flags; | ||
764 | |||
765 | if (!is_realtime(t)) | ||
766 | return -EPERM; | ||
767 | |||
768 | preempt_disable(); | ||
769 | |||
770 | if (sem->vspin) | ||
771 | mpcp_vspin_enter(); | ||
772 | |||
773 | /* Priority-boost ourself *before* we suspend so that | ||
774 | * our priority is boosted when we resume. Use the priority | ||
775 | * ceiling for the local partition. */ | ||
776 | boost_priority(t, sem->prio_ceiling[get_partition(t)]); | ||
777 | |||
778 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
779 | |||
780 | preempt_enable_no_resched(); | ||
781 | |||
782 | if (sem->owner) { | ||
783 | /* resource is not free => must suspend and wait */ | ||
784 | |||
785 | /* ordered by regular priority */ | ||
786 | init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); | ||
787 | |||
788 | /* FIXME: interruptible would be nice some day */ | ||
789 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
790 | |||
791 | __add_wait_queue_prio_exclusive(&sem->wait, &wait); | ||
792 | |||
793 | TS_LOCK_SUSPEND; | ||
794 | |||
795 | /* release lock before sleeping */ | ||
796 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
797 | |||
798 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
799 | * when we wake up; we are guaranteed to have the lock since | ||
800 | * there is only one wake up per release. | ||
801 | */ | ||
802 | |||
803 | schedule(); | ||
804 | |||
805 | TS_LOCK_RESUME; | ||
806 | |||
807 | /* Since we hold the lock, no other task will change | ||
808 | * ->owner. We can thus check it without acquiring the spin | ||
809 | * lock. */ | ||
810 | BUG_ON(sem->owner != t); | ||
811 | } else { | ||
812 | /* it's ours now */ | ||
813 | sem->owner = t; | ||
814 | |||
815 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
816 | } | ||
817 | |||
818 | return 0; | ||
819 | } | ||
820 | |||
821 | int pfp_mpcp_unlock(struct litmus_lock* l) | ||
822 | { | ||
823 | struct task_struct *t = current, *next; | ||
824 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
825 | unsigned long flags; | ||
826 | int err = 0; | ||
827 | |||
828 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
829 | |||
830 | if (sem->owner != t) { | ||
831 | err = -EINVAL; | ||
832 | goto out; | ||
833 | } | ||
834 | |||
835 | /* we lose the benefit of priority boosting */ | ||
836 | |||
837 | unboost_priority(t); | ||
838 | |||
839 | /* check if there are jobs waiting for this resource */ | ||
840 | next = __waitqueue_remove_first(&sem->wait); | ||
841 | if (next) { | ||
842 | /* next becomes the resouce holder */ | ||
843 | sem->owner = next; | ||
844 | |||
845 | /* Wake up next. The waiting job is already priority-boosted. */ | ||
846 | wake_up_process(next); | ||
847 | } else | ||
848 | /* resource becomes available */ | ||
849 | sem->owner = NULL; | ||
850 | |||
851 | out: | ||
852 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
853 | |||
854 | if (sem->vspin && err == 0) { | ||
855 | preempt_disable(); | ||
856 | mpcp_vspin_exit(); | ||
857 | preempt_enable(); | ||
858 | } | ||
859 | |||
860 | return err; | ||
861 | } | ||
862 | |||
863 | int pfp_mpcp_open(struct litmus_lock* l, void* config) | ||
864 | { | ||
865 | struct task_struct *t = current; | ||
866 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
867 | int cpu, local_cpu; | ||
868 | unsigned long flags; | ||
869 | |||
870 | if (!is_realtime(t)) | ||
871 | /* we need to know the real-time priority */ | ||
872 | return -EPERM; | ||
873 | |||
874 | local_cpu = get_partition(t); | ||
875 | |||
876 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
877 | |||
878 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
879 | if (cpu != local_cpu) | ||
880 | { | ||
881 | sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu], | ||
882 | get_priority(t)); | ||
883 | TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n", | ||
884 | sem, sem->prio_ceiling[cpu], cpu); | ||
885 | } | ||
886 | |||
887 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | int pfp_mpcp_close(struct litmus_lock* l) | ||
893 | { | ||
894 | struct task_struct *t = current; | ||
895 | struct mpcp_semaphore *sem = mpcp_from_lock(l); | ||
896 | unsigned long flags; | ||
897 | |||
898 | int owner; | ||
899 | |||
900 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
901 | |||
902 | owner = sem->owner == t; | ||
903 | |||
904 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
905 | |||
906 | if (owner) | ||
907 | pfp_mpcp_unlock(l); | ||
908 | |||
909 | return 0; | ||
910 | } | ||
911 | |||
912 | void pfp_mpcp_free(struct litmus_lock* lock) | ||
913 | { | ||
914 | kfree(mpcp_from_lock(lock)); | ||
915 | } | ||
916 | |||
917 | static struct litmus_lock_ops pfp_mpcp_lock_ops = { | ||
918 | .close = pfp_mpcp_close, | ||
919 | .lock = pfp_mpcp_lock, | ||
920 | .open = pfp_mpcp_open, | ||
921 | .unlock = pfp_mpcp_unlock, | ||
922 | .deallocate = pfp_mpcp_free, | ||
923 | }; | ||
924 | |||
925 | static struct litmus_lock* pfp_new_mpcp(int vspin) | ||
926 | { | ||
927 | struct mpcp_semaphore* sem; | ||
928 | int cpu; | ||
929 | |||
930 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
931 | if (!sem) | ||
932 | return NULL; | ||
933 | |||
934 | sem->owner = NULL; | ||
935 | init_waitqueue_head(&sem->wait); | ||
936 | sem->litmus_lock.ops = &pfp_mpcp_lock_ops; | ||
937 | |||
938 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
939 | sem->prio_ceiling[cpu] = OMEGA_CEILING; | ||
940 | |||
941 | /* mark as virtual spinning */ | ||
942 | sem->vspin = vspin; | ||
943 | |||
944 | return &sem->litmus_lock; | ||
945 | } | ||
946 | |||
947 | |||
948 | /* ******************** PCP support ********************** */ | ||
949 | |||
950 | |||
951 | struct pcp_semaphore { | ||
952 | struct list_head ceiling; | ||
953 | |||
954 | /* current resource holder */ | ||
955 | struct task_struct *owner; | ||
956 | |||
957 | /* priority ceiling --- can be negative due to DPCP support */ | ||
958 | int prio_ceiling; | ||
959 | |||
960 | /* on which processor is this PCP semaphore allocated? */ | ||
961 | int on_cpu; | ||
962 | }; | ||
963 | |||
964 | struct pcp_state { | ||
965 | struct list_head system_ceiling; | ||
966 | |||
967 | /* highest-priority waiting task */ | ||
968 | struct task_struct* hp_waiter; | ||
969 | |||
970 | /* list of jobs waiting to get past the system ceiling */ | ||
971 | wait_queue_head_t ceiling_blocked; | ||
972 | }; | ||
973 | |||
974 | static void pcp_init_state(struct pcp_state* s) | ||
975 | { | ||
976 | INIT_LIST_HEAD(&s->system_ceiling); | ||
977 | s->hp_waiter = NULL; | ||
978 | init_waitqueue_head(&s->ceiling_blocked); | ||
979 | } | ||
980 | |||
981 | static DEFINE_PER_CPU(struct pcp_state, pcp_state); | ||
982 | |||
983 | /* assumes preemptions are off */ | ||
984 | static struct pcp_semaphore* pcp_get_ceiling(void) | ||
985 | { | ||
986 | struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next; | ||
987 | |||
988 | if (top) | ||
989 | return list_entry(top, struct pcp_semaphore, ceiling); | ||
990 | else | ||
991 | return NULL; | ||
992 | } | ||
993 | |||
994 | /* assumes preempt off */ | ||
995 | static void pcp_add_ceiling(struct pcp_semaphore* sem) | ||
996 | { | ||
997 | struct list_head *pos; | ||
998 | struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling; | ||
999 | struct pcp_semaphore* held; | ||
1000 | |||
1001 | BUG_ON(sem->on_cpu != smp_processor_id()); | ||
1002 | BUG_ON(in_list(&sem->ceiling)); | ||
1003 | |||
1004 | list_for_each(pos, in_use) { | ||
1005 | held = list_entry(pos, struct pcp_semaphore, ceiling); | ||
1006 | if (held->prio_ceiling >= sem->prio_ceiling) { | ||
1007 | __list_add(&sem->ceiling, pos->prev, pos); | ||
1008 | return; | ||
1009 | } | ||
1010 | } | ||
1011 | |||
1012 | /* we hit the end of the list */ | ||
1013 | |||
1014 | list_add_tail(&sem->ceiling, in_use); | ||
1015 | } | ||
1016 | |||
1017 | /* assumes preempt off */ | ||
1018 | static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling, | ||
1019 | struct task_struct* task, | ||
1020 | int effective_prio) | ||
1021 | { | ||
1022 | return ceiling == NULL || | ||
1023 | ceiling->prio_ceiling > effective_prio || | ||
1024 | ceiling->owner == task; | ||
1025 | } | ||
1026 | |||
1027 | /* assumes preempt off */ | ||
1028 | static void pcp_priority_inheritance(void) | ||
1029 | { | ||
1030 | unsigned long flags; | ||
1031 | pfp_domain_t* pfp = local_pfp; | ||
1032 | |||
1033 | struct pcp_semaphore* ceiling = pcp_get_ceiling(); | ||
1034 | struct task_struct *blocker, *blocked; | ||
1035 | |||
1036 | blocker = ceiling ? ceiling->owner : NULL; | ||
1037 | blocked = __get_cpu_var(pcp_state).hp_waiter; | ||
1038 | |||
1039 | raw_spin_lock_irqsave(&pfp->slock, flags); | ||
1040 | |||
1041 | /* Current is no longer inheriting anything by default. This should be | ||
1042 | * the currently scheduled job, and hence not currently queued. */ | ||
1043 | BUG_ON(current != pfp->scheduled); | ||
1044 | |||
1045 | fp_set_prio_inh(pfp, current, NULL); | ||
1046 | fp_set_prio_inh(pfp, blocked, NULL); | ||
1047 | fp_set_prio_inh(pfp, blocker, NULL); | ||
1048 | |||
1049 | |||
1050 | /* Let blocking job inherit priority of blocked job, if required. */ | ||
1051 | if (blocker && blocked && | ||
1052 | fp_higher_prio(blocked, blocker)) { | ||
1053 | TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n", | ||
1054 | blocked->comm, blocked->pid, | ||
1055 | get_priority(blocker), get_priority(blocked)); | ||
1056 | fp_set_prio_inh(pfp, blocker, blocked); | ||
1057 | } | ||
1058 | |||
1059 | /* check if anything changed */ | ||
1060 | if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) | ||
1061 | preempt(pfp); | ||
1062 | |||
1063 | raw_spin_unlock_irqrestore(&pfp->slock, flags); | ||
1064 | } | ||
1065 | |||
1066 | /* called with preemptions off */ | ||
1067 | static void pcp_raise_ceiling(struct pcp_semaphore* sem, | ||
1068 | int effective_prio) | ||
1069 | { | ||
1070 | struct task_struct* t = current; | ||
1071 | struct pcp_semaphore* ceiling; | ||
1072 | prio_wait_queue_t wait; | ||
1073 | unsigned int waiting_higher_prio; | ||
1074 | |||
1075 | do { | ||
1076 | ceiling = pcp_get_ceiling(); | ||
1077 | if (pcp_exceeds_ceiling(ceiling, t, effective_prio)) | ||
1078 | break; | ||
1079 | |||
1080 | TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n", | ||
1081 | sem, ceiling->owner->comm, ceiling->owner->pid); | ||
1082 | |||
1083 | /* we need to wait until the ceiling is lowered */ | ||
1084 | |||
1085 | /* enqueue in priority order */ | ||
1086 | init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio)); | ||
1087 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1088 | waiting_higher_prio = add_wait_queue_prio_exclusive( | ||
1089 | &__get_cpu_var(pcp_state).ceiling_blocked, &wait); | ||
1090 | |||
1091 | if (waiting_higher_prio == 0) { | ||
1092 | TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n"); | ||
1093 | |||
1094 | /* we are the new highest-priority waiting job | ||
1095 | * => update inheritance */ | ||
1096 | __get_cpu_var(pcp_state).hp_waiter = t; | ||
1097 | pcp_priority_inheritance(); | ||
1098 | } | ||
1099 | |||
1100 | TS_LOCK_SUSPEND; | ||
1101 | |||
1102 | preempt_enable_no_resched(); | ||
1103 | schedule(); | ||
1104 | preempt_disable(); | ||
1105 | |||
1106 | /* pcp_resume_unblocked() removed us from wait queue */ | ||
1107 | |||
1108 | TS_LOCK_RESUME; | ||
1109 | } while(1); | ||
1110 | |||
1111 | TRACE_CUR("PCP got the ceiling and sem %p\n", sem); | ||
1112 | |||
1113 | /* We are good to go. The semaphore should be available. */ | ||
1114 | BUG_ON(sem->owner != NULL); | ||
1115 | |||
1116 | sem->owner = t; | ||
1117 | |||
1118 | pcp_add_ceiling(sem); | ||
1119 | } | ||
1120 | |||
1121 | static void pcp_resume_unblocked(void) | ||
1122 | { | ||
1123 | wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked; | ||
1124 | unsigned long flags; | ||
1125 | prio_wait_queue_t* q; | ||
1126 | struct task_struct* t = NULL; | ||
1127 | |||
1128 | struct pcp_semaphore* ceiling = pcp_get_ceiling(); | ||
1129 | |||
1130 | spin_lock_irqsave(&blocked->lock, flags); | ||
1131 | |||
1132 | while (waitqueue_active(blocked)) { | ||
1133 | /* check first == highest-priority waiting job */ | ||
1134 | q = list_entry(blocked->task_list.next, | ||
1135 | prio_wait_queue_t, wq.task_list); | ||
1136 | t = (struct task_struct*) q->wq.private; | ||
1137 | |||
1138 | /* can it proceed now? => let it go */ | ||
1139 | if (pcp_exceeds_ceiling(ceiling, t, | ||
1140 | prio_from_point(q->priority))) { | ||
1141 | __remove_wait_queue(blocked, &q->wq); | ||
1142 | wake_up_process(t); | ||
1143 | } else { | ||
1144 | /* We are done. Update highest-priority waiter. */ | ||
1145 | __get_cpu_var(pcp_state).hp_waiter = t; | ||
1146 | goto out; | ||
1147 | } | ||
1148 | } | ||
1149 | /* If we get here, then there are no more waiting | ||
1150 | * jobs. */ | ||
1151 | __get_cpu_var(pcp_state).hp_waiter = NULL; | ||
1152 | out: | ||
1153 | spin_unlock_irqrestore(&blocked->lock, flags); | ||
1154 | } | ||
1155 | |||
1156 | /* assumes preempt off */ | ||
1157 | static void pcp_lower_ceiling(struct pcp_semaphore* sem) | ||
1158 | { | ||
1159 | BUG_ON(!in_list(&sem->ceiling)); | ||
1160 | BUG_ON(sem->owner != current); | ||
1161 | BUG_ON(sem->on_cpu != smp_processor_id()); | ||
1162 | |||
1163 | /* remove from ceiling list */ | ||
1164 | list_del(&sem->ceiling); | ||
1165 | |||
1166 | /* release */ | ||
1167 | sem->owner = NULL; | ||
1168 | |||
1169 | TRACE_CUR("PCP released sem %p\n", sem); | ||
1170 | |||
1171 | /* Wake up all ceiling-blocked jobs that now pass the ceiling. */ | ||
1172 | pcp_resume_unblocked(); | ||
1173 | |||
1174 | pcp_priority_inheritance(); | ||
1175 | } | ||
1176 | |||
1177 | static void pcp_update_prio_ceiling(struct pcp_semaphore* sem, | ||
1178 | int effective_prio) | ||
1179 | { | ||
1180 | /* This needs to be synchronized on something. | ||
1181 | * Might as well use waitqueue lock for the processor. | ||
1182 | * We assume this happens only before the task set starts execution, | ||
1183 | * (i.e., during initialization), but it may happen on multiple processors | ||
1184 | * at the same time. | ||
1185 | */ | ||
1186 | unsigned long flags; | ||
1187 | |||
1188 | struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu); | ||
1189 | |||
1190 | spin_lock_irqsave(&s->ceiling_blocked.lock, flags); | ||
1191 | |||
1192 | sem->prio_ceiling = min(sem->prio_ceiling, effective_prio); | ||
1193 | |||
1194 | spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags); | ||
1195 | } | ||
1196 | |||
1197 | static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu) | ||
1198 | { | ||
1199 | sem->owner = NULL; | ||
1200 | INIT_LIST_HEAD(&sem->ceiling); | ||
1201 | sem->prio_ceiling = INT_MAX; | ||
1202 | sem->on_cpu = cpu; | ||
1203 | } | ||
1204 | |||
1205 | |||
1206 | /* ******************** DPCP support ********************** */ | ||
1207 | |||
1208 | struct dpcp_semaphore { | ||
1209 | struct litmus_lock litmus_lock; | ||
1210 | struct pcp_semaphore pcp; | ||
1211 | int owner_cpu; | ||
1212 | }; | ||
1213 | |||
1214 | static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock) | ||
1215 | { | ||
1216 | return container_of(lock, struct dpcp_semaphore, litmus_lock); | ||
1217 | } | ||
1218 | |||
1219 | /* called with preemptions disabled */ | ||
1220 | static void pfp_migrate_to(int target_cpu) | ||
1221 | { | ||
1222 | struct task_struct* t = current; | ||
1223 | pfp_domain_t *from; | ||
1224 | |||
1225 | if (get_partition(t) == target_cpu) | ||
1226 | return; | ||
1227 | |||
1228 | /* make sure target_cpu makes sense */ | ||
1229 | BUG_ON(!cpu_online(target_cpu)); | ||
1230 | |||
1231 | local_irq_disable(); | ||
1232 | |||
1233 | /* scheduled task should not be in any ready or release queue */ | ||
1234 | BUG_ON(is_queued(t)); | ||
1235 | |||
1236 | /* lock both pfp domains in order of address */ | ||
1237 | from = task_pfp(t); | ||
1238 | |||
1239 | raw_spin_lock(&from->slock); | ||
1240 | |||
1241 | /* switch partitions */ | ||
1242 | tsk_rt(t)->task_params.cpu = target_cpu; | ||
1243 | |||
1244 | raw_spin_unlock(&from->slock); | ||
1245 | |||
1246 | /* Don't trace scheduler costs as part of | ||
1247 | * locking overhead. Scheduling costs are accounted for | ||
1248 | * explicitly. */ | ||
1249 | TS_LOCK_SUSPEND; | ||
1250 | |||
1251 | local_irq_enable(); | ||
1252 | preempt_enable_no_resched(); | ||
1253 | |||
1254 | /* deschedule to be migrated */ | ||
1255 | schedule(); | ||
1256 | |||
1257 | /* we are now on the target processor */ | ||
1258 | preempt_disable(); | ||
1259 | |||
1260 | /* start recording costs again */ | ||
1261 | TS_LOCK_RESUME; | ||
1262 | |||
1263 | BUG_ON(smp_processor_id() != target_cpu); | ||
1264 | } | ||
1265 | |||
1266 | int pfp_dpcp_lock(struct litmus_lock* l) | ||
1267 | { | ||
1268 | struct task_struct* t = current; | ||
1269 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
1270 | int eprio = effective_agent_priority(get_priority(t)); | ||
1271 | int from = get_partition(t); | ||
1272 | int to = sem->pcp.on_cpu; | ||
1273 | |||
1274 | if (!is_realtime(t)) | ||
1275 | return -EPERM; | ||
1276 | |||
1277 | preempt_disable(); | ||
1278 | |||
1279 | /* Priority-boost ourself *before* we suspend so that | ||
1280 | * our priority is boosted when we resume. */ | ||
1281 | |||
1282 | boost_priority(t, get_priority(t)); | ||
1283 | |||
1284 | pfp_migrate_to(to); | ||
1285 | |||
1286 | pcp_raise_ceiling(&sem->pcp, eprio); | ||
1287 | |||
1288 | /* yep, we got it => execute request */ | ||
1289 | sem->owner_cpu = from; | ||
1290 | |||
1291 | preempt_enable(); | ||
1292 | |||
1293 | return 0; | ||
1294 | } | ||
1295 | |||
1296 | int pfp_dpcp_unlock(struct litmus_lock* l) | ||
1297 | { | ||
1298 | struct task_struct *t = current; | ||
1299 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
1300 | int err = 0; | ||
1301 | int home; | ||
1302 | |||
1303 | preempt_disable(); | ||
1304 | |||
1305 | if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) { | ||
1306 | err = -EINVAL; | ||
1307 | goto out; | ||
1308 | } | ||
1309 | |||
1310 | home = sem->owner_cpu; | ||
1311 | |||
1312 | /* give it back */ | ||
1313 | pcp_lower_ceiling(&sem->pcp); | ||
1314 | |||
1315 | /* we lose the benefit of priority boosting */ | ||
1316 | unboost_priority(t); | ||
1317 | |||
1318 | pfp_migrate_to(home); | ||
1319 | |||
1320 | out: | ||
1321 | preempt_enable(); | ||
1322 | |||
1323 | return err; | ||
1324 | } | ||
1325 | |||
1326 | int pfp_dpcp_open(struct litmus_lock* l, void* __user config) | ||
1327 | { | ||
1328 | struct task_struct *t = current; | ||
1329 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
1330 | int cpu, eprio; | ||
1331 | |||
1332 | if (!is_realtime(t)) | ||
1333 | /* we need to know the real-time priority */ | ||
1334 | return -EPERM; | ||
1335 | |||
1336 | if (get_user(cpu, (int*) config)) | ||
1337 | return -EFAULT; | ||
1338 | |||
1339 | /* make sure the resource location matches */ | ||
1340 | if (cpu != sem->pcp.on_cpu) | ||
1341 | return -EINVAL; | ||
1342 | |||
1343 | eprio = effective_agent_priority(get_priority(t)); | ||
1344 | |||
1345 | pcp_update_prio_ceiling(&sem->pcp, eprio); | ||
1346 | |||
1347 | return 0; | ||
1348 | } | ||
1349 | |||
1350 | int pfp_dpcp_close(struct litmus_lock* l) | ||
1351 | { | ||
1352 | struct task_struct *t = current; | ||
1353 | struct dpcp_semaphore *sem = dpcp_from_lock(l); | ||
1354 | int owner = 0; | ||
1355 | |||
1356 | preempt_disable(); | ||
1357 | |||
1358 | if (sem->pcp.on_cpu == smp_processor_id()) | ||
1359 | owner = sem->pcp.owner == t; | ||
1360 | |||
1361 | preempt_enable(); | ||
1362 | |||
1363 | if (owner) | ||
1364 | pfp_dpcp_unlock(l); | ||
1365 | |||
1366 | return 0; | ||
1367 | } | ||
1368 | |||
1369 | void pfp_dpcp_free(struct litmus_lock* lock) | ||
1370 | { | ||
1371 | kfree(dpcp_from_lock(lock)); | ||
1372 | } | ||
1373 | |||
1374 | static struct litmus_lock_ops pfp_dpcp_lock_ops = { | ||
1375 | .close = pfp_dpcp_close, | ||
1376 | .lock = pfp_dpcp_lock, | ||
1377 | .open = pfp_dpcp_open, | ||
1378 | .unlock = pfp_dpcp_unlock, | ||
1379 | .deallocate = pfp_dpcp_free, | ||
1380 | }; | ||
1381 | |||
1382 | static struct litmus_lock* pfp_new_dpcp(int on_cpu) | ||
1383 | { | ||
1384 | struct dpcp_semaphore* sem; | ||
1385 | |||
1386 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1387 | if (!sem) | ||
1388 | return NULL; | ||
1389 | |||
1390 | sem->litmus_lock.ops = &pfp_dpcp_lock_ops; | ||
1391 | sem->owner_cpu = NO_CPU; | ||
1392 | pcp_init_semaphore(&sem->pcp, on_cpu); | ||
1393 | |||
1394 | return &sem->litmus_lock; | ||
1395 | } | ||
1396 | |||
1397 | |||
1398 | /* **** lock constructor **** */ | ||
1399 | |||
1400 | |||
1401 | static long pfp_allocate_lock(struct litmus_lock **lock, int type, | ||
1402 | void* __user config) | ||
1403 | { | ||
1404 | int err = -ENXIO, cpu; | ||
1405 | struct srp_semaphore* srp; | ||
1406 | |||
1407 | /* P-FP currently supports the SRP for local resources and the FMLP | ||
1408 | * for global resources. */ | ||
1409 | switch (type) { | ||
1410 | case FMLP_SEM: | ||
1411 | /* FIFO Mutex Locking Protocol */ | ||
1412 | *lock = pfp_new_fmlp(); | ||
1413 | if (*lock) | ||
1414 | err = 0; | ||
1415 | else | ||
1416 | err = -ENOMEM; | ||
1417 | break; | ||
1418 | |||
1419 | case MPCP_SEM: | ||
1420 | /* Multiprocesor Priority Ceiling Protocol */ | ||
1421 | *lock = pfp_new_mpcp(0); | ||
1422 | if (*lock) | ||
1423 | err = 0; | ||
1424 | else | ||
1425 | err = -ENOMEM; | ||
1426 | break; | ||
1427 | |||
1428 | case MPCP_VS_SEM: | ||
1429 | /* Multiprocesor Priority Ceiling Protocol with virtual spinning */ | ||
1430 | *lock = pfp_new_mpcp(1); | ||
1431 | if (*lock) | ||
1432 | err = 0; | ||
1433 | else | ||
1434 | err = -ENOMEM; | ||
1435 | break; | ||
1436 | |||
1437 | case DPCP_SEM: | ||
1438 | /* Distributed Priority Ceiling Protocol */ | ||
1439 | if (get_user(cpu, (int*) config)) | ||
1440 | return -EFAULT; | ||
1441 | |||
1442 | if (!cpu_online(cpu)) | ||
1443 | return -EINVAL; | ||
1444 | |||
1445 | *lock = pfp_new_dpcp(cpu); | ||
1446 | if (*lock) | ||
1447 | err = 0; | ||
1448 | else | ||
1449 | err = -ENOMEM; | ||
1450 | break; | ||
1451 | |||
1452 | case SRP_SEM: | ||
1453 | /* Baker's Stack Resource Policy */ | ||
1454 | srp = allocate_srp_semaphore(); | ||
1455 | if (srp) { | ||
1456 | *lock = &srp->litmus_lock; | ||
1457 | err = 0; | ||
1458 | } else | ||
1459 | err = -ENOMEM; | ||
1460 | break; | ||
1461 | }; | ||
1462 | |||
1463 | return err; | ||
1464 | } | ||
1465 | |||
1466 | #endif | ||
1467 | |||
1468 | static long pfp_admit_task(struct task_struct* tsk) | ||
1469 | { | ||
1470 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu && | ||
1471 | #ifdef CONFIG_RELEASE_MASTER | ||
1472 | /* don't allow tasks on release master CPU */ | ||
1473 | task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master && | ||
1474 | #endif | ||
1475 | get_priority(tsk) > 0) | ||
1476 | return 0; | ||
1477 | else | ||
1478 | return -EINVAL; | ||
1479 | } | ||
1480 | |||
1481 | static long pfp_activate_plugin(void) | ||
1482 | { | ||
1483 | #ifdef CONFIG_RELEASE_MASTER | ||
1484 | int cpu; | ||
1485 | |||
1486 | for_each_online_cpu(cpu) { | ||
1487 | remote_dom(cpu)->release_master = atomic_read(&release_master_cpu); | ||
1488 | } | ||
1489 | #endif | ||
1490 | |||
1491 | #ifdef CONFIG_LITMUS_LOCKING | ||
1492 | get_srp_prio = pfp_get_srp_prio; | ||
1493 | |||
1494 | for_each_online_cpu(cpu) { | ||
1495 | init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu)); | ||
1496 | per_cpu(mpcpvs_vspin, cpu) = NULL; | ||
1497 | |||
1498 | pcp_init_state(&per_cpu(pcp_state, cpu)); | ||
1499 | pfp_doms[cpu] = remote_pfp(cpu); | ||
1500 | } | ||
1501 | |||
1502 | #endif | ||
1503 | |||
1504 | return 0; | ||
1505 | } | ||
1506 | |||
1507 | |||
1508 | /* Plugin object */ | ||
1509 | static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = { | ||
1510 | .plugin_name = "P-FP", | ||
1511 | .tick = pfp_tick, | ||
1512 | .task_new = pfp_task_new, | ||
1513 | .complete_job = complete_job, | ||
1514 | .task_exit = pfp_task_exit, | ||
1515 | .schedule = pfp_schedule, | ||
1516 | .task_wake_up = pfp_task_wake_up, | ||
1517 | .task_block = pfp_task_block, | ||
1518 | .admit_task = pfp_admit_task, | ||
1519 | .activate_plugin = pfp_activate_plugin, | ||
1520 | #ifdef CONFIG_LITMUS_LOCKING | ||
1521 | .allocate_lock = pfp_allocate_lock, | ||
1522 | .finish_switch = pfp_finish_switch, | ||
1523 | #endif | ||
1524 | }; | ||
1525 | |||
1526 | |||
1527 | static int __init init_pfp(void) | ||
1528 | { | ||
1529 | int i; | ||
1530 | |||
1531 | /* We do not really want to support cpu hotplug, do we? ;) | ||
1532 | * However, if we are so crazy to do so, | ||
1533 | * we cannot use num_online_cpu() | ||
1534 | */ | ||
1535 | for (i = 0; i < num_online_cpus(); i++) { | ||
1536 | pfp_domain_init(remote_pfp(i), i); | ||
1537 | } | ||
1538 | return register_sched_plugin(&pfp_plugin); | ||
1539 | } | ||
1540 | |||
1541 | module_init(init_pfp); | ||
1542 | |||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 000000000000..950fe5e6a1ab --- /dev/null +++ b/litmus/sched_plugin.c | |||
@@ -0,0 +1,233 @@ | |||
1 | /* sched_plugin.c -- core infrastructure for the scheduler plugin system | ||
2 | * | ||
3 | * This file includes the initialization of the plugin system, the no-op Linux | ||
4 | * scheduler plugin, some dummy functions, and some helper functions. | ||
5 | */ | ||
6 | |||
7 | #include <linux/list.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/sched.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/preempt.h> | ||
14 | #include <litmus/jobs.h> | ||
15 | |||
16 | /* | ||
17 | * Generic function to trigger preemption on either local or remote cpu | ||
18 | * from scheduler plugins. The key feature is that this function is | ||
19 | * non-preemptive section aware and does not invoke the scheduler / send | ||
20 | * IPIs if the to-be-preempted task is actually non-preemptive. | ||
21 | */ | ||
22 | void preempt_if_preemptable(struct task_struct* t, int cpu) | ||
23 | { | ||
24 | /* t is the real-time task executing on CPU on_cpu If t is NULL, then | ||
25 | * on_cpu is currently scheduling background work. | ||
26 | */ | ||
27 | |||
28 | int reschedule = 0; | ||
29 | |||
30 | if (!t) | ||
31 | /* move non-real-time task out of the way */ | ||
32 | reschedule = 1; | ||
33 | else { | ||
34 | if (smp_processor_id() == cpu) { | ||
35 | /* local CPU case */ | ||
36 | /* check if we need to poke userspace */ | ||
37 | if (is_user_np(t)) | ||
38 | /* Yes, poke it. This doesn't have to be atomic since | ||
39 | * the task is definitely not executing. */ | ||
40 | request_exit_np(t); | ||
41 | else if (!is_kernel_np(t)) | ||
42 | /* only if we are allowed to preempt the | ||
43 | * currently-executing task */ | ||
44 | reschedule = 1; | ||
45 | } else { | ||
46 | /* Remote CPU case. Only notify if it's not a kernel | ||
47 | * NP section and if we didn't set the userspace | ||
48 | * flag. */ | ||
49 | reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); | ||
50 | } | ||
51 | } | ||
52 | if (likely(reschedule)) | ||
53 | litmus_reschedule(cpu); | ||
54 | } | ||
55 | |||
56 | |||
57 | /************************************************************* | ||
58 | * Dummy plugin functions * | ||
59 | *************************************************************/ | ||
60 | |||
61 | static void litmus_dummy_finish_switch(struct task_struct * prev) | ||
62 | { | ||
63 | } | ||
64 | |||
65 | static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) | ||
66 | { | ||
67 | sched_state_task_picked(); | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static void litmus_dummy_tick(struct task_struct* tsk) | ||
72 | { | ||
73 | } | ||
74 | |||
75 | static long litmus_dummy_admit_task(struct task_struct* tsk) | ||
76 | { | ||
77 | printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", | ||
78 | tsk->comm, tsk->pid); | ||
79 | return -EINVAL; | ||
80 | } | ||
81 | |||
82 | static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) | ||
83 | { | ||
84 | } | ||
85 | |||
86 | static void litmus_dummy_task_wake_up(struct task_struct *task) | ||
87 | { | ||
88 | } | ||
89 | |||
90 | static void litmus_dummy_task_block(struct task_struct *task) | ||
91 | { | ||
92 | } | ||
93 | |||
94 | static void litmus_dummy_task_exit(struct task_struct *task) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static void litmus_dummy_pre_setsched(struct task_struct *task, int policy) | ||
99 | { | ||
100 | } | ||
101 | |||
102 | |||
103 | static long litmus_dummy_complete_job(void) | ||
104 | { | ||
105 | return -ENOSYS; | ||
106 | } | ||
107 | |||
108 | static long litmus_dummy_activate_plugin(void) | ||
109 | { | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static long litmus_dummy_deactivate_plugin(void) | ||
114 | { | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | #ifdef CONFIG_LITMUS_LOCKING | ||
119 | |||
120 | static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, | ||
121 | void* __user config) | ||
122 | { | ||
123 | return -ENXIO; | ||
124 | } | ||
125 | |||
126 | #endif | ||
127 | |||
128 | |||
129 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | ||
130 | * job. | ||
131 | */ | ||
132 | struct sched_plugin linux_sched_plugin = { | ||
133 | .plugin_name = "Linux", | ||
134 | .tick = litmus_dummy_tick, | ||
135 | .task_new = litmus_dummy_task_new, | ||
136 | .task_exit = litmus_dummy_task_exit, | ||
137 | .task_wake_up = litmus_dummy_task_wake_up, | ||
138 | .task_block = litmus_dummy_task_block, | ||
139 | .complete_job = litmus_dummy_complete_job, | ||
140 | .schedule = litmus_dummy_schedule, | ||
141 | .finish_switch = litmus_dummy_finish_switch, | ||
142 | .activate_plugin = litmus_dummy_activate_plugin, | ||
143 | .deactivate_plugin = litmus_dummy_deactivate_plugin, | ||
144 | #ifdef CONFIG_LITMUS_LOCKING | ||
145 | .allocate_lock = litmus_dummy_allocate_lock, | ||
146 | #endif | ||
147 | .admit_task = litmus_dummy_admit_task | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * The reference to current plugin that is used to schedule tasks within | ||
152 | * the system. It stores references to actual function implementations | ||
153 | * Should be initialized by calling "init_***_plugin()" | ||
154 | */ | ||
155 | struct sched_plugin *litmus = &linux_sched_plugin; | ||
156 | |||
157 | /* the list of registered scheduling plugins */ | ||
158 | static LIST_HEAD(sched_plugins); | ||
159 | static DEFINE_RAW_SPINLOCK(sched_plugins_lock); | ||
160 | |||
161 | #define CHECK(func) {\ | ||
162 | if (!plugin->func) \ | ||
163 | plugin->func = litmus_dummy_ ## func;} | ||
164 | |||
165 | /* FIXME: get reference to module */ | ||
166 | int register_sched_plugin(struct sched_plugin* plugin) | ||
167 | { | ||
168 | printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", | ||
169 | plugin->plugin_name); | ||
170 | |||
171 | /* make sure we don't trip over null pointers later */ | ||
172 | CHECK(finish_switch); | ||
173 | CHECK(schedule); | ||
174 | CHECK(tick); | ||
175 | CHECK(task_wake_up); | ||
176 | CHECK(task_exit); | ||
177 | CHECK(task_block); | ||
178 | CHECK(task_new); | ||
179 | CHECK(complete_job); | ||
180 | CHECK(activate_plugin); | ||
181 | CHECK(deactivate_plugin); | ||
182 | #ifdef CONFIG_LITMUS_LOCKING | ||
183 | CHECK(allocate_lock); | ||
184 | #endif | ||
185 | CHECK(admit_task); | ||
186 | CHECK(pre_setsched); | ||
187 | |||
188 | if (!plugin->release_at) | ||
189 | plugin->release_at = release_at; | ||
190 | |||
191 | raw_spin_lock(&sched_plugins_lock); | ||
192 | list_add(&plugin->list, &sched_plugins); | ||
193 | raw_spin_unlock(&sched_plugins_lock); | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | |||
199 | /* FIXME: reference counting, etc. */ | ||
200 | struct sched_plugin* find_sched_plugin(const char* name) | ||
201 | { | ||
202 | struct list_head *pos; | ||
203 | struct sched_plugin *plugin; | ||
204 | |||
205 | raw_spin_lock(&sched_plugins_lock); | ||
206 | list_for_each(pos, &sched_plugins) { | ||
207 | plugin = list_entry(pos, struct sched_plugin, list); | ||
208 | if (!strcmp(plugin->plugin_name, name)) | ||
209 | goto out_unlock; | ||
210 | } | ||
211 | plugin = NULL; | ||
212 | |||
213 | out_unlock: | ||
214 | raw_spin_unlock(&sched_plugins_lock); | ||
215 | return plugin; | ||
216 | } | ||
217 | |||
218 | int print_sched_plugins(char* buf, int max) | ||
219 | { | ||
220 | int count = 0; | ||
221 | struct list_head *pos; | ||
222 | struct sched_plugin *plugin; | ||
223 | |||
224 | raw_spin_lock(&sched_plugins_lock); | ||
225 | list_for_each(pos, &sched_plugins) { | ||
226 | plugin = list_entry(pos, struct sched_plugin, list); | ||
227 | count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name); | ||
228 | if (max - count <= 0) | ||
229 | break; | ||
230 | } | ||
231 | raw_spin_unlock(&sched_plugins_lock); | ||
232 | return count; | ||
233 | } | ||
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..7b12689ab61a --- /dev/null +++ b/litmus/sched_psn_edf.c | |||
@@ -0,0 +1,917 @@ | |||
1 | /* | ||
2 | * kernel/sched_psn_edf.c | ||
3 | * | ||
4 | * Implementation of the PSN-EDF scheduler plugin. | ||
5 | * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c. | ||
6 | * | ||
7 | * Suspensions and non-preemptable sections are supported. | ||
8 | * Priority inheritance is not supported. | ||
9 | */ | ||
10 | |||
11 | #include <linux/percpu.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/list.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <litmus/litmus.h> | ||
18 | #include <litmus/wait.h> | ||
19 | #include <litmus/jobs.h> | ||
20 | #include <litmus/preempt.h> | ||
21 | #include <litmus/sched_plugin.h> | ||
22 | #include <litmus/edf_common.h> | ||
23 | #include <litmus/sched_trace.h> | ||
24 | #include <litmus/trace.h> | ||
25 | |||
26 | typedef struct { | ||
27 | rt_domain_t domain; | ||
28 | int cpu; | ||
29 | struct task_struct* scheduled; /* only RT tasks */ | ||
30 | /* | ||
31 | * scheduling lock slock | ||
32 | * protects the domain and serializes scheduling decisions | ||
33 | */ | ||
34 | #define slock domain.ready_lock | ||
35 | |||
36 | } psnedf_domain_t; | ||
37 | |||
38 | DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains); | ||
39 | |||
40 | #define local_edf (&__get_cpu_var(psnedf_domains).domain) | ||
41 | #define local_pedf (&__get_cpu_var(psnedf_domains)) | ||
42 | #define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain) | ||
43 | #define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu)) | ||
44 | #define task_edf(task) remote_edf(get_partition(task)) | ||
45 | #define task_pedf(task) remote_pedf(get_partition(task)) | ||
46 | |||
47 | |||
48 | static void psnedf_domain_init(psnedf_domain_t* pedf, | ||
49 | check_resched_needed_t check, | ||
50 | release_jobs_t release, | ||
51 | int cpu) | ||
52 | { | ||
53 | edf_domain_init(&pedf->domain, check, release); | ||
54 | pedf->cpu = cpu; | ||
55 | pedf->scheduled = NULL; | ||
56 | } | ||
57 | |||
58 | static void requeue(struct task_struct* t, rt_domain_t *edf) | ||
59 | { | ||
60 | if (t->state != TASK_RUNNING) | ||
61 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
62 | |||
63 | set_rt_flags(t, RT_F_RUNNING); | ||
64 | if (is_released(t, litmus_clock())) | ||
65 | __add_ready(edf, t); | ||
66 | else | ||
67 | add_release(edf, t); /* it has got to wait */ | ||
68 | } | ||
69 | |||
70 | /* we assume the lock is being held */ | ||
71 | static void preempt(psnedf_domain_t *pedf) | ||
72 | { | ||
73 | preempt_if_preemptable(pedf->scheduled, pedf->cpu); | ||
74 | } | ||
75 | |||
76 | #ifdef CONFIG_LITMUS_LOCKING | ||
77 | |||
78 | static void boost_priority(struct task_struct* t) | ||
79 | { | ||
80 | unsigned long flags; | ||
81 | psnedf_domain_t* pedf = task_pedf(t); | ||
82 | lt_t now; | ||
83 | |||
84 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
85 | now = litmus_clock(); | ||
86 | |||
87 | TRACE_TASK(t, "priority boosted at %llu\n", now); | ||
88 | |||
89 | tsk_rt(t)->priority_boosted = 1; | ||
90 | tsk_rt(t)->boost_start_time = now; | ||
91 | |||
92 | if (pedf->scheduled != t) { | ||
93 | /* holder may be queued: first stop queue changes */ | ||
94 | raw_spin_lock(&pedf->domain.release_lock); | ||
95 | if (is_queued(t) && | ||
96 | /* If it is queued, then we need to re-order. */ | ||
97 | bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) && | ||
98 | /* If we bubbled to the top, then we need to check for preemptions. */ | ||
99 | edf_preemption_needed(&pedf->domain, pedf->scheduled)) | ||
100 | preempt(pedf); | ||
101 | raw_spin_unlock(&pedf->domain.release_lock); | ||
102 | } /* else: nothing to do since the job is not queued while scheduled */ | ||
103 | |||
104 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
105 | } | ||
106 | |||
107 | static void unboost_priority(struct task_struct* t) | ||
108 | { | ||
109 | unsigned long flags; | ||
110 | psnedf_domain_t* pedf = task_pedf(t); | ||
111 | lt_t now; | ||
112 | |||
113 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
114 | now = litmus_clock(); | ||
115 | |||
116 | /* assumption: this only happens when the job is scheduled */ | ||
117 | BUG_ON(pedf->scheduled != t); | ||
118 | |||
119 | TRACE_TASK(t, "priority restored at %llu\n", now); | ||
120 | |||
121 | /* priority boosted jobs must be scheduled */ | ||
122 | BUG_ON(pedf->scheduled != t); | ||
123 | |||
124 | tsk_rt(t)->priority_boosted = 0; | ||
125 | tsk_rt(t)->boost_start_time = 0; | ||
126 | |||
127 | /* check if this changes anything */ | ||
128 | if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) | ||
129 | preempt(pedf); | ||
130 | |||
131 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
132 | } | ||
133 | |||
134 | #endif | ||
135 | |||
136 | /* This check is trivial in partioned systems as we only have to consider | ||
137 | * the CPU of the partition. | ||
138 | */ | ||
139 | static int psnedf_check_resched(rt_domain_t *edf) | ||
140 | { | ||
141 | psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain); | ||
142 | |||
143 | /* because this is a callback from rt_domain_t we already hold | ||
144 | * the necessary lock for the ready queue | ||
145 | */ | ||
146 | if (edf_preemption_needed(edf, pedf->scheduled)) { | ||
147 | preempt(pedf); | ||
148 | return 1; | ||
149 | } else | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static void job_completion(struct task_struct* t, int forced) | ||
154 | { | ||
155 | sched_trace_task_completion(t,forced); | ||
156 | TRACE_TASK(t, "job_completion().\n"); | ||
157 | |||
158 | set_rt_flags(t, RT_F_SLEEP); | ||
159 | prepare_for_next_period(t); | ||
160 | } | ||
161 | |||
162 | static void psnedf_tick(struct task_struct *t) | ||
163 | { | ||
164 | psnedf_domain_t *pedf = local_pedf; | ||
165 | |||
166 | /* Check for inconsistency. We don't need the lock for this since | ||
167 | * ->scheduled is only changed in schedule, which obviously is not | ||
168 | * executing in parallel on this CPU | ||
169 | */ | ||
170 | BUG_ON(is_realtime(t) && t != pedf->scheduled); | ||
171 | |||
172 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
173 | if (!is_np(t)) { | ||
174 | litmus_reschedule_local(); | ||
175 | TRACE("psnedf_scheduler_tick: " | ||
176 | "%d is preemptable " | ||
177 | " => FORCE_RESCHED\n", t->pid); | ||
178 | } else if (is_user_np(t)) { | ||
179 | TRACE("psnedf_scheduler_tick: " | ||
180 | "%d is non-preemptable, " | ||
181 | "preemption delayed.\n", t->pid); | ||
182 | request_exit_np(t); | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static struct task_struct* psnedf_schedule(struct task_struct * prev) | ||
188 | { | ||
189 | psnedf_domain_t* pedf = local_pedf; | ||
190 | rt_domain_t* edf = &pedf->domain; | ||
191 | struct task_struct* next; | ||
192 | |||
193 | int out_of_time, sleep, preempt, | ||
194 | np, exists, blocks, resched; | ||
195 | |||
196 | raw_spin_lock(&pedf->slock); | ||
197 | |||
198 | /* sanity checking | ||
199 | * differently from gedf, when a task exits (dead) | ||
200 | * pedf->schedule may be null and prev _is_ realtime | ||
201 | */ | ||
202 | BUG_ON(pedf->scheduled && pedf->scheduled != prev); | ||
203 | BUG_ON(pedf->scheduled && !is_realtime(prev)); | ||
204 | |||
205 | /* (0) Determine state */ | ||
206 | exists = pedf->scheduled != NULL; | ||
207 | blocks = exists && !is_running(pedf->scheduled); | ||
208 | out_of_time = exists && | ||
209 | budget_enforced(pedf->scheduled) && | ||
210 | budget_exhausted(pedf->scheduled); | ||
211 | np = exists && is_np(pedf->scheduled); | ||
212 | sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP; | ||
213 | preempt = edf_preemption_needed(edf, prev); | ||
214 | |||
215 | /* If we need to preempt do so. | ||
216 | * The following checks set resched to 1 in case of special | ||
217 | * circumstances. | ||
218 | */ | ||
219 | resched = preempt; | ||
220 | |||
221 | /* If a task blocks we have no choice but to reschedule. | ||
222 | */ | ||
223 | if (blocks) | ||
224 | resched = 1; | ||
225 | |||
226 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
227 | * Multiple calls to request_exit_np() don't hurt. | ||
228 | */ | ||
229 | if (np && (out_of_time || preempt || sleep)) | ||
230 | request_exit_np(pedf->scheduled); | ||
231 | |||
232 | /* Any task that is preemptable and either exhausts its execution | ||
233 | * budget or wants to sleep completes. We may have to reschedule after | ||
234 | * this. | ||
235 | */ | ||
236 | if (!np && (out_of_time || sleep) && !blocks) { | ||
237 | job_completion(pedf->scheduled, !sleep); | ||
238 | resched = 1; | ||
239 | } | ||
240 | |||
241 | /* The final scheduling decision. Do we need to switch for some reason? | ||
242 | * Switch if we are in RT mode and have no task or if we need to | ||
243 | * resched. | ||
244 | */ | ||
245 | next = NULL; | ||
246 | if ((!np || blocks) && (resched || !exists)) { | ||
247 | /* When preempting a task that does not block, then | ||
248 | * re-insert it into either the ready queue or the | ||
249 | * release queue (if it completed). requeue() picks | ||
250 | * the appropriate queue. | ||
251 | */ | ||
252 | if (pedf->scheduled && !blocks) | ||
253 | requeue(pedf->scheduled, edf); | ||
254 | next = __take_ready(edf); | ||
255 | } else | ||
256 | /* Only override Linux scheduler if we have a real-time task | ||
257 | * scheduled that needs to continue. | ||
258 | */ | ||
259 | if (exists) | ||
260 | next = prev; | ||
261 | |||
262 | if (next) { | ||
263 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
264 | set_rt_flags(next, RT_F_RUNNING); | ||
265 | } else { | ||
266 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
267 | } | ||
268 | |||
269 | pedf->scheduled = next; | ||
270 | sched_state_task_picked(); | ||
271 | raw_spin_unlock(&pedf->slock); | ||
272 | |||
273 | return next; | ||
274 | } | ||
275 | |||
276 | |||
277 | /* Prepare a task for running in RT mode | ||
278 | */ | ||
279 | static void psnedf_task_new(struct task_struct * t, int on_rq, int running) | ||
280 | { | ||
281 | rt_domain_t* edf = task_edf(t); | ||
282 | psnedf_domain_t* pedf = task_pedf(t); | ||
283 | unsigned long flags; | ||
284 | |||
285 | TRACE_TASK(t, "psn edf: task new, cpu = %d\n", | ||
286 | t->rt_param.task_params.cpu); | ||
287 | |||
288 | /* setup job parameters */ | ||
289 | release_at(t, litmus_clock()); | ||
290 | |||
291 | /* The task should be running in the queue, otherwise signal | ||
292 | * code will try to wake it up with fatal consequences. | ||
293 | */ | ||
294 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
295 | if (running) { | ||
296 | /* there shouldn't be anything else running at the time */ | ||
297 | BUG_ON(pedf->scheduled); | ||
298 | pedf->scheduled = t; | ||
299 | } else { | ||
300 | requeue(t, edf); | ||
301 | /* maybe we have to reschedule */ | ||
302 | preempt(pedf); | ||
303 | } | ||
304 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
305 | } | ||
306 | |||
307 | static void psnedf_task_wake_up(struct task_struct *task) | ||
308 | { | ||
309 | unsigned long flags; | ||
310 | psnedf_domain_t* pedf = task_pedf(task); | ||
311 | rt_domain_t* edf = task_edf(task); | ||
312 | lt_t now; | ||
313 | |||
314 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
315 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
316 | BUG_ON(is_queued(task)); | ||
317 | now = litmus_clock(); | ||
318 | if (is_tardy(task, now) | ||
319 | #ifdef CONFIG_LITMUS_LOCKING | ||
320 | /* We need to take suspensions because of semaphores into | ||
321 | * account! If a job resumes after being suspended due to acquiring | ||
322 | * a semaphore, it should never be treated as a new job release. | ||
323 | */ | ||
324 | && !is_priority_boosted(task) | ||
325 | #endif | ||
326 | ) { | ||
327 | /* new sporadic release */ | ||
328 | release_at(task, now); | ||
329 | sched_trace_task_release(task); | ||
330 | } | ||
331 | |||
332 | /* Only add to ready queue if it is not the currently-scheduled | ||
333 | * task. This could be the case if a task was woken up concurrently | ||
334 | * on a remote CPU before the executing CPU got around to actually | ||
335 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
336 | * and won. | ||
337 | */ | ||
338 | if (pedf->scheduled != task) | ||
339 | requeue(task, edf); | ||
340 | |||
341 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
342 | TRACE_TASK(task, "wake up done\n"); | ||
343 | } | ||
344 | |||
345 | static void psnedf_task_block(struct task_struct *t) | ||
346 | { | ||
347 | /* only running tasks can block, thus t is in no queue */ | ||
348 | TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); | ||
349 | |||
350 | BUG_ON(!is_realtime(t)); | ||
351 | BUG_ON(is_queued(t)); | ||
352 | } | ||
353 | |||
354 | static void psnedf_task_exit(struct task_struct * t) | ||
355 | { | ||
356 | unsigned long flags; | ||
357 | psnedf_domain_t* pedf = task_pedf(t); | ||
358 | rt_domain_t* edf; | ||
359 | |||
360 | raw_spin_lock_irqsave(&pedf->slock, flags); | ||
361 | if (is_queued(t)) { | ||
362 | /* dequeue */ | ||
363 | edf = task_edf(t); | ||
364 | remove(edf, t); | ||
365 | } | ||
366 | if (pedf->scheduled == t) | ||
367 | pedf->scheduled = NULL; | ||
368 | |||
369 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
370 | |||
371 | preempt(pedf); | ||
372 | raw_spin_unlock_irqrestore(&pedf->slock, flags); | ||
373 | } | ||
374 | |||
375 | #ifdef CONFIG_LITMUS_LOCKING | ||
376 | |||
377 | #include <litmus/fdso.h> | ||
378 | #include <litmus/srp.h> | ||
379 | |||
380 | /* ******************** SRP support ************************ */ | ||
381 | |||
382 | static unsigned int psnedf_get_srp_prio(struct task_struct* t) | ||
383 | { | ||
384 | /* assumes implicit deadlines */ | ||
385 | return get_rt_period(t); | ||
386 | } | ||
387 | |||
388 | /* ******************** FMLP support ********************** */ | ||
389 | |||
390 | /* struct for semaphore with priority inheritance */ | ||
391 | struct fmlp_semaphore { | ||
392 | struct litmus_lock litmus_lock; | ||
393 | |||
394 | /* current resource holder */ | ||
395 | struct task_struct *owner; | ||
396 | |||
397 | /* FIFO queue of waiting tasks */ | ||
398 | wait_queue_head_t wait; | ||
399 | }; | ||
400 | |||
401 | static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) | ||
402 | { | ||
403 | return container_of(lock, struct fmlp_semaphore, litmus_lock); | ||
404 | } | ||
405 | int psnedf_fmlp_lock(struct litmus_lock* l) | ||
406 | { | ||
407 | struct task_struct* t = current; | ||
408 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
409 | wait_queue_t wait; | ||
410 | unsigned long flags; | ||
411 | |||
412 | if (!is_realtime(t)) | ||
413 | return -EPERM; | ||
414 | |||
415 | preempt_disable(); | ||
416 | |||
417 | TRACE_CUR("want FMLP sem %p\n", sem); | ||
418 | |||
419 | boost_priority(t); | ||
420 | |||
421 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
422 | |||
423 | if (sem->owner) { | ||
424 | /* resource is not free => must suspend and wait */ | ||
425 | |||
426 | init_waitqueue_entry(&wait, t); | ||
427 | |||
428 | /* FIXME: interruptible would be nice some day */ | ||
429 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
430 | |||
431 | TRACE_CUR("blocking on FMLP sem %p\n", sem); | ||
432 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
433 | |||
434 | /* release lock before sleeping */ | ||
435 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
436 | |||
437 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
438 | * when we wake up; we are guaranteed to have the lock since | ||
439 | * there is only one wake up per release. | ||
440 | */ | ||
441 | |||
442 | TS_LOCK_SUSPEND; | ||
443 | |||
444 | preempt_enable_no_resched(); | ||
445 | |||
446 | schedule(); | ||
447 | |||
448 | preempt_disable(); | ||
449 | |||
450 | TS_LOCK_RESUME; | ||
451 | |||
452 | /* Since we hold the lock, no other task will change | ||
453 | * ->owner. We can thus check it without acquiring the spin | ||
454 | * lock. */ | ||
455 | BUG_ON(sem->owner != t); | ||
456 | } else { | ||
457 | /* it's ours now */ | ||
458 | sem->owner = t; | ||
459 | |||
460 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
461 | } | ||
462 | |||
463 | TRACE_CUR("got FMLP sem %p\n", sem); | ||
464 | |||
465 | preempt_enable(); | ||
466 | |||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | int psnedf_fmlp_unlock(struct litmus_lock* l) | ||
471 | { | ||
472 | struct task_struct *t = current, *next; | ||
473 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
474 | unsigned long flags; | ||
475 | int err = 0; | ||
476 | |||
477 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
478 | |||
479 | if (sem->owner != t) { | ||
480 | err = -EINVAL; | ||
481 | goto out; | ||
482 | } | ||
483 | |||
484 | TRACE_CUR("releasing FMLP sem %p\n", sem); | ||
485 | |||
486 | /* we lose the benefit of priority boosting */ | ||
487 | |||
488 | unboost_priority(t); | ||
489 | |||
490 | /* check if there are jobs waiting for this resource */ | ||
491 | next = __waitqueue_remove_first(&sem->wait); | ||
492 | if (next) { | ||
493 | /* next becomes the resouce holder */ | ||
494 | sem->owner = next; | ||
495 | |||
496 | /* wake up next */ | ||
497 | wake_up_process(next); | ||
498 | } else | ||
499 | /* resource becomes available */ | ||
500 | sem->owner = NULL; | ||
501 | |||
502 | out: | ||
503 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
504 | return err; | ||
505 | } | ||
506 | |||
507 | int psnedf_fmlp_close(struct litmus_lock* l) | ||
508 | { | ||
509 | struct task_struct *t = current; | ||
510 | struct fmlp_semaphore *sem = fmlp_from_lock(l); | ||
511 | unsigned long flags; | ||
512 | |||
513 | int owner; | ||
514 | |||
515 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
516 | |||
517 | owner = sem->owner == t; | ||
518 | |||
519 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
520 | |||
521 | if (owner) | ||
522 | psnedf_fmlp_unlock(l); | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | void psnedf_fmlp_free(struct litmus_lock* lock) | ||
528 | { | ||
529 | kfree(fmlp_from_lock(lock)); | ||
530 | } | ||
531 | |||
532 | static struct litmus_lock_ops psnedf_fmlp_lock_ops = { | ||
533 | .close = psnedf_fmlp_close, | ||
534 | .lock = psnedf_fmlp_lock, | ||
535 | .unlock = psnedf_fmlp_unlock, | ||
536 | .deallocate = psnedf_fmlp_free, | ||
537 | }; | ||
538 | |||
539 | static struct litmus_lock* psnedf_new_fmlp(void) | ||
540 | { | ||
541 | struct fmlp_semaphore* sem; | ||
542 | |||
543 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
544 | if (!sem) | ||
545 | return NULL; | ||
546 | |||
547 | sem->owner = NULL; | ||
548 | init_waitqueue_head(&sem->wait); | ||
549 | sem->litmus_lock.ops = &psnedf_fmlp_lock_ops; | ||
550 | |||
551 | return &sem->litmus_lock; | ||
552 | } | ||
553 | |||
554 | |||
555 | |||
556 | /* ******************** OMLP support **********************/ | ||
557 | |||
558 | /* Since jobs spin "virtually" while waiting to acquire a lock, | ||
559 | * they first must aquire a local per-cpu resource. | ||
560 | */ | ||
561 | static DEFINE_PER_CPU(wait_queue_head_t, omlp_token_wait); | ||
562 | static DEFINE_PER_CPU(struct task_struct*, omlp_token); | ||
563 | |||
564 | /* called with preemptions off <=> no local modifications */ | ||
565 | static void omlp_grab_token(void) | ||
566 | { | ||
567 | struct task_struct* t = current; | ||
568 | |||
569 | while (1) { | ||
570 | if (__get_cpu_var(omlp_token) == NULL) { | ||
571 | /* take it */ | ||
572 | __get_cpu_var(omlp_token) = t; | ||
573 | break; | ||
574 | } else { | ||
575 | /* some job is spinning => enqueue in request queue */ | ||
576 | prio_wait_queue_t wait; | ||
577 | wait_queue_head_t* token_waiters = &__get_cpu_var(omlp_token_wait); | ||
578 | unsigned long flags; | ||
579 | |||
580 | /* ordered by regular priority; break by lower PID */ | ||
581 | init_prio_waitqueue_entry_tie(&wait, t, get_deadline(t), t->pid); | ||
582 | |||
583 | spin_lock_irqsave(&token_waiters->lock, flags); | ||
584 | |||
585 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
586 | |||
587 | __add_wait_queue_prio_exclusive(token_waiters, &wait); | ||
588 | |||
589 | TRACE_CUR("waiting for OMLP token\n"); | ||
590 | |||
591 | spin_unlock_irqrestore(&token_waiters->lock, flags); | ||
592 | |||
593 | TS_LOCK_SUSPEND; | ||
594 | |||
595 | preempt_enable_no_resched(); | ||
596 | |||
597 | schedule(); | ||
598 | |||
599 | preempt_disable(); | ||
600 | |||
601 | TS_LOCK_RESUME; | ||
602 | /* Recheck if we got it */ | ||
603 | } | ||
604 | } | ||
605 | /* ok, now it is ours */ | ||
606 | TRACE_CUR("got OMLP token\n"); | ||
607 | } | ||
608 | |||
609 | /* called with preemptions off */ | ||
610 | static void omlp_release_token(void) | ||
611 | { | ||
612 | struct task_struct* t = current, *next; | ||
613 | unsigned long flags; | ||
614 | wait_queue_head_t* token_waiters = &__get_cpu_var(omlp_token_wait); | ||
615 | |||
616 | BUG_ON(__get_cpu_var(omlp_token) != t); | ||
617 | |||
618 | __get_cpu_var(omlp_token) = NULL; | ||
619 | |||
620 | TRACE_CUR("released OMLP token\n"); | ||
621 | |||
622 | spin_lock_irqsave(&token_waiters->lock, flags); | ||
623 | next = __waitqueue_remove_first(token_waiters); | ||
624 | |||
625 | if (next) | ||
626 | wake_up_process(next); | ||
627 | |||
628 | spin_unlock_irqrestore(&token_waiters->lock, flags); | ||
629 | } | ||
630 | |||
631 | |||
632 | struct omlp_semaphore { | ||
633 | struct litmus_lock litmus_lock; | ||
634 | |||
635 | /* current resource holder */ | ||
636 | struct task_struct *owner; | ||
637 | |||
638 | /* FIFO queue of waiting tasks */ | ||
639 | wait_queue_head_t wait; | ||
640 | }; | ||
641 | |||
642 | static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock) | ||
643 | { | ||
644 | return container_of(lock, struct omlp_semaphore, litmus_lock); | ||
645 | } | ||
646 | int psnedf_omlp_lock(struct litmus_lock* l) | ||
647 | { | ||
648 | struct task_struct* t = current; | ||
649 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
650 | wait_queue_t wait; | ||
651 | unsigned long flags; | ||
652 | |||
653 | if (!is_realtime(t)) | ||
654 | return -EPERM; | ||
655 | |||
656 | preempt_disable(); | ||
657 | |||
658 | omlp_grab_token(); | ||
659 | |||
660 | /* Priority-boost ourself *before* we suspend so that | ||
661 | * our priority is boosted when we resume. */ | ||
662 | boost_priority(t); | ||
663 | |||
664 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
665 | |||
666 | if (sem->owner) { | ||
667 | /* resource is not free => must suspend and wait */ | ||
668 | |||
669 | init_waitqueue_entry(&wait, t); | ||
670 | |||
671 | /* FIXME: interruptible would be nice some day */ | ||
672 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
673 | |||
674 | __add_wait_queue_tail_exclusive(&sem->wait, &wait); | ||
675 | |||
676 | /* release lock before sleeping */ | ||
677 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
678 | |||
679 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
680 | * when we wake up; we are guaranteed to have the lock since | ||
681 | * there is only one wake up per release. | ||
682 | */ | ||
683 | TS_LOCK_SUSPEND; | ||
684 | |||
685 | preempt_enable_no_resched(); | ||
686 | |||
687 | schedule(); | ||
688 | |||
689 | preempt_disable(); | ||
690 | |||
691 | TS_LOCK_RESUME; | ||
692 | |||
693 | /* Since we hold the lock, no other task will change | ||
694 | * ->owner. We can thus check it without acquiring the spin | ||
695 | * lock. */ | ||
696 | BUG_ON(sem->owner != t); | ||
697 | } else { | ||
698 | /* it's ours now */ | ||
699 | sem->owner = t; | ||
700 | |||
701 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
702 | } | ||
703 | |||
704 | preempt_enable(); | ||
705 | |||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | int psnedf_omlp_unlock(struct litmus_lock* l) | ||
710 | { | ||
711 | struct task_struct *t = current, *next; | ||
712 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
713 | unsigned long flags; | ||
714 | int err = 0; | ||
715 | |||
716 | preempt_disable(); | ||
717 | |||
718 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
719 | |||
720 | if (sem->owner != t) { | ||
721 | err = -EINVAL; | ||
722 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
723 | goto out; | ||
724 | } | ||
725 | |||
726 | /* we lose the benefit of priority boosting */ | ||
727 | |||
728 | unboost_priority(t); | ||
729 | |||
730 | /* check if there are jobs waiting for this resource */ | ||
731 | next = __waitqueue_remove_first(&sem->wait); | ||
732 | if (next) { | ||
733 | /* next becomes the resouce holder */ | ||
734 | sem->owner = next; | ||
735 | |||
736 | /* Wake up next. The waiting job is already priority-boosted. */ | ||
737 | wake_up_process(next); | ||
738 | } else | ||
739 | /* resource becomes available */ | ||
740 | sem->owner = NULL; | ||
741 | |||
742 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
743 | |||
744 | omlp_release_token(); | ||
745 | |||
746 | out: | ||
747 | preempt_enable(); | ||
748 | return err; | ||
749 | } | ||
750 | |||
751 | int psnedf_omlp_close(struct litmus_lock* l) | ||
752 | { | ||
753 | struct task_struct *t = current; | ||
754 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
755 | unsigned long flags; | ||
756 | |||
757 | int owner; | ||
758 | |||
759 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
760 | |||
761 | owner = sem->owner == t; | ||
762 | |||
763 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
764 | |||
765 | if (owner) | ||
766 | psnedf_omlp_unlock(l); | ||
767 | |||
768 | return 0; | ||
769 | } | ||
770 | |||
771 | void psnedf_omlp_free(struct litmus_lock* lock) | ||
772 | { | ||
773 | kfree(omlp_from_lock(lock)); | ||
774 | } | ||
775 | |||
776 | static struct litmus_lock_ops psnedf_omlp_lock_ops = { | ||
777 | .close = psnedf_omlp_close, | ||
778 | .lock = psnedf_omlp_lock, | ||
779 | .unlock = psnedf_omlp_unlock, | ||
780 | .deallocate = psnedf_omlp_free, | ||
781 | }; | ||
782 | |||
783 | static struct litmus_lock* psnedf_new_omlp(void) | ||
784 | { | ||
785 | struct omlp_semaphore* sem; | ||
786 | |||
787 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
788 | if (!sem) | ||
789 | return NULL; | ||
790 | |||
791 | sem->owner = NULL; | ||
792 | init_waitqueue_head(&sem->wait); | ||
793 | sem->litmus_lock.ops = &psnedf_omlp_lock_ops; | ||
794 | |||
795 | return &sem->litmus_lock; | ||
796 | } | ||
797 | |||
798 | |||
799 | /* **** lock constructor **** */ | ||
800 | |||
801 | |||
802 | static long psnedf_allocate_lock(struct litmus_lock **lock, int type, | ||
803 | void* __user unused) | ||
804 | { | ||
805 | int err = -ENXIO; | ||
806 | struct srp_semaphore* srp; | ||
807 | |||
808 | /* PSN-EDF currently supports the SRP for local resources and the FMLP | ||
809 | * for global resources. */ | ||
810 | switch (type) { | ||
811 | case FMLP_SEM: | ||
812 | /* Flexible Multiprocessor Locking Protocol */ | ||
813 | *lock = psnedf_new_fmlp(); | ||
814 | if (*lock) | ||
815 | err = 0; | ||
816 | else | ||
817 | err = -ENOMEM; | ||
818 | break; | ||
819 | |||
820 | case OMLP_SEM: | ||
821 | /* O(m) Locking Protocol */ | ||
822 | *lock = psnedf_new_omlp(); | ||
823 | if (*lock) | ||
824 | err = 0; | ||
825 | else | ||
826 | err = -ENOMEM; | ||
827 | break; | ||
828 | |||
829 | case SRP_SEM: | ||
830 | /* Baker's Stack Resource Policy */ | ||
831 | srp = allocate_srp_semaphore(); | ||
832 | if (srp) { | ||
833 | *lock = &srp->litmus_lock; | ||
834 | err = 0; | ||
835 | } else | ||
836 | err = -ENOMEM; | ||
837 | break; | ||
838 | }; | ||
839 | |||
840 | return err; | ||
841 | } | ||
842 | |||
843 | #endif | ||
844 | |||
845 | |||
846 | static long psnedf_activate_plugin(void) | ||
847 | { | ||
848 | |||
849 | int cpu; | ||
850 | |||
851 | for_each_online_cpu(cpu) { | ||
852 | #ifdef CONFIG_RELEASE_MASTER | ||
853 | remote_edf(cpu)->release_master = atomic_read(&release_master_cpu); | ||
854 | #endif | ||
855 | #ifdef CONFIG_LITMUS_LOCKING | ||
856 | init_waitqueue_head(&per_cpu(omlp_token_wait, cpu)); | ||
857 | per_cpu(omlp_token, cpu) = NULL; | ||
858 | #endif | ||
859 | } | ||
860 | |||
861 | |||
862 | #ifdef CONFIG_LITMUS_LOCKING | ||
863 | get_srp_prio = psnedf_get_srp_prio; | ||
864 | #endif | ||
865 | |||
866 | return 0; | ||
867 | } | ||
868 | |||
869 | static long psnedf_admit_task(struct task_struct* tsk) | ||
870 | { | ||
871 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu | ||
872 | #ifdef CONFIG_RELEASE_MASTER | ||
873 | /* don't allow tasks on release master CPU */ | ||
874 | && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master | ||
875 | #endif | ||
876 | ) | ||
877 | return 0; | ||
878 | else | ||
879 | return -EINVAL; | ||
880 | } | ||
881 | |||
882 | /* Plugin object */ | ||
883 | static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = { | ||
884 | .plugin_name = "PSN-EDF", | ||
885 | .tick = psnedf_tick, | ||
886 | .task_new = psnedf_task_new, | ||
887 | .complete_job = complete_job, | ||
888 | .task_exit = psnedf_task_exit, | ||
889 | .schedule = psnedf_schedule, | ||
890 | .task_wake_up = psnedf_task_wake_up, | ||
891 | .task_block = psnedf_task_block, | ||
892 | .admit_task = psnedf_admit_task, | ||
893 | .activate_plugin = psnedf_activate_plugin, | ||
894 | #ifdef CONFIG_LITMUS_LOCKING | ||
895 | .allocate_lock = psnedf_allocate_lock, | ||
896 | #endif | ||
897 | }; | ||
898 | |||
899 | |||
900 | static int __init init_psn_edf(void) | ||
901 | { | ||
902 | int i; | ||
903 | |||
904 | /* We do not really want to support cpu hotplug, do we? ;) | ||
905 | * However, if we are so crazy to do so, | ||
906 | * we cannot use num_online_cpu() | ||
907 | */ | ||
908 | for (i = 0; i < num_online_cpus(); i++) { | ||
909 | psnedf_domain_init(remote_pedf(i), | ||
910 | psnedf_check_resched, | ||
911 | NULL, i); | ||
912 | } | ||
913 | return register_sched_plugin(&psn_edf_plugin); | ||
914 | } | ||
915 | |||
916 | module_init(init_psn_edf); | ||
917 | |||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c new file mode 100644 index 000000000000..5ef8d09ab41f --- /dev/null +++ b/litmus/sched_task_trace.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * sched_task_trace.c -- record scheduling events to a byte stream | ||
3 | */ | ||
4 | |||
5 | #define NO_TASK_TRACE_DECLS | ||
6 | |||
7 | #include <linux/module.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/percpu.h> | ||
10 | |||
11 | #include <litmus/ftdev.h> | ||
12 | #include <litmus/litmus.h> | ||
13 | |||
14 | #include <litmus/sched_trace.h> | ||
15 | #include <litmus/feather_trace.h> | ||
16 | #include <litmus/ftdev.h> | ||
17 | |||
18 | |||
19 | #define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) | ||
20 | |||
21 | #define now() litmus_clock() | ||
22 | |||
23 | struct local_buffer { | ||
24 | struct st_event_record record[NO_EVENTS]; | ||
25 | char flag[NO_EVENTS]; | ||
26 | struct ft_buffer ftbuf; | ||
27 | }; | ||
28 | |||
29 | DEFINE_PER_CPU(struct local_buffer, st_event_buffer); | ||
30 | |||
31 | static struct ftdev st_dev; | ||
32 | |||
33 | static int st_dev_can_open(struct ftdev *dev, unsigned int cpu) | ||
34 | { | ||
35 | return cpu_online(cpu) ? 0 : -ENODEV; | ||
36 | } | ||
37 | |||
38 | static int __init init_sched_task_trace(void) | ||
39 | { | ||
40 | struct local_buffer* buf; | ||
41 | int i, ok = 0, err; | ||
42 | printk("Allocated %u sched_trace_xxx() events per CPU " | ||
43 | "(buffer size: %d bytes)\n", | ||
44 | NO_EVENTS, (int) sizeof(struct local_buffer)); | ||
45 | |||
46 | err = ftdev_init(&st_dev, THIS_MODULE, | ||
47 | num_online_cpus(), "sched_trace"); | ||
48 | if (err) | ||
49 | goto err_out; | ||
50 | |||
51 | for (i = 0; i < st_dev.minor_cnt; i++) { | ||
52 | buf = &per_cpu(st_event_buffer, i); | ||
53 | ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, | ||
54 | sizeof(struct st_event_record), | ||
55 | buf->flag, | ||
56 | buf->record); | ||
57 | st_dev.minor[i].buf = &buf->ftbuf; | ||
58 | } | ||
59 | if (ok == st_dev.minor_cnt) { | ||
60 | st_dev.can_open = st_dev_can_open; | ||
61 | err = register_ftdev(&st_dev); | ||
62 | if (err) | ||
63 | goto err_dealloc; | ||
64 | } else { | ||
65 | err = -EINVAL; | ||
66 | goto err_dealloc; | ||
67 | } | ||
68 | |||
69 | return 0; | ||
70 | |||
71 | err_dealloc: | ||
72 | ftdev_exit(&st_dev); | ||
73 | err_out: | ||
74 | printk(KERN_WARNING "Could not register sched_trace module\n"); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | static void __exit exit_sched_task_trace(void) | ||
79 | { | ||
80 | ftdev_exit(&st_dev); | ||
81 | } | ||
82 | |||
83 | module_init(init_sched_task_trace); | ||
84 | module_exit(exit_sched_task_trace); | ||
85 | |||
86 | |||
87 | static inline struct st_event_record* get_record(u8 type, struct task_struct* t) | ||
88 | { | ||
89 | struct st_event_record* rec = NULL; | ||
90 | struct local_buffer* buf; | ||
91 | |||
92 | buf = &get_cpu_var(st_event_buffer); | ||
93 | if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) { | ||
94 | rec->hdr.type = type; | ||
95 | rec->hdr.cpu = smp_processor_id(); | ||
96 | rec->hdr.pid = t ? t->pid : 0; | ||
97 | rec->hdr.job = t ? t->rt_param.job_params.job_no : 0; | ||
98 | } else { | ||
99 | put_cpu_var(st_event_buffer); | ||
100 | } | ||
101 | /* rec will be NULL if it failed */ | ||
102 | return rec; | ||
103 | } | ||
104 | |||
105 | static inline void put_record(struct st_event_record* rec) | ||
106 | { | ||
107 | struct local_buffer* buf; | ||
108 | buf = &__get_cpu_var(st_event_buffer); | ||
109 | ft_buffer_finish_write(&buf->ftbuf, rec); | ||
110 | put_cpu_var(st_event_buffer); | ||
111 | } | ||
112 | |||
113 | feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task) | ||
114 | { | ||
115 | struct task_struct *t = (struct task_struct*) _task; | ||
116 | struct st_event_record* rec = get_record(ST_NAME, t); | ||
117 | int i; | ||
118 | if (rec) { | ||
119 | for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++) | ||
120 | rec->data.name.cmd[i] = t->comm[i]; | ||
121 | put_record(rec); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task) | ||
126 | { | ||
127 | struct task_struct *t = (struct task_struct*) _task; | ||
128 | struct st_event_record* rec = get_record(ST_PARAM, t); | ||
129 | if (rec) { | ||
130 | rec->data.param.wcet = get_exec_cost(t); | ||
131 | rec->data.param.period = get_rt_period(t); | ||
132 | rec->data.param.phase = get_rt_phase(t); | ||
133 | rec->data.param.partition = get_partition(t); | ||
134 | rec->data.param.class = get_class(t); | ||
135 | put_record(rec); | ||
136 | } | ||
137 | } | ||
138 | |||
139 | feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task) | ||
140 | { | ||
141 | struct task_struct *t = (struct task_struct*) _task; | ||
142 | struct st_event_record* rec = get_record(ST_RELEASE, t); | ||
143 | if (rec) { | ||
144 | rec->data.release.release = get_release(t); | ||
145 | rec->data.release.deadline = get_deadline(t); | ||
146 | put_record(rec); | ||
147 | } | ||
148 | } | ||
149 | |||
150 | /* skipped: st_assigned_data, we don't use it atm */ | ||
151 | |||
152 | feather_callback void do_sched_trace_task_switch_to(unsigned long id, | ||
153 | unsigned long _task) | ||
154 | { | ||
155 | struct task_struct *t = (struct task_struct*) _task; | ||
156 | struct st_event_record* rec; | ||
157 | if (is_realtime(t)) { | ||
158 | rec = get_record(ST_SWITCH_TO, t); | ||
159 | if (rec) { | ||
160 | rec->data.switch_to.when = now(); | ||
161 | rec->data.switch_to.exec_time = get_exec_time(t); | ||
162 | put_record(rec); | ||
163 | } | ||
164 | } | ||
165 | } | ||
166 | |||
167 | feather_callback void do_sched_trace_task_switch_away(unsigned long id, | ||
168 | unsigned long _task) | ||
169 | { | ||
170 | struct task_struct *t = (struct task_struct*) _task; | ||
171 | struct st_event_record* rec; | ||
172 | if (is_realtime(t)) { | ||
173 | rec = get_record(ST_SWITCH_AWAY, t); | ||
174 | if (rec) { | ||
175 | rec->data.switch_away.when = now(); | ||
176 | rec->data.switch_away.exec_time = get_exec_time(t); | ||
177 | put_record(rec); | ||
178 | } | ||
179 | } | ||
180 | } | ||
181 | |||
182 | feather_callback void do_sched_trace_task_completion(unsigned long id, | ||
183 | unsigned long _task, | ||
184 | unsigned long forced) | ||
185 | { | ||
186 | struct task_struct *t = (struct task_struct*) _task; | ||
187 | struct st_event_record* rec = get_record(ST_COMPLETION, t); | ||
188 | if (rec) { | ||
189 | rec->data.completion.when = now(); | ||
190 | rec->data.completion.forced = forced; | ||
191 | put_record(rec); | ||
192 | } | ||
193 | } | ||
194 | |||
195 | feather_callback void do_sched_trace_task_block(unsigned long id, | ||
196 | unsigned long _task) | ||
197 | { | ||
198 | struct task_struct *t = (struct task_struct*) _task; | ||
199 | struct st_event_record* rec = get_record(ST_BLOCK, t); | ||
200 | if (rec) { | ||
201 | rec->data.block.when = now(); | ||
202 | put_record(rec); | ||
203 | } | ||
204 | } | ||
205 | |||
206 | feather_callback void do_sched_trace_task_resume(unsigned long id, | ||
207 | unsigned long _task) | ||
208 | { | ||
209 | struct task_struct *t = (struct task_struct*) _task; | ||
210 | struct st_event_record* rec = get_record(ST_RESUME, t); | ||
211 | if (rec) { | ||
212 | rec->data.resume.when = now(); | ||
213 | put_record(rec); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | feather_callback void do_sched_trace_sys_release(unsigned long id, | ||
218 | unsigned long _start) | ||
219 | { | ||
220 | lt_t *start = (lt_t*) _start; | ||
221 | struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL); | ||
222 | if (rec) { | ||
223 | rec->data.sys_release.when = now(); | ||
224 | rec->data.sys_release.release = *start; | ||
225 | put_record(rec); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | feather_callback void do_sched_trace_action(unsigned long id, | ||
230 | unsigned long _task, | ||
231 | unsigned long action) | ||
232 | { | ||
233 | struct task_struct *t = (struct task_struct*) _task; | ||
234 | struct st_event_record* rec = get_record(ST_ACTION, t); | ||
235 | |||
236 | if (rec) { | ||
237 | rec->data.action.when = now(); | ||
238 | rec->data.action.action = action; | ||
239 | put_record(rec); | ||
240 | } | ||
241 | } | ||
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c new file mode 100644 index 000000000000..f4171fddbbb1 --- /dev/null +++ b/litmus/sched_trace.c | |||
@@ -0,0 +1,252 @@ | |||
1 | /* | ||
2 | * sched_trace.c -- record scheduling events to a byte stream. | ||
3 | */ | ||
4 | #include <linux/spinlock.h> | ||
5 | #include <linux/mutex.h> | ||
6 | |||
7 | #include <linux/fs.h> | ||
8 | #include <linux/slab.h> | ||
9 | #include <linux/miscdevice.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/sysrq.h> | ||
13 | |||
14 | #include <linux/kfifo.h> | ||
15 | |||
16 | #include <litmus/sched_trace.h> | ||
17 | #include <litmus/litmus.h> | ||
18 | |||
19 | #define SCHED_TRACE_NAME "litmus/log" | ||
20 | |||
21 | /* Compute size of TRACE() buffer */ | ||
22 | #define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT) | ||
23 | |||
24 | /* Max length of one read from the buffer */ | ||
25 | #define MAX_READ_LEN (64 * 1024) | ||
26 | |||
27 | /* Max length for one write --- by TRACE() --- to the buffer. This is used to | ||
28 | * allocate a per-cpu buffer for printf() formatting. */ | ||
29 | #define MSG_SIZE 255 | ||
30 | |||
31 | |||
32 | static DEFINE_MUTEX(reader_mutex); | ||
33 | static atomic_t reader_cnt = ATOMIC_INIT(0); | ||
34 | static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE); | ||
35 | |||
36 | |||
37 | static DEFINE_RAW_SPINLOCK(log_buffer_lock); | ||
38 | static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer); | ||
39 | |||
40 | /* | ||
41 | * sched_trace_log_message - Write to the trace buffer (log_buffer) | ||
42 | * | ||
43 | * This is the only function accessing the log_buffer from inside the | ||
44 | * kernel for writing. | ||
45 | * Concurrent access to sched_trace_log_message must be serialized using | ||
46 | * log_buffer_lock | ||
47 | * The maximum length of a formatted message is 255 | ||
48 | */ | ||
49 | void sched_trace_log_message(const char* fmt, ...) | ||
50 | { | ||
51 | unsigned long flags; | ||
52 | va_list args; | ||
53 | size_t len; | ||
54 | char* buf; | ||
55 | |||
56 | if (!atomic_read(&reader_cnt)) | ||
57 | /* early exit if nobody is listening */ | ||
58 | return; | ||
59 | |||
60 | va_start(args, fmt); | ||
61 | local_irq_save(flags); | ||
62 | |||
63 | /* format message */ | ||
64 | buf = __get_cpu_var(fmt_buffer); | ||
65 | len = vscnprintf(buf, MSG_SIZE, fmt, args); | ||
66 | |||
67 | raw_spin_lock(&log_buffer_lock); | ||
68 | /* Don't copy the trailing null byte, we don't want null bytes in a | ||
69 | * text file. | ||
70 | */ | ||
71 | kfifo_in(&debug_buffer, buf, len); | ||
72 | raw_spin_unlock(&log_buffer_lock); | ||
73 | |||
74 | local_irq_restore(flags); | ||
75 | va_end(args); | ||
76 | } | ||
77 | |||
78 | |||
79 | /* | ||
80 | * log_read - Read the trace buffer | ||
81 | * | ||
82 | * This function is called as a file operation from userspace. | ||
83 | * Readers can sleep. Access is serialized through reader_mutex | ||
84 | */ | ||
85 | static ssize_t log_read(struct file *filp, | ||
86 | char __user *to, size_t len, | ||
87 | loff_t *f_pos) | ||
88 | { | ||
89 | /* we ignore f_pos, this is strictly sequential */ | ||
90 | |||
91 | ssize_t error = -EINVAL; | ||
92 | char* mem; | ||
93 | |||
94 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
95 | error = -ERESTARTSYS; | ||
96 | goto out; | ||
97 | } | ||
98 | |||
99 | if (len > MAX_READ_LEN) | ||
100 | len = MAX_READ_LEN; | ||
101 | |||
102 | mem = kmalloc(len, GFP_KERNEL); | ||
103 | if (!mem) { | ||
104 | error = -ENOMEM; | ||
105 | goto out_unlock; | ||
106 | } | ||
107 | |||
108 | error = kfifo_out(&debug_buffer, mem, len); | ||
109 | while (!error) { | ||
110 | set_current_state(TASK_INTERRUPTIBLE); | ||
111 | schedule_timeout(110); | ||
112 | if (signal_pending(current)) | ||
113 | error = -ERESTARTSYS; | ||
114 | else | ||
115 | error = kfifo_out(&debug_buffer, mem, len); | ||
116 | } | ||
117 | |||
118 | if (error > 0 && copy_to_user(to, mem, error)) | ||
119 | error = -EFAULT; | ||
120 | |||
121 | kfree(mem); | ||
122 | out_unlock: | ||
123 | mutex_unlock(&reader_mutex); | ||
124 | out: | ||
125 | return error; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Enable redirection of printk() messages to the trace buffer. | ||
130 | * Defined in kernel/printk.c | ||
131 | */ | ||
132 | extern int trace_override; | ||
133 | extern int trace_recurse; | ||
134 | |||
135 | /* | ||
136 | * log_open - open the global log message ring buffer. | ||
137 | */ | ||
138 | static int log_open(struct inode *in, struct file *filp) | ||
139 | { | ||
140 | int error = -EINVAL; | ||
141 | |||
142 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
143 | error = -ERESTARTSYS; | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | atomic_inc(&reader_cnt); | ||
148 | error = 0; | ||
149 | |||
150 | printk(KERN_DEBUG | ||
151 | "sched_trace kfifo with buffer starting at: 0x%p\n", | ||
152 | debug_buffer.buf); | ||
153 | |||
154 | /* override printk() */ | ||
155 | trace_override++; | ||
156 | |||
157 | mutex_unlock(&reader_mutex); | ||
158 | out: | ||
159 | return error; | ||
160 | } | ||
161 | |||
162 | static int log_release(struct inode *in, struct file *filp) | ||
163 | { | ||
164 | int error = -EINVAL; | ||
165 | |||
166 | if (mutex_lock_interruptible(&reader_mutex)) { | ||
167 | error = -ERESTARTSYS; | ||
168 | goto out; | ||
169 | } | ||
170 | |||
171 | atomic_dec(&reader_cnt); | ||
172 | |||
173 | /* release printk() overriding */ | ||
174 | trace_override--; | ||
175 | |||
176 | printk(KERN_DEBUG "sched_trace kfifo released\n"); | ||
177 | |||
178 | mutex_unlock(&reader_mutex); | ||
179 | out: | ||
180 | return error; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * log_fops - The file operations for accessing the global LITMUS log message | ||
185 | * buffer. | ||
186 | * | ||
187 | * Except for opening the device file it uses the same operations as trace_fops. | ||
188 | */ | ||
189 | static struct file_operations log_fops = { | ||
190 | .owner = THIS_MODULE, | ||
191 | .open = log_open, | ||
192 | .release = log_release, | ||
193 | .read = log_read, | ||
194 | }; | ||
195 | |||
196 | static struct miscdevice litmus_log_dev = { | ||
197 | .name = SCHED_TRACE_NAME, | ||
198 | .minor = MISC_DYNAMIC_MINOR, | ||
199 | .fops = &log_fops, | ||
200 | }; | ||
201 | |||
202 | #ifdef CONFIG_MAGIC_SYSRQ | ||
203 | void dump_trace_buffer(int max) | ||
204 | { | ||
205 | char line[80]; | ||
206 | int len; | ||
207 | int count = 0; | ||
208 | |||
209 | /* potential, but very unlikely, race... */ | ||
210 | trace_recurse = 1; | ||
211 | while ((max == 0 || count++ < max) && | ||
212 | (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) { | ||
213 | line[len] = '\0'; | ||
214 | printk("%s", line); | ||
215 | } | ||
216 | trace_recurse = 0; | ||
217 | } | ||
218 | |||
219 | static void sysrq_dump_trace_buffer(int key) | ||
220 | { | ||
221 | dump_trace_buffer(100); | ||
222 | } | ||
223 | |||
224 | static struct sysrq_key_op sysrq_dump_trace_buffer_op = { | ||
225 | .handler = sysrq_dump_trace_buffer, | ||
226 | .help_msg = "dump-trace-buffer(Y)", | ||
227 | .action_msg = "writing content of TRACE() buffer", | ||
228 | }; | ||
229 | #endif | ||
230 | |||
231 | static int __init init_sched_trace(void) | ||
232 | { | ||
233 | printk("Initializing TRACE() device\n"); | ||
234 | |||
235 | #ifdef CONFIG_MAGIC_SYSRQ | ||
236 | /* offer some debugging help */ | ||
237 | if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op)) | ||
238 | printk("Registered dump-trace-buffer(Y) magic sysrq.\n"); | ||
239 | else | ||
240 | printk("Could not register dump-trace-buffer(Y) magic sysrq.\n"); | ||
241 | #endif | ||
242 | |||
243 | return misc_register(&litmus_log_dev); | ||
244 | } | ||
245 | |||
246 | static void __exit exit_sched_trace(void) | ||
247 | { | ||
248 | misc_deregister(&litmus_log_dev); | ||
249 | } | ||
250 | |||
251 | module_init(init_sched_trace); | ||
252 | module_exit(exit_sched_trace); | ||
diff --git a/litmus/srp.c b/litmus/srp.c new file mode 100644 index 000000000000..2ed4ec12a9d3 --- /dev/null +++ b/litmus/srp.c | |||
@@ -0,0 +1,295 @@ | |||
1 | /* ************************************************************************** */ | ||
2 | /* STACK RESOURCE POLICY */ | ||
3 | /* ************************************************************************** */ | ||
4 | |||
5 | #include <asm/atomic.h> | ||
6 | #include <linux/sched.h> | ||
7 | #include <linux/wait.h> | ||
8 | |||
9 | #include <litmus/litmus.h> | ||
10 | #include <litmus/sched_plugin.h> | ||
11 | #include <litmus/fdso.h> | ||
12 | #include <litmus/trace.h> | ||
13 | |||
14 | |||
15 | #ifdef CONFIG_LITMUS_LOCKING | ||
16 | |||
17 | #include <litmus/srp.h> | ||
18 | |||
19 | srp_prioritization_t get_srp_prio; | ||
20 | |||
21 | struct srp { | ||
22 | struct list_head ceiling; | ||
23 | wait_queue_head_t ceiling_blocked; | ||
24 | }; | ||
25 | #define system_ceiling(srp) list2prio(srp->ceiling.next) | ||
26 | #define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) | ||
27 | |||
28 | #define UNDEF_SEM -2 | ||
29 | |||
30 | atomic_t srp_objects_in_use = ATOMIC_INIT(0); | ||
31 | |||
32 | DEFINE_PER_CPU(struct srp, srp); | ||
33 | |||
34 | /* Initialize SRP semaphores at boot time. */ | ||
35 | static int __init srp_init(void) | ||
36 | { | ||
37 | int i; | ||
38 | |||
39 | printk("Initializing SRP per-CPU ceilings..."); | ||
40 | for (i = 0; i < NR_CPUS; i++) { | ||
41 | init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); | ||
42 | INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); | ||
43 | } | ||
44 | printk(" done!\n"); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | module_init(srp_init); | ||
49 | |||
50 | /* SRP task priority comparison function. Smaller numeric values have higher | ||
51 | * priority, tie-break is PID. Special case: priority == 0 <=> no priority | ||
52 | */ | ||
53 | static int srp_higher_prio(struct srp_priority* first, | ||
54 | struct srp_priority* second) | ||
55 | { | ||
56 | if (!first->priority) | ||
57 | return 0; | ||
58 | else | ||
59 | return !second->priority || | ||
60 | first->priority < second->priority || ( | ||
61 | first->priority == second->priority && | ||
62 | first->pid < second->pid); | ||
63 | } | ||
64 | |||
65 | |||
66 | static int srp_exceeds_ceiling(struct task_struct* first, | ||
67 | struct srp* srp) | ||
68 | { | ||
69 | struct srp_priority prio; | ||
70 | |||
71 | if (list_empty(&srp->ceiling)) | ||
72 | return 1; | ||
73 | else { | ||
74 | prio.pid = first->pid; | ||
75 | prio.priority = get_srp_prio(first); | ||
76 | return srp_higher_prio(&prio, system_ceiling(srp)) || | ||
77 | ceiling2sem(system_ceiling(srp))->owner == first; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static void srp_add_prio(struct srp* srp, struct srp_priority* prio) | ||
82 | { | ||
83 | struct list_head *pos; | ||
84 | if (in_list(&prio->list)) { | ||
85 | printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " | ||
86 | "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); | ||
87 | return; | ||
88 | } | ||
89 | list_for_each(pos, &srp->ceiling) | ||
90 | if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { | ||
91 | __list_add(&prio->list, pos->prev, pos); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | list_add_tail(&prio->list, &srp->ceiling); | ||
96 | } | ||
97 | |||
98 | |||
99 | static int lock_srp_semaphore(struct litmus_lock* l) | ||
100 | { | ||
101 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
102 | |||
103 | if (!is_realtime(current)) | ||
104 | return -EPERM; | ||
105 | |||
106 | preempt_disable(); | ||
107 | |||
108 | /* Update ceiling. */ | ||
109 | srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); | ||
110 | |||
111 | /* SRP invariant: all resources available */ | ||
112 | BUG_ON(sem->owner != NULL); | ||
113 | |||
114 | sem->owner = current; | ||
115 | TRACE_CUR("acquired srp 0x%p\n", sem); | ||
116 | |||
117 | preempt_enable(); | ||
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static int unlock_srp_semaphore(struct litmus_lock* l) | ||
123 | { | ||
124 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
125 | int err = 0; | ||
126 | |||
127 | preempt_disable(); | ||
128 | |||
129 | if (sem->owner != current) { | ||
130 | err = -EINVAL; | ||
131 | } else { | ||
132 | /* Determine new system priority ceiling for this CPU. */ | ||
133 | BUG_ON(!in_list(&sem->ceiling.list)); | ||
134 | |||
135 | list_del(&sem->ceiling.list); | ||
136 | sem->owner = NULL; | ||
137 | |||
138 | /* Wake tasks on this CPU, if they exceed current ceiling. */ | ||
139 | TRACE_CUR("released srp 0x%p\n", sem); | ||
140 | wake_up_all(&__get_cpu_var(srp).ceiling_blocked); | ||
141 | } | ||
142 | |||
143 | preempt_enable(); | ||
144 | return err; | ||
145 | } | ||
146 | |||
147 | static int open_srp_semaphore(struct litmus_lock* l, void* __user arg) | ||
148 | { | ||
149 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
150 | int err = 0; | ||
151 | struct task_struct* t = current; | ||
152 | struct srp_priority t_prio; | ||
153 | |||
154 | if (!is_realtime(t)) | ||
155 | return -EPERM; | ||
156 | |||
157 | TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); | ||
158 | |||
159 | preempt_disable(); | ||
160 | |||
161 | if (sem->owner != NULL) | ||
162 | err = -EBUSY; | ||
163 | |||
164 | if (err == 0) { | ||
165 | if (sem->cpu == UNDEF_SEM) | ||
166 | sem->cpu = get_partition(t); | ||
167 | else if (sem->cpu != get_partition(t)) | ||
168 | err = -EPERM; | ||
169 | } | ||
170 | |||
171 | if (err == 0) { | ||
172 | t_prio.priority = get_srp_prio(t); | ||
173 | t_prio.pid = t->pid; | ||
174 | if (srp_higher_prio(&t_prio, &sem->ceiling)) { | ||
175 | sem->ceiling.priority = t_prio.priority; | ||
176 | sem->ceiling.pid = t_prio.pid; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | preempt_enable(); | ||
181 | |||
182 | return err; | ||
183 | } | ||
184 | |||
185 | static int close_srp_semaphore(struct litmus_lock* l) | ||
186 | { | ||
187 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
188 | int err = 0; | ||
189 | |||
190 | preempt_disable(); | ||
191 | |||
192 | if (sem->owner == current) | ||
193 | unlock_srp_semaphore(l); | ||
194 | |||
195 | preempt_enable(); | ||
196 | |||
197 | return err; | ||
198 | } | ||
199 | |||
200 | static void deallocate_srp_semaphore(struct litmus_lock* l) | ||
201 | { | ||
202 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
203 | atomic_dec(&srp_objects_in_use); | ||
204 | kfree(sem); | ||
205 | } | ||
206 | |||
207 | static struct litmus_lock_ops srp_lock_ops = { | ||
208 | .open = open_srp_semaphore, | ||
209 | .close = close_srp_semaphore, | ||
210 | .lock = lock_srp_semaphore, | ||
211 | .unlock = unlock_srp_semaphore, | ||
212 | .deallocate = deallocate_srp_semaphore, | ||
213 | }; | ||
214 | |||
215 | struct srp_semaphore* allocate_srp_semaphore(void) | ||
216 | { | ||
217 | struct srp_semaphore* sem; | ||
218 | |||
219 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
220 | if (!sem) | ||
221 | return NULL; | ||
222 | |||
223 | INIT_LIST_HEAD(&sem->ceiling.list); | ||
224 | sem->ceiling.priority = 0; | ||
225 | sem->cpu = UNDEF_SEM; | ||
226 | sem->owner = NULL; | ||
227 | |||
228 | sem->litmus_lock.ops = &srp_lock_ops; | ||
229 | |||
230 | atomic_inc(&srp_objects_in_use); | ||
231 | return sem; | ||
232 | } | ||
233 | |||
234 | static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
235 | void *key) | ||
236 | { | ||
237 | int cpu = smp_processor_id(); | ||
238 | struct task_struct *tsk = wait->private; | ||
239 | if (cpu != get_partition(tsk)) | ||
240 | TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", | ||
241 | get_partition(tsk)); | ||
242 | else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
243 | return default_wake_function(wait, mode, sync, key); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static void do_ceiling_block(struct task_struct *tsk) | ||
248 | { | ||
249 | wait_queue_t wait = { | ||
250 | .private = tsk, | ||
251 | .func = srp_wake_up, | ||
252 | .task_list = {NULL, NULL} | ||
253 | }; | ||
254 | |||
255 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
256 | add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
257 | tsk->rt_param.srp_non_recurse = 1; | ||
258 | preempt_enable_no_resched(); | ||
259 | schedule(); | ||
260 | preempt_disable(); | ||
261 | tsk->rt_param.srp_non_recurse = 0; | ||
262 | remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
263 | } | ||
264 | |||
265 | /* Wait for current task priority to exceed system-wide priority ceiling. | ||
266 | * FIXME: the hotpath should be inline. | ||
267 | */ | ||
268 | void srp_ceiling_block(void) | ||
269 | { | ||
270 | struct task_struct *tsk = current; | ||
271 | |||
272 | /* Only applies to real-time tasks, but optimize for RT tasks. */ | ||
273 | if (unlikely(!is_realtime(tsk))) | ||
274 | return; | ||
275 | |||
276 | /* Avoid recursive ceiling blocking. */ | ||
277 | if (unlikely(tsk->rt_param.srp_non_recurse)) | ||
278 | return; | ||
279 | |||
280 | /* Bail out early if there aren't any SRP resources around. */ | ||
281 | if (likely(!atomic_read(&srp_objects_in_use))) | ||
282 | return; | ||
283 | |||
284 | preempt_disable(); | ||
285 | if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { | ||
286 | TRACE_CUR("is priority ceiling blocked.\n"); | ||
287 | while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
288 | do_ceiling_block(tsk); | ||
289 | TRACE_CUR("finally exceeds system ceiling.\n"); | ||
290 | } else | ||
291 | TRACE_CUR("is not priority ceiling blocked\n"); | ||
292 | preempt_enable(); | ||
293 | } | ||
294 | |||
295 | #endif | ||
diff --git a/litmus/sync.c b/litmus/sync.c new file mode 100644 index 000000000000..bf75fde5450b --- /dev/null +++ b/litmus/sync.c | |||
@@ -0,0 +1,104 @@ | |||
1 | /* litmus/sync.c - Support for synchronous and asynchronous task system releases. | ||
2 | * | ||
3 | * | ||
4 | */ | ||
5 | |||
6 | #include <asm/atomic.h> | ||
7 | #include <asm/uaccess.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/list.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/completion.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | |||
17 | #include <litmus/sched_trace.h> | ||
18 | |||
19 | static DECLARE_COMPLETION(ts_release); | ||
20 | |||
21 | static long do_wait_for_ts_release(void) | ||
22 | { | ||
23 | long ret = 0; | ||
24 | |||
25 | /* If the interruption races with a release, the completion object | ||
26 | * may have a non-zero counter. To avoid this problem, this should | ||
27 | * be replaced by wait_for_completion(). | ||
28 | * | ||
29 | * For debugging purposes, this is interruptible for now. | ||
30 | */ | ||
31 | ret = wait_for_completion_interruptible(&ts_release); | ||
32 | |||
33 | return ret; | ||
34 | } | ||
35 | |||
36 | int count_tasks_waiting_for_release(void) | ||
37 | { | ||
38 | unsigned long flags; | ||
39 | int task_count = 0; | ||
40 | struct list_head *pos; | ||
41 | |||
42 | spin_lock_irqsave(&ts_release.wait.lock, flags); | ||
43 | list_for_each(pos, &ts_release.wait.task_list) { | ||
44 | task_count++; | ||
45 | } | ||
46 | spin_unlock_irqrestore(&ts_release.wait.lock, flags); | ||
47 | |||
48 | return task_count; | ||
49 | } | ||
50 | |||
51 | static long do_release_ts(lt_t start) | ||
52 | { | ||
53 | int task_count = 0; | ||
54 | unsigned long flags; | ||
55 | struct list_head *pos; | ||
56 | struct task_struct *t; | ||
57 | |||
58 | |||
59 | spin_lock_irqsave(&ts_release.wait.lock, flags); | ||
60 | TRACE("<<<<<< synchronous task system release >>>>>>\n"); | ||
61 | |||
62 | sched_trace_sys_release(&start); | ||
63 | list_for_each(pos, &ts_release.wait.task_list) { | ||
64 | t = (struct task_struct*) list_entry(pos, | ||
65 | struct __wait_queue, | ||
66 | task_list)->private; | ||
67 | task_count++; | ||
68 | litmus->release_at(t, start + t->rt_param.task_params.phase); | ||
69 | sched_trace_task_release(t); | ||
70 | } | ||
71 | |||
72 | spin_unlock_irqrestore(&ts_release.wait.lock, flags); | ||
73 | |||
74 | complete_n(&ts_release, task_count); | ||
75 | |||
76 | return task_count; | ||
77 | } | ||
78 | |||
79 | |||
80 | asmlinkage long sys_wait_for_ts_release(void) | ||
81 | { | ||
82 | long ret = -EPERM; | ||
83 | struct task_struct *t = current; | ||
84 | |||
85 | if (is_realtime(t)) | ||
86 | ret = do_wait_for_ts_release(); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | |||
92 | asmlinkage long sys_release_ts(lt_t __user *__delay) | ||
93 | { | ||
94 | long ret; | ||
95 | lt_t delay; | ||
96 | |||
97 | /* FIXME: check capabilities... */ | ||
98 | |||
99 | ret = copy_from_user(&delay, __delay, sizeof(delay)); | ||
100 | if (ret == 0) | ||
101 | ret = do_release_ts(litmus_clock() + delay); | ||
102 | |||
103 | return ret; | ||
104 | } | ||
diff --git a/litmus/trace.c b/litmus/trace.c new file mode 100644 index 000000000000..39200c8ff74e --- /dev/null +++ b/litmus/trace.c | |||
@@ -0,0 +1,213 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/uaccess.h> | ||
4 | |||
5 | #include <litmus/ftdev.h> | ||
6 | #include <litmus/litmus.h> | ||
7 | #include <litmus/trace.h> | ||
8 | |||
9 | /******************************************************************************/ | ||
10 | /* Allocation */ | ||
11 | /******************************************************************************/ | ||
12 | |||
13 | static struct ftdev overhead_dev; | ||
14 | |||
15 | #define trace_ts_buf overhead_dev.minor[0].buf | ||
16 | |||
17 | static unsigned int ts_seq_no = 0; | ||
18 | |||
19 | static inline void __save_timestamp_cpu(unsigned long event, | ||
20 | uint8_t type, uint8_t cpu) | ||
21 | { | ||
22 | unsigned int seq_no; | ||
23 | struct timestamp *ts; | ||
24 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
25 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
26 | ts->event = event; | ||
27 | ts->timestamp = ft_timestamp(); | ||
28 | ts->seq_no = seq_no; | ||
29 | ts->cpu = cpu; | ||
30 | ts->task_type = type; | ||
31 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | static void __add_timestamp_user(struct timestamp *pre_recorded) | ||
36 | { | ||
37 | unsigned int seq_no; | ||
38 | struct timestamp *ts; | ||
39 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
40 | |||
41 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
42 | *ts = *pre_recorded; | ||
43 | ts->seq_no = seq_no; | ||
44 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
45 | } | ||
46 | } | ||
47 | |||
48 | static inline void __save_timestamp(unsigned long event, | ||
49 | uint8_t type) | ||
50 | { | ||
51 | __save_timestamp_cpu(event, type, raw_smp_processor_id()); | ||
52 | } | ||
53 | |||
54 | /* hack: fake timestamp to user-reported time, and record parts of the PID */ | ||
55 | feather_callback void save_timestamp_time(unsigned long event, unsigned long ptr) | ||
56 | { | ||
57 | uint64_t* time = (uint64_t*) ptr; | ||
58 | unsigned int seq_no; | ||
59 | struct timestamp *ts; | ||
60 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
61 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
62 | ts->event = event; | ||
63 | ts->timestamp = *time; | ||
64 | ts->seq_no = seq_no; | ||
65 | /* type takes lowest byte of PID */ | ||
66 | ts->task_type = (uint8_t) current->pid; | ||
67 | /* cpu takes second-lowest byte of PID*/ | ||
68 | ts->cpu = (uint8_t) (current->pid >> 8); | ||
69 | |||
70 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | feather_callback void save_timestamp_pid(unsigned long event) | ||
75 | { | ||
76 | /* Abuse existing fields to partially export PID. */ | ||
77 | __save_timestamp_cpu(event, | ||
78 | /* type takes lowest byte of PID */ | ||
79 | (uint8_t) current->pid, | ||
80 | /* cpu takes second-lowest byte of PID*/ | ||
81 | (uint8_t) (current->pid >> 8)); | ||
82 | } | ||
83 | |||
84 | feather_callback void save_timestamp(unsigned long event) | ||
85 | { | ||
86 | __save_timestamp(event, TSK_UNKNOWN); | ||
87 | } | ||
88 | |||
89 | feather_callback void save_timestamp_def(unsigned long event, | ||
90 | unsigned long type) | ||
91 | { | ||
92 | __save_timestamp(event, (uint8_t) type); | ||
93 | } | ||
94 | |||
95 | feather_callback void save_timestamp_task(unsigned long event, | ||
96 | unsigned long t_ptr) | ||
97 | { | ||
98 | int rt = is_realtime((struct task_struct *) t_ptr); | ||
99 | __save_timestamp(event, rt ? TSK_RT : TSK_BE); | ||
100 | } | ||
101 | |||
102 | feather_callback void save_timestamp_cpu(unsigned long event, | ||
103 | unsigned long cpu) | ||
104 | { | ||
105 | __save_timestamp_cpu(event, TSK_UNKNOWN, cpu); | ||
106 | } | ||
107 | |||
108 | feather_callback void save_task_latency(unsigned long event, | ||
109 | unsigned long when_ptr) | ||
110 | { | ||
111 | lt_t now = litmus_clock(); | ||
112 | lt_t *when = (lt_t*) when_ptr; | ||
113 | unsigned int seq_no; | ||
114 | int cpu = raw_smp_processor_id(); | ||
115 | struct timestamp *ts; | ||
116 | |||
117 | seq_no = fetch_and_inc((int *) &ts_seq_no); | ||
118 | if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) { | ||
119 | ts->event = event; | ||
120 | ts->timestamp = now - *when; | ||
121 | ts->seq_no = seq_no; | ||
122 | ts->cpu = cpu; | ||
123 | ts->task_type = TSK_RT; | ||
124 | ft_buffer_finish_write(trace_ts_buf, ts); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /******************************************************************************/ | ||
129 | /* DEVICE FILE DRIVER */ | ||
130 | /******************************************************************************/ | ||
131 | |||
132 | /* | ||
133 | * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER) | ||
134 | * and we might not get as much | ||
135 | */ | ||
136 | #define NO_TIMESTAMPS (2 << 16) | ||
137 | |||
138 | static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
139 | { | ||
140 | unsigned int count = NO_TIMESTAMPS; | ||
141 | while (count && !trace_ts_buf) { | ||
142 | printk("time stamp buffer: trying to allocate %u time stamps.\n", count); | ||
143 | ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp)); | ||
144 | count /= 2; | ||
145 | } | ||
146 | return ftdev->minor[idx].buf ? 0 : -ENOMEM; | ||
147 | } | ||
148 | |||
149 | static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) | ||
150 | { | ||
151 | free_ft_buffer(ftdev->minor[idx].buf); | ||
152 | ftdev->minor[idx].buf = NULL; | ||
153 | } | ||
154 | |||
155 | static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, | ||
156 | const char __user *from) | ||
157 | { | ||
158 | ssize_t consumed = 0; | ||
159 | struct timestamp ts; | ||
160 | |||
161 | /* don't give us partial timestamps */ | ||
162 | if (len % sizeof(ts)) | ||
163 | return -EINVAL; | ||
164 | |||
165 | while (len >= sizeof(ts)) { | ||
166 | if (copy_from_user(&ts, from, sizeof(ts))) { | ||
167 | consumed = -EFAULT; | ||
168 | goto out; | ||
169 | } | ||
170 | len -= sizeof(ts); | ||
171 | from += sizeof(ts); | ||
172 | consumed += sizeof(ts); | ||
173 | |||
174 | __add_timestamp_user(&ts); | ||
175 | } | ||
176 | |||
177 | out: | ||
178 | return consumed; | ||
179 | } | ||
180 | |||
181 | static int __init init_ft_overhead_trace(void) | ||
182 | { | ||
183 | int err; | ||
184 | |||
185 | printk("Initializing Feather-Trace overhead tracing device.\n"); | ||
186 | err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace"); | ||
187 | if (err) | ||
188 | goto err_out; | ||
189 | |||
190 | overhead_dev.alloc = alloc_timestamp_buffer; | ||
191 | overhead_dev.free = free_timestamp_buffer; | ||
192 | overhead_dev.write = write_timestamp_from_user; | ||
193 | |||
194 | err = register_ftdev(&overhead_dev); | ||
195 | if (err) | ||
196 | goto err_dealloc; | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | err_dealloc: | ||
201 | ftdev_exit(&overhead_dev); | ||
202 | err_out: | ||
203 | printk(KERN_WARNING "Could not register ft_trace module.\n"); | ||
204 | return err; | ||
205 | } | ||
206 | |||
207 | static void __exit exit_ft_overhead_trace(void) | ||
208 | { | ||
209 | ftdev_exit(&overhead_dev); | ||
210 | } | ||
211 | |||
212 | module_init(init_ft_overhead_trace); | ||
213 | module_exit(exit_ft_overhead_trace); | ||