aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 21:01:47 -0400
committerJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 21:01:47 -0400
commitb1e1fea67bca3796d5f9133a92c300ec4fa93a4f (patch)
tree5cc1336e1fe1d6f93b1067e73e43381dd20db690
parentf6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff)
Bjoern's Dissertation Code with Priority Donationwip-splitting-omlp-jerickso
-rw-r--r--Makefile4
-rw-r--r--arch/arm/Kconfig8
-rw-r--r--arch/arm/include/asm/timex.h2
-rw-r--r--arch/arm/include/asm/unistd.h3
-rw-r--r--arch/arm/kernel/calls.S12
-rw-r--r--arch/arm/kernel/smp.c4
-rw-r--r--arch/arm/mach-realview/include/mach/timex.h27
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/include/asm/entry_arch.h1
-rw-r--r--arch/x86/include/asm/feather_trace.h17
-rw-r--r--arch/x86/include/asm/feather_trace_32.h79
-rw-r--r--arch/x86/include/asm/feather_trace_64.h67
-rw-r--r--arch/x86/include/asm/hw_irq.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h5
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/unistd_32.h6
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ft_event.c118
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/smp.c27
-rw-r--r--arch/x86/kernel/syscall_table_32.S12
-rw-r--r--drivers/tty/vt/consolemap_deftbl.c86
-rw-r--r--drivers/tty/vt/defkeymap.c262
-rw-r--r--fs/exec.c13
-rw-r--r--fs/inode.c2
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/hrtimer.h32
-rw-r--r--include/linux/sched.h19
-rw-r--r--include/linux/smp.h5
-rw-r--r--include/linux/tick.h5
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/budget.h8
-rw-r--r--include/litmus/clustered.h44
-rw-r--r--include/litmus/debug_trace.h37
-rw-r--r--include/litmus/edf_common.h33
-rw-r--r--include/litmus/fdso.h77
-rw-r--r--include/litmus/feather_buffer.h94
-rw-r--r--include/litmus/feather_trace.h65
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/ftdev.h55
-rw-r--r--include/litmus/jobs.h9
-rw-r--r--include/litmus/litmus.h292
-rw-r--r--include/litmus/litmus_proc.h25
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/preempt.h165
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h228
-rw-r--r--include/litmus/sched_plugin.h117
-rw-r--r--include/litmus/sched_plugin.h.rej22
-rw-r--r--include/litmus/sched_trace.h200
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/trace.h129
-rw-r--r--include/litmus/unistd_32.h21
-rw-r--r--include/litmus/unistd_64.h33
-rw-r--r--include/litmus/wait.h57
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/hrtimer.c95
-rw-r--r--kernel/printk.c14
-rw-r--r--kernel/sched.c127
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/time/tick-sched.c47
-rw-r--r--litmus/Kconfig185
-rw-r--r--litmus/Makefile30
-rw-r--r--litmus/bheap.c314
-rw-r--r--litmus/budget.c111
-rw-r--r--litmus/clustered.c111
-rw-r--r--litmus/ctrldev.c150
-rw-r--r--litmus/edf_common.c143
-rw-r--r--litmus/fdso.c297
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/ft_event.c43
-rw-r--r--litmus/ftdev.c446
-rw-r--r--litmus/jobs.c43
-rw-r--r--litmus/litmus.c555
-rw-r--r--litmus/litmus_proc.c347
-rw-r--r--litmus/locking.c186
-rw-r--r--litmus/preempt.c131
-rw-r--r--litmus/rt_domain.c357
-rw-r--r--litmus/sched_cedf.c1526
-rw-r--r--litmus/sched_cedf.c.rej53
-rw-r--r--litmus/sched_gfl_split_namechange.c1149
-rw-r--r--litmus/sched_gsn_edf.c1286
-rw-r--r--litmus/sched_gsn_edf_split_namechange.c1165
-rw-r--r--litmus/sched_litmus.c328
-rw-r--r--litmus/sched_litmus.c.rej11
-rw-r--r--litmus/sched_pfair.c1056
-rw-r--r--litmus/sched_pfp.c1542
-rw-r--r--litmus/sched_plugin.c233
-rw-r--r--litmus/sched_psn_edf.c917
-rw-r--r--litmus/sched_task_trace.c241
-rw-r--r--litmus/sched_trace.c252
-rw-r--r--litmus/srp.c295
-rw-r--r--litmus/sync.c104
-rw-r--r--litmus/trace.c213
100 files changed, 17213 insertions, 36 deletions
diff --git a/Makefile b/Makefile
index 860c26af52c3..8e53f47a311b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 36 3SUBLEVEL = 36
4EXTRAVERSION = 4EXTRAVERSION =-litmus2010
5NAME = Flesh-Eating Bats with Fangs 5NAME = Flesh-Eating Bats with Fangs
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -659,7 +659,7 @@ export mod_strip_cmd
659 659
660 660
661ifeq ($(KBUILD_EXTMOD),) 661ifeq ($(KBUILD_EXTMOD),)
662core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ 662core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
663 663
664vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 664vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
665 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 665 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9c26ba7244fb..babad6d7681a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1808,3 +1808,11 @@ source "security/Kconfig"
1808source "crypto/Kconfig" 1808source "crypto/Kconfig"
1809 1809
1810source "lib/Kconfig" 1810source "lib/Kconfig"
1811
1812config ARCH_HAS_SEND_PULL_TIMERS
1813 def_bool n
1814
1815config ARCH_HAS_FEATHER_TRACE
1816 def_bool n
1817
1818source "litmus/Kconfig"
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..8a102a383a36 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -16,9 +16,11 @@
16 16
17typedef unsigned long cycles_t; 17typedef unsigned long cycles_t;
18 18
19#ifndef get_cycles
19static inline cycles_t get_cycles (void) 20static inline cycles_t get_cycles (void)
20{ 21{
21 return 0; 22 return 0;
22} 23}
24#endif
23 25
24#endif 26#endif
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index c891eb76c0e3..625b30490624 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -397,6 +397,9 @@
397#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) 397#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368)
398#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) 398#define __NR_prlimit64 (__NR_SYSCALL_BASE+369)
399 399
400#define __NR_LITMUS (__NR_SYSCALL_BASE+370)
401#include <litmus/unistd_32.h>
402
400/* 403/*
401 * The following SWIs are ARM private. 404 * The following SWIs are ARM private.
402 */ 405 */
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 5c26eccef998..b99087ac85b9 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -379,6 +379,18 @@
379 CALL(sys_fanotify_init) 379 CALL(sys_fanotify_init)
380 CALL(sys_fanotify_mark) 380 CALL(sys_fanotify_mark)
381 CALL(sys_prlimit64) 381 CALL(sys_prlimit64)
382/* 370 */ CALL(sys_set_rt_task_param)
383 CALL(sys_get_rt_task_param)
384 CALL(sys_complete_job)
385 CALL(sys_od_open)
386 CALL(sys_od_close)
387/* 375 */ CALL(sys_litmus_lock)
388 CALL(sys_litmus_unlock)
389 CALL(sys_query_job_no)
390 CALL(sys_wait_for_job_release)
391 CALL(sys_wait_for_ts_release)
392/* 380 */ CALL(sys_release_ts)
393 CALL(sys_null_call)
382#ifndef syscalls_counted 394#ifndef syscalls_counted
383.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls 395.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
384#define syscalls_counted 396#define syscalls_counted
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 40dc74f2b27f..b72fbf3d043c 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -38,6 +38,8 @@
38#include <asm/localtimer.h> 38#include <asm/localtimer.h>
39#include <asm/smp_plat.h> 39#include <asm/smp_plat.h>
40 40
41#include <litmus/preempt.h>
42
41/* 43/*
42 * as from 2.5, kernels no longer have an init_tasks structure 44 * as from 2.5, kernels no longer have an init_tasks structure
43 * so we need some other way of telling a new secondary core 45 * so we need some other way of telling a new secondary core
@@ -533,6 +535,8 @@ asmlinkage void __exception do_IPI(struct pt_regs *regs)
533 * nothing more to do - eveything is 535 * nothing more to do - eveything is
534 * done on the interrupt return path 536 * done on the interrupt return path
535 */ 537 */
538 /* LITMUS^RT: take action based on scheduler state */
539 sched_state_ipi();
536 break; 540 break;
537 541
538 case IPI_CALL_FUNC: 542 case IPI_CALL_FUNC:
diff --git a/arch/arm/mach-realview/include/mach/timex.h b/arch/arm/mach-realview/include/mach/timex.h
index 4eeb069373c2..e8bcc40d1f08 100644
--- a/arch/arm/mach-realview/include/mach/timex.h
+++ b/arch/arm/mach-realview/include/mach/timex.h
@@ -21,3 +21,30 @@
21 */ 21 */
22 22
23#define CLOCK_TICK_RATE (50000000 / 16) 23#define CLOCK_TICK_RATE (50000000 / 16)
24
25#if defined(CONFIG_MACH_REALVIEW_PB11MP) || defined(CONFIG_MACH_REALVIEW_PB1176)
26
27static inline unsigned long realview_get_arm11_cp15_ccnt(void)
28{
29 unsigned long cycles;
30 /* Read CP15 CCNT register. */
31 asm volatile ("mrc p15, 0, %0, c15, c12, 1" : "=r" (cycles));
32 return cycles;
33}
34
35#define get_cycles realview_get_arm11_cp15_ccnt
36
37#elif defined(CONFIG_MACH_REALVIEW_PBA8)
38
39
40static inline unsigned long realview_get_a8_cp15_ccnt(void)
41{
42 unsigned long cycles;
43 /* Read CP15 CCNT register. */
44 asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
45 return cycles;
46}
47
48#define get_cycles realview_get_a8_cp15_ccnt
49
50#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..5181ed3a211a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2142,3 +2142,11 @@ source "crypto/Kconfig"
2142source "arch/x86/kvm/Kconfig" 2142source "arch/x86/kvm/Kconfig"
2143 2143
2144source "lib/Kconfig" 2144source "lib/Kconfig"
2145
2146config ARCH_HAS_FEATHER_TRACE
2147 def_bool y
2148
2149config ARCH_HAS_SEND_PULL_TIMERS
2150 def_bool y
2151
2152source "litmus/Kconfig"
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..5d07dea2ebb8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,6 +13,7 @@
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 18BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18 19
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
new file mode 100644
index 000000000000..4fd31633405d
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace.h
@@ -0,0 +1,17 @@
1#ifndef _ARCH_FEATHER_TRACE_H
2#define _ARCH_FEATHER_TRACE_H
3
4#include <asm/msr.h>
5
6static inline unsigned long long ft_timestamp(void)
7{
8 return __native_read_tsc();
9}
10
11#ifdef CONFIG_X86_32
12#include "feather_trace_32.h"
13#else
14#include "feather_trace_64.h"
15#endif
16
17#endif
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
new file mode 100644
index 000000000000..70202f90f169
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -0,0 +1,79 @@
1/* Do not directly include this file. Include feather_trace.h instead */
2
3#define feather_callback __attribute__((regparm(0)))
4
5/*
6 * make the compiler reload any register that is not saved in
7 * a cdecl function call
8 */
9#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
10
11#define ft_event(id, callback) \
12 __asm__ __volatile__( \
13 "1: jmp 2f \n\t" \
14 " call " #callback " \n\t" \
15 ".section __event_table, \"aw\" \n\t" \
16 ".long " #id ", 0, 1b, 2f \n\t" \
17 ".previous \n\t" \
18 "2: \n\t" \
19 : : : CLOBBER_LIST)
20
21#define ft_event0(id, callback) \
22 __asm__ __volatile__( \
23 "1: jmp 2f \n\t" \
24 " subl $4, %%esp \n\t" \
25 " movl $" #id ", (%%esp) \n\t" \
26 " call " #callback " \n\t" \
27 " addl $4, %%esp \n\t" \
28 ".section __event_table, \"aw\" \n\t" \
29 ".long " #id ", 0, 1b, 2f \n\t" \
30 ".previous \n\t" \
31 "2: \n\t" \
32 : : : CLOBBER_LIST)
33
34#define ft_event1(id, callback, param) \
35 __asm__ __volatile__( \
36 "1: jmp 2f \n\t" \
37 " subl $8, %%esp \n\t" \
38 " movl %0, 4(%%esp) \n\t" \
39 " movl $" #id ", (%%esp) \n\t" \
40 " call " #callback " \n\t" \
41 " addl $8, %%esp \n\t" \
42 ".section __event_table, \"aw\" \n\t" \
43 ".long " #id ", 0, 1b, 2f \n\t" \
44 ".previous \n\t" \
45 "2: \n\t" \
46 : : "r" (param) : CLOBBER_LIST)
47
48#define ft_event2(id, callback, param, param2) \
49 __asm__ __volatile__( \
50 "1: jmp 2f \n\t" \
51 " subl $12, %%esp \n\t" \
52 " movl %1, 8(%%esp) \n\t" \
53 " movl %0, 4(%%esp) \n\t" \
54 " movl $" #id ", (%%esp) \n\t" \
55 " call " #callback " \n\t" \
56 " addl $12, %%esp \n\t" \
57 ".section __event_table, \"aw\" \n\t" \
58 ".long " #id ", 0, 1b, 2f \n\t" \
59 ".previous \n\t" \
60 "2: \n\t" \
61 : : "r" (param), "r" (param2) : CLOBBER_LIST)
62
63
64#define ft_event3(id, callback, p, p2, p3) \
65 __asm__ __volatile__( \
66 "1: jmp 2f \n\t" \
67 " subl $16, %%esp \n\t" \
68 " movl %2, 12(%%esp) \n\t" \
69 " movl %1, 8(%%esp) \n\t" \
70 " movl %0, 4(%%esp) \n\t" \
71 " movl $" #id ", (%%esp) \n\t" \
72 " call " #callback " \n\t" \
73 " addl $16, %%esp \n\t" \
74 ".section __event_table, \"aw\" \n\t" \
75 ".long " #id ", 0, 1b, 2f \n\t" \
76 ".previous \n\t" \
77 "2: \n\t" \
78 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
79
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
new file mode 100644
index 000000000000..54ac2aeb3a28
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -0,0 +1,67 @@
1/* Do not directly include this file. Include feather_trace.h instead */
2
3/* regparm is the default on x86_64 */
4#define feather_callback
5
6# define _EVENT_TABLE(id,from,to) \
7 ".section __event_table, \"aw\"\n\t" \
8 ".balign 8\n\t" \
9 ".quad " #id ", 0, " #from ", " #to " \n\t" \
10 ".previous \n\t"
11
12/*
13 * x86_64 callee only owns rbp, rbx, r12 -> r15
14 * the called can freely modify the others
15 */
16#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
17 "r8", "r9", "r10", "r11", "rax"
18
19#define ft_event(id, callback) \
20 __asm__ __volatile__( \
21 "1: jmp 2f \n\t" \
22 " call " #callback " \n\t" \
23 _EVENT_TABLE(id,1b,2f) \
24 "2: \n\t" \
25 : : : CLOBBER_LIST)
26
27#define ft_event0(id, callback) \
28 __asm__ __volatile__( \
29 "1: jmp 2f \n\t" \
30 " movq $" #id ", %%rdi \n\t" \
31 " call " #callback " \n\t" \
32 _EVENT_TABLE(id,1b,2f) \
33 "2: \n\t" \
34 : : : CLOBBER_LIST)
35
36#define ft_event1(id, callback, param) \
37 __asm__ __volatile__( \
38 "1: jmp 2f \n\t" \
39 " movq %0, %%rsi \n\t" \
40 " movq $" #id ", %%rdi \n\t" \
41 " call " #callback " \n\t" \
42 _EVENT_TABLE(id,1b,2f) \
43 "2: \n\t" \
44 : : "r" (param) : CLOBBER_LIST)
45
46#define ft_event2(id, callback, param, param2) \
47 __asm__ __volatile__( \
48 "1: jmp 2f \n\t" \
49 " movq %1, %%rdx \n\t" \
50 " movq %0, %%rsi \n\t" \
51 " movq $" #id ", %%rdi \n\t" \
52 " call " #callback " \n\t" \
53 _EVENT_TABLE(id,1b,2f) \
54 "2: \n\t" \
55 : : "r" (param), "r" (param2) : CLOBBER_LIST)
56
57#define ft_event3(id, callback, p, p2, p3) \
58 __asm__ __volatile__( \
59 "1: jmp 2f \n\t" \
60 " movq %2, %%rcx \n\t" \
61 " movq %1, %%rdx \n\t" \
62 " movq %0, %%rsi \n\t" \
63 " movq $" #id ", %%rdi \n\t" \
64 " call " #callback " \n\t" \
65 _EVENT_TABLE(id,1b,2f) \
66 "2: \n\t" \
67 : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..c17411503f28 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -53,6 +53,8 @@ extern void threshold_interrupt(void);
53extern void call_function_interrupt(void); 53extern void call_function_interrupt(void);
54extern void call_function_single_interrupt(void); 54extern void call_function_single_interrupt(void);
55 55
56extern void pull_timers_interrupt(void);
57
56/* IOAPIC */ 58/* IOAPIC */
57#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 59#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
58extern unsigned long io_apic_irqs; 60extern unsigned long io_apic_irqs;
@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
122extern void smp_reschedule_interrupt(struct pt_regs *); 124extern void smp_reschedule_interrupt(struct pt_regs *);
123extern void smp_call_function_interrupt(struct pt_regs *); 125extern void smp_call_function_interrupt(struct pt_regs *);
124extern void smp_call_function_single_interrupt(struct pt_regs *); 126extern void smp_call_function_single_interrupt(struct pt_regs *);
127extern void smp_pull_timers_interrupt(struct pt_regs *);
125#ifdef CONFIG_X86_32 128#ifdef CONFIG_X86_32
126extern void smp_invalidate_interrupt(struct pt_regs *); 129extern void smp_invalidate_interrupt(struct pt_regs *);
127#else 130#else
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6143ebeeebfa 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,6 +109,11 @@
109#define LOCAL_TIMER_VECTOR 0xef 109#define LOCAL_TIMER_VECTOR 0xef
110 110
111/* 111/*
112 * LITMUS^RT pull timers IRQ vector
113 */
114#define PULL_TIMERS_VECTOR 0xee
115
116/*
112 * Generic system vector for platform specific use 117 * Generic system vector for platform specific use
113 */ 118 */
114#define X86_PLATFORM_IPI_VECTOR 0xed 119#define X86_PLATFORM_IPI_VECTOR 0xed
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 325b7bdbebaa..ebaa04a8d3af 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -169,6 +169,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
169extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 169extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
170extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 170extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
171extern unsigned short num_cache_leaves; 171extern unsigned short num_cache_leaves;
172#ifdef CONFIG_SYSFS
173extern int get_shared_cpu_map(cpumask_var_t mask,
174 unsigned int cpu, int index);
175#endif
172 176
173extern void detect_extended_topology(struct cpuinfo_x86 *c); 177extern void detect_extended_topology(struct cpuinfo_x86 *c);
174extern void detect_ht(struct cpuinfo_x86 *c); 178extern void detect_ht(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..b7ba19acd3f8 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -347,9 +347,13 @@
347#define __NR_fanotify_mark 339 347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340 348#define __NR_prlimit64 340
349 349
350#define __NR_LITMUS 341
351
352#include "litmus/unistd_32.h"
353
350#ifdef __KERNEL__ 354#ifdef __KERNEL__
351 355
352#define NR_syscalls 341 356#define NR_syscalls 341 + NR_litmus_syscalls
353 357
354#define __ARCH_WANT_IPC_PARSE_VERSION 358#define __ARCH_WANT_IPC_PARSE_VERSION
355#define __ARCH_WANT_OLD_READDIR 359#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..332bf3c9c84c 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -670,6 +670,10 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
670#define __NR_prlimit64 302 670#define __NR_prlimit64 302
671__SYSCALL(__NR_prlimit64, sys_prlimit64) 671__SYSCALL(__NR_prlimit64, sys_prlimit64)
672 672
673#define __NR_LITMUS 303
674
675#include "litmus/unistd_64.h"
676
673#ifndef __NO_STUBS 677#ifndef __NO_STUBS
674#define __ARCH_WANT_OLD_READDIR 678#define __ARCH_WANT_OLD_READDIR
675#define __ARCH_WANT_OLD_STAT 679#define __ARCH_WANT_OLD_STAT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..6890dbb9ac15 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,6 +118,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
118 118
119obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 119obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
120 120
121obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
122
121### 123###
122# 64 bit specific files 124# 64 bit specific files
123ifeq ($(CONFIG_X86_64),y) 125ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 898c2f4eab88..3fec7d9bfd62 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -758,6 +758,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
758static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); 758static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
759#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 759#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
760 760
761/* returns CPUs that share the index cache with cpu */
762int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
763{
764 int ret = 0;
765 struct _cpuid4_info *this_leaf;
766
767 if (index >= num_cache_leaves) {
768 index = num_cache_leaves - 1;
769 ret = index;
770 }
771
772 this_leaf = CPUID4_INFO_IDX(cpu,index);
773 cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
774
775 return ret;
776}
777
761#ifdef CONFIG_SMP 778#ifdef CONFIG_SMP
762static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 779static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
763{ 780{
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..115e8951e8c8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
1016 call_function_interrupt smp_call_function_interrupt 1016 call_function_interrupt smp_call_function_interrupt
1017apicinterrupt RESCHEDULE_VECTOR \ 1017apicinterrupt RESCHEDULE_VECTOR \
1018 reschedule_interrupt smp_reschedule_interrupt 1018 reschedule_interrupt smp_reschedule_interrupt
1019apicinterrupt PULL_TIMERS_VECTOR \
1020 pull_timers_interrupt smp_pull_timers_interrupt
1019#endif 1021#endif
1020 1022
1021apicinterrupt ERROR_APIC_VECTOR \ 1023apicinterrupt ERROR_APIC_VECTOR \
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
new file mode 100644
index 000000000000..37cc33252713
--- /dev/null
+++ b/arch/x86/kernel/ft_event.c
@@ -0,0 +1,118 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5/* the feather trace management functions assume
6 * exclusive access to the event table
7 */
8
9#ifndef CONFIG_DEBUG_RODATA
10
11#define BYTE_JUMP 0xeb
12#define BYTE_JUMP_LEN 0x02
13
14/* for each event, there is an entry in the event table */
15struct trace_event {
16 long id;
17 long count;
18 long start_addr;
19 long end_addr;
20};
21
22extern struct trace_event __start___event_table[];
23extern struct trace_event __stop___event_table[];
24
25/* Workaround: if no events are defined, then the event_table section does not
26 * exist and the above references cause linker errors. This could probably be
27 * fixed by adjusting the linker script, but it is easier to maintain for us if
28 * we simply create a dummy symbol in the event table section.
29 */
30int __event_table_dummy[0] __attribute__ ((section("__event_table")));
31
32int ft_enable_event(unsigned long id)
33{
34 struct trace_event* te = __start___event_table;
35 int count = 0;
36 char* delta;
37 unsigned char* instr;
38
39 while (te < __stop___event_table) {
40 if (te->id == id && ++te->count == 1) {
41 instr = (unsigned char*) te->start_addr;
42 /* make sure we don't clobber something wrong */
43 if (*instr == BYTE_JUMP) {
44 delta = (((unsigned char*) te->start_addr) + 1);
45 *delta = 0;
46 }
47 }
48 if (te->id == id)
49 count++;
50 te++;
51 }
52
53 printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
54 return count;
55}
56
57int ft_disable_event(unsigned long id)
58{
59 struct trace_event* te = __start___event_table;
60 int count = 0;
61 char* delta;
62 unsigned char* instr;
63
64 while (te < __stop___event_table) {
65 if (te->id == id && --te->count == 0) {
66 instr = (unsigned char*) te->start_addr;
67 if (*instr == BYTE_JUMP) {
68 delta = (((unsigned char*) te->start_addr) + 1);
69 *delta = te->end_addr - te->start_addr -
70 BYTE_JUMP_LEN;
71 }
72 }
73 if (te->id == id)
74 count++;
75 te++;
76 }
77
78 printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
79 return count;
80}
81
82int ft_disable_all_events(void)
83{
84 struct trace_event* te = __start___event_table;
85 int count = 0;
86 char* delta;
87 unsigned char* instr;
88
89 while (te < __stop___event_table) {
90 if (te->count) {
91 instr = (unsigned char*) te->start_addr;
92 if (*instr == BYTE_JUMP) {
93 delta = (((unsigned char*) te->start_addr)
94 + 1);
95 *delta = te->end_addr - te->start_addr -
96 BYTE_JUMP_LEN;
97 te->count = 0;
98 count++;
99 }
100 }
101 te++;
102 }
103 return count;
104}
105
106int ft_is_event_enabled(unsigned long id)
107{
108 struct trace_event* te = __start___event_table;
109
110 while (te < __stop___event_table) {
111 if (te->id == id)
112 return te->count;
113 te++;
114 }
115 return 0;
116}
117
118#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..9772b1a0f9a4 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -189,6 +189,9 @@ static void __init smp_intr_init(void)
189 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, 189 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
190 call_function_single_interrupt); 190 call_function_single_interrupt);
191 191
192 /* IPI for hrtimer pulling on remote cpus */
193 alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
194
192 /* Low priority IPI to cleanup after moving an irq */ 195 /* Low priority IPI to cleanup after moving an irq */
193 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 196 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
194 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 197 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d801210945d6..74cca6014c0e 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,10 @@
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/gfp.h> 24#include <linux/gfp.h>
25 25
26#include <litmus/preempt.h>
27#include <litmus/debug_trace.h>
28#include <litmus/trace.h>
29
26#include <asm/mtrr.h> 30#include <asm/mtrr.h>
27#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
28#include <asm/mmu_context.h> 32#include <asm/mmu_context.h>
@@ -118,6 +122,7 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 122 WARN_ON(1);
119 return; 123 return;
120 } 124 }
125 TS_SEND_RESCHED_START(cpu);
121 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 126 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 127}
123 128
@@ -147,6 +152,16 @@ void native_send_call_func_ipi(const struct cpumask *mask)
147 free_cpumask_var(allbutself); 152 free_cpumask_var(allbutself);
148} 153}
149 154
155/* trigger timers on remote cpu */
156void smp_send_pull_timers(int cpu)
157{
158 if (unlikely(cpu_is_offline(cpu))) {
159 WARN_ON(1);
160 return;
161 }
162 apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
163}
164
150/* 165/*
151 * this function calls the 'stop' function on all other CPUs in the system. 166 * this function calls the 'stop' function on all other CPUs in the system.
152 */ 167 */
@@ -198,7 +213,10 @@ static void native_smp_send_stop(void)
198void smp_reschedule_interrupt(struct pt_regs *regs) 213void smp_reschedule_interrupt(struct pt_regs *regs)
199{ 214{
200 ack_APIC_irq(); 215 ack_APIC_irq();
216 /* LITMUS^RT: this IPI might need to trigger the sched state machine. */
217 sched_state_ipi();
201 inc_irq_stat(irq_resched_count); 218 inc_irq_stat(irq_resched_count);
219 TS_SEND_RESCHED_END;
202 /* 220 /*
203 * KVM uses this interrupt to force a cpu out of guest mode 221 * KVM uses this interrupt to force a cpu out of guest mode
204 */ 222 */
@@ -222,6 +240,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
222 irq_exit(); 240 irq_exit();
223} 241}
224 242
243extern void hrtimer_pull(void);
244
245void smp_pull_timers_interrupt(struct pt_regs *regs)
246{
247 ack_APIC_irq();
248 TRACE("pull timer interrupt\n");
249 hrtimer_pull();
250}
251
225struct smp_ops smp_ops = { 252struct smp_ops smp_ops = {
226 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 253 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
227 .smp_prepare_cpus = native_smp_prepare_cpus, 254 .smp_prepare_cpus = native_smp_prepare_cpus,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..37702905f658 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,15 @@ ENTRY(sys_call_table)
340 .long sys_fanotify_init 340 .long sys_fanotify_init
341 .long sys_fanotify_mark 341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */ 342 .long sys_prlimit64 /* 340 */
343 .long sys_set_rt_task_param /* LITMUS^RT 341 */
344 .long sys_get_rt_task_param
345 .long sys_complete_job
346 .long sys_od_open
347 .long sys_od_close
348 .long sys_litmus_lock
349 .long sys_litmus_unlock
350 .long sys_query_job_no
351 .long sys_wait_for_job_release
352 .long sys_wait_for_ts_release
353 .long sys_release_ts
354 .long sys_null_call
diff --git a/drivers/tty/vt/consolemap_deftbl.c b/drivers/tty/vt/consolemap_deftbl.c
new file mode 100644
index 000000000000..5f141383566b
--- /dev/null
+++ b/drivers/tty/vt/consolemap_deftbl.c
@@ -0,0 +1,86 @@
1/*
2 * Do not edit this file; it was automatically generated by
3 *
4 * conmakehash drivers/tty/vt/cp437.uni > [this file]
5 *
6 */
7
8#include <linux/types.h>
9
10u8 dfont_unicount[256] =
11{
12 1, 1, 1, 1, 2, 1, 1, 1,
13 1, 1, 1, 1, 1, 1, 1, 2,
14 2, 2, 1, 1, 1, 1, 1, 1,
15 1, 1, 1, 1, 1, 1, 1, 1,
16 1, 1, 2, 1, 1, 1, 1, 2,
17 1, 1, 1, 1, 2, 2, 1, 1,
18 1, 1, 1, 1, 1, 1, 1, 1,
19 1, 1, 1, 1, 1, 1, 1, 1,
20 1, 5, 1, 2, 2, 4, 1, 1,
21 1, 5, 1, 2, 1, 1, 1, 5,
22 1, 1, 2, 1, 1, 4, 1, 1,
23 1, 2, 1, 1, 1, 1, 1, 3,
24 1, 2, 1, 1, 1, 1, 1, 1,
25 1, 1, 1, 1, 1, 1, 1, 2,
26 1, 1, 1, 1, 1, 1, 1, 1,
27 2, 2, 1, 1, 2, 1, 1, 1,
28 1, 1, 1, 1, 1, 1, 1, 1,
29 1, 1, 1, 1, 1, 1, 1, 2,
30 1, 1, 1, 1, 1, 1, 1, 1,
31 1, 1, 1, 1, 1, 1, 1, 1,
32 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 2, 1, 1, 1, 1, 2, 1,
41 2, 1, 2, 2, 1, 2, 2, 1,
42 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 2, 1
44};
45
46u16 dfont_unitable[303] =
47{
48 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x25c6, 0x2663, 0x2660,
49 0x2022, 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b,
50 0x263c, 0x00a4, 0x25b6, 0x25ba, 0x25c0, 0x25c4, 0x2195, 0x203c,
51 0x00b6, 0x00a7, 0x25ac, 0x21a8, 0x2191, 0x2193, 0x2192, 0x2190,
52 0x221f, 0x2194, 0x25b2, 0x25bc, 0x0020, 0x0021, 0x0022, 0x00a8,
53 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x00b4, 0x0028, 0x0029,
54 0x002a, 0x002b, 0x002c, 0x00b8, 0x002d, 0x00ad, 0x002e, 0x002f,
55 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
56 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
57 0x0040, 0x0041, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x0042, 0x0043,
58 0x00a9, 0x0044, 0x00d0, 0x0045, 0x00c8, 0x00ca, 0x00cb, 0x0046,
59 0x0047, 0x0048, 0x0049, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x004a,
60 0x004b, 0x212a, 0x004c, 0x004d, 0x004e, 0x004f, 0x00d2, 0x00d3,
61 0x00d4, 0x00d5, 0x0050, 0x0051, 0x0052, 0x00ae, 0x0053, 0x0054,
62 0x0055, 0x00d9, 0x00da, 0x00db, 0x0056, 0x0057, 0x0058, 0x0059,
63 0x00dd, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x23bd,
64 0xf804, 0x0060, 0x0061, 0x00e3, 0x0062, 0x0063, 0x0064, 0x0065,
65 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d,
66 0x006e, 0x006f, 0x00f5, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
67 0x0075, 0x0076, 0x0077, 0x0078, 0x00d7, 0x0079, 0x00fd, 0x007a,
68 0x007b, 0x007c, 0x00a6, 0x007d, 0x007e, 0x2302, 0x00c7, 0x00fc,
69 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, 0x00ea, 0x00eb,
70 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, 0x212b, 0x00c9,
71 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, 0x00ff,
72 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, 0x00e1,
73 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, 0x00bf,
74 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, 0x2591,
75 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555,
76 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, 0x2514,
77 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, 0x255a,
78 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, 0x2568,
79 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, 0x256a,
80 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, 0x03b1,
81 0x03b2, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03bc,
82 0x03c4, 0x03a6, 0x00d8, 0x0398, 0x03a9, 0x2126, 0x03b4, 0x00f0,
83 0x221e, 0x03c6, 0x00f8, 0x03b5, 0x2208, 0x2229, 0x2261, 0x00b1,
84 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, 0x00b0, 0x2219,
85 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0xfffd, 0x00a0
86};
diff --git a/drivers/tty/vt/defkeymap.c b/drivers/tty/vt/defkeymap.c
new file mode 100644
index 000000000000..d2208dfe3f67
--- /dev/null
+++ b/drivers/tty/vt/defkeymap.c
@@ -0,0 +1,262 @@
1/* Do not edit this file! It was automatically generated by */
2/* loadkeys --mktable defkeymap.map > defkeymap.c */
3
4#include <linux/types.h>
5#include <linux/keyboard.h>
6#include <linux/kd.h>
7
8u_short plain_map[NR_KEYS] = {
9 0xf200, 0xf01b, 0xf031, 0xf032, 0xf033, 0xf034, 0xf035, 0xf036,
10 0xf037, 0xf038, 0xf039, 0xf030, 0xf02d, 0xf03d, 0xf07f, 0xf009,
11 0xfb71, 0xfb77, 0xfb65, 0xfb72, 0xfb74, 0xfb79, 0xfb75, 0xfb69,
12 0xfb6f, 0xfb70, 0xf05b, 0xf05d, 0xf201, 0xf702, 0xfb61, 0xfb73,
13 0xfb64, 0xfb66, 0xfb67, 0xfb68, 0xfb6a, 0xfb6b, 0xfb6c, 0xf03b,
14 0xf027, 0xf060, 0xf700, 0xf05c, 0xfb7a, 0xfb78, 0xfb63, 0xfb76,
15 0xfb62, 0xfb6e, 0xfb6d, 0xf02c, 0xf02e, 0xf02f, 0xf700, 0xf30c,
16 0xf703, 0xf020, 0xf207, 0xf100, 0xf101, 0xf102, 0xf103, 0xf104,
17 0xf105, 0xf106, 0xf107, 0xf108, 0xf109, 0xf208, 0xf209, 0xf307,
18 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301,
19 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf03c, 0xf10a,
20 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
21 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603,
22 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116,
23 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
24 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
25};
26
27u_short shift_map[NR_KEYS] = {
28 0xf200, 0xf01b, 0xf021, 0xf040, 0xf023, 0xf024, 0xf025, 0xf05e,
29 0xf026, 0xf02a, 0xf028, 0xf029, 0xf05f, 0xf02b, 0xf07f, 0xf009,
30 0xfb51, 0xfb57, 0xfb45, 0xfb52, 0xfb54, 0xfb59, 0xfb55, 0xfb49,
31 0xfb4f, 0xfb50, 0xf07b, 0xf07d, 0xf201, 0xf702, 0xfb41, 0xfb53,
32 0xfb44, 0xfb46, 0xfb47, 0xfb48, 0xfb4a, 0xfb4b, 0xfb4c, 0xf03a,
33 0xf022, 0xf07e, 0xf700, 0xf07c, 0xfb5a, 0xfb58, 0xfb43, 0xfb56,
34 0xfb42, 0xfb4e, 0xfb4d, 0xf03c, 0xf03e, 0xf03f, 0xf700, 0xf30c,
35 0xf703, 0xf020, 0xf207, 0xf10a, 0xf10b, 0xf10c, 0xf10d, 0xf10e,
36 0xf10f, 0xf110, 0xf111, 0xf112, 0xf113, 0xf213, 0xf203, 0xf307,
37 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301,
38 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf03e, 0xf10a,
39 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
40 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603,
41 0xf20b, 0xf601, 0xf602, 0xf117, 0xf600, 0xf20a, 0xf115, 0xf116,
42 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
43 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
44};
45
46u_short altgr_map[NR_KEYS] = {
47 0xf200, 0xf200, 0xf200, 0xf040, 0xf200, 0xf024, 0xf200, 0xf200,
48 0xf07b, 0xf05b, 0xf05d, 0xf07d, 0xf05c, 0xf200, 0xf200, 0xf200,
49 0xfb71, 0xfb77, 0xf918, 0xfb72, 0xfb74, 0xfb79, 0xfb75, 0xfb69,
50 0xfb6f, 0xfb70, 0xf200, 0xf07e, 0xf201, 0xf702, 0xf914, 0xfb73,
51 0xf917, 0xf919, 0xfb67, 0xfb68, 0xfb6a, 0xfb6b, 0xfb6c, 0xf200,
52 0xf200, 0xf200, 0xf700, 0xf200, 0xfb7a, 0xfb78, 0xf916, 0xfb76,
53 0xf915, 0xfb6e, 0xfb6d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c,
54 0xf703, 0xf200, 0xf207, 0xf50c, 0xf50d, 0xf50e, 0xf50f, 0xf510,
55 0xf511, 0xf512, 0xf513, 0xf514, 0xf515, 0xf208, 0xf202, 0xf911,
56 0xf912, 0xf913, 0xf30b, 0xf90e, 0xf90f, 0xf910, 0xf30a, 0xf90b,
57 0xf90c, 0xf90d, 0xf90a, 0xf310, 0xf206, 0xf200, 0xf07c, 0xf516,
58 0xf517, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
59 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603,
60 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116,
61 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
62 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
63};
64
65u_short ctrl_map[NR_KEYS] = {
66 0xf200, 0xf200, 0xf200, 0xf000, 0xf01b, 0xf01c, 0xf01d, 0xf01e,
67 0xf01f, 0xf07f, 0xf200, 0xf200, 0xf01f, 0xf200, 0xf008, 0xf200,
68 0xf011, 0xf017, 0xf005, 0xf012, 0xf014, 0xf019, 0xf015, 0xf009,
69 0xf00f, 0xf010, 0xf01b, 0xf01d, 0xf201, 0xf702, 0xf001, 0xf013,
70 0xf004, 0xf006, 0xf007, 0xf008, 0xf00a, 0xf00b, 0xf00c, 0xf200,
71 0xf007, 0xf000, 0xf700, 0xf01c, 0xf01a, 0xf018, 0xf003, 0xf016,
72 0xf002, 0xf00e, 0xf00d, 0xf200, 0xf20e, 0xf07f, 0xf700, 0xf30c,
73 0xf703, 0xf000, 0xf207, 0xf100, 0xf101, 0xf102, 0xf103, 0xf104,
74 0xf105, 0xf106, 0xf107, 0xf108, 0xf109, 0xf208, 0xf204, 0xf307,
75 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301,
76 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf200, 0xf10a,
77 0xf10b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
78 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603,
79 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116,
80 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
81 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
82};
83
84u_short shift_ctrl_map[NR_KEYS] = {
85 0xf200, 0xf200, 0xf200, 0xf000, 0xf200, 0xf200, 0xf200, 0xf200,
86 0xf200, 0xf200, 0xf200, 0xf200, 0xf01f, 0xf200, 0xf200, 0xf200,
87 0xf011, 0xf017, 0xf005, 0xf012, 0xf014, 0xf019, 0xf015, 0xf009,
88 0xf00f, 0xf010, 0xf200, 0xf200, 0xf201, 0xf702, 0xf001, 0xf013,
89 0xf004, 0xf006, 0xf007, 0xf008, 0xf00a, 0xf00b, 0xf00c, 0xf200,
90 0xf200, 0xf200, 0xf700, 0xf200, 0xf01a, 0xf018, 0xf003, 0xf016,
91 0xf002, 0xf00e, 0xf00d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c,
92 0xf703, 0xf200, 0xf207, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
93 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf208, 0xf200, 0xf307,
94 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301,
95 0xf302, 0xf303, 0xf300, 0xf310, 0xf206, 0xf200, 0xf200, 0xf200,
96 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
97 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603,
98 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116,
99 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
100 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
101};
102
103u_short alt_map[NR_KEYS] = {
104 0xf200, 0xf81b, 0xf831, 0xf832, 0xf833, 0xf834, 0xf835, 0xf836,
105 0xf837, 0xf838, 0xf839, 0xf830, 0xf82d, 0xf83d, 0xf87f, 0xf809,
106 0xf871, 0xf877, 0xf865, 0xf872, 0xf874, 0xf879, 0xf875, 0xf869,
107 0xf86f, 0xf870, 0xf85b, 0xf85d, 0xf80d, 0xf702, 0xf861, 0xf873,
108 0xf864, 0xf866, 0xf867, 0xf868, 0xf86a, 0xf86b, 0xf86c, 0xf83b,
109 0xf827, 0xf860, 0xf700, 0xf85c, 0xf87a, 0xf878, 0xf863, 0xf876,
110 0xf862, 0xf86e, 0xf86d, 0xf82c, 0xf82e, 0xf82f, 0xf700, 0xf30c,
111 0xf703, 0xf820, 0xf207, 0xf500, 0xf501, 0xf502, 0xf503, 0xf504,
112 0xf505, 0xf506, 0xf507, 0xf508, 0xf509, 0xf208, 0xf209, 0xf907,
113 0xf908, 0xf909, 0xf30b, 0xf904, 0xf905, 0xf906, 0xf30a, 0xf901,
114 0xf902, 0xf903, 0xf900, 0xf310, 0xf206, 0xf200, 0xf83c, 0xf50a,
115 0xf50b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
116 0xf30e, 0xf702, 0xf30d, 0xf01c, 0xf701, 0xf205, 0xf114, 0xf603,
117 0xf118, 0xf210, 0xf211, 0xf117, 0xf600, 0xf119, 0xf115, 0xf116,
118 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
119 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
120};
121
122u_short ctrl_alt_map[NR_KEYS] = {
123 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
124 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
125 0xf811, 0xf817, 0xf805, 0xf812, 0xf814, 0xf819, 0xf815, 0xf809,
126 0xf80f, 0xf810, 0xf200, 0xf200, 0xf201, 0xf702, 0xf801, 0xf813,
127 0xf804, 0xf806, 0xf807, 0xf808, 0xf80a, 0xf80b, 0xf80c, 0xf200,
128 0xf200, 0xf200, 0xf700, 0xf200, 0xf81a, 0xf818, 0xf803, 0xf816,
129 0xf802, 0xf80e, 0xf80d, 0xf200, 0xf200, 0xf200, 0xf700, 0xf30c,
130 0xf703, 0xf200, 0xf207, 0xf500, 0xf501, 0xf502, 0xf503, 0xf504,
131 0xf505, 0xf506, 0xf507, 0xf508, 0xf509, 0xf208, 0xf200, 0xf307,
132 0xf308, 0xf309, 0xf30b, 0xf304, 0xf305, 0xf306, 0xf30a, 0xf301,
133 0xf302, 0xf303, 0xf300, 0xf20c, 0xf206, 0xf200, 0xf200, 0xf50a,
134 0xf50b, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
135 0xf30e, 0xf702, 0xf30d, 0xf200, 0xf701, 0xf205, 0xf114, 0xf603,
136 0xf118, 0xf601, 0xf602, 0xf117, 0xf600, 0xf119, 0xf115, 0xf20c,
137 0xf11a, 0xf10c, 0xf10d, 0xf11b, 0xf11c, 0xf110, 0xf311, 0xf11d,
138 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200, 0xf200,
139};
140
141ushort *key_maps[MAX_NR_KEYMAPS] = {
142 plain_map, shift_map, altgr_map, NULL,
143 ctrl_map, shift_ctrl_map, NULL, NULL,
144 alt_map, NULL, NULL, NULL,
145 ctrl_alt_map, NULL
146};
147
148unsigned int keymap_count = 7;
149
150/*
151 * Philosophy: most people do not define more strings, but they who do
152 * often want quite a lot of string space. So, we statically allocate
153 * the default and allocate dynamically in chunks of 512 bytes.
154 */
155
156char func_buf[] = {
157 '\033', '[', '[', 'A', 0,
158 '\033', '[', '[', 'B', 0,
159 '\033', '[', '[', 'C', 0,
160 '\033', '[', '[', 'D', 0,
161 '\033', '[', '[', 'E', 0,
162 '\033', '[', '1', '7', '~', 0,
163 '\033', '[', '1', '8', '~', 0,
164 '\033', '[', '1', '9', '~', 0,
165 '\033', '[', '2', '0', '~', 0,
166 '\033', '[', '2', '1', '~', 0,
167 '\033', '[', '2', '3', '~', 0,
168 '\033', '[', '2', '4', '~', 0,
169 '\033', '[', '2', '5', '~', 0,
170 '\033', '[', '2', '6', '~', 0,
171 '\033', '[', '2', '8', '~', 0,
172 '\033', '[', '2', '9', '~', 0,
173 '\033', '[', '3', '1', '~', 0,
174 '\033', '[', '3', '2', '~', 0,
175 '\033', '[', '3', '3', '~', 0,
176 '\033', '[', '3', '4', '~', 0,
177 '\033', '[', '1', '~', 0,
178 '\033', '[', '2', '~', 0,
179 '\033', '[', '3', '~', 0,
180 '\033', '[', '4', '~', 0,
181 '\033', '[', '5', '~', 0,
182 '\033', '[', '6', '~', 0,
183 '\033', '[', 'M', 0,
184 '\033', '[', 'P', 0,
185};
186
187char *funcbufptr = func_buf;
188int funcbufsize = sizeof(func_buf);
189int funcbufleft = 0; /* space left */
190
191char *func_table[MAX_NR_FUNC] = {
192 func_buf + 0,
193 func_buf + 5,
194 func_buf + 10,
195 func_buf + 15,
196 func_buf + 20,
197 func_buf + 25,
198 func_buf + 31,
199 func_buf + 37,
200 func_buf + 43,
201 func_buf + 49,
202 func_buf + 55,
203 func_buf + 61,
204 func_buf + 67,
205 func_buf + 73,
206 func_buf + 79,
207 func_buf + 85,
208 func_buf + 91,
209 func_buf + 97,
210 func_buf + 103,
211 func_buf + 109,
212 func_buf + 115,
213 func_buf + 120,
214 func_buf + 125,
215 func_buf + 130,
216 func_buf + 135,
217 func_buf + 140,
218 func_buf + 145,
219 NULL,
220 NULL,
221 func_buf + 149,
222 NULL,
223};
224
225struct kbdiacruc accent_table[MAX_DIACR] = {
226 {'`', 'A', 0300}, {'`', 'a', 0340},
227 {'\'', 'A', 0301}, {'\'', 'a', 0341},
228 {'^', 'A', 0302}, {'^', 'a', 0342},
229 {'~', 'A', 0303}, {'~', 'a', 0343},
230 {'"', 'A', 0304}, {'"', 'a', 0344},
231 {'O', 'A', 0305}, {'o', 'a', 0345},
232 {'0', 'A', 0305}, {'0', 'a', 0345},
233 {'A', 'A', 0305}, {'a', 'a', 0345},
234 {'A', 'E', 0306}, {'a', 'e', 0346},
235 {',', 'C', 0307}, {',', 'c', 0347},
236 {'`', 'E', 0310}, {'`', 'e', 0350},
237 {'\'', 'E', 0311}, {'\'', 'e', 0351},
238 {'^', 'E', 0312}, {'^', 'e', 0352},
239 {'"', 'E', 0313}, {'"', 'e', 0353},
240 {'`', 'I', 0314}, {'`', 'i', 0354},
241 {'\'', 'I', 0315}, {'\'', 'i', 0355},
242 {'^', 'I', 0316}, {'^', 'i', 0356},
243 {'"', 'I', 0317}, {'"', 'i', 0357},
244 {'-', 'D', 0320}, {'-', 'd', 0360},
245 {'~', 'N', 0321}, {'~', 'n', 0361},
246 {'`', 'O', 0322}, {'`', 'o', 0362},
247 {'\'', 'O', 0323}, {'\'', 'o', 0363},
248 {'^', 'O', 0324}, {'^', 'o', 0364},
249 {'~', 'O', 0325}, {'~', 'o', 0365},
250 {'"', 'O', 0326}, {'"', 'o', 0366},
251 {'/', 'O', 0330}, {'/', 'o', 0370},
252 {'`', 'U', 0331}, {'`', 'u', 0371},
253 {'\'', 'U', 0332}, {'\'', 'u', 0372},
254 {'^', 'U', 0333}, {'^', 'u', 0373},
255 {'"', 'U', 0334}, {'"', 'u', 0374},
256 {'\'', 'Y', 0335}, {'\'', 'y', 0375},
257 {'T', 'H', 0336}, {'t', 'h', 0376},
258 {'s', 's', 0337}, {'"', 'y', 0377},
259 {'s', 'z', 0337}, {'i', 'j', 0377},
260};
261
262unsigned int accent_table_size = 68;
diff --git a/fs/exec.c b/fs/exec.c
index 6d2b6f936858..56536ad0e7cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
19 * current->executable is only used by the procfs. This allows a dispatch 19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep 20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary 21 * trying until we recognize the file or we run out of supported binary
22 * formats. 22 * formats.
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
@@ -55,6 +55,8 @@
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57 57
58#include <litmus/litmus.h>
59
58#include <asm/uaccess.h> 60#include <asm/uaccess.h>
59#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
60#include <asm/tlb.h> 62#include <asm/tlb.h>
@@ -78,7 +80,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
78 insert ? list_add(&fmt->lh, &formats) : 80 insert ? list_add(&fmt->lh, &formats) :
79 list_add_tail(&fmt->lh, &formats); 81 list_add_tail(&fmt->lh, &formats);
80 write_unlock(&binfmt_lock); 82 write_unlock(&binfmt_lock);
81 return 0; 83 return 0;
82} 84}
83 85
84EXPORT_SYMBOL(__register_binfmt); 86EXPORT_SYMBOL(__register_binfmt);
@@ -1064,7 +1066,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1064 group */ 1066 group */
1065 1067
1066 current->self_exec_id++; 1068 current->self_exec_id++;
1067 1069
1068 flush_signal_handlers(current, 0); 1070 flush_signal_handlers(current, 0);
1069 flush_old_files(current->files); 1071 flush_old_files(current->files);
1070} 1072}
@@ -1154,8 +1156,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1154 return res; 1156 return res;
1155} 1157}
1156 1158
1157/* 1159/*
1158 * Fill the binprm structure from the inode. 1160 * Fill the binprm structure from the inode.
1159 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1161 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1160 * 1162 *
1161 * This may be called multiple times for binary chains (scripts for example). 1163 * This may be called multiple times for binary chains (scripts for example).
@@ -1367,6 +1369,7 @@ int do_execve(const char * filename,
1367 goto out_unmark; 1369 goto out_unmark;
1368 1370
1369 sched_exec(); 1371 sched_exec();
1372 litmus_exec();
1370 1373
1371 bprm->file = file; 1374 bprm->file = file;
1372 bprm->filename = filename; 1375 bprm->filename = filename;
diff --git a/fs/inode.c b/fs/inode.c
index 86464332e590..d4fe9c031864 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -266,6 +266,8 @@ void inode_init_once(struct inode *inode)
266#ifdef CONFIG_FSNOTIFY 266#ifdef CONFIG_FSNOTIFY
267 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 267 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
268#endif 268#endif
269 INIT_LIST_HEAD(&inode->i_obj_list);
270 mutex_init(&inode->i_obj_mutex);
269} 271}
270EXPORT_SYMBOL(inode_init_once); 272EXPORT_SYMBOL(inode_init_once);
271 273
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 51e3145196f6..c63950e8a863 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -90,6 +90,7 @@ extern bool completion_done(struct completion *x);
90 90
91extern void complete(struct completion *); 91extern void complete(struct completion *);
92extern void complete_all(struct completion *); 92extern void complete_all(struct completion *);
93extern void complete_n(struct completion *, int n);
93 94
94/** 95/**
95 * INIT_COMPLETION: - reinitialize a completion structure 96 * INIT_COMPLETION: - reinitialize a completion structure
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 63d069bd80b7..29a672458d27 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -16,8 +16,8 @@
16 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 16 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
17 * upper limit on files-per-process. 17 * upper limit on files-per-process.
18 * 18 *
19 * Some programs (notably those using select()) may have to be 19 * Some programs (notably those using select()) may have to be
20 * recompiled to take full advantage of the new limits.. 20 * recompiled to take full advantage of the new limits..
21 */ 21 */
22 22
23/* Fixed constants first: */ 23/* Fixed constants first: */
@@ -172,7 +172,7 @@ struct inodes_stat_t {
172#define SEL_EX 4 172#define SEL_EX 4
173 173
174/* public flags for file_system_type */ 174/* public flags for file_system_type */
175#define FS_REQUIRES_DEV 1 175#define FS_REQUIRES_DEV 1
176#define FS_BINARY_MOUNTDATA 2 176#define FS_BINARY_MOUNTDATA 2
177#define FS_HAS_SUBTYPE 4 177#define FS_HAS_SUBTYPE 4
178#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 178#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
@@ -470,7 +470,7 @@ struct iattr {
470 */ 470 */
471#include <linux/quota.h> 471#include <linux/quota.h>
472 472
473/** 473/**
474 * enum positive_aop_returns - aop return codes with specific semantics 474 * enum positive_aop_returns - aop return codes with specific semantics
475 * 475 *
476 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 476 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
@@ -480,7 +480,7 @@ struct iattr {
480 * be a candidate for writeback again in the near 480 * be a candidate for writeback again in the near
481 * future. Other callers must be careful to unlock 481 * future. Other callers must be careful to unlock
482 * the page if they get this return. Returned by 482 * the page if they get this return. Returned by
483 * writepage(); 483 * writepage();
484 * 484 *
485 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 485 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
486 * unlocked it and the page might have been truncated. 486 * unlocked it and the page might have been truncated.
@@ -721,6 +721,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
721 721
722struct posix_acl; 722struct posix_acl;
723#define ACL_NOT_CACHED ((void *)(-1)) 723#define ACL_NOT_CACHED ((void *)(-1))
724struct inode_obj_id_table;
724 725
725struct inode { 726struct inode {
726 struct hlist_node i_hash; 727 struct hlist_node i_hash;
@@ -784,6 +785,8 @@ struct inode {
784 struct posix_acl *i_acl; 785 struct posix_acl *i_acl;
785 struct posix_acl *i_default_acl; 786 struct posix_acl *i_default_acl;
786#endif 787#endif
788 struct list_head i_obj_list;
789 struct mutex i_obj_mutex;
787 void *i_private; /* fs or device private pointer */ 790 void *i_private; /* fs or device private pointer */
788}; 791};
789 792
@@ -997,10 +1000,10 @@ static inline int file_check_writeable(struct file *filp)
997 1000
998#define MAX_NON_LFS ((1UL<<31) - 1) 1001#define MAX_NON_LFS ((1UL<<31) - 1)
999 1002
1000/* Page cache limit. The filesystems should put that into their s_maxbytes 1003/* Page cache limit. The filesystems should put that into their s_maxbytes
1001 limits, otherwise bad things can happen in VM. */ 1004 limits, otherwise bad things can happen in VM. */
1002#if BITS_PER_LONG==32 1005#if BITS_PER_LONG==32
1003#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1006#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1004#elif BITS_PER_LONG==64 1007#elif BITS_PER_LONG==64
1005#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1008#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1006#endif 1009#endif
@@ -2145,7 +2148,7 @@ extern int may_open(struct path *, int, int);
2145 2148
2146extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2149extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2147extern struct file * open_exec(const char *); 2150extern struct file * open_exec(const char *);
2148 2151
2149/* fs/dcache.c -- generic fs support functions */ 2152/* fs/dcache.c -- generic fs support functions */
2150extern int is_subdir(struct dentry *, struct dentry *); 2153extern int is_subdir(struct dentry *, struct dentry *);
2151extern int path_is_under(struct path *, struct path *); 2154extern int path_is_under(struct path *, struct path *);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0c1b857d3d..76da541c1f66 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -167,6 +167,7 @@ struct hrtimer_clock_base {
167 * @nr_retries: Total number of hrtimer interrupt retries 167 * @nr_retries: Total number of hrtimer interrupt retries
168 * @nr_hangs: Total number of hrtimer interrupt hangs 168 * @nr_hangs: Total number of hrtimer interrupt hangs
169 * @max_hang_time: Maximum time spent in hrtimer_interrupt 169 * @max_hang_time: Maximum time spent in hrtimer_interrupt
170 * @to_pull: LITMUS^RT list of timers to be pulled on this cpu
170 */ 171 */
171struct hrtimer_cpu_base { 172struct hrtimer_cpu_base {
172 raw_spinlock_t lock; 173 raw_spinlock_t lock;
@@ -180,8 +181,32 @@ struct hrtimer_cpu_base {
180 unsigned long nr_hangs; 181 unsigned long nr_hangs;
181 ktime_t max_hang_time; 182 ktime_t max_hang_time;
182#endif 183#endif
184 struct list_head to_pull;
183}; 185};
184 186
187#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
188
189#define HRTIMER_START_ON_INACTIVE 0
190#define HRTIMER_START_ON_QUEUED 1
191
192/*
193 * struct hrtimer_start_on_info - save timer info on remote cpu
194 * @list: list of hrtimer_start_on_info on remote cpu (to_pull)
195 * @timer: timer to be triggered on remote cpu
196 * @time: time event
197 * @mode: timer mode
198 * @state: activity flag
199 */
200struct hrtimer_start_on_info {
201 struct list_head list;
202 struct hrtimer *timer;
203 ktime_t time;
204 enum hrtimer_mode mode;
205 atomic_t state;
206};
207
208#endif
209
185static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 210static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
186{ 211{
187 timer->_expires = time; 212 timer->_expires = time;
@@ -348,6 +373,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
348 unsigned long delta_ns, 373 unsigned long delta_ns,
349 const enum hrtimer_mode mode, int wakeup); 374 const enum hrtimer_mode mode, int wakeup);
350 375
376#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
377extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
378extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
379 struct hrtimer *timer, ktime_t time,
380 const enum hrtimer_mode mode);
381#endif
382
351extern int hrtimer_cancel(struct hrtimer *timer); 383extern int hrtimer_cancel(struct hrtimer *timer);
352extern int hrtimer_try_to_cancel(struct hrtimer *timer); 384extern int hrtimer_try_to_cancel(struct hrtimer *timer);
353 385
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db2d7dd..c9ac4fc837ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,7 @@
38#define SCHED_BATCH 3 38#define SCHED_BATCH 3
39/* SCHED_ISO: reserved but not implemented yet */ 39/* SCHED_ISO: reserved but not implemented yet */
40#define SCHED_IDLE 5 40#define SCHED_IDLE 5
41#define SCHED_LITMUS 6
41/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ 42/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
42#define SCHED_RESET_ON_FORK 0x40000000 43#define SCHED_RESET_ON_FORK 0x40000000
43 44
@@ -94,6 +95,9 @@ struct sched_param {
94 95
95#include <asm/processor.h> 96#include <asm/processor.h>
96 97
98#include <litmus/rt_param.h>
99#include <litmus/preempt.h>
100
97struct exec_domain; 101struct exec_domain;
98struct futex_pi_state; 102struct futex_pi_state;
99struct robust_list_head; 103struct robust_list_head;
@@ -1159,6 +1163,7 @@ struct sched_rt_entity {
1159}; 1163};
1160 1164
1161struct rcu_node; 1165struct rcu_node;
1166struct od_table_entry;
1162 1167
1163struct task_struct { 1168struct task_struct {
1164 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 1169 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
@@ -1243,9 +1248,9 @@ struct task_struct {
1243 unsigned long stack_canary; 1248 unsigned long stack_canary;
1244#endif 1249#endif
1245 1250
1246 /* 1251 /*
1247 * pointers to (original) parent process, youngest child, younger sibling, 1252 * pointers to (original) parent process, youngest child, younger sibling,
1248 * older sibling, respectively. (p->father can be replaced with 1253 * older sibling, respectively. (p->father can be replaced with
1249 * p->real_parent->pid) 1254 * p->real_parent->pid)
1250 */ 1255 */
1251 struct task_struct *real_parent; /* real parent process */ 1256 struct task_struct *real_parent; /* real parent process */
@@ -1453,6 +1458,13 @@ struct task_struct {
1453 int make_it_fail; 1458 int make_it_fail;
1454#endif 1459#endif
1455 struct prop_local_single dirties; 1460 struct prop_local_single dirties;
1461
1462 /* LITMUS RT parameters and state */
1463 struct rt_param rt_param;
1464
1465 /* references to PI semaphores, etc. */
1466 struct od_table_entry *od_table;
1467
1456#ifdef CONFIG_LATENCYTOP 1468#ifdef CONFIG_LATENCYTOP
1457 int latency_record_count; 1469 int latency_record_count;
1458 struct latency_record latency_record[LT_SAVECOUNT]; 1470 struct latency_record latency_record[LT_SAVECOUNT];
@@ -2014,7 +2026,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
2014 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2026 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
2015 2027
2016 return ret; 2028 return ret;
2017} 2029}
2018 2030
2019extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2031extern void block_all_signals(int (*notifier)(void *priv), void *priv,
2020 sigset_t *mask); 2032 sigset_t *mask);
@@ -2290,6 +2302,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2290static inline void set_tsk_need_resched(struct task_struct *tsk) 2302static inline void set_tsk_need_resched(struct task_struct *tsk)
2291{ 2303{
2292 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 2304 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2305 sched_state_will_schedule(tsk);
2293} 2306}
2294 2307
2295static inline void clear_tsk_need_resched(struct task_struct *tsk) 2308static inline void clear_tsk_need_resched(struct task_struct *tsk)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfa2d20e35f1..f86d40768e7f 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask,
80 void (*func)(void *info), void *info, int wait); 80 void (*func)(void *info), void *info, int wait);
81 81
82/* 82/*
83 * sends a 'pull timer' event to a remote CPU
84 */
85extern void smp_send_pull_timers(int cpu);
86
87/*
83 * Generic and arch helpers 88 * Generic and arch helpers
84 */ 89 */
85#ifdef CONFIG_USE_GENERIC_SMP_HELPERS 90#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee29..1e29bd5b18af 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -74,6 +74,11 @@ extern int tick_is_oneshot_available(void);
74extern struct tick_device *tick_get_device(int cpu); 74extern struct tick_device *tick_get_device(int cpu);
75 75
76# ifdef CONFIG_HIGH_RES_TIMERS 76# ifdef CONFIG_HIGH_RES_TIMERS
77/* LITMUS^RT tick alignment */
78#define LINUX_DEFAULT_TICKS 0
79#define LITMUS_ALIGNED_TICKS 1
80#define LITMUS_STAGGERED_TICKS 2
81
77extern int tick_init_highres(void); 82extern int tick_init_highres(void);
78extern int tick_program_event(ktime_t expires, int force); 83extern int tick_program_event(ktime_t expires, int force);
79extern void tick_setup_sched_timer(void); 84extern void tick_setup_sched_timer(void);
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 000000000000..cf4864a498d8
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 000000000000..732530e63491
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,8 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8#endif
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 000000000000..0c18dcb15e6c
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,44 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44#endif
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
new file mode 100644
index 000000000000..48d086d5a44c
--- /dev/null
+++ b/include/litmus/debug_trace.h
@@ -0,0 +1,37 @@
1#ifndef LITMUS_DEBUG_TRACE_H
2#define LITMUS_DEBUG_TRACE_H
3
4#ifdef CONFIG_SCHED_DEBUG_TRACE
5void sched_trace_log_message(const char* fmt, ...);
6void dump_trace_buffer(int max);
7#else
8
9#define sched_trace_log_message(fmt, ...)
10
11#endif
12
13extern atomic_t __log_seq_no;
14
15#ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER
16#define TRACE_PREFIX "%d P%d [%s@%s:%d]: "
17#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
18 raw_smp_processor_id(), \
19 __FUNCTION__, __FILE__, __LINE__
20#else
21#define TRACE_PREFIX "%d P%d: "
22#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
23 raw_smp_processor_id()
24#endif
25
26#define TRACE(fmt, args...) \
27 sched_trace_log_message(TRACE_PREFIX fmt, \
28 TRACE_ARGS, ## args)
29
30#define TRACE_TASK(t, fmt, args...) \
31 TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \
32 (t)->rt_param.job_params.job_no, ##args)
33
34#define TRACE_CUR(fmt, args...) \
35 TRACE_TASK(current, fmt, ## args)
36
37#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 000000000000..2c4266f77c03
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,33 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21#ifdef CONFIG_LITMUS_LOCKING
22/* priority comparison without priority inheritance */
23int edf_higher_base_prio(struct task_struct* first,
24 struct task_struct* second);
25
26int edf_pending_order(struct bheap_node* a, struct bheap_node* b);
27#endif
28
29int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
30
31int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
32
33#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 000000000000..d1ee0d1142d8
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,77 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007--2011 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26
27 OMLP_SEM = 5,
28
29 MAX_OBJ_TYPE = 5
30} obj_type_t;
31
32struct inode_obj_id {
33 struct list_head list;
34 atomic_t count;
35 struct inode* inode;
36
37 obj_type_t type;
38 void* obj;
39 unsigned int id;
40};
41
42struct fdso_ops;
43
44struct od_table_entry {
45 unsigned int used;
46
47 struct inode_obj_id* obj;
48 const struct fdso_ops* class;
49};
50
51struct fdso_ops {
52 int (*create)(void** obj_ref, obj_type_t type, void* __user);
53 void (*destroy)(obj_type_t type, void*);
54 int (*open) (struct od_table_entry*, void* __user);
55 int (*close) (struct od_table_entry*);
56};
57
58/* translate a userspace supplied od into the raw table entry
59 * returns NULL if od is invalid
60 */
61struct od_table_entry* get_entry_for_od(int od);
62
63/* translate a userspace supplied od into the associated object
64 * returns NULL if od is invalid
65 */
66static inline void* od_lookup(int od, obj_type_t type)
67{
68 struct od_table_entry* e = get_entry_for_od(od);
69 return e && e->obj->type == type ? e->obj->obj : NULL;
70}
71
72#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
73#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
74#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
75
76
77#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 000000000000..6c18277fdfc9
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,94 @@
1#ifndef _FEATHER_BUFFER_H_
2#define _FEATHER_BUFFER_H_
3
4/* requires UINT_MAX and memcpy */
5
6#define SLOT_FREE 0
7#define SLOT_BUSY 1
8#define SLOT_READY 2
9
10struct ft_buffer {
11 unsigned int slot_count;
12 unsigned int slot_size;
13
14 int free_count;
15 unsigned int write_idx;
16 unsigned int read_idx;
17
18 char* slots;
19 void* buffer_mem;
20 unsigned int failed_writes;
21};
22
23static inline int init_ft_buffer(struct ft_buffer* buf,
24 unsigned int slot_count,
25 unsigned int slot_size,
26 char* slots,
27 void* buffer_mem)
28{
29 int i = 0;
30 if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
31 /* The slot count must divide UNIT_MAX + 1 so that when it
32 * wraps around the index correctly points to 0.
33 */
34 return 0;
35 } else {
36 buf->slot_count = slot_count;
37 buf->slot_size = slot_size;
38 buf->slots = slots;
39 buf->buffer_mem = buffer_mem;
40 buf->free_count = slot_count;
41 buf->write_idx = 0;
42 buf->read_idx = 0;
43 buf->failed_writes = 0;
44 for (i = 0; i < slot_count; i++)
45 buf->slots[i] = SLOT_FREE;
46 return 1;
47 }
48}
49
50static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
51{
52 int free = fetch_and_dec(&buf->free_count);
53 unsigned int idx;
54 if (free <= 0) {
55 fetch_and_inc(&buf->free_count);
56 *ptr = 0;
57 fetch_and_inc(&buf->failed_writes);
58 return 0;
59 } else {
60 idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
61 buf->slots[idx] = SLOT_BUSY;
62 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
63 return 1;
64 }
65}
66
67static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
68{
69 unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
70 buf->slots[idx] = SLOT_READY;
71}
72
73
74/* exclusive reader access is assumed */
75static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
76{
77 unsigned int idx;
78 if (buf->free_count == buf->slot_count)
79 /* nothing available */
80 return 0;
81 idx = buf->read_idx % buf->slot_count;
82 if (buf->slots[idx] == SLOT_READY) {
83 memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
84 buf->slot_size);
85 buf->slots[idx] = SLOT_FREE;
86 buf->read_idx++;
87 fetch_and_inc(&buf->free_count);
88 return 1;
89 } else
90 return 0;
91}
92
93
94#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 000000000000..028dfb206fb0
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,65 @@
1#ifndef _FEATHER_TRACE_H_
2#define _FEATHER_TRACE_H_
3
4#include <asm/atomic.h>
5
6int ft_enable_event(unsigned long id);
7int ft_disable_event(unsigned long id);
8int ft_is_event_enabled(unsigned long id);
9int ft_disable_all_events(void);
10
11/* atomic_* funcitons are inline anyway */
12static inline int fetch_and_inc(int *val)
13{
14 return atomic_add_return(1, (atomic_t*) val) - 1;
15}
16
17static inline int fetch_and_dec(int *val)
18{
19 return atomic_sub_return(1, (atomic_t*) val) + 1;
20}
21
22/* Don't use rewriting implementation if kernel text pages are read-only.
23 * Ftrace gets around this by using the identity mapping, but that's more
24 * effort that is warrented right now for Feather-Trace.
25 * Eventually, it may make sense to replace Feather-Trace with ftrace.
26 */
27#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA)
28
29#include <asm/feather_trace.h>
30
31#else /* !__ARCH_HAS_FEATHER_TRACE */
32
33/* provide default implementation */
34
35#include <asm/timex.h> /* for get_cycles() */
36
37static inline unsigned long long ft_timestamp(void)
38{
39 return get_cycles();
40}
41
42#define feather_callback
43
44#define MAX_EVENTS 1024
45
46extern int ft_events[MAX_EVENTS];
47
48#define ft_event(id, callback) \
49 if (ft_events[id]) callback();
50
51#define ft_event0(id, callback) \
52 if (ft_events[id]) callback(id);
53
54#define ft_event1(id, callback, param) \
55 if (ft_events[id]) callback(id, param);
56
57#define ft_event2(id, callback, param, param2) \
58 if (ft_events[id]) callback(id, param, param2);
59
60#define ft_event3(id, callback, p, p2, p3) \
61 if (ft_events[id]) callback(id, p, p2, p3);
62
63#endif /* __ARCH_HAS_FEATHER_TRACE */
64
65#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..dd1f7bf1e347
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
new file mode 100644
index 000000000000..0b959874dd70
--- /dev/null
+++ b/include/litmus/ftdev.h
@@ -0,0 +1,55 @@
1#ifndef _LITMUS_FTDEV_H_
2#define _LITMUS_FTDEV_H_
3
4#include <litmus/feather_trace.h>
5#include <litmus/feather_buffer.h>
6#include <linux/mutex.h>
7#include <linux/cdev.h>
8
9#define FTDEV_ENABLE_CMD 0
10#define FTDEV_DISABLE_CMD 1
11
12struct ftdev;
13
14/* return 0 if buffer can be opened, otherwise -$REASON */
15typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
16/* return 0 on success, otherwise -$REASON */
17typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
18typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
19/* Let devices handle writes from userspace. No synchronization provided. */
20typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from);
21
22struct ftdev_event;
23
24struct ftdev_minor {
25 struct ft_buffer* buf;
26 unsigned int readers;
27 struct mutex lock;
28 /* FIXME: filter for authorized events */
29 struct ftdev_event* events;
30 struct device* device;
31 struct ftdev* ftdev;
32};
33
34struct ftdev {
35 dev_t major;
36 struct cdev cdev;
37 struct class* class;
38 const char* name;
39 struct ftdev_minor* minor;
40 unsigned int minor_cnt;
41 ftdev_alloc_t alloc;
42 ftdev_free_t free;
43 ftdev_can_open_t can_open;
44 ftdev_write_t write;
45};
46
47struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
48void free_ft_buffer(struct ft_buffer* buf);
49
50int ftdev_init( struct ftdev* ftdev, struct module* owner,
51 const int minor_cnt, const char* name);
52void ftdev_exit(struct ftdev* ftdev);
53int register_ftdev(struct ftdev* ftdev);
54
55#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..9bd361ef3943
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,9 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6long complete_job(void);
7
8#endif
9
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 000000000000..31ac72eddef7
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,292 @@
1/*
2 * Constant definitions related to
3 * scheduling policy.
4 */
5
6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_
8
9#include <litmus/debug_trace.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29#define NO_CPU 0xffffffff
30
31void litmus_fork(struct task_struct *tsk);
32void litmus_exec(void);
33/* clean up real-time state of a task */
34void exit_litmus(struct task_struct *dead_tsk);
35
36long litmus_admit_task(struct task_struct *tsk);
37void litmus_exit_task(struct task_struct *tsk);
38
39#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
40#define rt_transition_pending(t) \
41 ((t)->rt_param.transition_pending)
42
43#define tsk_rt(t) (&(t)->rt_param)
44
45/* Realtime utility macros */
46#define get_rt_flags(t) (tsk_rt(t)->flags)
47#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
48#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
49#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
50#define get_rt_period(t) (tsk_rt(t)->task_params.period)
51#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
52#define get_partition(t) (tsk_rt(t)->task_params.cpu)
53#define get_priority(t) (tsk_rt(t)->task_params.priority)
54#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
55#define get_release(t) (tsk_rt(t)->job_params.release)
56#define get_class(t) (tsk_rt(t)->task_params.cls)
57
58#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
59#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
60
61inline static int budget_exhausted(struct task_struct* t)
62{
63 return get_exec_time(t) >= get_exec_cost(t);
64}
65
66inline static lt_t budget_remaining(struct task_struct* t)
67{
68 if (!budget_exhausted(t))
69 return get_exec_cost(t) - get_exec_time(t);
70 else
71 /* avoid overflow */
72 return 0;
73}
74
75#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
76
77#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
78 == PRECISE_ENFORCEMENT)
79
80#define is_hrt(t) \
81 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
82#define is_srt(t) \
83 (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT)
84#define is_be(t) \
85 (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT)
86
87/* Our notion of time within LITMUS: kernel monotonic time. */
88static inline lt_t litmus_clock(void)
89{
90 return ktime_to_ns(ktime_get());
91}
92
93/* A macro to convert from nanoseconds to ktime_t. */
94#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
95
96#define get_domain(t) (tsk_rt(t)->domain)
97
98/* Honor the flag in the preempt_count variable that is set
99 * when scheduling is in progress.
100 */
101#define is_running(t) \
102 ((t)->state == TASK_RUNNING || \
103 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
104
105#define is_blocked(t) \
106 (!is_running(t))
107#define is_released(t, now) \
108 (lt_before_eq(get_release(t), now))
109#define is_tardy(t, now) \
110 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
111
112/* real-time comparison macros */
113#define earlier_deadline(a, b) (lt_before(\
114 (a)->rt_param.job_params.deadline,\
115 (b)->rt_param.job_params.deadline))
116#define earlier_release(a, b) (lt_before(\
117 (a)->rt_param.job_params.release,\
118 (b)->rt_param.job_params.release))
119
120void preempt_if_preemptable(struct task_struct* t, int on_cpu);
121
122#ifdef CONFIG_LITMUS_LOCKING
123void srp_ceiling_block(void);
124#else
125#define srp_ceiling_block() /* nothing */
126#endif
127
128#define bheap2task(hn) ((struct task_struct*) hn->value)
129
130static inline struct control_page* get_control_page(struct task_struct *t)
131{
132 return tsk_rt(t)->ctrl_page;
133}
134
135static inline int has_control_page(struct task_struct* t)
136{
137 return tsk_rt(t)->ctrl_page != NULL;
138}
139
140#ifdef CONFIG_NP_SECTION
141
142static inline int is_kernel_np(struct task_struct *t)
143{
144 return tsk_rt(t)->kernel_np;
145}
146
147static inline int is_user_np(struct task_struct *t)
148{
149 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
150}
151
152static inline void request_exit_np(struct task_struct *t)
153{
154 if (is_user_np(t)) {
155 /* Set the flag that tells user space to call
156 * into the kernel at the end of a critical section. */
157 if (likely(tsk_rt(t)->ctrl_page)) {
158 TRACE_TASK(t, "setting delayed_preemption flag\n");
159 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
160 }
161 }
162}
163
164static inline void make_np(struct task_struct *t)
165{
166 tsk_rt(t)->kernel_np++;
167}
168
169/* Caller should check if preemption is necessary when
170 * the function return 0.
171 */
172static inline int take_np(struct task_struct *t)
173{
174 return --tsk_rt(t)->kernel_np;
175}
176
177/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
178static inline int request_exit_np_atomic(struct task_struct *t)
179{
180 union np_flag old, new;
181 int ok;
182
183 if (tsk_rt(t)->ctrl_page) {
184 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
185 if (old.np.flag == 0) {
186 /* no longer non-preemptive */
187 return 0;
188 } else if (old.np.preempt) {
189 /* already set, nothing for us to do */
190 TRACE_TASK(t, "not setting np.preempt flag again\n");
191 return 1;
192 } else {
193 /* non preemptive and flag not set */
194 new.raw = old.raw;
195 new.np.preempt = 1;
196 /* if we get old back, then we atomically set the flag */
197 ok = cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
198 /* If we raced with a concurrent change, then so be
199 * it. Deliver it by IPI. We don't want an unbounded
200 * retry loop here since tasks might exploit that to
201 * keep the kernel busy indefinitely. */
202 TRACE_TASK(t, "request_exit_np => %d\n", ok);
203 return ok;
204 }
205 } else
206 return 0;
207}
208
209#else
210
211static inline int is_kernel_np(struct task_struct* t)
212{
213 return 0;
214}
215
216static inline int is_user_np(struct task_struct* t)
217{
218 return 0;
219}
220
221static inline void request_exit_np(struct task_struct *t)
222{
223 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
224 BUG();
225}
226
227static inline int request_exist_np_atomic(struct task_struct *t)
228{
229 return 0;
230}
231
232#endif
233
234static inline void clear_exit_np(struct task_struct *t)
235{
236 if (likely(tsk_rt(t)->ctrl_page))
237 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
238}
239
240static inline int is_np(struct task_struct *t)
241{
242#ifdef CONFIG_SCHED_DEBUG_TRACE
243 int kernel, user;
244 kernel = is_kernel_np(t);
245 user = is_user_np(t);
246 if (kernel || user)
247 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
248
249 kernel, user);
250 return kernel || user;
251#else
252 return unlikely(is_kernel_np(t) || is_user_np(t));
253#endif
254}
255
256static inline int is_present(struct task_struct* t)
257{
258 return t && tsk_rt(t)->present;
259}
260
261
262/* make the unit explicit */
263typedef unsigned long quanta_t;
264
265enum round {
266 FLOOR,
267 CEIL
268};
269
270
271/* Tick period is used to convert ns-specified execution
272 * costs and periods into tick-based equivalents.
273 */
274extern ktime_t tick_period;
275
276static inline quanta_t time2quanta(lt_t time, enum round round)
277{
278 s64 quantum_length = ktime_to_ns(tick_period);
279
280 if (do_div(time, quantum_length) && round == CEIL)
281 time++;
282 return (quanta_t) time;
283}
284
285/* By how much is cpu staggered behind CPU 0? */
286u64 cpu_stagger_offset(int cpu);
287
288#define TS_SYSCALL_IN_START \
289 if (has_control_page(current)) \
290 __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start);
291
292#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 000000000000..6800e725d48c
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,25 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7/*
8 * On success, returns 0 and sets the pointer to the location of the new
9 * proc dir entry, otherwise returns an error code and sets pde to NULL.
10 */
11long make_plugin_proc_dir(struct sched_plugin* plugin,
12 struct proc_dir_entry** pde);
13
14/*
15 * Plugins should deallocate all child proc directory entries before
16 * calling this, to avoid memory leaks.
17 */
18void remove_plugin_proc_dir(struct sched_plugin* plugin);
19
20
21/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
22 * remove a '\n' if present. Returns the number of bytes that were read or
23 * -EFAULT. */
24int copy_and_chomp(char *kbuf, unsigned long ksize,
25 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 000000000000..4d7b870cb443
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 000000000000..f3cf29ad87ee
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,165 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9#include <litmus/debug_trace.h>
10
11extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13//#ifdef CONFIG_DEBUG_KERNEL
14#if 0
15const char* sched_state_name(int s);
16#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
17#else
18#define TRACE_STATE(fmt, args...) /* ignore */
19#endif
20
21#define VERIFY_SCHED_STATE(x) \
22 do { int __s = get_sched_state(); \
23 if ((__s & (x)) == 0) \
24 TRACE_STATE("INVALID s=0x%x (%s) not " \
25 "in 0x%x (%s) [%s]\n", \
26 __s, sched_state_name(__s), \
27 (x), #x, __FUNCTION__); \
28 } while (0);
29
30#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
31 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
32 cpu, (x), sched_state_name(x), \
33 (y), sched_state_name(y))
34
35
36typedef enum scheduling_state {
37 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
38 * should be scheduled, and the processor does not
39 * plan to invoke schedule(). */
40 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
41 * processor should reschedule, but this has not
42 * been communicated yet (IPI still pending). */
43 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
44 * reschedule and will do so shortly. */
45 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
46 * has selected a new task to schedule, but has not
47 * yet performed the actual context switch. */
48 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
49 * switch, but a remote processor has already
50 * determined that a higher-priority task became
51 * eligible after the task was picked. */
52} sched_state_t;
53
54static inline sched_state_t get_sched_state_on(int cpu)
55{
56 return atomic_read(&per_cpu(resched_state, cpu));
57}
58
59static inline sched_state_t get_sched_state(void)
60{
61 return atomic_read(&__get_cpu_var(resched_state));
62}
63
64static inline int is_in_sched_state(int possible_states)
65{
66 return get_sched_state() & possible_states;
67}
68
69static inline int cpu_is_in_sched_state(int cpu, int possible_states)
70{
71 return get_sched_state_on(cpu) & possible_states;
72}
73
74static inline void set_sched_state(sched_state_t s)
75{
76 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
77 atomic_set(&__get_cpu_var(resched_state), s);
78}
79
80static inline int sched_state_transition(sched_state_t from, sched_state_t to)
81{
82 sched_state_t old_state;
83
84 old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to);
85 if (old_state == from) {
86 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
87 return 1;
88 } else
89 return 0;
90}
91
92static inline int sched_state_transition_on(int cpu,
93 sched_state_t from,
94 sched_state_t to)
95{
96 sched_state_t old_state;
97
98 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
99 if (old_state == from) {
100 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
101 return 1;
102 } else
103 return 0;
104}
105
106/* Plugins must call this function after they have decided which job to
107 * schedule next. IMPORTANT: this function must be called while still holding
108 * the lock that is used to serialize scheduling decisions.
109 *
110 * (Ideally, we would like to use runqueue locks for this purpose, but that
111 * would lead to deadlocks with the migration code.)
112 */
113static inline void sched_state_task_picked(void)
114{
115 VERIFY_SCHED_STATE(WILL_SCHEDULE);
116
117 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
118 set_sched_state(TASK_PICKED);
119}
120
121static inline void sched_state_entered_schedule(void)
122{
123 /* Update state for the case that we entered schedule() not due to
124 * set_tsk_need_resched() */
125 set_sched_state(WILL_SCHEDULE);
126}
127
128/* Called by schedule() to check if the scheduling decision is still valid
129 * after a context switch. Returns 1 if the CPU needs to reschdule. */
130static inline int sched_state_validate_switch(void)
131{
132 int left_state_ok = 0;
133
134 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED);
135
136 if (is_in_sched_state(TASK_PICKED)) {
137 /* Might be good; let's try to transition out of this
138 * state. This must be done atomically since remote processors
139 * may try to change the state, too. */
140 left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
141 }
142
143 if (!left_state_ok) {
144 /* We raced with a higher-priority task arrival => not
145 * valid. The CPU needs to reschedule. */
146 set_sched_state(WILL_SCHEDULE);
147 return 1;
148 } else
149 return 0;
150}
151
152/* State transition events. See litmus/preempt.c for details. */
153void sched_state_will_schedule(struct task_struct* tsk);
154void sched_state_ipi(void);
155/* Cause a CPU (remote or local) to reschedule. */
156void litmus_reschedule(int cpu);
157void litmus_reschedule_local(void);
158
159#ifdef CONFIG_DEBUG_KERNEL
160void sched_state_plugin_check(void);
161#else
162#define sched_state_plugin_check() /* no check */
163#endif
164
165#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 000000000000..ac249292e866
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 000000000000..a23ce1524051
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,228 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_
7
8/* Litmus time type. */
9typedef unsigned long long lt_t;
10
11static inline int lt_after(lt_t a, lt_t b)
12{
13 return ((long long) b) - ((long long) a) < 0;
14}
15#define lt_before(a, b) lt_after(b, a)
16
17static inline int lt_after_eq(lt_t a, lt_t b)
18{
19 return ((long long) a) - ((long long) b) >= 0;
20}
21#define lt_before_eq(a, b) lt_after_eq(b, a)
22
23/* different types of clients */
24typedef enum {
25 RT_CLASS_HARD,
26 RT_CLASS_SOFT,
27 RT_CLASS_BEST_EFFORT
28} task_class_t;
29
30typedef enum {
31 NO_ENFORCEMENT, /* job may overrun unhindered */
32 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
33 PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
34} budget_policy_t;
35
36#define LITMUS_MAX_PRIORITY 512
37
38struct rt_task {
39 lt_t exec_cost;
40 lt_t period;
41 lt_t phase;
42 unsigned int cpu;
43 unsigned int priority;
44 task_class_t cls;
45 budget_policy_t budget_policy; /* ignored by pfair */
46};
47
48union np_flag {
49 uint32_t raw;
50 struct {
51 /* Is the task currently in a non-preemptive section? */
52 uint32_t flag:31;
53 /* Should the task call into the scheduler? */
54 uint32_t preempt:1;
55 } np;
56};
57
58/* The definition of the data that is shared between the kernel and real-time
59 * tasks via a shared page (see litmus/ctrldev.c).
60 *
61 * WARNING: User space can write to this, so don't trust
62 * the correctness of the fields!
63 *
64 * This servees two purposes: to enable efficient signaling
65 * of non-preemptive sections (user->kernel) and
66 * delayed preemptions (kernel->user), and to export
67 * some real-time relevant statistics such as preemption and
68 * migration data to user space. We can't use a device to export
69 * statistics because we want to avoid system call overhead when
70 * determining preemption/migration overheads).
71 */
72struct control_page {
73 volatile union np_flag sched;
74
75 /* locking overhead tracing: time stamp prior to system call */
76 uint64_t ts_syscall_start; /* Feather-Trace cycles */
77
78 /* to be extended */
79};
80
81/* don't export internal data structures to user space (liblitmus) */
82#ifdef __KERNEL__
83
84struct _rt_domain;
85struct bheap_node;
86struct release_heap;
87
88struct rt_job {
89 /* Time instant the the job was or will be released. */
90 lt_t release;
91 /* What is the current deadline? */
92 lt_t deadline;
93
94 /* How much service has this job received so far? */
95 lt_t exec_time;
96
97 /* Which job is this. This is used to let user space
98 * specify which job to wait for, which is important if jobs
99 * overrun. If we just call sys_sleep_next_period() then we
100 * will unintentionally miss jobs after an overrun.
101 *
102 * Increase this sequence number when a job is released.
103 */
104 unsigned int job_no;
105};
106
107struct pfair_param;
108
109/* RT task parameters for scheduling extensions
110 * These parameters are inherited during clone and therefore must
111 * be explicitly set up before the task set is launched.
112 */
113struct rt_param {
114 /* is the task sleeping? */
115 unsigned int flags:8;
116
117 /* do we need to check for srp blocking? */
118 unsigned int srp_non_recurse:1;
119
120 /* is the task present? (true if it can be scheduled) */
121 unsigned int present:1;
122
123#ifdef CONFIG_LITMUS_LOCKING
124 /* Is the task being priority-boosted by a locking protocol? */
125 unsigned int priority_boosted:1;
126 /* If so, when did this start? */
127 lt_t boost_start_time;
128#endif
129
130 /* user controlled parameters */
131 struct rt_task task_params;
132
133 /* timing parameters */
134 struct rt_job job_params;
135
136 /* task representing the current "inherited" task
137 * priority, assigned by inherit_priority and
138 * return priority in the scheduler plugins.
139 * could point to self if PI does not result in
140 * an increased task priority.
141 */
142 struct task_struct* inh_task;
143
144#ifdef CONFIG_NP_SECTION
145 /* For the FMLP under PSN-EDF, it is required to make the task
146 * non-preemptive from kernel space. In order not to interfere with
147 * user space, this counter indicates the kernel space np setting.
148 * kernel_np > 0 => task is non-preemptive
149 */
150 unsigned int kernel_np;
151#endif
152
153 /* This field can be used by plugins to store where the task
154 * is currently scheduled. It is the responsibility of the
155 * plugin to avoid race conditions.
156 *
157 * This used by GSN-EDF and PFAIR.
158 */
159 volatile int scheduled_on;
160
161 /* Is the stack of the task currently in use? This is updated by
162 * the LITMUS core.
163 *
164 * Be careful to avoid deadlocks!
165 */
166 volatile int stack_in_use;
167
168 /* This field can be used by plugins to store where the task
169 * is currently linked. It is the responsibility of the plugin
170 * to avoid race conditions.
171 *
172 * Used by GSN-EDF.
173 */
174 volatile int linked_on;
175
176 /* PFAIR/PD^2 state. Allocated on demand. */
177 struct pfair_param* pfair;
178
179 /* Fields saved before BE->RT transition.
180 */
181 int old_policy;
182 int old_prio;
183
184 /* ready queue for this task */
185 struct _rt_domain* domain;
186
187 /* heap element for this task
188 *
189 * Warning: Don't statically allocate this node. The heap
190 * implementation swaps these between tasks, thus after
191 * dequeuing from a heap you may end up with a different node
192 * then the one you had when enqueuing the task. For the same
193 * reason, don't obtain and store references to this node
194 * other than this pointer (which is updated by the heap
195 * implementation).
196 */
197 struct bheap_node* heap_node;
198 struct release_heap* rel_heap;
199
200#ifdef CONFIG_LITMUS_LOCKING
201 /* task in heap of pending jobs -- used by C-EDF for priority donation */
202 struct bheap_node* pending_node;
203 /* is the job in a critical section or a wait queue?*/
204 unsigned int request_incomplete;
205 /* is the job currently a donor? */
206 unsigned int is_donor;
207 /* is this job suspended, waiting to become eligible? */
208 unsigned int waiting_eligible;
209
210 int pending_on;
211#endif
212
213 /* Used by rt_domain to queue task in release list.
214 */
215 struct list_head list;
216
217 /* Pointer to the page shared between userspace and kernel. */
218 struct control_page * ctrl_page;
219};
220
221/* Possible RT flags */
222#define RT_F_RUNNING 0x00000000
223#define RT_F_SLEEP 0x00000001
224#define RT_F_EXIT_SEM 0x00000008
225
226#endif
227
228#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..b5d1ae7bc3b6
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,117 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19
20
21/********************* scheduler invocation ******************/
22
23/* Plugin-specific realtime tick handler */
24typedef void (*scheduler_tick_t) (struct task_struct *cur);
25/* Novell make sched decision function */
26typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
27/* Clean up after the task switch has occured.
28 * This function is called after every (even non-rt) task switch.
29 */
30typedef void (*finish_switch_t)(struct task_struct *prev);
31
32
33/********************* task state changes ********************/
34
35/* Called to setup a new real-time task.
36 * Release the first job, enqueue, etc.
37 * Task may already be running.
38 */
39typedef void (*task_new_t) (struct task_struct *task,
40 int on_rq,
41 int running);
42
43/* Called to re-introduce a task after blocking.
44 * Can potentially be called multiple times.
45 */
46typedef void (*task_wake_up_t) (struct task_struct *task);
47/* called to notify the plugin of a blocking real-time task
48 * it will only be called for real-time tasks and before schedule is called */
49typedef void (*task_block_t) (struct task_struct *task);
50/* Called when a real-time task exits or changes to a different scheduling
51 * class.
52 * Free any allocated resources
53 */
54typedef void (*task_exit_t) (struct task_struct *);
55
56/* called early before the caller holds the runqueue lock */
57typedef void (*pre_setsched_t) (struct task_struct *, int policy);
58
59
60/* Called when the current task attempts to create a new lock of a given
61 * protocol type. */
62typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
63 void* __user config);
64
65
66/********************* sys call backends ********************/
67/* This function causes the caller to sleep until the next release */
68typedef long (*complete_job_t) (void);
69
70typedef long (*admit_task_t)(struct task_struct* tsk);
71
72typedef void (*release_at_t)(struct task_struct *t, lt_t start);
73
74struct sched_plugin {
75 struct list_head list;
76 /* basic info */
77 char *plugin_name;
78
79 /* setup */
80 activate_plugin_t activate_plugin;
81 deactivate_plugin_t deactivate_plugin;
82
83 /* scheduler invocation */
84 scheduler_tick_t tick;
85 schedule_t schedule;
86 finish_switch_t finish_switch;
87
88 /* syscall backend */
89 complete_job_t complete_job;
90 release_at_t release_at;
91
92 /* task state changes */
93 admit_task_t admit_task;
94
95 task_new_t task_new;
96 task_wake_up_t task_wake_up;
97 task_block_t task_block;
98 task_exit_t task_exit;
99
100 pre_setsched_t pre_setsched;
101
102#ifdef CONFIG_LITMUS_LOCKING
103 /* locking protocols */
104 allocate_lock_t allocate_lock;
105#endif
106} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
107
108
109extern struct sched_plugin *litmus;
110
111int register_sched_plugin(struct sched_plugin* plugin);
112struct sched_plugin* find_sched_plugin(const char* name);
113int print_sched_plugins(char* buf, int max);
114
115extern struct sched_plugin linux_sched_plugin;
116
117#endif
diff --git a/include/litmus/sched_plugin.h.rej b/include/litmus/sched_plugin.h.rej
new file mode 100644
index 000000000000..47e0c27c5061
--- /dev/null
+++ b/include/litmus/sched_plugin.h.rej
@@ -0,0 +1,22 @@
1--- include/litmus/sched_plugin.h
2+++ include/litmus/sched_plugin.h
3@@ -53,6 +53,10 @@
4 */
5 typedef void (*task_exit_t) (struct task_struct *);
6
7+/* called early before the caller holds the runqueue lock */
8+typedef void (*pre_setsched_t) (struct task_struct *, int policy);
9+
10+
11 /* Called when the current task attempts to create a new lock of a given
12 * protocol type. */
13 typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
14@@ -93,6 +97,8 @@
15 task_block_t task_block;
16 task_exit_t task_exit;
17
18+ pre_setsched_t pre_setsched;
19+
20 #ifdef CONFIG_LITMUS_LOCKING
21 /* locking protocols */
22 allocate_lock_t allocate_lock;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 000000000000..7ca34cb13881
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,200 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_H_
5#define _LINUX_SCHED_TRACE_H_
6
7/* all times in nanoseconds */
8
9struct st_trace_header {
10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */
14};
15
16#define ST_NAME_LEN 16
17struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19};
20
21struct st_param_data { /* regular params */
22 u32 wcet;
23 u32 period;
24 u32 phase;
25 u8 partition;
26 u8 class;
27 u8 __unused[2];
28};
29
30struct st_release_data { /* A job is was/is going to be released. */
31 u64 release; /* What's the release time? */
32 u64 deadline; /* By when must it finish? */
33};
34
35struct st_assigned_data { /* A job was asigned to a CPU. */
36 u64 when;
37 u8 target; /* Where should it execute? */
38 u8 __unused[7];
39};
40
41struct st_switch_to_data { /* A process was switched to on a given CPU. */
42 u64 when; /* When did this occur? */
43 u32 exec_time; /* Time the current job has executed. */
44 u8 __unused[4];
45
46};
47
48struct st_switch_away_data { /* A process was switched away from on a given CPU. */
49 u64 when;
50 u64 exec_time;
51};
52
53struct st_completion_data { /* A job completed. */
54 u64 when;
55 u8 forced:1; /* Set to 1 if job overran and kernel advanced to the
56 * next task automatically; set to 0 otherwise.
57 */
58 u8 __uflags:7;
59 u8 __unused[7];
60};
61
62struct st_block_data { /* A task blocks. */
63 u64 when;
64 u64 __unused;
65};
66
67struct st_resume_data { /* A task resumes. */
68 u64 when;
69 u64 __unused;
70};
71
72struct st_action_data {
73 u64 when;
74 u8 action;
75 u8 __unused[7];
76};
77
78struct st_sys_release_data {
79 u64 when;
80 u64 release;
81};
82
83#define DATA(x) struct st_ ## x ## _data x;
84
85typedef enum {
86 ST_NAME = 1, /* Start at one, so that we can spot
87 * uninitialized records. */
88 ST_PARAM,
89 ST_RELEASE,
90 ST_ASSIGNED,
91 ST_SWITCH_TO,
92 ST_SWITCH_AWAY,
93 ST_COMPLETION,
94 ST_BLOCK,
95 ST_RESUME,
96 ST_ACTION,
97 ST_SYS_RELEASE
98} st_event_record_type_t;
99
100struct st_event_record {
101 struct st_trace_header hdr;
102 union {
103 u64 raw[2];
104
105 DATA(name);
106 DATA(param);
107 DATA(release);
108 DATA(assigned);
109 DATA(switch_to);
110 DATA(switch_away);
111 DATA(completion);
112 DATA(block);
113 DATA(resume);
114 DATA(action);
115 DATA(sys_release);
116 } data;
117};
118
119#undef DATA
120
121#ifdef __KERNEL__
122
123#include <linux/sched.h>
124#include <litmus/feather_trace.h>
125
126#ifdef CONFIG_SCHED_TASK_TRACE
127
128#define SCHED_TRACE(id, callback, task) \
129 ft_event1(id, callback, task)
130#define SCHED_TRACE2(id, callback, task, xtra) \
131 ft_event2(id, callback, task, xtra)
132
133/* provide prototypes; needed on sparc64 */
134#ifndef NO_TASK_TRACE_DECLS
135feather_callback void do_sched_trace_task_name(unsigned long id,
136 struct task_struct* task);
137feather_callback void do_sched_trace_task_param(unsigned long id,
138 struct task_struct* task);
139feather_callback void do_sched_trace_task_release(unsigned long id,
140 struct task_struct* task);
141feather_callback void do_sched_trace_task_switch_to(unsigned long id,
142 struct task_struct* task);
143feather_callback void do_sched_trace_task_switch_away(unsigned long id,
144 struct task_struct* task);
145feather_callback void do_sched_trace_task_completion(unsigned long id,
146 struct task_struct* task,
147 unsigned long forced);
148feather_callback void do_sched_trace_task_block(unsigned long id,
149 struct task_struct* task);
150feather_callback void do_sched_trace_task_resume(unsigned long id,
151 struct task_struct* task);
152feather_callback void do_sched_trace_action(unsigned long id,
153 struct task_struct* task,
154 unsigned long action);
155feather_callback void do_sched_trace_sys_release(unsigned long id,
156 lt_t* start);
157
158#endif
159
160#else
161
162#define SCHED_TRACE(id, callback, task) /* no tracing */
163#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
164
165#endif
166
167
168#define SCHED_TRACE_BASE_ID 500
169
170
171#define sched_trace_task_name(t) \
172 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t)
173#define sched_trace_task_param(t) \
174 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t)
175#define sched_trace_task_release(t) \
176 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t)
177#define sched_trace_task_switch_to(t) \
178 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t)
179#define sched_trace_task_switch_away(t) \
180 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t)
181#define sched_trace_task_completion(t, forced) \
182 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \
183 (unsigned long) forced)
184#define sched_trace_task_block(t) \
185 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t)
186#define sched_trace_task_resume(t) \
187 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t)
188#define sched_trace_action(t, action) \
189 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, do_sched_trace_action, t, \
190 (unsigned long) action);
191/* when is a pointer, it does not need an explicit cast to unsigned long */
192#define sched_trace_sys_release(when) \
193 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
194
195
196#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
197
198#endif /* __KERNEL__ */
199
200#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 000000000000..c9a4552b2bf3
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 000000000000..d6829c416912
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,129 @@
1#ifndef _SYS_TRACE_H_
2#define _SYS_TRACE_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6#include <litmus/feather_trace.h>
7#include <litmus/feather_buffer.h>
8
9
10/*********************** TIMESTAMPS ************************/
11
12enum task_type_marker {
13 TSK_BE,
14 TSK_RT,
15 TSK_UNKNOWN
16};
17
18struct timestamp {
19 uint64_t timestamp;
20 uint32_t seq_no;
21 uint8_t cpu;
22 uint8_t event;
23 uint8_t task_type;
24};
25
26/* tracing callbacks */
27feather_callback void save_timestamp(unsigned long event);
28feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
29feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
30feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
31feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr);
32feather_callback void save_timestamp_time(unsigned long event, unsigned long time_ptr);
33
34#define TIMESTAMP(id) ft_event0(id, save_timestamp)
35
36#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
37
38#define TTIMESTAMP(id, task) \
39 ft_event1(id, save_timestamp_task, (unsigned long) task)
40
41#define CTIMESTAMP(id, cpu) \
42 ft_event1(id, save_timestamp_cpu, (unsigned long) cpu)
43
44#define LTIMESTAMP(id, task) \
45 ft_event1(id, save_task_latency, (unsigned long) task)
46
47#define TIMESTAMP_TIME(id, time_ptr) \
48 ft_event1(id, save_timestamp_time, (unsigned long) time_ptr)
49
50#define TIMESTAMP_PID(id) ft_event0(id, save_timestamp_pid)
51
52#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
53
54#define TIMESTAMP(id) /* no tracing */
55
56#define DTIMESTAMP(id, def) /* no tracing */
57
58#define TTIMESTAMP(id, task) /* no tracing */
59
60#define CTIMESTAMP(id, cpu) /* no tracing */
61
62#define LTIMESTAMP(id, when_ptr) /* no tracing */
63
64#define TIMESTAMP_TIME(id, time_ptr) /* no tracing */
65
66#define TIMESTAMP_PID(id) /* no tracing */
67
68#endif
69
70
71/* Convention for timestamps
72 * =========================
73 *
74 * In order to process the trace files with a common tool, we use the following
75 * convention to measure execution times: The end time id of a code segment is
76 * always the next number after the start time event id.
77 */
78
79#define __TS_SYSCALL_IN_START(p) TIMESTAMP_TIME(10, p)
80#define TS_SYSCALL_IN_END TIMESTAMP_PID(11)
81
82#define TS_SYSCALL_OUT_START TIMESTAMP_PID(20)
83#define TS_SYSCALL_OUT_END TIMESTAMP_PID(21)
84
85#define TS_LOCK_START TIMESTAMP_PID(30)
86#define TS_LOCK_END TIMESTAMP_PID(31)
87
88#define TS_LOCK_SUSPEND TIMESTAMP_PID(38)
89#define TS_LOCK_RESUME TIMESTAMP_PID(39)
90
91#define TS_UNLOCK_START TIMESTAMP_PID(40)
92#define TS_UNLOCK_END TIMESTAMP_PID(41)
93
94#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
95 * care
96 * about
97 * next */
98#define TS_SCHED_END(t) TTIMESTAMP(101, t)
99#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
100#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
101
102#define TS_CXS_START(t) TTIMESTAMP(104, t)
103#define TS_CXS_END(t) TTIMESTAMP(105, t)
104
105#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
106#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
107
108#define TS_TICK_START(t) TTIMESTAMP(110, t)
109#define TS_TICK_END(t) TTIMESTAMP(111, t)
110
111
112#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
113#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
114
115#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
116#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
117
118#define TS_ENTER_NP_START TIMESTAMP(140)
119#define TS_ENTER_NP_END TIMESTAMP(141)
120
121#define TS_EXIT_NP_START TIMESTAMP(150)
122#define TS_EXIT_NP_END TIMESTAMP(151)
123
124#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
125#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
126
127#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
128
129#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
new file mode 100644
index 000000000000..94264c27d9ac
--- /dev/null
+++ b/include/litmus/unistd_32.h
@@ -0,0 +1,21 @@
1/*
2 * included from arch/x86/include/asm/unistd_32.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9#define __NR_get_rt_task_param __LSC(1)
10#define __NR_complete_job __LSC(2)
11#define __NR_od_open __LSC(3)
12#define __NR_od_close __LSC(4)
13#define __NR_litmus_lock __LSC(5)
14#define __NR_litmus_unlock __LSC(6)
15#define __NR_query_job_no __LSC(7)
16#define __NR_wait_for_job_release __LSC(8)
17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11)
20
21#define NR_litmus_syscalls 12
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
new file mode 100644
index 000000000000..d5ced0d2642c
--- /dev/null
+++ b/include/litmus/unistd_64.h
@@ -0,0 +1,33 @@
1/*
2 * included from arch/x86/include/asm/unistd_64.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
10#define __NR_get_rt_task_param __LSC(1)
11__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
12#define __NR_complete_job __LSC(2)
13__SYSCALL(__NR_complete_job, sys_complete_job)
14#define __NR_od_open __LSC(3)
15__SYSCALL(__NR_od_open, sys_od_open)
16#define __NR_od_close __LSC(4)
17__SYSCALL(__NR_od_close, sys_od_close)
18#define __NR_litmus_lock __LSC(5)
19__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
20#define __NR_litmus_unlock __LSC(6)
21__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
22#define __NR_query_job_no __LSC(7)
23__SYSCALL(__NR_query_job_no, sys_query_job_no)
24#define __NR_wait_for_job_release __LSC(8)
25__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
26#define __NR_wait_for_ts_release __LSC(9)
27__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
28#define __NR_release_ts __LSC(10)
29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call)
32
33#define NR_litmus_syscalls 12
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..b9d3bc6c21ec 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,6 +56,8 @@
56#include <asm/pgtable.h> 56#include <asm/pgtable.h>
57#include <asm/mmu_context.h> 57#include <asm/mmu_context.h>
58 58
59extern void exit_od_table(struct task_struct *t);
60
59static void exit_mm(struct task_struct * tsk); 61static void exit_mm(struct task_struct * tsk);
60 62
61static void __unhash_process(struct task_struct *p, bool group_dead) 63static void __unhash_process(struct task_struct *p, bool group_dead)
@@ -960,6 +962,8 @@ NORET_TYPE void do_exit(long code)
960 if (unlikely(tsk->audit_context)) 962 if (unlikely(tsk->audit_context))
961 audit_free(tsk); 963 audit_free(tsk);
962 964
965 exit_od_table(tsk);
966
963 tsk->exit_code = code; 967 tsk->exit_code = code;
964 taskstats_exit(tsk, group_dead); 968 taskstats_exit(tsk, group_dead);
965 969
diff --git a/kernel/fork.c b/kernel/fork.c
index c445f8cc408d..ab7f29d906c7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -75,6 +75,9 @@
75 75
76#include <trace/events/sched.h> 76#include <trace/events/sched.h>
77 77
78#include <litmus/litmus.h>
79#include <litmus/sched_plugin.h>
80
78/* 81/*
79 * Protected counters by write_lock_irq(&tasklist_lock) 82 * Protected counters by write_lock_irq(&tasklist_lock)
80 */ 83 */
@@ -183,6 +186,7 @@ void __put_task_struct(struct task_struct *tsk)
183 WARN_ON(atomic_read(&tsk->usage)); 186 WARN_ON(atomic_read(&tsk->usage));
184 WARN_ON(tsk == current); 187 WARN_ON(tsk == current);
185 188
189 exit_litmus(tsk);
186 exit_creds(tsk); 190 exit_creds(tsk);
187 delayacct_tsk_free(tsk); 191 delayacct_tsk_free(tsk);
188 put_signal_struct(tsk->signal); 192 put_signal_struct(tsk->signal);
@@ -266,6 +270,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
266 270
267 tsk->stack = ti; 271 tsk->stack = ti;
268 272
273 /* Don't let the new task be a real-time task. */
274 litmus_fork(tsk);
275
269 err = prop_local_init_single(&tsk->dirties); 276 err = prop_local_init_single(&tsk->dirties);
270 if (err) 277 if (err)
271 goto out; 278 goto out;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 72206cf5c6cf..cb49883b64e5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -46,6 +46,8 @@
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/timer.h> 47#include <linux/timer.h>
48 48
49#include <litmus/litmus.h>
50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50 52
51#include <trace/events/timer.h> 53#include <trace/events/timer.h>
@@ -1042,6 +1044,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1042} 1044}
1043EXPORT_SYMBOL_GPL(hrtimer_start); 1045EXPORT_SYMBOL_GPL(hrtimer_start);
1044 1046
1047#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
1048
1049/**
1050 * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
1051 */
1052void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
1053{
1054 memset(info, 0, sizeof(struct hrtimer_start_on_info));
1055 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
1056}
1057
1058/**
1059 * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
1060 */
1061void hrtimer_pull(void)
1062{
1063 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
1064 struct hrtimer_start_on_info *info;
1065 struct list_head *pos, *safe, list;
1066
1067 raw_spin_lock(&base->lock);
1068 list_replace_init(&base->to_pull, &list);
1069 raw_spin_unlock(&base->lock);
1070
1071 list_for_each_safe(pos, safe, &list) {
1072 info = list_entry(pos, struct hrtimer_start_on_info, list);
1073 TRACE("pulled timer 0x%x\n", info->timer);
1074 list_del(pos);
1075 hrtimer_start(info->timer, info->time, info->mode);
1076 }
1077}
1078
1079/**
1080 * hrtimer_start_on - trigger timer arming on remote cpu
1081 * @cpu: remote cpu
1082 * @info: save timer information for enqueuing on remote cpu
1083 * @timer: timer to be pulled
1084 * @time: expire time
1085 * @mode: timer mode
1086 */
1087int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1088 struct hrtimer *timer, ktime_t time,
1089 const enum hrtimer_mode mode)
1090{
1091 unsigned long flags;
1092 struct hrtimer_cpu_base* base;
1093 int in_use = 0, was_empty;
1094
1095 /* serialize access to info through the timer base */
1096 lock_hrtimer_base(timer, &flags);
1097
1098 in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
1099 if (!in_use) {
1100 INIT_LIST_HEAD(&info->list);
1101 info->timer = timer;
1102 info->time = time;
1103 info->mode = mode;
1104 /* mark as in use */
1105 atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
1106 }
1107
1108 unlock_hrtimer_base(timer, &flags);
1109
1110 if (!in_use) {
1111 /* initiate pull */
1112 preempt_disable();
1113 if (cpu == smp_processor_id()) {
1114 /* start timer locally; we may get called
1115 * with rq->lock held, do not wake up anything
1116 */
1117 TRACE("hrtimer_start_on: starting on local CPU\n");
1118 __hrtimer_start_range_ns(info->timer, info->time,
1119 0, info->mode, 0);
1120 } else {
1121 TRACE("hrtimer_start_on: pulling to remote CPU\n");
1122 base = &per_cpu(hrtimer_bases, cpu);
1123 raw_spin_lock_irqsave(&base->lock, flags);
1124 was_empty = list_empty(&base->to_pull);
1125 list_add(&info->list, &base->to_pull);
1126 raw_spin_unlock_irqrestore(&base->lock, flags);
1127 if (was_empty)
1128 /* only send IPI if other no else
1129 * has done so already
1130 */
1131 smp_send_pull_timers(cpu);
1132 }
1133 preempt_enable();
1134 }
1135 return in_use;
1136}
1137
1138#endif
1045 1139
1046/** 1140/**
1047 * hrtimer_try_to_cancel - try to deactivate a timer 1141 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -1634,6 +1728,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1634 cpu_base->clock_base[i].cpu_base = cpu_base; 1728 cpu_base->clock_base[i].cpu_base = cpu_base;
1635 1729
1636 hrtimer_init_hres(cpu_base); 1730 hrtimer_init_hres(cpu_base);
1731 INIT_LIST_HEAD(&cpu_base->to_pull);
1637} 1732}
1638 1733
1639#ifdef CONFIG_HOTPLUG_CPU 1734#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fe465ac008a..9dc8ea140426 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -74,6 +74,13 @@ int console_printk[4] = {
74}; 74};
75 75
76/* 76/*
77 * divert printk() messages when there is a LITMUS^RT debug listener
78 */
79#include <litmus/litmus.h>
80int trace_override = 0;
81int trace_recurse = 0;
82
83/*
77 * Low level drivers may need that to know if they can schedule in 84 * Low level drivers may need that to know if they can schedule in
78 * their unblank() callback or not. So let's export it. 85 * their unblank() callback or not. So let's export it.
79 */ 86 */
@@ -735,6 +742,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
735 /* Emit the output into the temporary buffer */ 742 /* Emit the output into the temporary buffer */
736 printed_len += vscnprintf(printk_buf + printed_len, 743 printed_len += vscnprintf(printk_buf + printed_len,
737 sizeof(printk_buf) - printed_len, fmt, args); 744 sizeof(printk_buf) - printed_len, fmt, args);
745 /* if LITMUS^RT tracer is active divert printk() msgs */
746 if (trace_override && !trace_recurse)
747 TRACE("%s", printk_buf);
738 748
739 749
740 p = printk_buf; 750 p = printk_buf;
@@ -804,7 +814,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
804 * Try to acquire and then immediately release the 814 * Try to acquire and then immediately release the
805 * console semaphore. The release will do all the 815 * console semaphore. The release will do all the
806 * actual magic (print out buffers, wake up klogd, 816 * actual magic (print out buffers, wake up klogd,
807 * etc). 817 * etc).
808 * 818 *
809 * The acquire_console_semaphore_for_printk() function 819 * The acquire_console_semaphore_for_printk() function
810 * will release 'logbuf_lock' regardless of whether it 820 * will release 'logbuf_lock' regardless of whether it
@@ -1067,7 +1077,7 @@ int printk_needs_cpu(int cpu)
1067 1077
1068void wake_up_klogd(void) 1078void wake_up_klogd(void)
1069{ 1079{
1070 if (waitqueue_active(&log_wait)) 1080 if (!trace_override && waitqueue_active(&log_wait))
1071 __raw_get_cpu_var(printk_pending) = 1; 1081 __raw_get_cpu_var(printk_pending) = 1;
1072} 1082}
1073 1083
diff --git a/kernel/sched.c b/kernel/sched.c
index dc85ceb90832..1f5327f8c012 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -79,6 +79,11 @@
79#include "sched_cpupri.h" 79#include "sched_cpupri.h"
80#include "workqueue_sched.h" 80#include "workqueue_sched.h"
81 81
82#include <litmus/sched_trace.h>
83#include <litmus/trace.h>
84
85static void litmus_tick(struct rq*, struct task_struct*);
86
82#define CREATE_TRACE_POINTS 87#define CREATE_TRACE_POINTS
83#include <trace/events/sched.h> 88#include <trace/events/sched.h>
84 89
@@ -405,6 +410,12 @@ struct rt_rq {
405#endif 410#endif
406}; 411};
407 412
413/* Litmus related fields in a runqueue */
414struct litmus_rq {
415 unsigned long nr_running;
416 struct task_struct *prev;
417};
418
408#ifdef CONFIG_SMP 419#ifdef CONFIG_SMP
409 420
410/* 421/*
@@ -471,6 +482,7 @@ struct rq {
471 482
472 struct cfs_rq cfs; 483 struct cfs_rq cfs;
473 struct rt_rq rt; 484 struct rt_rq rt;
485 struct litmus_rq litmus;
474 486
475#ifdef CONFIG_FAIR_GROUP_SCHED 487#ifdef CONFIG_FAIR_GROUP_SCHED
476 /* list of leaf cfs_rq on this cpu: */ 488 /* list of leaf cfs_rq on this cpu: */
@@ -566,8 +578,14 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
566 * A queue event has occurred, and we're going to schedule. In 578 * A queue event has occurred, and we're going to schedule. In
567 * this case, we can save a useless back to back clock update. 579 * this case, we can save a useless back to back clock update.
568 */ 580 */
581 /* LITMUS^RT: turning off the clock update is buggy in Linux 2.6.36;
582 * the scheduler can "forget" to renable the runqueue clock in some
583 * cases. LITMUS^RT amplifies the effects of this problem. Hence, we
584 * turn it off to avoid stalling clocks. */
585 /*
569 if (test_tsk_need_resched(p)) 586 if (test_tsk_need_resched(p))
570 rq->skip_clock_update = 1; 587 rq->skip_clock_update = 1;
588 */
571} 589}
572 590
573static inline int cpu_of(struct rq *rq) 591static inline int cpu_of(struct rq *rq)
@@ -1042,6 +1060,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1042 raw_spin_lock(&rq->lock); 1060 raw_spin_lock(&rq->lock);
1043 update_rq_clock(rq); 1061 update_rq_clock(rq);
1044 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1062 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1063 litmus_tick(rq, rq->curr);
1045 raw_spin_unlock(&rq->lock); 1064 raw_spin_unlock(&rq->lock);
1046 1065
1047 return HRTIMER_NORESTART; 1066 return HRTIMER_NORESTART;
@@ -1840,7 +1859,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1840 1859
1841static const struct sched_class rt_sched_class; 1860static const struct sched_class rt_sched_class;
1842 1861
1843#define sched_class_highest (&rt_sched_class) 1862#define sched_class_highest (&litmus_sched_class)
1844#define for_each_class(class) \ 1863#define for_each_class(class) \
1845 for (class = sched_class_highest; class; class = class->next) 1864 for (class = sched_class_highest; class; class = class->next)
1846 1865
@@ -1920,6 +1939,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1920#include "sched_idletask.c" 1939#include "sched_idletask.c"
1921#include "sched_fair.c" 1940#include "sched_fair.c"
1922#include "sched_rt.c" 1941#include "sched_rt.c"
1942#include "../litmus/sched_litmus.c"
1923#ifdef CONFIG_SCHED_DEBUG 1943#ifdef CONFIG_SCHED_DEBUG
1924# include "sched_debug.c" 1944# include "sched_debug.c"
1925#endif 1945#endif
@@ -2352,6 +2372,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2352 unsigned long en_flags = ENQUEUE_WAKEUP; 2372 unsigned long en_flags = ENQUEUE_WAKEUP;
2353 struct rq *rq; 2373 struct rq *rq;
2354 2374
2375 if (is_realtime(p))
2376 TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
2377
2355 this_cpu = get_cpu(); 2378 this_cpu = get_cpu();
2356 2379
2357 smp_wmb(); 2380 smp_wmb();
@@ -2366,7 +2389,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2366 orig_cpu = cpu; 2389 orig_cpu = cpu;
2367 2390
2368#ifdef CONFIG_SMP 2391#ifdef CONFIG_SMP
2369 if (unlikely(task_running(rq, p))) 2392 if (unlikely(task_running(rq, p)) || is_realtime(p))
2370 goto out_activate; 2393 goto out_activate;
2371 2394
2372 /* 2395 /*
@@ -2428,6 +2451,8 @@ out_activate:
2428out_running: 2451out_running:
2429 ttwu_post_activation(p, rq, wake_flags, success); 2452 ttwu_post_activation(p, rq, wake_flags, success);
2430out: 2453out:
2454 if (is_realtime(p))
2455 TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2431 task_rq_unlock(rq, &flags); 2456 task_rq_unlock(rq, &flags);
2432 put_cpu(); 2457 put_cpu();
2433 2458
@@ -2532,7 +2557,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
2532 * Revert to default priority/policy on fork if requested. 2557 * Revert to default priority/policy on fork if requested.
2533 */ 2558 */
2534 if (unlikely(p->sched_reset_on_fork)) { 2559 if (unlikely(p->sched_reset_on_fork)) {
2535 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { 2560 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR ||
2561 p->policy == SCHED_LITMUS) {
2536 p->policy = SCHED_NORMAL; 2562 p->policy = SCHED_NORMAL;
2537 p->normal_prio = p->static_prio; 2563 p->normal_prio = p->static_prio;
2538 } 2564 }
@@ -2748,6 +2774,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2748 */ 2774 */
2749 prev_state = prev->state; 2775 prev_state = prev->state;
2750 finish_arch_switch(prev); 2776 finish_arch_switch(prev);
2777 litmus->finish_switch(prev);
2778 prev->rt_param.stack_in_use = NO_CPU;
2751#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 2779#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2752 local_irq_disable(); 2780 local_irq_disable();
2753#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 2781#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -2777,6 +2805,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2777{ 2805{
2778 if (prev->sched_class->pre_schedule) 2806 if (prev->sched_class->pre_schedule)
2779 prev->sched_class->pre_schedule(rq, prev); 2807 prev->sched_class->pre_schedule(rq, prev);
2808
2809 /* LITMUS^RT not very clean hack: we need to save the prev task
2810 * as our scheduling decision rely on it (as we drop the rq lock
2811 * something in prev can change...); there is no way to escape
2812 * this ack apart from modifying pick_nex_task(rq, _prev_) or
2813 * falling back on the previous solution of decoupling
2814 * scheduling decisions
2815 */
2816 rq->litmus.prev = prev;
2780} 2817}
2781 2818
2782/* rq->lock is NOT held, but preemption is disabled */ 2819/* rq->lock is NOT held, but preemption is disabled */
@@ -3578,18 +3615,26 @@ void scheduler_tick(void)
3578 3615
3579 sched_clock_tick(); 3616 sched_clock_tick();
3580 3617
3618 TS_TICK_START(current);
3619
3581 raw_spin_lock(&rq->lock); 3620 raw_spin_lock(&rq->lock);
3582 update_rq_clock(rq); 3621 update_rq_clock(rq);
3583 update_cpu_load_active(rq); 3622 update_cpu_load_active(rq);
3584 curr->sched_class->task_tick(rq, curr, 0); 3623 curr->sched_class->task_tick(rq, curr, 0);
3624
3625 /* litmus_tick may force current to resched */
3626 litmus_tick(rq, curr);
3627
3585 raw_spin_unlock(&rq->lock); 3628 raw_spin_unlock(&rq->lock);
3586 3629
3587 perf_event_task_tick(curr); 3630 perf_event_task_tick(curr);
3588 3631
3589#ifdef CONFIG_SMP 3632#ifdef CONFIG_SMP
3590 rq->idle_at_tick = idle_cpu(cpu); 3633 rq->idle_at_tick = idle_cpu(cpu);
3591 trigger_load_balance(rq, cpu); 3634 if (!is_realtime(current))
3635 trigger_load_balance(rq, cpu);
3592#endif 3636#endif
3637 TS_TICK_END(current);
3593} 3638}
3594 3639
3595notrace unsigned long get_parent_ip(unsigned long addr) 3640notrace unsigned long get_parent_ip(unsigned long addr)
@@ -3716,12 +3761,20 @@ pick_next_task(struct rq *rq)
3716 /* 3761 /*
3717 * Optimization: we know that if all tasks are in 3762 * Optimization: we know that if all tasks are in
3718 * the fair class we can call that function directly: 3763 * the fair class we can call that function directly:
3719 */ 3764
3720 if (likely(rq->nr_running == rq->cfs.nr_running)) { 3765 * NOT IN LITMUS^RT!
3766
3767 * This breaks many assumptions in the plugins.
3768 * Do not uncomment without thinking long and hard
3769 * about how this affects global plugins such as GSN-EDF.
3770
3771 if (rq->nr_running == rq->cfs.nr_running) {
3772 TRACE("taking shortcut in pick_next_task()\n");
3721 p = fair_sched_class.pick_next_task(rq); 3773 p = fair_sched_class.pick_next_task(rq);
3722 if (likely(p)) 3774 if (likely(p))
3723 return p; 3775 return p;
3724 } 3776 }
3777 */
3725 3778
3726 class = sched_class_highest; 3779 class = sched_class_highest;
3727 for ( ; ; ) { 3780 for ( ; ; ) {
@@ -3748,6 +3801,7 @@ asmlinkage void __sched schedule(void)
3748 3801
3749need_resched: 3802need_resched:
3750 preempt_disable(); 3803 preempt_disable();
3804 sched_state_entered_schedule();
3751 cpu = smp_processor_id(); 3805 cpu = smp_processor_id();
3752 rq = cpu_rq(cpu); 3806 rq = cpu_rq(cpu);
3753 rcu_note_context_switch(cpu); 3807 rcu_note_context_switch(cpu);
@@ -3755,6 +3809,8 @@ need_resched:
3755 3809
3756 release_kernel_lock(prev); 3810 release_kernel_lock(prev);
3757need_resched_nonpreemptible: 3811need_resched_nonpreemptible:
3812 TS_SCHED_START;
3813 sched_trace_task_switch_away(prev);
3758 3814
3759 schedule_debug(prev); 3815 schedule_debug(prev);
3760 3816
@@ -3803,7 +3859,10 @@ need_resched_nonpreemptible:
3803 rq->curr = next; 3859 rq->curr = next;
3804 ++*switch_count; 3860 ++*switch_count;
3805 3861
3862 TS_SCHED_END(next);
3863 TS_CXS_START(next);
3806 context_switch(rq, prev, next); /* unlocks the rq */ 3864 context_switch(rq, prev, next); /* unlocks the rq */
3865 TS_CXS_END(current);
3807 /* 3866 /*
3808 * The context switch have flipped the stack from under us 3867 * The context switch have flipped the stack from under us
3809 * and restored the local variables which were saved when 3868 * and restored the local variables which were saved when
@@ -3812,17 +3871,23 @@ need_resched_nonpreemptible:
3812 */ 3871 */
3813 cpu = smp_processor_id(); 3872 cpu = smp_processor_id();
3814 rq = cpu_rq(cpu); 3873 rq = cpu_rq(cpu);
3815 } else 3874 } else {
3875 TS_SCHED_END(prev);
3816 raw_spin_unlock_irq(&rq->lock); 3876 raw_spin_unlock_irq(&rq->lock);
3877 }
3878
3879 sched_trace_task_switch_to(current);
3817 3880
3818 post_schedule(rq); 3881 post_schedule(rq);
3819 3882
3820 if (unlikely(reacquire_kernel_lock(prev))) 3883 if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev)))
3821 goto need_resched_nonpreemptible; 3884 goto need_resched_nonpreemptible;
3822 3885
3823 preempt_enable_no_resched(); 3886 preempt_enable_no_resched();
3824 if (need_resched()) 3887 if (need_resched())
3825 goto need_resched; 3888 goto need_resched;
3889
3890 srp_ceiling_block();
3826} 3891}
3827EXPORT_SYMBOL(schedule); 3892EXPORT_SYMBOL(schedule);
3828 3893
@@ -4108,6 +4173,17 @@ void complete_all(struct completion *x)
4108} 4173}
4109EXPORT_SYMBOL(complete_all); 4174EXPORT_SYMBOL(complete_all);
4110 4175
4176void complete_n(struct completion *x, int n)
4177{
4178 unsigned long flags;
4179
4180 spin_lock_irqsave(&x->wait.lock, flags);
4181 x->done += n;
4182 __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
4183 spin_unlock_irqrestore(&x->wait.lock, flags);
4184}
4185EXPORT_SYMBOL(complete_n);
4186
4111static inline long __sched 4187static inline long __sched
4112do_wait_for_common(struct completion *x, long timeout, int state) 4188do_wait_for_common(struct completion *x, long timeout, int state)
4113{ 4189{
@@ -4550,7 +4626,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4550 p->normal_prio = normal_prio(p); 4626 p->normal_prio = normal_prio(p);
4551 /* we are holding p->pi_lock already */ 4627 /* we are holding p->pi_lock already */
4552 p->prio = rt_mutex_getprio(p); 4628 p->prio = rt_mutex_getprio(p);
4553 if (rt_prio(p->prio)) 4629 if (p->policy == SCHED_LITMUS)
4630 p->sched_class = &litmus_sched_class;
4631 else if (rt_prio(p->prio))
4554 p->sched_class = &rt_sched_class; 4632 p->sched_class = &rt_sched_class;
4555 else 4633 else
4556 p->sched_class = &fair_sched_class; 4634 p->sched_class = &fair_sched_class;
@@ -4595,7 +4673,7 @@ recheck:
4595 4673
4596 if (policy != SCHED_FIFO && policy != SCHED_RR && 4674 if (policy != SCHED_FIFO && policy != SCHED_RR &&
4597 policy != SCHED_NORMAL && policy != SCHED_BATCH && 4675 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
4598 policy != SCHED_IDLE) 4676 policy != SCHED_IDLE && policy != SCHED_LITMUS)
4599 return -EINVAL; 4677 return -EINVAL;
4600 } 4678 }
4601 4679
@@ -4610,6 +4688,8 @@ recheck:
4610 return -EINVAL; 4688 return -EINVAL;
4611 if (rt_policy(policy) != (param->sched_priority != 0)) 4689 if (rt_policy(policy) != (param->sched_priority != 0))
4612 return -EINVAL; 4690 return -EINVAL;
4691 if (policy == SCHED_LITMUS && policy == p->policy)
4692 return -EINVAL;
4613 4693
4614 /* 4694 /*
4615 * Allow unprivileged RT tasks to decrease priority: 4695 * Allow unprivileged RT tasks to decrease priority:
@@ -4650,6 +4730,12 @@ recheck:
4650 return retval; 4730 return retval;
4651 } 4731 }
4652 4732
4733 if (policy == SCHED_LITMUS) {
4734 retval = litmus_admit_task(p);
4735 if (retval)
4736 return retval;
4737 }
4738
4653 /* 4739 /*
4654 * make sure no PI-waiters arrive (or leave) while we are 4740 * make sure no PI-waiters arrive (or leave) while we are
4655 * changing the priority of the task: 4741 * changing the priority of the task:
@@ -4692,10 +4778,19 @@ recheck:
4692 4778
4693 p->sched_reset_on_fork = reset_on_fork; 4779 p->sched_reset_on_fork = reset_on_fork;
4694 4780
4781 if (p->policy == SCHED_LITMUS)
4782 litmus_exit_task(p);
4783
4695 oldprio = p->prio; 4784 oldprio = p->prio;
4696 prev_class = p->sched_class; 4785 prev_class = p->sched_class;
4697 __setscheduler(rq, p, policy, param->sched_priority); 4786 __setscheduler(rq, p, policy, param->sched_priority);
4698 4787
4788 if (policy == SCHED_LITMUS) {
4789 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
4790 p->rt_param.present = running;
4791 litmus->task_new(p, on_rq, running);
4792 }
4793
4699 if (running) 4794 if (running)
4700 p->sched_class->set_curr_task(rq); 4795 p->sched_class->set_curr_task(rq);
4701 if (on_rq) { 4796 if (on_rq) {
@@ -4755,6 +4850,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4755 if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 4850 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
4756 return -EFAULT; 4851 return -EFAULT;
4757 4852
4853#ifdef CONFIG_LITMUS_LOCKING
4854 /* Hack to allow plugin to call into schedule
4855 * prio to a setscheduler() call. */
4856 if (is_realtime(current))
4857 litmus->pre_setsched(current, policy);
4858#endif
4859
4758 rcu_read_lock(); 4860 rcu_read_lock();
4759 retval = -ESRCH; 4861 retval = -ESRCH;
4760 p = find_process_by_pid(pid); 4862 p = find_process_by_pid(pid);
@@ -4865,10 +4967,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4865 rcu_read_lock(); 4967 rcu_read_lock();
4866 4968
4867 p = find_process_by_pid(pid); 4969 p = find_process_by_pid(pid);
4868 if (!p) { 4970 /* Don't set affinity if task not found and for LITMUS tasks */
4971 if (!p || is_realtime(p)) {
4869 rcu_read_unlock(); 4972 rcu_read_unlock();
4870 put_online_cpus(); 4973 put_online_cpus();
4871 return -ESRCH; 4974 return p ? -EPERM : -ESRCH;
4872 } 4975 }
4873 4976
4874 /* Prevent p going away */ 4977 /* Prevent p going away */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index db3f674ca49d..e0e8d5ca3c98 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,7 +1654,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio))) 1657 if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS)
1658 goto preempt; 1658 goto preempt;
1659 1659
1660 if (unlikely(p->sched_class != &fair_sched_class)) 1660 if (unlikely(p->sched_class != &fair_sched_class))
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d10c80ebb67a..e40e7fe43170 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1013,7 +1013,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1013 */ 1013 */
1014static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1014static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1015{ 1015{
1016 if (p->prio < rq->curr->prio) { 1016 if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
1017 resched_task(rq->curr); 1017 resched_task(rq->curr);
1018 return; 1018 return;
1019 } 1019 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e01bbd1..bb2d8b7850a3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -768,12 +768,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
768} 768}
769 769
770/** 770/**
771 * tick_set_quanta_type - get the quanta type as a boot option
772 * Default is standard setup with ticks staggered over first
773 * half of tick period.
774 */
775int quanta_type = LINUX_DEFAULT_TICKS;
776static int __init tick_set_quanta_type(char *str)
777{
778 if (strcmp("aligned", str) == 0) {
779 quanta_type = LITMUS_ALIGNED_TICKS;
780 printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
781 }
782 else if (strcmp("staggered", str) == 0) {
783 quanta_type = LITMUS_STAGGERED_TICKS;
784 printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
785 }
786 return 1;
787}
788__setup("quanta=", tick_set_quanta_type);
789
790u64 cpu_stagger_offset(int cpu)
791{
792 u64 offset = 0;
793 switch (quanta_type) {
794 case LITMUS_ALIGNED_TICKS:
795 offset = 0;
796 break;
797 case LITMUS_STAGGERED_TICKS:
798 offset = ktime_to_ns(tick_period);
799 do_div(offset, num_possible_cpus());
800 offset *= cpu;
801 break;
802 default:
803 offset = ktime_to_ns(tick_period) >> 1;
804 do_div(offset, num_possible_cpus());
805 offset *= cpu;
806 }
807 return offset;
808}
809
810/**
771 * tick_setup_sched_timer - setup the tick emulation timer 811 * tick_setup_sched_timer - setup the tick emulation timer
772 */ 812 */
773void tick_setup_sched_timer(void) 813void tick_setup_sched_timer(void)
774{ 814{
775 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 815 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
776 ktime_t now = ktime_get(); 816 ktime_t now = ktime_get();
817 u64 offset;
777 818
778 /* 819 /*
779 * Emulate tick processing via per-CPU hrtimers: 820 * Emulate tick processing via per-CPU hrtimers:
@@ -784,6 +825,12 @@ void tick_setup_sched_timer(void)
784 /* Get the next period (per cpu) */ 825 /* Get the next period (per cpu) */
785 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 826 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
786 827
828 /* Offset must be set correctly to achieve desired quanta type. */
829 offset = cpu_stagger_offset(smp_processor_id());
830
831 /* Add the correct offset to expiration time */
832 hrtimer_add_expires_ns(&ts->sched_timer, offset);
833
787 for (;;) { 834 for (;;) {
788 hrtimer_forward(&ts->sched_timer, now, tick_period); 835 hrtimer_forward(&ts->sched_timer, now, tick_period);
789 hrtimer_start_expires(&ts->sched_timer, 836 hrtimer_start_expires(&ts->sched_timer,
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 000000000000..ad8dc8308cf0
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,185 @@
1menu "LITMUS^RT"
2
3menu "Scheduling"
4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
15config PLUGIN_PFAIR
16 bool "PFAIR"
17 depends on HIGH_RES_TIMERS && !NO_HZ
18 default y
19 help
20 Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
21 The PFAIR plugin requires high resolution timers (for staggered quanta)
22 and does not support NO_HZ (quanta could be missed when the system is idle).
23
24 If unsure, say Yes.
25
26config RELEASE_MASTER
27 bool "Release-master Support"
28 depends on ARCH_HAS_SEND_PULL_TIMERS
29 default n
30 help
31 Allow one processor to act as a dedicated interrupt processor
32 that services all timer interrupts, but that does not schedule
33 real-time tasks. See RTSS'09 paper for details
34 (http://www.cs.unc.edu/~anderson/papers.html).
35 Currently only supported by GSN-EDF.
36
37endmenu
38
39menu "Real-Time Synchronization"
40
41config NP_SECTION
42 bool "Non-preemptive section support"
43 default n
44 help
45 Allow tasks to become non-preemptable.
46 Note that plugins still need to explicitly support non-preemptivity.
47 Currently, only GSN-EDF and PSN-EDF have such support.
48
49 This is required to support locking protocols such as the FMLP.
50 If disabled, all tasks will be considered preemptable at all times.
51
52config LITMUS_LOCKING
53 bool "Support for real-time locking protocols"
54 depends on NP_SECTION
55 default n
56 help
57 Enable LITMUS^RT's deterministic multiprocessor real-time
58 locking protocols.
59
60 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP.
62
63endmenu
64
65menu "Tracing"
66
67config FEATHER_TRACE
68 bool "Feather-Trace Infrastructure"
69 default y
70 help
71 Feather-Trace basic tracing infrastructure. Includes device file
72 driver and instrumentation point support.
73
74 There are actually two implementations of Feather-Trace.
75 1) A slower, but portable, default implementation.
76 2) Architecture-specific implementations that rewrite kernel .text at runtime.
77
78 If enabled, Feather-Trace will be based on 2) if available (currently only for x86).
79 However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case
80 to avoid problems with write-protected .text pages.
81
82 Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n.
83
84 Note that this option only enables the basic Feather-Trace infrastructure;
85 you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
86 actually enable any events.
87
88config SCHED_TASK_TRACE
89 bool "Trace real-time tasks"
90 depends on FEATHER_TRACE
91 default y
92 help
93 Include support for the sched_trace_XXX() tracing functions. This
94 allows the collection of real-time task events such as job
95 completions, job releases, early completions, etc. This results in a
96 small overhead in the scheduling code. Disable if the overhead is not
97 acceptable (e.g., benchmarking).
98
99 Say Yes for debugging.
100 Say No for overhead tracing.
101
102config SCHED_TASK_TRACE_SHIFT
103 int "Buffer size for sched_trace_xxx() events"
104 depends on SCHED_TASK_TRACE
105 range 8 13
106 default 9
107 help
108
109 Select the buffer size of sched_trace_xxx() events as a power of two.
110 These buffers are statically allocated as per-CPU data. Each event
111 requires 24 bytes storage plus one additional flag byte. Too large
112 buffers can cause issues with the per-cpu allocator (and waste
113 memory). Too small buffers can cause scheduling events to be lost. The
114 "right" size is workload dependent and depends on the number of tasks,
115 each task's period, each task's number of suspensions, and how often
116 the buffer is flushed.
117
118 Examples: 12 => 4k events
119 10 => 1k events
120 8 => 512 events
121
122config SCHED_OVERHEAD_TRACE
123 bool "Record timestamps for overhead measurements"
124 depends on FEATHER_TRACE
125 default n
126 help
127 Export event stream for overhead tracing.
128 Say Yes for overhead tracing.
129
130config SCHED_DEBUG_TRACE
131 bool "TRACE() debugging"
132 default y
133 help
134 Include support for sched_trace_log_messageg(), which is used to
135 implement TRACE(). If disabled, no TRACE() messages will be included
136 in the kernel, and no overheads due to debugging statements will be
137 incurred by the scheduler. Disable if the overhead is not acceptable
138 (e.g. benchmarking).
139
140 Say Yes for debugging.
141 Say No for overhead tracing.
142
143config SCHED_DEBUG_TRACE_SHIFT
144 int "Buffer size for TRACE() buffer"
145 depends on SCHED_DEBUG_TRACE
146 range 14 22
147 default 18
148 help
149
150 Select the amount of memory needed per for the TRACE() buffer, as a
151 power of two. The TRACE() buffer is global and statically allocated. If
152 the buffer is too small, there will be holes in the TRACE() log if the
153 buffer-flushing task is starved.
154
155 The default should be sufficient for most systems. Increase the buffer
156 size if the log contains holes. Reduce the buffer size when running on
157 a memory-constrained system.
158
159 Examples: 14 => 16KB
160 18 => 256KB
161 20 => 1MB
162
163 This buffer is exported to usespace using a misc device as
164 'litmus/log'. On a system with default udev rules, a corresponding
165 character device node should be created at /dev/litmus/log. The buffer
166 can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'.
167
168config SCHED_DEBUG_TRACE_CALLER
169 bool "Include [function@file:line] tag in TRACE() log"
170 depends on SCHED_DEBUG_TRACE
171 default n
172 help
173 With this option enabled, TRACE() prepends
174
175 "[<function name>@<filename>:<line number>]"
176
177 to each message in the debug log. Enable this to aid in figuring out
178 what was called in which order. The downside is that it adds a lot of
179 clutter.
180
181 If unsure, say No.
182
183endmenu
184
185endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 000000000000..e86fad8c25ec
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,30 @@
1#
2# Makefile for LITMUS^RT
3#
4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 ctrldev.o \
20 sched_gsn_edf.o \
21 sched_psn_edf.o \
22 sched_pfp.o
23
24obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
25obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
26
27obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
28obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
29obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
30obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..528af97f18a6
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,314 @@
1#include "linux/kernel.h"
2#include "litmus/bheap.h"
3
4void bheap_init(struct bheap* heap)
5{
6 heap->head = NULL;
7 heap->min = NULL;
8}
9
10void bheap_node_init(struct bheap_node** _h, void* value)
11{
12 struct bheap_node* h = *_h;
13 h->parent = NULL;
14 h->next = NULL;
15 h->child = NULL;
16 h->degree = NOT_IN_HEAP;
17 h->value = value;
18 h->ref = _h;
19}
20
21
22/* make child a subtree of root */
23static void __bheap_link(struct bheap_node* root,
24 struct bheap_node* child)
25{
26 child->parent = root;
27 child->next = root->child;
28 root->child = child;
29 root->degree++;
30}
31
32/* merge root lists */
33static struct bheap_node* __bheap_merge(struct bheap_node* a,
34 struct bheap_node* b)
35{
36 struct bheap_node* head = NULL;
37 struct bheap_node** pos = &head;
38
39 while (a && b) {
40 if (a->degree < b->degree) {
41 *pos = a;
42 a = a->next;
43 } else {
44 *pos = b;
45 b = b->next;
46 }
47 pos = &(*pos)->next;
48 }
49 if (a)
50 *pos = a;
51 else
52 *pos = b;
53 return head;
54}
55
56/* reverse a linked list of nodes. also clears parent pointer */
57static struct bheap_node* __bheap_reverse(struct bheap_node* h)
58{
59 struct bheap_node* tail = NULL;
60 struct bheap_node* next;
61
62 if (!h)
63 return h;
64
65 h->parent = NULL;
66 while (h->next) {
67 next = h->next;
68 h->next = tail;
69 tail = h;
70 h = next;
71 h->parent = NULL;
72 }
73 h->next = tail;
74 return h;
75}
76
77static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
78 struct bheap_node** prev, struct bheap_node** node)
79{
80 struct bheap_node *_prev, *cur;
81 *prev = NULL;
82
83 if (!heap->head) {
84 *node = NULL;
85 return;
86 }
87
88 *node = heap->head;
89 _prev = heap->head;
90 cur = heap->head->next;
91 while (cur) {
92 if (higher_prio(cur, *node)) {
93 *node = cur;
94 *prev = _prev;
95 }
96 _prev = cur;
97 cur = cur->next;
98 }
99}
100
101static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
102 struct bheap_node* h2)
103{
104 struct bheap_node* h1;
105 struct bheap_node *prev, *x, *next;
106 if (!h2)
107 return;
108 h1 = heap->head;
109 if (!h1) {
110 heap->head = h2;
111 return;
112 }
113 h1 = __bheap_merge(h1, h2);
114 prev = NULL;
115 x = h1;
116 next = x->next;
117 while (next) {
118 if (x->degree != next->degree ||
119 (next->next && next->next->degree == x->degree)) {
120 /* nothing to do, advance */
121 prev = x;
122 x = next;
123 } else if (higher_prio(x, next)) {
124 /* x becomes the root of next */
125 x->next = next->next;
126 __bheap_link(x, next);
127 } else {
128 /* next becomes the root of x */
129 if (prev)
130 prev->next = next;
131 else
132 h1 = next;
133 __bheap_link(next, x);
134 x = next;
135 }
136 next = x->next;
137 }
138 heap->head = h1;
139}
140
141static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
142 struct bheap* heap)
143{
144 struct bheap_node *prev, *node;
145 __bheap_min(higher_prio, heap, &prev, &node);
146 if (!node)
147 return NULL;
148 if (prev)
149 prev->next = node->next;
150 else
151 heap->head = node->next;
152 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
153 return node;
154}
155
156/* insert (and reinitialize) a node into the heap */
157void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
158 struct bheap_node* node)
159{
160 struct bheap_node *min;
161 node->child = NULL;
162 node->parent = NULL;
163 node->next = NULL;
164 node->degree = 0;
165 if (heap->min && higher_prio(node, heap->min)) {
166 /* swap min cache */
167 min = heap->min;
168 min->child = NULL;
169 min->parent = NULL;
170 min->next = NULL;
171 min->degree = 0;
172 __bheap_union(higher_prio, heap, min);
173 heap->min = node;
174 } else
175 __bheap_union(higher_prio, heap, node);
176}
177
178void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
179{
180 struct bheap_node* min;
181 if (heap->min) {
182 min = heap->min;
183 heap->min = NULL;
184 bheap_insert(higher_prio, heap, min);
185 }
186}
187
188/* merge addition into target */
189void bheap_union(bheap_prio_t higher_prio,
190 struct bheap* target, struct bheap* addition)
191{
192 /* first insert any cached minima, if necessary */
193 bheap_uncache_min(higher_prio, target);
194 bheap_uncache_min(higher_prio, addition);
195 __bheap_union(higher_prio, target, addition->head);
196 /* this is a destructive merge */
197 addition->head = NULL;
198}
199
200struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
201 struct bheap* heap)
202{
203 if (!heap->min)
204 heap->min = __bheap_extract_min(higher_prio, heap);
205 return heap->min;
206}
207
208struct bheap_node* bheap_take(bheap_prio_t higher_prio,
209 struct bheap* heap)
210{
211 struct bheap_node *node;
212 if (!heap->min)
213 heap->min = __bheap_extract_min(higher_prio, heap);
214 node = heap->min;
215 heap->min = NULL;
216 if (node)
217 node->degree = NOT_IN_HEAP;
218 return node;
219}
220
221int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
222{
223 struct bheap_node *parent;
224 struct bheap_node** tmp_ref;
225 void* tmp;
226
227 /* bubble up */
228 parent = node->parent;
229 while (parent && higher_prio(node, parent)) {
230 /* swap parent and node */
231 tmp = parent->value;
232 parent->value = node->value;
233 node->value = tmp;
234 /* swap references */
235 *(parent->ref) = node;
236 *(node->ref) = parent;
237 tmp_ref = parent->ref;
238 parent->ref = node->ref;
239 node->ref = tmp_ref;
240 /* step up */
241 node = parent;
242 parent = node->parent;
243 }
244
245 return parent != NULL;
246}
247
248void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
249 struct bheap_node* node)
250{
251 struct bheap_node *parent, *prev, *pos;
252 struct bheap_node** tmp_ref;
253 void* tmp;
254
255 if (heap->min != node) {
256 /* bubble up */
257 parent = node->parent;
258 while (parent) {
259 /* swap parent and node */
260 tmp = parent->value;
261 parent->value = node->value;
262 node->value = tmp;
263 /* swap references */
264 *(parent->ref) = node;
265 *(node->ref) = parent;
266 tmp_ref = parent->ref;
267 parent->ref = node->ref;
268 node->ref = tmp_ref;
269 /* step up */
270 node = parent;
271 parent = node->parent;
272 }
273 /* now delete:
274 * first find prev */
275 prev = NULL;
276 pos = heap->head;
277 while (pos != node) {
278 prev = pos;
279 pos = pos->next;
280 }
281 /* we have prev, now remove node */
282 if (prev)
283 prev->next = node->next;
284 else
285 heap->head = node->next;
286 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
287 } else
288 heap->min = NULL;
289 node->degree = NOT_IN_HEAP;
290}
291
292/* allocate a heap node for value and insert into the heap */
293int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
294 void* value, int gfp_flags)
295{
296 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
297 if (likely(hn)) {
298 bheap_node_init(&hn, value);
299 bheap_insert(higher_prio, heap, hn);
300 }
301 return hn != NULL;
302}
303
304void* bheap_take_del(bheap_prio_t higher_prio,
305 struct bheap* heap)
306{
307 struct bheap_node* hn = bheap_take(higher_prio, heap);
308 void* ret = NULL;
309 if (hn) {
310 ret = hn->value;
311 bheap_node_free(hn);
312 }
313 return ret;
314}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..310e9a3d4172
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,111 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4
5#include <litmus/litmus.h>
6#include <litmus/preempt.h>
7
8struct enforcement_timer {
9 /* The enforcement timer is used to accurately police
10 * slice budgets. */
11 struct hrtimer timer;
12 int armed;
13};
14
15DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
16
17static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
18{
19 struct enforcement_timer* et = container_of(timer,
20 struct enforcement_timer,
21 timer);
22 unsigned long flags;
23
24 local_irq_save(flags);
25 TRACE("enforcement timer fired.\n");
26 et->armed = 0;
27 /* activate scheduler */
28 litmus_reschedule_local();
29 local_irq_restore(flags);
30
31 return HRTIMER_NORESTART;
32}
33
34/* assumes called with IRQs off */
35static void cancel_enforcement_timer(struct enforcement_timer* et)
36{
37 int ret;
38
39 TRACE("cancelling enforcement timer.\n");
40
41 /* Since interrupts are disabled and et->armed is only
42 * modified locally, we do not need any locks.
43 */
44
45 if (et->armed) {
46 ret = hrtimer_try_to_cancel(&et->timer);
47 /* Should never be inactive. */
48 BUG_ON(ret == 0);
49 /* Should never be running concurrently. */
50 BUG_ON(ret == -1);
51
52 et->armed = 0;
53 }
54}
55
56/* assumes called with IRQs off */
57static void arm_enforcement_timer(struct enforcement_timer* et,
58 struct task_struct* t)
59{
60 lt_t when_to_fire;
61 TRACE_TASK(t, "arming enforcement timer.\n");
62
63 /* Calling this when there is no budget left for the task
64 * makes no sense, unless the task is non-preemptive. */
65 BUG_ON(budget_exhausted(t) && (!is_np(t)));
66
67 /* __hrtimer_start_range_ns() cancels the timer
68 * anyway, so we don't have to check whether it is still armed */
69
70 if (likely(!is_np(t))) {
71 when_to_fire = litmus_clock() + budget_remaining(t);
72 __hrtimer_start_range_ns(&et->timer,
73 ns_to_ktime(when_to_fire),
74 0 /* delta */,
75 HRTIMER_MODE_ABS_PINNED,
76 0 /* no wakeup */);
77 et->armed = 1;
78 }
79}
80
81
82/* expects to be called with IRQs off */
83void update_enforcement_timer(struct task_struct* t)
84{
85 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
86
87 if (t && budget_precisely_enforced(t)) {
88 /* Make sure we call into the scheduler when this budget
89 * expires. */
90 arm_enforcement_timer(et, t);
91 } else if (et->armed) {
92 /* Make sure we don't cause unnecessary interrupts. */
93 cancel_enforcement_timer(et);
94 }
95}
96
97
98static int __init init_budget_enforcement(void)
99{
100 int cpu;
101 struct enforcement_timer* et;
102
103 for (cpu = 0; cpu < NR_CPUS; cpu++) {
104 et = &per_cpu(budget_timer, cpu);
105 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
106 et->timer.function = on_enforcement_timeout;
107 }
108 return 0;
109}
110
111module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..6fe1b512f628
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,111 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4
5#include <litmus/clustered.h>
6
7#ifndef CONFIG_X86
8/* fake get_shared_cpu_map() on non-x86 architectures */
9
10int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
11{
12 if (index != 1)
13 return 1;
14 else {
15 /* Fake L1: CPU is all by itself. */
16 cpumask_clear(mask);
17 cpumask_set_cpu(cpu, mask);
18 return 0;
19 }
20}
21
22#endif
23
24int get_cluster_size(enum cache_level level)
25{
26 cpumask_var_t mask;
27 int ok;
28 int num_cpus;
29
30 if (level == GLOBAL_CLUSTER)
31 return num_online_cpus();
32 else {
33 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
34 return -ENOMEM;
35 /* assumes CPU 0 is representative of all CPUs */
36 ok = get_shared_cpu_map(mask, 0, level);
37 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
38 if (ok == 0)
39 num_cpus = cpumask_weight(mask);
40 free_cpumask_var(mask);
41
42 if (ok == 0)
43 return num_cpus;
44 else
45 return -EINVAL;
46 }
47}
48
49int assign_cpus_to_clusters(enum cache_level level,
50 struct scheduling_cluster* clusters[],
51 unsigned int num_clusters,
52 struct cluster_cpu* cpus[],
53 unsigned int num_cpus)
54{
55 cpumask_var_t mask;
56 unsigned int i, free_cluster = 0, low_cpu;
57 int err = 0;
58
59 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
60 return -ENOMEM;
61
62 /* clear cluster pointers */
63 for (i = 0; i < num_cpus; i++) {
64 cpus[i]->id = i;
65 cpus[i]->cluster = NULL;
66 }
67
68 /* initialize clusters */
69 for (i = 0; i < num_clusters; i++) {
70 clusters[i]->id = i;
71 INIT_LIST_HEAD(&clusters[i]->cpus);
72 }
73
74 /* Assign each CPU. Two assumtions are made:
75 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
76 * 2) All cpus that belong to some cluster are online.
77 */
78 for_each_online_cpu(i) {
79 /* get lowest-id CPU in cluster */
80 if (level != GLOBAL_CLUSTER) {
81 err = get_shared_cpu_map(mask, cpus[i]->id, level);
82 if (err != 0) {
83 /* ugh... wrong cache level? Either caller screwed up
84 * or the CPU topology is weird. */
85 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
86 level, err);
87 err = -EINVAL;
88 goto out;
89 }
90 low_cpu = cpumask_first(mask);
91 } else
92 low_cpu = 0;
93 if (low_cpu == i) {
94 /* caller must provide an appropriate number of clusters */
95 BUG_ON(free_cluster >= num_clusters);
96
97 /* create new cluster */
98 cpus[i]->cluster = clusters[free_cluster++];
99 } else {
100 /* low_cpu points to the right cluster
101 * Assumption: low_cpu is actually online and was processed earlier. */
102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 }
104 /* enqueue in cpus list */
105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 }
108out:
109 free_cpumask_var(mask);
110 return err;
111}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..6677a67cc945
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,150 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33 unsigned long pfn;
34
35 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
36
37 /* Increase ref count. Is decreased when vma is destroyed. */
38 get_page(ctrl);
39
40 /* compute page frame number */
41 pfn = page_to_pfn(ctrl);
42
43 TRACE_CUR(CTRL_NAME
44 ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n",
45 tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start,
46 vma->vm_page_prot);
47
48 /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
49 * userspace actually gets a copy-on-write page. */
50 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
51
52 if (err)
53 TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
54
55 return err;
56}
57
58static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
59{
60 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
61 vma->vm_flags, vma->vm_page_prot);
62
63 TRACE_CUR(CTRL_NAME
64 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
65 (void*) vma->vm_start, (void*) vma->vm_end, vma,
66 vma->vm_private_data, current->comm,
67 current->pid);
68}
69
70static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
71 struct vm_fault* vmf)
72{
73 /* This function should never be called, since
74 * all pages should have been mapped by mmap()
75 * already. */
76 TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags);
77
78 /* nope, you only get one page */
79 return VM_FAULT_SIGBUS;
80}
81
82static struct vm_operations_struct litmus_ctrl_vm_ops = {
83 .close = litmus_ctrl_vm_close,
84 .fault = litmus_ctrl_vm_fault,
85};
86
87static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
88{
89 int err = 0;
90
91 /* first make sure mapper knows what he's doing */
92
93 /* you can only get one page */
94 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
95 return -EINVAL;
96
97 /* you can only map the "first" page */
98 if (vma->vm_pgoff != 0)
99 return -EINVAL;
100
101 /* you can't share it with anyone */
102 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
103 return -EINVAL;
104
105 vma->vm_ops = &litmus_ctrl_vm_ops;
106 /* this mapping should not be kept across forks,
107 * and cannot be expanded */
108 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
109
110 err = alloc_ctrl_page(current);
111 if (!err)
112 err = map_ctrl_page(current, vma);
113
114 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
115 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
116
117 return err;
118}
119
120static struct file_operations litmus_ctrl_fops = {
121 .owner = THIS_MODULE,
122 .mmap = litmus_ctrl_mmap,
123};
124
125static struct miscdevice litmus_ctrl_dev = {
126 .name = CTRL_NAME,
127 .minor = MISC_DYNAMIC_MINOR,
128 .fops = &litmus_ctrl_fops,
129};
130
131static int __init init_litmus_ctrl_dev(void)
132{
133 int err;
134
135 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
136
137 printk("Initializing LITMUS^RT control device.\n");
138 err = misc_register(&litmus_ctrl_dev);
139 if (err)
140 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
141 return err;
142}
143
144static void __exit exit_litmus_ctrl_dev(void)
145{
146 misc_deregister(&litmus_ctrl_dev);
147}
148
149module_init(init_litmus_ctrl_dev);
150module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..c7d02ec2e15b
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,143 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17
18#ifdef CONFIG_LITMUS_LOCKING
19int edf_higher_base_prio(struct task_struct* first,
20 struct task_struct* second)
21{
22 struct task_struct *first_task = first;
23 struct task_struct *second_task = second;
24
25 /* check for NULL tasks */
26 if (!first || !second)
27 return first && !second;
28
29 return !is_realtime(second_task) ||
30 earlier_deadline(first_task, second_task) ||
31 (get_deadline(first_task) == get_deadline(second_task) &&
32 first_task->pid < second_task->pid);
33}
34
35int edf_pending_order(struct bheap_node* a, struct bheap_node* b)
36{
37 return edf_higher_base_prio(bheap2task(a), bheap2task(b));
38}
39
40#endif
41
42/* edf_higher_prio - returns true if first has a higher EDF priority
43 * than second. Deadline ties are broken by PID.
44 *
45 * both first and second may be NULL
46 */
47int edf_higher_prio(struct task_struct* first,
48 struct task_struct* second)
49{
50 struct task_struct *first_task = first;
51 struct task_struct *second_task = second;
52
53 /* There is no point in comparing a task to itself. */
54 if (first && first == second) {
55 TRACE_TASK(first,
56 "WARNING: pointless edf priority comparison.\n");
57 return 0;
58 }
59
60
61 /* check for NULL tasks */
62 if (!first || !second)
63 return first && !second;
64
65#ifdef CONFIG_LITMUS_LOCKING
66
67 /* Check for inherited priorities. Change task
68 * used for comparison in such a case.
69 */
70 if (unlikely(first->rt_param.inh_task))
71 first_task = first->rt_param.inh_task;
72 if (unlikely(second->rt_param.inh_task))
73 second_task = second->rt_param.inh_task;
74
75 /* Check for priority boosting. Tie-break by start of boosting.
76 */
77 if (unlikely(is_priority_boosted(first_task))) {
78 /* first_task is boosted, how about second_task? */
79 if (!is_priority_boosted(second_task) ||
80 lt_before(get_boost_start(first_task),
81 get_boost_start(second_task)))
82 return 1;
83 else
84 return 0;
85 } else if (unlikely(is_priority_boosted(second_task)))
86 /* second_task is boosted, first is not*/
87 return 0;
88
89#endif
90
91
92 return !is_realtime(second_task) ||
93
94 /* is the deadline of the first task earlier?
95 * Then it has higher priority.
96 */
97 earlier_deadline(first_task, second_task) ||
98
99 /* Do we have a deadline tie?
100 * Then break by PID.
101 */
102 (get_deadline(first_task) == get_deadline(second_task) &&
103 (first_task->pid < second_task->pid ||
104
105 /* If the PIDs are the same then the task with the inherited
106 * priority wins.
107 */
108 (first_task->pid == second_task->pid &&
109 !second->rt_param.inh_task)));
110}
111
112int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
113{
114 return edf_higher_prio(bheap2task(a), bheap2task(b));
115}
116
117void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
118 release_jobs_t release)
119{
120 rt_domain_init(rt, edf_ready_order, resched, release);
121}
122
123/* need_to_preempt - check whether the task t needs to be preempted
124 * call only with irqs disabled and with ready_lock acquired
125 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
126 */
127int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
128{
129 /* we need the read lock for edf_ready_queue */
130 /* no need to preempt if there is nothing pending */
131 if (!__jobs_pending(rt))
132 return 0;
133 /* we need to reschedule if t doesn't exist */
134 if (!t)
135 return 1;
136
137 /* NOTE: We cannot check for non-preemptibility since we
138 * don't know what address space we're currently in.
139 */
140
141 /* make sure to get non-rt stuff out of the way */
142 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
143}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..2c629598e3c9
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,297 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* OMLP_SEM */
30};
31
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
33{
34 if (fdso_ops[type]->create)
35 return fdso_ops[type]->create(obj_ref, type, config);
36 else
37 return -EINVAL;
38}
39
40static void fdso_destroy(obj_type_t type, void* obj)
41{
42 fdso_ops[type]->destroy(type, obj);
43}
44
45static int fdso_open(struct od_table_entry* entry, void* __user config)
46{
47 if (fdso_ops[entry->obj->type]->open)
48 return fdso_ops[entry->obj->type]->open(entry, config);
49 else
50 return 0;
51}
52
53static int fdso_close(struct od_table_entry* entry)
54{
55 if (fdso_ops[entry->obj->type]->close)
56 return fdso_ops[entry->obj->type]->close(entry);
57 else
58 return 0;
59}
60
61/* inode must be locked already */
62static int alloc_inode_obj(struct inode_obj_id** obj_ref,
63 struct inode* inode,
64 obj_type_t type,
65 unsigned int id,
66 void* __user config)
67{
68 struct inode_obj_id* obj;
69 void* raw_obj;
70 int err;
71
72 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
73 if (!obj) {
74 return -ENOMEM;
75 }
76
77 err = fdso_create(&raw_obj, type, config);
78 if (err != 0) {
79 kfree(obj);
80 return err;
81 }
82
83 INIT_LIST_HEAD(&obj->list);
84 atomic_set(&obj->count, 1);
85 obj->type = type;
86 obj->id = id;
87 obj->obj = raw_obj;
88 obj->inode = inode;
89
90 list_add(&obj->list, &inode->i_obj_list);
91 atomic_inc(&inode->i_count);
92
93 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
94
95 *obj_ref = obj;
96 return 0;
97}
98
99/* inode must be locked already */
100static struct inode_obj_id* get_inode_obj(struct inode* inode,
101 obj_type_t type,
102 unsigned int id)
103{
104 struct list_head* pos;
105 struct inode_obj_id* obj = NULL;
106
107 list_for_each(pos, &inode->i_obj_list) {
108 obj = list_entry(pos, struct inode_obj_id, list);
109 if (obj->id == id && obj->type == type) {
110 atomic_inc(&obj->count);
111 return obj;
112 }
113 }
114 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
115 return NULL;
116}
117
118
119static void put_inode_obj(struct inode_obj_id* obj)
120{
121 struct inode* inode;
122 int let_go = 0;
123
124 inode = obj->inode;
125 if (atomic_dec_and_test(&obj->count)) {
126
127 mutex_lock(&inode->i_obj_mutex);
128 /* no new references can be obtained */
129 if (!atomic_read(&obj->count)) {
130 list_del(&obj->list);
131 fdso_destroy(obj->type, obj->obj);
132 kfree(obj);
133 let_go = 1;
134 }
135 mutex_unlock(&inode->i_obj_mutex);
136 if (let_go)
137 iput(inode);
138 }
139}
140
141static struct od_table_entry* get_od_entry(struct task_struct* t)
142{
143 struct od_table_entry* table;
144 int i;
145
146
147 table = t->od_table;
148 if (!table) {
149 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
150 GFP_KERNEL);
151 t->od_table = table;
152 }
153
154 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
155 if (!table[i].used) {
156 table[i].used = 1;
157 return table + i;
158 }
159 return NULL;
160}
161
162static int put_od_entry(struct od_table_entry* od)
163{
164 put_inode_obj(od->obj);
165 od->used = 0;
166 return 0;
167}
168
169void exit_od_table(struct task_struct* t)
170{
171 int i;
172
173 if (t->od_table) {
174 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
175 if (t->od_table[i].used)
176 put_od_entry(t->od_table + i);
177 kfree(t->od_table);
178 t->od_table = NULL;
179 }
180}
181
182static int do_sys_od_open(struct file* file, obj_type_t type, int id,
183 void* __user config)
184{
185 int idx = 0, err = 0;
186 struct inode* inode;
187 struct inode_obj_id* obj = NULL;
188 struct od_table_entry* entry;
189
190 inode = file->f_dentry->d_inode;
191
192 entry = get_od_entry(current);
193 if (!entry)
194 return -ENOMEM;
195
196 mutex_lock(&inode->i_obj_mutex);
197 obj = get_inode_obj(inode, type, id);
198 if (!obj)
199 err = alloc_inode_obj(&obj, inode, type, id, config);
200 if (err != 0) {
201 obj = NULL;
202 idx = err;
203 entry->used = 0;
204 } else {
205 entry->obj = obj;
206 entry->class = fdso_ops[type];
207 idx = entry - current->od_table;
208 }
209
210 mutex_unlock(&inode->i_obj_mutex);
211
212 /* open only if creation succeeded */
213 if (!err)
214 err = fdso_open(entry, config);
215 if (err < 0) {
216 /* The class rejected the open call.
217 * We need to clean up and tell user space.
218 */
219 if (obj)
220 put_od_entry(entry);
221 idx = err;
222 }
223
224 return idx;
225}
226
227
228struct od_table_entry* get_entry_for_od(int od)
229{
230 struct task_struct *t = current;
231
232 if (!t->od_table)
233 return NULL;
234 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
235 return NULL;
236 if (!t->od_table[od].used)
237 return NULL;
238 return t->od_table + od;
239}
240
241
242asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
243{
244 int ret = 0;
245 struct file* file;
246
247 /*
248 1) get file from fd, get inode from file
249 2) lock inode
250 3) try to lookup object
251 4) if not present create and enqueue object, inc inode refcnt
252 5) increment refcnt of object
253 6) alloc od_table_entry, setup ptrs
254 7) unlock inode
255 8) return offset in od_table as OD
256 */
257
258 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
259 ret = -EINVAL;
260 goto out;
261 }
262
263 file = fget(fd);
264 if (!file) {
265 ret = -EBADF;
266 goto out;
267 }
268
269 ret = do_sys_od_open(file, type, obj_id, config);
270
271 fput(file);
272
273out:
274 return ret;
275}
276
277
278asmlinkage long sys_od_close(int od)
279{
280 int ret = -EINVAL;
281 struct task_struct *t = current;
282
283 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
284 return ret;
285
286 if (!t->od_table || !t->od_table[od].used)
287 return ret;
288
289
290 /* give the class a chance to reject the close
291 */
292 ret = fdso_close(t->od_table + od);
293 if (ret == 0)
294 ret = put_od_entry(t->od_table + od);
295
296 return ret;
297}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..31fc2db20adf
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66
67 return !is_realtime(second_task) ||
68
69 get_priority(first_task) < get_priority(second_task) ||
70
71 /* Break by PID.
72 */
73 (get_priority(first_task) == get_priority(second_task) &&
74 (first_task->pid < second_task->pid ||
75
76 /* If the PIDs are the same then the task with the inherited
77 * priority wins.
78 */
79 (first_task->pid == second_task->pid &&
80 !second->rt_param.inh_task)));
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 000000000000..399a07becca5
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,43 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA)
6/* provide dummy implementation */
7
8int ft_events[MAX_EVENTS];
9
10int ft_enable_event(unsigned long id)
11{
12 if (id < MAX_EVENTS) {
13 ft_events[id]++;
14 return 1;
15 } else
16 return 0;
17}
18
19int ft_disable_event(unsigned long id)
20{
21 if (id < MAX_EVENTS && ft_events[id]) {
22 ft_events[id]--;
23 return 1;
24 } else
25 return 0;
26}
27
28int ft_disable_all_events(void)
29{
30 int i;
31
32 for (i = 0; i < MAX_EVENTS; i++)
33 ft_events[i] = 0;
34
35 return MAX_EVENTS;
36}
37
38int ft_is_event_enabled(unsigned long id)
39{
40 return id < MAX_EVENTS && ft_events[id];
41}
42
43#endif
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
new file mode 100644
index 000000000000..99bc39ffbcef
--- /dev/null
+++ b/litmus/ftdev.c
@@ -0,0 +1,446 @@
1#include <linux/sched.h>
2#include <linux/fs.h>
3#include <linux/slab.h>
4#include <linux/cdev.h>
5#include <asm/uaccess.h>
6#include <linux/module.h>
7#include <linux/device.h>
8
9#include <litmus/litmus.h>
10#include <litmus/feather_trace.h>
11#include <litmus/ftdev.h>
12
13struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
14{
15 struct ft_buffer* buf;
16 size_t total = (size + 1) * count;
17 char* mem;
18 int order = 0, pages = 1;
19
20 buf = kmalloc(sizeof(*buf), GFP_KERNEL);
21 if (!buf)
22 return NULL;
23
24 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
25 while (pages < total) {
26 order++;
27 pages *= 2;
28 }
29
30 mem = (char*) __get_free_pages(GFP_KERNEL, order);
31 if (!mem) {
32 kfree(buf);
33 return NULL;
34 }
35
36 if (!init_ft_buffer(buf, count, size,
37 mem + (count * size), /* markers at the end */
38 mem)) { /* buffer objects */
39 free_pages((unsigned long) mem, order);
40 kfree(buf);
41 return NULL;
42 }
43 return buf;
44}
45
46void free_ft_buffer(struct ft_buffer* buf)
47{
48 int order = 0, pages = 1;
49 size_t total;
50
51 if (buf) {
52 total = (buf->slot_size + 1) * buf->slot_count;
53 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
54 while (pages < total) {
55 order++;
56 pages *= 2;
57 }
58 free_pages((unsigned long) buf->buffer_mem, order);
59 kfree(buf);
60 }
61}
62
63struct ftdev_event {
64 int id;
65 struct ftdev_event* next;
66};
67
68static int activate(struct ftdev_event** chain, int id)
69{
70 struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
71 if (ev) {
72 printk(KERN_INFO
73 "Enabling feather-trace event %d.\n", (int) id);
74 ft_enable_event(id);
75 ev->id = id;
76 ev->next = *chain;
77 *chain = ev;
78 }
79 return ev ? 0 : -ENOMEM;
80}
81
82static void deactivate(struct ftdev_event** chain, int id)
83{
84 struct ftdev_event **cur = chain;
85 struct ftdev_event *nxt;
86 while (*cur) {
87 if ((*cur)->id == id) {
88 nxt = (*cur)->next;
89 kfree(*cur);
90 *cur = nxt;
91 printk(KERN_INFO
92 "Disabling feather-trace event %d.\n", (int) id);
93 ft_disable_event(id);
94 break;
95 }
96 cur = &(*cur)->next;
97 }
98}
99
100static int ftdev_open(struct inode *in, struct file *filp)
101{
102 struct ftdev* ftdev;
103 struct ftdev_minor* ftdm;
104 unsigned int buf_idx = iminor(in);
105 int err = 0;
106
107 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
108
109 if (buf_idx >= ftdev->minor_cnt) {
110 err = -ENODEV;
111 goto out;
112 }
113 if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
114 goto out;
115
116 ftdm = ftdev->minor + buf_idx;
117 ftdm->ftdev = ftdev;
118 filp->private_data = ftdm;
119
120 if (mutex_lock_interruptible(&ftdm->lock)) {
121 err = -ERESTARTSYS;
122 goto out;
123 }
124
125 if (!ftdm->readers && ftdev->alloc)
126 err = ftdev->alloc(ftdev, buf_idx);
127 if (0 == err)
128 ftdm->readers++;
129
130 mutex_unlock(&ftdm->lock);
131out:
132 return err;
133}
134
135static int ftdev_release(struct inode *in, struct file *filp)
136{
137 struct ftdev* ftdev;
138 struct ftdev_minor* ftdm;
139 unsigned int buf_idx = iminor(in);
140 int err = 0;
141
142 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
143
144 if (buf_idx >= ftdev->minor_cnt) {
145 err = -ENODEV;
146 goto out;
147 }
148 ftdm = ftdev->minor + buf_idx;
149
150 if (mutex_lock_interruptible(&ftdm->lock)) {
151 err = -ERESTARTSYS;
152 goto out;
153 }
154
155 if (ftdm->readers == 1) {
156 while (ftdm->events)
157 deactivate(&ftdm->events, ftdm->events->id);
158
159 /* wait for any pending events to complete */
160 set_current_state(TASK_UNINTERRUPTIBLE);
161 schedule_timeout(HZ);
162
163 printk(KERN_ALERT "Failed trace writes: %u\n",
164 ftdm->buf->failed_writes);
165
166 if (ftdev->free)
167 ftdev->free(ftdev, buf_idx);
168 }
169
170 ftdm->readers--;
171 mutex_unlock(&ftdm->lock);
172out:
173 return err;
174}
175
176/* based on ft_buffer_read
177 * @returns < 0 : page fault
178 * = 0 : no data available
179 * = 1 : one slot copied
180 */
181static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
182{
183 unsigned int idx;
184 int err = 0;
185 if (buf->free_count != buf->slot_count) {
186 /* data available */
187 idx = buf->read_idx % buf->slot_count;
188 if (buf->slots[idx] == SLOT_READY) {
189 err = copy_to_user(dest, ((char*) buf->buffer_mem) +
190 idx * buf->slot_size,
191 buf->slot_size);
192 if (err == 0) {
193 /* copy ok */
194 buf->slots[idx] = SLOT_FREE;
195 buf->read_idx++;
196 fetch_and_inc(&buf->free_count);
197 err = 1;
198 }
199 }
200 }
201 return err;
202}
203
204static ssize_t ftdev_read(struct file *filp,
205 char __user *to, size_t len, loff_t *f_pos)
206{
207 /* we ignore f_pos, this is strictly sequential */
208
209 ssize_t err = 0;
210 size_t chunk;
211 int copied;
212 struct ftdev_minor* ftdm = filp->private_data;
213
214 if (mutex_lock_interruptible(&ftdm->lock)) {
215 err = -ERESTARTSYS;
216 goto out;
217 }
218
219
220 chunk = ftdm->buf->slot_size;
221 while (len >= chunk) {
222 copied = ft_buffer_copy_to_user(ftdm->buf, to);
223 if (copied == 1) {
224 len -= chunk;
225 to += chunk;
226 err += chunk;
227 } else if (err == 0 && copied == 0 && ftdm->events) {
228 /* Only wait if there are any events enabled and only
229 * if we haven't copied some data yet. We cannot wait
230 * here with copied data because that data would get
231 * lost if the task is interrupted (e.g., killed).
232 */
233 mutex_unlock(&ftdm->lock);
234 set_current_state(TASK_INTERRUPTIBLE);
235
236 schedule_timeout(50);
237
238 if (signal_pending(current)) {
239 if (err == 0)
240 /* nothing read yet, signal problem */
241 err = -ERESTARTSYS;
242 goto out;
243 }
244 if (mutex_lock_interruptible(&ftdm->lock)) {
245 err = -ERESTARTSYS;
246 goto out;
247 }
248 } else if (copied < 0) {
249 /* page fault */
250 err = copied;
251 break;
252 } else
253 /* nothing left to get, return to user space */
254 break;
255 }
256 mutex_unlock(&ftdm->lock);
257out:
258 return err;
259}
260
261static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
262{
263 long err = -ENOIOCTLCMD;
264 struct ftdev_minor* ftdm = filp->private_data;
265
266 if (mutex_lock_interruptible(&ftdm->lock)) {
267 err = -ERESTARTSYS;
268 goto out;
269 }
270
271 /* FIXME: check id against list of acceptable events */
272
273 switch (cmd) {
274 case FTDEV_ENABLE_CMD:
275 if (activate(&ftdm->events, arg))
276 err = -ENOMEM;
277 else
278 err = 0;
279 break;
280
281 case FTDEV_DISABLE_CMD:
282 deactivate(&ftdm->events, arg);
283 err = 0;
284 break;
285
286 default:
287 printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
288 };
289
290 mutex_unlock(&ftdm->lock);
291out:
292 return err;
293}
294
295static ssize_t ftdev_write(struct file *filp, const char __user *from,
296 size_t len, loff_t *f_pos)
297{
298 struct ftdev_minor* ftdm = filp->private_data;
299 ssize_t err = -EINVAL;
300 struct ftdev* ftdev = ftdm->ftdev;
301
302 /* dispatch write to buffer-specific code, if available */
303 if (ftdev->write)
304 err = ftdev->write(ftdm->buf, len, from);
305
306 return err;
307}
308
309struct file_operations ftdev_fops = {
310 .owner = THIS_MODULE,
311 .open = ftdev_open,
312 .release = ftdev_release,
313 .write = ftdev_write,
314 .read = ftdev_read,
315 .unlocked_ioctl = ftdev_ioctl,
316};
317
318int ftdev_init( struct ftdev* ftdev, struct module* owner,
319 const int minor_cnt, const char* name)
320{
321 int i, err;
322
323 BUG_ON(minor_cnt < 1);
324
325 cdev_init(&ftdev->cdev, &ftdev_fops);
326 ftdev->name = name;
327 ftdev->minor_cnt = minor_cnt;
328 ftdev->cdev.owner = owner;
329 ftdev->cdev.ops = &ftdev_fops;
330 ftdev->alloc = NULL;
331 ftdev->free = NULL;
332 ftdev->can_open = NULL;
333 ftdev->write = NULL;
334
335 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
336 GFP_KERNEL);
337 if (!ftdev->minor) {
338 printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n",
339 ftdev->name);
340 err = -ENOMEM;
341 goto err_out;
342 }
343
344 for (i = 0; i < ftdev->minor_cnt; i++) {
345 mutex_init(&ftdev->minor[i].lock);
346 ftdev->minor[i].readers = 0;
347 ftdev->minor[i].buf = NULL;
348 ftdev->minor[i].events = NULL;
349 }
350
351 ftdev->class = class_create(owner, ftdev->name);
352 if (IS_ERR(ftdev->class)) {
353 err = PTR_ERR(ftdev->class);
354 printk(KERN_WARNING "ftdev(%s): "
355 "Could not create device class.\n", ftdev->name);
356 goto err_dealloc;
357 }
358
359 return 0;
360
361err_dealloc:
362 kfree(ftdev->minor);
363err_out:
364 return err;
365}
366
367/*
368 * Destroy minor devices up to, but not including, up_to.
369 */
370static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to)
371{
372 dev_t minor_cntr;
373
374 if (up_to < 1)
375 up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt;
376
377 for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr)
378 device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr));
379}
380
381void ftdev_exit(struct ftdev* ftdev)
382{
383 printk("ftdev(%s): Exiting\n", ftdev->name);
384 ftdev_device_destroy(ftdev, -1);
385 cdev_del(&ftdev->cdev);
386 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
387 class_destroy(ftdev->class);
388 kfree(ftdev->minor);
389}
390
391int register_ftdev(struct ftdev* ftdev)
392{
393 struct device **device;
394 dev_t trace_dev_tmp, minor_cntr;
395 int err;
396
397 err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt,
398 ftdev->name);
399 if (err) {
400 printk(KERN_WARNING "ftdev(%s): "
401 "Could not allocate char. device region (%d minors)\n",
402 ftdev->name, ftdev->minor_cnt);
403 goto err_out;
404 }
405
406 ftdev->major = MAJOR(trace_dev_tmp);
407
408 err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt);
409 if (err) {
410 printk(KERN_WARNING "ftdev(%s): "
411 "Could not add cdev for major %u with %u minor(s).\n",
412 ftdev->name, ftdev->major, ftdev->minor_cnt);
413 goto err_unregister;
414 }
415
416 /* create the minor device(s) */
417 for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr)
418 {
419 trace_dev_tmp = MKDEV(ftdev->major, minor_cntr);
420 device = &ftdev->minor[minor_cntr].device;
421
422 *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL,
423 "litmus/%s%d", ftdev->name, minor_cntr);
424 if (IS_ERR(*device)) {
425 err = PTR_ERR(*device);
426 printk(KERN_WARNING "ftdev(%s): "
427 "Could not create device major/minor number "
428 "%u/%u\n", ftdev->name, ftdev->major,
429 minor_cntr);
430 printk(KERN_WARNING "ftdev(%s): "
431 "will attempt deletion of allocated devices.\n",
432 ftdev->name);
433 goto err_minors;
434 }
435 }
436
437 return 0;
438
439err_minors:
440 ftdev_device_destroy(ftdev, minor_cntr);
441 cdev_del(&ftdev->cdev);
442err_unregister:
443 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
444err_out:
445 return err;
446}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..36e314625d86
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,43 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/litmus.h>
7#include <litmus/jobs.h>
8
9void prepare_for_next_period(struct task_struct *t)
10{
11 BUG_ON(!t);
12 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
14 t->rt_param.job_params.deadline += get_rt_period(t);
15 t->rt_param.job_params.exec_time = 0;
16 /* update job sequence number */
17 t->rt_param.job_params.job_no++;
18
19 /* don't confuse Linux */
20 t->rt.time_slice = 1;
21}
22
23void release_at(struct task_struct *t, lt_t start)
24{
25 t->rt_param.job_params.deadline = start;
26 prepare_for_next_period(t);
27 set_rt_flags(t, RT_F_RUNNING);
28}
29
30
31/*
32 * Deactivate current task until the beginning of the next period.
33 */
34long complete_job(void)
35{
36 /* Mark that we do not excute anymore */
37 set_rt_flags(current, RT_F_SLEEP);
38 /* call schedule, this will return when a new job arrives
39 * it also takes care of preparing for the next release
40 */
41 schedule();
42 return 0;
43}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..b22f84a02010
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,555 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12
13#include <litmus/litmus.h>
14#include <litmus/bheap.h>
15#include <litmus/trace.h>
16#include <litmus/rt_domain.h>
17#include <litmus/litmus_proc.h>
18#include <litmus/sched_trace.h>
19
20/* Number of RT tasks that exist in the system */
21atomic_t rt_task_count = ATOMIC_INIT(0);
22static DEFINE_RAW_SPINLOCK(task_transition_lock);
23/* synchronize plugin switching */
24atomic_t cannot_use_plugin = ATOMIC_INIT(0);
25
26/* Give log messages sequential IDs. */
27atomic_t __log_seq_no = ATOMIC_INIT(0);
28
29#ifdef CONFIG_RELEASE_MASTER
30/* current master CPU for handling timer IRQs */
31atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
32#endif
33
34static struct kmem_cache * bheap_node_cache;
35extern struct kmem_cache * release_heap_cache;
36
37struct bheap_node* bheap_node_alloc(int gfp_flags)
38{
39 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
40}
41
42void bheap_node_free(struct bheap_node* hn)
43{
44 kmem_cache_free(bheap_node_cache, hn);
45}
46
47struct release_heap* release_heap_alloc(int gfp_flags);
48void release_heap_free(struct release_heap* rh);
49
50/*
51 * sys_set_task_rt_param
52 * @pid: Pid of the task which scheduling parameters must be changed
53 * @param: New real-time extension parameters such as the execution cost and
54 * period
55 * Syscall for manipulating with task rt extension params
56 * Returns EFAULT if param is NULL.
57 * ESRCH if pid is not corrsponding
58 * to a valid task.
59 * EINVAL if either period or execution cost is <=0
60 * EPERM if pid is a real-time task
61 * 0 if success
62 *
63 * Only non-real-time tasks may be configured with this system call
64 * to avoid races with the scheduler. In practice, this means that a
65 * task's parameters must be set _before_ calling sys_prepare_rt_task()
66 *
67 * find_task_by_vpid() assumes that we are in the same namespace of the
68 * target.
69 */
70asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
71{
72 struct rt_task tp;
73 struct task_struct *target;
74 int retval = -EINVAL;
75
76 printk("Setting up rt task parameters for process %d.\n", pid);
77
78 if (pid < 0 || param == 0) {
79 goto out;
80 }
81 if (copy_from_user(&tp, param, sizeof(tp))) {
82 retval = -EFAULT;
83 goto out;
84 }
85
86 /* Task search and manipulation must be protected */
87 read_lock_irq(&tasklist_lock);
88 if (!(target = find_task_by_vpid(pid))) {
89 retval = -ESRCH;
90 goto out_unlock;
91 }
92
93 if (is_realtime(target)) {
94 /* The task is already a real-time task.
95 * We cannot not allow parameter changes at this point.
96 */
97 retval = -EBUSY;
98 goto out_unlock;
99 }
100
101 if (tp.exec_cost <= 0)
102 goto out_unlock;
103 if (tp.period <= 0)
104 goto out_unlock;
105 if (!cpu_online(tp.cpu))
106 goto out_unlock;
107 if (tp.period < tp.exec_cost)
108 {
109 printk(KERN_INFO "litmus: real-time task %d rejected "
110 "because wcet > period\n", pid);
111 goto out_unlock;
112 }
113 if (tp.budget_policy != NO_ENFORCEMENT &&
114 tp.budget_policy != QUANTUM_ENFORCEMENT &&
115 tp.budget_policy != PRECISE_ENFORCEMENT)
116 {
117 printk(KERN_INFO "litmus: real-time task %d rejected "
118 "because unsupported budget enforcement policy "
119 "specified (%d)\n",
120 pid, tp.budget_policy);
121 goto out_unlock;
122 }
123
124 if (tp.priority >= LITMUS_MAX_PRIORITY) {
125 printk(KERN_INFO "litmus: invalid priority (%u); "
126 "task %s/%d rejected\n",
127 tp.priority, target->comm, target->pid);
128 goto out_unlock;
129 }
130
131 target->rt_param.task_params = tp;
132
133 retval = 0;
134 out_unlock:
135 read_unlock_irq(&tasklist_lock);
136 out:
137 return retval;
138}
139
140/*
141 * Getter of task's RT params
142 * returns EINVAL if param or pid is NULL
143 * returns ESRCH if pid does not correspond to a valid task
144 * returns EFAULT if copying of parameters has failed.
145 *
146 * find_task_by_vpid() assumes that we are in the same namespace of the
147 * target.
148 */
149asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
150{
151 int retval = -EINVAL;
152 struct task_struct *source;
153 struct rt_task lp;
154 if (param == 0 || pid < 0)
155 goto out;
156 read_lock(&tasklist_lock);
157 if (!(source = find_task_by_vpid(pid))) {
158 retval = -ESRCH;
159 goto out_unlock;
160 }
161 lp = source->rt_param.task_params;
162 read_unlock(&tasklist_lock);
163 /* Do copying outside the lock */
164 retval =
165 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
166 return retval;
167 out_unlock:
168 read_unlock(&tasklist_lock);
169 out:
170 return retval;
171
172}
173
174/*
175 * This is the crucial function for periodic task implementation,
176 * It checks if a task is periodic, checks if such kind of sleep
177 * is permitted and calls plugin-specific sleep, which puts the
178 * task into a wait array.
179 * returns 0 on successful wakeup
180 * returns EPERM if current conditions do not permit such sleep
181 * returns EINVAL if current task is not able to go to sleep
182 */
183asmlinkage long sys_complete_job(void)
184{
185 int retval = -EPERM;
186 if (!is_realtime(current)) {
187 retval = -EINVAL;
188 goto out;
189 }
190 /* Task with negative or zero period cannot sleep */
191 if (get_rt_period(current) <= 0) {
192 retval = -EINVAL;
193 goto out;
194 }
195 /* The plugin has to put the task into an
196 * appropriate queue and call schedule
197 */
198 retval = litmus->complete_job();
199 out:
200 return retval;
201}
202
203/* This is an "improved" version of sys_complete_job that
204 * addresses the problem of unintentionally missing a job after
205 * an overrun.
206 *
207 * returns 0 on successful wakeup
208 * returns EPERM if current conditions do not permit such sleep
209 * returns EINVAL if current task is not able to go to sleep
210 */
211asmlinkage long sys_wait_for_job_release(unsigned int job)
212{
213 int retval = -EPERM;
214 if (!is_realtime(current)) {
215 retval = -EINVAL;
216 goto out;
217 }
218
219 /* Task with negative or zero period cannot sleep */
220 if (get_rt_period(current) <= 0) {
221 retval = -EINVAL;
222 goto out;
223 }
224
225 retval = 0;
226
227 /* first wait until we have "reached" the desired job
228 *
229 * This implementation has at least two problems:
230 *
231 * 1) It doesn't gracefully handle the wrap around of
232 * job_no. Since LITMUS is a prototype, this is not much
233 * of a problem right now.
234 *
235 * 2) It is theoretically racy if a job release occurs
236 * between checking job_no and calling sleep_next_period().
237 * A proper solution would requiring adding another callback
238 * in the plugin structure and testing the condition with
239 * interrupts disabled.
240 *
241 * FIXME: At least problem 2 should be taken care of eventually.
242 */
243 while (!retval && job > current->rt_param.job_params.job_no)
244 /* If the last job overran then job <= job_no and we
245 * don't send the task to sleep.
246 */
247 retval = litmus->complete_job();
248 out:
249 return retval;
250}
251
252/* This is a helper syscall to query the current job sequence number.
253 *
254 * returns 0 on successful query
255 * returns EPERM if task is not a real-time task.
256 * returns EFAULT if &job is not a valid pointer.
257 */
258asmlinkage long sys_query_job_no(unsigned int __user *job)
259{
260 int retval = -EPERM;
261 if (is_realtime(current))
262 retval = put_user(current->rt_param.job_params.job_no, job);
263
264 return retval;
265}
266
267/* sys_null_call() is only used for determining raw system call
268 * overheads (kernel entry, kernel exit). It has no useful side effects.
269 * If ts is non-NULL, then the current Feather-Trace time is recorded.
270 */
271asmlinkage long sys_null_call(cycles_t __user *ts)
272{
273 long ret = 0;
274 cycles_t now;
275
276 if (ts) {
277 now = get_cycles();
278 ret = put_user(now, ts);
279 }
280
281 return ret;
282}
283
284/* p is a real-time task. Re-init its state as a best-effort task. */
285static void reinit_litmus_state(struct task_struct* p, int restore)
286{
287 struct rt_task user_config = {};
288 void* ctrl_page = NULL;
289
290 if (restore) {
291 /* Safe user-space provided configuration data.
292 * and allocated page. */
293 user_config = p->rt_param.task_params;
294 ctrl_page = p->rt_param.ctrl_page;
295 }
296
297 /* We probably should not be inheriting any task's priority
298 * at this point in time.
299 */
300 WARN_ON(p->rt_param.inh_task);
301
302 /* Cleanup everything else. */
303 memset(&p->rt_param, 0, sizeof(p->rt_param));
304
305 /* Restore preserved fields. */
306 if (restore) {
307 p->rt_param.task_params = user_config;
308 p->rt_param.ctrl_page = ctrl_page;
309 }
310}
311
312long litmus_admit_task(struct task_struct* tsk)
313{
314 long retval = 0;
315 unsigned long flags;
316
317 BUG_ON(is_realtime(tsk));
318
319 if (get_rt_period(tsk) == 0 ||
320 get_exec_cost(tsk) > get_rt_period(tsk)) {
321 TRACE_TASK(tsk, "litmus admit: invalid task parameters "
322 "(%lu, %lu)\n",
323 get_exec_cost(tsk), get_rt_period(tsk));
324 retval = -EINVAL;
325 goto out;
326 }
327
328 if (!cpu_online(get_partition(tsk))) {
329 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
330 get_partition(tsk));
331 retval = -EINVAL;
332 goto out;
333 }
334
335 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
336
337 /* avoid scheduler plugin changing underneath us */
338 raw_spin_lock_irqsave(&task_transition_lock, flags);
339
340 /* allocate heap node for this task */
341 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
342 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
343
344 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
345 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
346
347 bheap_node_free(tsk_rt(tsk)->heap_node);
348 release_heap_free(tsk_rt(tsk)->rel_heap);
349
350 retval = -ENOMEM;
351 goto out_unlock;
352 } else {
353 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
354 }
355
356 retval = litmus->admit_task(tsk);
357
358 if (!retval) {
359 sched_trace_task_name(tsk);
360 sched_trace_task_param(tsk);
361 atomic_inc(&rt_task_count);
362 }
363
364out_unlock:
365 raw_spin_unlock_irqrestore(&task_transition_lock, flags);
366out:
367 return retval;
368}
369
370void litmus_exit_task(struct task_struct* tsk)
371{
372 if (is_realtime(tsk)) {
373 sched_trace_task_completion(tsk, 1);
374
375 litmus->task_exit(tsk);
376
377 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
378 bheap_node_free(tsk_rt(tsk)->heap_node);
379 release_heap_free(tsk_rt(tsk)->rel_heap);
380
381 atomic_dec(&rt_task_count);
382 reinit_litmus_state(tsk, 1);
383 }
384}
385
386/* IPI callback to synchronize plugin switching */
387static void synch_on_plugin_switch(void* info)
388{
389 atomic_inc(&cannot_use_plugin);
390 while (atomic_read(&cannot_use_plugin) > 0)
391 cpu_relax();
392}
393
394/* Switching a plugin in use is tricky.
395 * We must watch out that no real-time tasks exists
396 * (and that none is created in parallel) and that the plugin is not
397 * currently in use on any processor (in theory).
398 */
399int switch_sched_plugin(struct sched_plugin* plugin)
400{
401 unsigned long flags;
402 int ret = 0;
403
404 BUG_ON(!plugin);
405
406 /* forbid other cpus to use the plugin */
407 atomic_set(&cannot_use_plugin, 1);
408 /* send IPI to force other CPUs to synch with us */
409 smp_call_function(synch_on_plugin_switch, NULL, 0);
410
411 /* wait until all other CPUs have started synch */
412 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
413 cpu_relax();
414
415 /* stop task transitions */
416 raw_spin_lock_irqsave(&task_transition_lock, flags);
417
418 /* don't switch if there are active real-time tasks */
419 if (atomic_read(&rt_task_count) == 0) {
420 ret = litmus->deactivate_plugin();
421 if (0 != ret)
422 goto out;
423 ret = plugin->activate_plugin();
424 if (0 != ret) {
425 printk(KERN_INFO "Can't activate %s (%d).\n",
426 plugin->plugin_name, ret);
427 plugin = &linux_sched_plugin;
428 }
429 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
430 litmus = plugin;
431 } else
432 ret = -EBUSY;
433out:
434 raw_spin_unlock_irqrestore(&task_transition_lock, flags);
435 atomic_set(&cannot_use_plugin, 0);
436 return ret;
437}
438
439/* Called upon fork.
440 * p is the newly forked task.
441 */
442void litmus_fork(struct task_struct* p)
443{
444 if (is_realtime(p)) {
445 /* clean out any litmus related state, don't preserve anything */
446 reinit_litmus_state(p, 0);
447 /* Don't let the child be a real-time task. */
448 p->sched_reset_on_fork = 1;
449 } else
450 /* non-rt tasks might have ctrl_page set */
451 tsk_rt(p)->ctrl_page = NULL;
452
453 /* od tables are never inherited across a fork */
454 p->od_table = NULL;
455}
456
457/* Called upon execve().
458 * current is doing the exec.
459 * Don't let address space specific stuff leak.
460 */
461void litmus_exec(void)
462{
463 struct task_struct* p = current;
464
465 if (is_realtime(p)) {
466 WARN_ON(p->rt_param.inh_task);
467 if (tsk_rt(p)->ctrl_page) {
468 free_page((unsigned long) tsk_rt(p)->ctrl_page);
469 tsk_rt(p)->ctrl_page = NULL;
470 }
471 }
472}
473
474void exit_litmus(struct task_struct *dead_tsk)
475{
476 /* We also allow non-RT tasks to
477 * allocate control pages to allow
478 * measurements with non-RT tasks.
479 * So check if we need to free the page
480 * in any case.
481 */
482 if (tsk_rt(dead_tsk)->ctrl_page) {
483 TRACE_TASK(dead_tsk,
484 "freeing ctrl_page %p\n",
485 tsk_rt(dead_tsk)->ctrl_page);
486 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
487 }
488
489 /* main cleanup only for RT tasks */
490 if (is_realtime(dead_tsk))
491 litmus_exit_task(dead_tsk);
492}
493
494
495#ifdef CONFIG_MAGIC_SYSRQ
496int sys_kill(int pid, int sig);
497
498static void sysrq_handle_kill_rt_tasks(int key)
499{
500 struct task_struct *t;
501 read_lock(&tasklist_lock);
502 for_each_process(t) {
503 if (is_realtime(t)) {
504 sys_kill(t->pid, SIGKILL);
505 }
506 }
507 read_unlock(&tasklist_lock);
508}
509
510static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
511 .handler = sysrq_handle_kill_rt_tasks,
512 .help_msg = "quit-rt-tasks(X)",
513 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
514};
515#endif
516
517extern struct sched_plugin linux_sched_plugin;
518
519static int __init _init_litmus(void)
520{
521 /* Common initializers,
522 * mode change lock is used to enforce single mode change
523 * operation.
524 */
525 printk("Starting LITMUS^RT kernel\n");
526
527 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
528
529 register_sched_plugin(&linux_sched_plugin);
530
531 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
532 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
533
534#ifdef CONFIG_MAGIC_SYSRQ
535 /* offer some debugging help */
536 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
537 printk("Registered kill rt tasks magic sysrq.\n");
538 else
539 printk("Could not register kill rt tasks magic sysrq.\n");
540#endif
541
542 init_litmus_proc();
543
544 return 0;
545}
546
547static void _exit_litmus(void)
548{
549 exit_litmus_proc();
550 kmem_cache_destroy(bheap_node_cache);
551 kmem_cache_destroy(release_heap_cache);
552}
553
554module_init(_init_litmus);
555module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..4bf725a36c9c
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,347 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/uaccess.h>
7
8#include <litmus/litmus.h>
9#include <litmus/litmus_proc.h>
10
11#include <litmus/clustered.h>
12
13/* in litmus/litmus.c */
14extern atomic_t rt_task_count;
15
16static struct proc_dir_entry *litmus_dir = NULL,
17 *curr_file = NULL,
18 *stat_file = NULL,
19 *plugs_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL,
22#endif
23 *plugs_file = NULL;
24
25/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void);
27
28static int proc_read_stats(char *page, char **start,
29 off_t off, int count,
30 int *eof, void *data)
31{
32 int len;
33
34 len = snprintf(page, PAGE_SIZE,
35 "real-time tasks = %d\n"
36 "ready for release = %d\n",
37 atomic_read(&rt_task_count),
38 count_tasks_waiting_for_release());
39 return len;
40}
41
42static int proc_read_plugins(char *page, char **start,
43 off_t off, int count,
44 int *eof, void *data)
45{
46 int len;
47
48 len = print_sched_plugins(page, PAGE_SIZE);
49 return len;
50}
51
52static int proc_read_curr(char *page, char **start,
53 off_t off, int count,
54 int *eof, void *data)
55{
56 int len;
57
58 len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
59 return len;
60}
61
62/* in litmus/litmus.c */
63int switch_sched_plugin(struct sched_plugin*);
64
65static int proc_write_curr(struct file *file,
66 const char *buffer,
67 unsigned long count,
68 void *data)
69{
70 int len, ret;
71 char name[65];
72 struct sched_plugin* found;
73
74 len = copy_and_chomp(name, sizeof(name), buffer, count);
75 if (len < 0)
76 return len;
77
78 found = find_sched_plugin(name);
79
80 if (found) {
81 ret = switch_sched_plugin(found);
82 if (ret != 0)
83 printk(KERN_INFO "Could not switch plugin: %d\n", ret);
84 } else
85 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
86
87 return len;
88}
89
90#ifdef CONFIG_RELEASE_MASTER
91static int proc_read_release_master(char *page, char **start,
92 off_t off, int count,
93 int *eof, void *data)
94{
95 int len, master;
96 master = atomic_read(&release_master_cpu);
97 if (master == NO_CPU)
98 len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
99 else
100 len = snprintf(page, PAGE_SIZE, "%d\n", master);
101 return len;
102}
103
104static int proc_write_release_master(struct file *file,
105 const char *buffer,
106 unsigned long count,
107 void *data)
108{
109 int cpu, err, len, online = 0;
110 char msg[64];
111
112 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
113
114 if (len < 0)
115 return len;
116
117 if (strcmp(msg, "NO_CPU") == 0)
118 atomic_set(&release_master_cpu, NO_CPU);
119 else {
120 err = sscanf(msg, "%d", &cpu);
121 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
122 atomic_set(&release_master_cpu, cpu);
123 } else {
124 TRACE("invalid release master: '%s' "
125 "(err:%d cpu:%d online:%d)\n",
126 msg, err, cpu, online);
127 len = -EINVAL;
128 }
129 }
130 return len;
131}
132#endif
133
134int __init init_litmus_proc(void)
135{
136 litmus_dir = proc_mkdir("litmus", NULL);
137 if (!litmus_dir) {
138 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
139 return -ENOMEM;
140 }
141
142 curr_file = create_proc_entry("active_plugin",
143 0644, litmus_dir);
144 if (!curr_file) {
145 printk(KERN_ERR "Could not allocate active_plugin "
146 "procfs entry.\n");
147 return -ENOMEM;
148 }
149 curr_file->read_proc = proc_read_curr;
150 curr_file->write_proc = proc_write_curr;
151
152#ifdef CONFIG_RELEASE_MASTER
153 release_master_file = create_proc_entry("release_master",
154 0644, litmus_dir);
155 if (!release_master_file) {
156 printk(KERN_ERR "Could not allocate release_master "
157 "procfs entry.\n");
158 return -ENOMEM;
159 }
160 release_master_file->read_proc = proc_read_release_master;
161 release_master_file->write_proc = proc_write_release_master;
162#endif
163
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL);
166
167 plugs_dir = proc_mkdir("plugins", litmus_dir);
168 if (!plugs_dir){
169 printk(KERN_ERR "Could not allocate plugins directory "
170 "procfs entry.\n");
171 return -ENOMEM;
172 }
173
174 plugs_file = create_proc_read_entry("loaded", 0444, plugs_dir,
175 proc_read_plugins, NULL);
176
177 return 0;
178}
179
180void exit_litmus_proc(void)
181{
182 if (plugs_file)
183 remove_proc_entry("loaded", plugs_dir);
184 if (plugs_dir)
185 remove_proc_entry("plugins", litmus_dir);
186 if (stat_file)
187 remove_proc_entry("stats", litmus_dir);
188 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir);
190#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir);
193#endif
194 if (litmus_dir)
195 remove_proc_entry("litmus", NULL);
196}
197
198long make_plugin_proc_dir(struct sched_plugin* plugin,
199 struct proc_dir_entry** pde_in)
200{
201 struct proc_dir_entry *pde_new = NULL;
202 long rv;
203
204 if (!plugin || !plugin->plugin_name){
205 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
206 __func__);
207 rv = -EINVAL;
208 goto out_no_pde;
209 }
210
211 if (!plugs_dir){
212 printk(KERN_ERR "Could not make plugin sub-directory, because "
213 "/proc/litmus/plugins does not exist.\n");
214 rv = -ENOENT;
215 goto out_no_pde;
216 }
217
218 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
219 if (!pde_new){
220 printk(KERN_ERR "Could not make plugin sub-directory: "
221 "out of memory?.\n");
222 rv = -ENOMEM;
223 goto out_no_pde;
224 }
225
226 rv = 0;
227 *pde_in = pde_new;
228 goto out_ok;
229
230out_no_pde:
231 *pde_in = NULL;
232out_ok:
233 return rv;
234}
235
236void remove_plugin_proc_dir(struct sched_plugin* plugin)
237{
238 if (!plugin || !plugin->plugin_name){
239 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
240 __func__);
241 return;
242 }
243 remove_proc_entry(plugin->plugin_name, plugs_dir);
244}
245
246
247
248/* misc. I/O helper functions */
249
250int copy_and_chomp(char *kbuf, unsigned long ksize,
251 __user const char* ubuf, unsigned long ulength)
252{
253 /* caller must provide buffer space */
254 BUG_ON(!ksize);
255
256 ksize--; /* leave space for null byte */
257
258 if (ksize > ulength)
259 ksize = ulength;
260
261 if(copy_from_user(kbuf, ubuf, ksize))
262 return -EFAULT;
263
264 kbuf[ksize] = '\0';
265
266 /* chomp kbuf */
267 if (ksize > 0 && kbuf[ksize - 1] == '\n')
268 kbuf[ksize - 1] = '\0';
269
270 return ksize;
271}
272
273/* helper functions for clustered plugins */
274static const char* cache_level_names[] = {
275 "ALL",
276 "L1",
277 "L2",
278 "L3",
279};
280
281int parse_cache_level(const char *cache_name, enum cache_level *level)
282{
283 int err = -EINVAL;
284 int i;
285 /* do a quick and dirty comparison to find the cluster size */
286 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
287 if (!strcmp(cache_name, cache_level_names[i])) {
288 *level = (enum cache_level) i;
289 err = 0;
290 break;
291 }
292 return err;
293}
294
295const char* cache_level_name(enum cache_level level)
296{
297 int idx = level;
298
299 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
300 return cache_level_names[idx];
301 else
302 return "INVALID";
303}
304
305
306/* proc file interface to configure the cluster size */
307static int proc_read_cluster_size(char *page, char **start,
308 off_t off, int count,
309 int *eof, void *data)
310{
311 return snprintf(page, PAGE_SIZE, "%s\n",
312 cache_level_name(*((enum cache_level*) data)));;
313}
314
315static int proc_write_cluster_size(struct file *file,
316 const char *buffer,
317 unsigned long count,
318 void *data)
319{
320 int len;
321 char cache_name[8];
322
323 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
324
325 if (len > 0 && parse_cache_level(cache_name, (enum cache_level*) data))
326 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
327
328 return len;
329}
330
331struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
332 enum cache_level* level)
333{
334 struct proc_dir_entry* cluster_file;
335
336 cluster_file = create_proc_entry("cluster", 0644, parent);
337 if (!cluster_file) {
338 printk(KERN_ERR "Could not allocate %s/cluster "
339 "procfs entry.\n", parent->name);
340 } else {
341 cluster_file->read_proc = proc_read_cluster_size;
342 cluster_file->write_proc = proc_write_cluster_size;
343 cluster_file->data = level;
344 }
345 return cluster_file;
346}
347
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..84a1d8309699
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,186 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4
5#ifdef CONFIG_LITMUS_LOCKING
6
7#include <litmus/sched_plugin.h>
8#include <litmus/trace.h>
9#include <litmus/wait.h>
10
11static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
12static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
13static int close_generic_lock(struct od_table_entry* entry);
14static void destroy_generic_lock(obj_type_t type, void* sem);
15
16struct fdso_ops generic_lock_ops = {
17 .create = create_generic_lock,
18 .open = open_generic_lock,
19 .close = close_generic_lock,
20 .destroy = destroy_generic_lock
21};
22
23static inline bool is_lock(struct od_table_entry* entry)
24{
25 return entry->class == &generic_lock_ops;
26}
27
28static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
29{
30 BUG_ON(!is_lock(entry));
31 return (struct litmus_lock*) entry->obj->obj;
32}
33
34static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
35{
36 struct litmus_lock* lock;
37 int err;
38
39 err = litmus->allocate_lock(&lock, type, arg);
40 if (err == 0)
41 *obj_ref = lock;
42 return err;
43}
44
45static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
46{
47 struct litmus_lock* lock = get_lock(entry);
48 if (lock->ops->open)
49 return lock->ops->open(lock, arg);
50 else
51 return 0; /* default: any task can open it */
52}
53
54static int close_generic_lock(struct od_table_entry* entry)
55{
56 struct litmus_lock* lock = get_lock(entry);
57 if (lock->ops->close)
58 return lock->ops->close(lock);
59 else
60 return 0; /* default: closing succeeds */
61}
62
63static void destroy_generic_lock(obj_type_t type, void* obj)
64{
65 struct litmus_lock* lock = (struct litmus_lock*) obj;
66 lock->ops->deallocate(lock);
67}
68
69asmlinkage long sys_litmus_lock(int lock_od)
70{
71 long err = -EINVAL;
72 struct od_table_entry* entry;
73 struct litmus_lock* l;
74
75 TS_SYSCALL_IN_START;
76
77 TS_SYSCALL_IN_END;
78
79 TS_LOCK_START;
80
81 entry = get_entry_for_od(lock_od);
82 if (entry && is_lock(entry)) {
83 l = get_lock(entry);
84 TRACE_CUR("attempts to lock 0x%p\n", l);
85 err = l->ops->lock(l);
86 }
87
88 /* Note: task my have been suspended or preempted in between! Take
89 * this into account when computing overheads. */
90 TS_LOCK_END;
91
92 TS_SYSCALL_OUT_START;
93
94 return err;
95}
96
97asmlinkage long sys_litmus_unlock(int lock_od)
98{
99 long err = -EINVAL;
100 struct od_table_entry* entry;
101 struct litmus_lock* l;
102
103 TS_SYSCALL_IN_START;
104
105 TS_SYSCALL_IN_END;
106
107 TS_UNLOCK_START;
108
109 entry = get_entry_for_od(lock_od);
110 if (entry && is_lock(entry)) {
111 l = get_lock(entry);
112 TRACE_CUR("attempts to unlock 0x%p\n", l);
113 err = l->ops->unlock(l);
114 }
115
116 /* Note: task my have been preempted in between! Take this into
117 * account when computing overheads. */
118 TS_UNLOCK_END;
119
120 TS_SYSCALL_OUT_START;
121
122 return err;
123}
124
125struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
126{
127 wait_queue_t* q;
128 struct task_struct* t = NULL;
129
130 if (waitqueue_active(wq)) {
131 q = list_entry(wq->task_list.next,
132 wait_queue_t, task_list);
133 t = (struct task_struct*) q->private;
134 __remove_wait_queue(wq, q);
135 }
136 return(t);
137}
138
139unsigned int __add_wait_queue_prio_exclusive(
140 wait_queue_head_t* head,
141 prio_wait_queue_t *new)
142{
143 struct list_head *pos;
144 unsigned int passed = 0;
145
146 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
147
148 /* find a spot where the new entry is less than the next */
149 list_for_each(pos, &head->task_list) {
150 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
151 wq.task_list);
152
153 if (unlikely(lt_before(new->priority, queued->priority) ||
154 (new->priority == queued->priority &&
155 new->tie_breaker < queued->tie_breaker))) {
156 /* pos is not less than new, thus insert here */
157 __list_add(&new->wq.task_list, pos->prev, pos);
158 goto out;
159 }
160 passed++;
161 }
162
163 /* if we get to this point either the list is empty or every entry
164 * queued element is less than new.
165 * Let's add new to the end. */
166 list_add_tail(&new->wq.task_list, &head->task_list);
167out:
168 return passed;
169}
170
171
172#else
173
174struct fdso_ops generic_lock_ops = {};
175
176asmlinkage long sys_litmus_lock(int sem_od)
177{
178 return -ENOSYS;
179}
180
181asmlinkage long sys_litmus_unlock(int sem_od)
182{
183 return -ENOSYS;
184}
185
186#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..90e09d091e30
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,131 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5
6/* The rescheduling state of each processor.
7 */
8DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
9
10void sched_state_will_schedule(struct task_struct* tsk)
11{
12 /* Litmus hack: we only care about processor-local invocations of
13 * set_tsk_need_resched(). We can't reliably set the flag remotely
14 * since it might race with other updates to the scheduling state. We
15 * can't rely on the runqueue lock protecting updates to the sched
16 * state since processors do not acquire the runqueue locks for all
17 * updates to the sched state (to avoid acquiring two runqueue locks at
18 * the same time). Further, if tsk is residing on a remote processor,
19 * then that processor doesn't actually know yet that it is going to
20 * reschedule; it still must receive an IPI (unless a local invocation
21 * races).
22 */
23 if (likely(task_cpu(tsk) == smp_processor_id())) {
24 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
25 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
26 set_sched_state(PICKED_WRONG_TASK);
27 else
28 set_sched_state(WILL_SCHEDULE);
29 } else
30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk));
33// TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
34// __builtin_return_address(0));
35}
36
37/* Called by the IPI handler after another CPU called smp_send_resched(). */
38void sched_state_ipi(void)
39{
40 /* If the IPI was slow, we might be in any state right now. The IPI is
41 * only meaningful if we are in SHOULD_SCHEDULE. */
42 if (is_in_sched_state(SHOULD_SCHEDULE)) {
43 /* Cause scheduler to be invoked.
44 * This will cause a transition to WILL_SCHEDULE. */
45 set_tsk_need_resched(current);
46 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
47 current->comm, current->pid);
48 } else {
49 /* ignore */
50 TRACE_STATE("ignoring IPI in state %x (%s)\n",
51 get_sched_state(),
52 sched_state_name(get_sched_state()));
53 }
54}
55
56/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
57 * hold the lock that is used to serialize scheduling decisions. */
58void litmus_reschedule(int cpu)
59{
60 int picked_transition_ok = 0;
61 int scheduled_transition_ok = 0;
62
63 /* The (remote) CPU could be in any state. */
64
65 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
66 * is not aware of the need to reschedule at this point. */
67
68 /* is a context switch in progress? */
69 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
70 picked_transition_ok = sched_state_transition_on(
71 cpu, TASK_PICKED, PICKED_WRONG_TASK);
72
73 if (!picked_transition_ok &&
74 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
75 /* We either raced with the end of the context switch, or the
76 * CPU was in TASK_SCHEDULED anyway. */
77 scheduled_transition_ok = sched_state_transition_on(
78 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
79 }
80
81 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
82 * scheduler to be invoked. */
83 if (scheduled_transition_ok) {
84 if (smp_processor_id() == cpu)
85 set_tsk_need_resched(current);
86 else
87 smp_send_reschedule(cpu);
88 }
89
90 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
91 __FUNCTION__,
92 picked_transition_ok,
93 scheduled_transition_ok);
94}
95
96void litmus_reschedule_local(void)
97{
98 if (is_in_sched_state(TASK_PICKED))
99 set_sched_state(PICKED_WRONG_TASK);
100 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
101 set_sched_state(WILL_SCHEDULE);
102 set_tsk_need_resched(current);
103 }
104}
105
106#ifdef CONFIG_DEBUG_KERNEL
107
108void sched_state_plugin_check(void)
109{
110 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
111 TRACE("!!!! plugin did not call sched_state_task_picked()!"
112 "Calling sched_state_task_picked() is mandatory---fix this.\n");
113 set_sched_state(TASK_PICKED);
114 }
115}
116
117#define NAME_CHECK(x) case x: return #x
118const char* sched_state_name(int s)
119{
120 switch (s) {
121 NAME_CHECK(TASK_SCHEDULED);
122 NAME_CHECK(SHOULD_SCHEDULE);
123 NAME_CHECK(WILL_SCHEDULE);
124 NAME_CHECK(TASK_PICKED);
125 NAME_CHECK(PICKED_WRONG_TASK);
126 default:
127 return "UNKNOWN";
128 };
129}
130
131#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..d405854cd39c
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,357 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24/* Uncomment when debugging timer races... */
25#if 0
26#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE
28#else
29#define VTRACE_TASK(t, fmt, args...) /* shut up */
30#define VTRACE(fmt, args...) /* be quiet already */
31#endif
32
33static int dummy_resched(rt_domain_t *rt)
34{
35 return 0;
36}
37
38static int dummy_order(struct bheap_node* a, struct bheap_node* b)
39{
40 return 0;
41}
42
43/* default implementation: use default lock */
44static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
45{
46 merge_ready(rt, tasks);
47}
48
49static unsigned int time2slot(lt_t time)
50{
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52}
53
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{
56 unsigned long flags;
57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
61
62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
63
64 TS_RELEASE_START;
65
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */
70 list_del(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73
74 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79
80 VTRACE("on_release_timer(0x%p) ends.\n", timer);
81
82 return HRTIMER_NORESTART;
83}
84
85/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache;
87
88struct release_heap* release_heap_alloc(int gfp_flags)
89{
90 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) {
93 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer;
96 }
97 return rh;
98}
99
100void release_heap_free(struct release_heap* rh)
101{
102 /* make sure timer is no longer in use */
103 hrtimer_cancel(&rh->timer);
104 kmem_cache_free(release_heap_cache, rh);
105}
106
107/* Caller must hold release lock.
108 * Will return heap for given time. If no such heap exists prior to
109 * the invocation it will be created.
110 */
111static struct release_heap* get_release_heap(rt_domain_t *rt,
112 struct task_struct* t,
113 int use_task_heap)
114{
115 struct list_head* pos;
116 struct release_heap* heap = NULL;
117 struct release_heap* rh;
118 lt_t release_time = get_release(t);
119 unsigned int slot = time2slot(release_time);
120
121 /* initialize pos for the case that the list is empty */
122 pos = rt->release_queue.slot[slot].next;
123 list_for_each(pos, &rt->release_queue.slot[slot]) {
124 rh = list_entry(pos, struct release_heap, list);
125 if (release_time == rh->release_time) {
126 /* perfect match -- this happens on hyperperiod
127 * boundaries
128 */
129 heap = rh;
130 break;
131 } else if (lt_before(release_time, rh->release_time)) {
132 /* we need to insert a new node since rh is
133 * already in the future
134 */
135 break;
136 }
137 }
138 if (!heap && use_task_heap) {
139 /* use pre-allocated release heap */
140 rh = tsk_rt(t)->rel_heap;
141
142 rh->dom = rt;
143 rh->release_time = release_time;
144
145 /* add to release queue */
146 list_add(&rh->list, pos->prev);
147 heap = rh;
148 }
149 return heap;
150}
151
152static void reinit_release_heap(struct task_struct* t)
153{
154 struct release_heap* rh;
155
156 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap;
158
159 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no
162 * circumstances should the timer be active (= yet to be
163 * triggered).
164 *
165 * WARNING: If the CPU still holds the release_lock at this point,
166 * deadlock may occur!
167 */
168 BUG_ON(hrtimer_cancel(&rh->timer));
169
170 /* initialize */
171 bheap_init(&rh->heap);
172#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
174#endif
175}
176/* arm_release_timer() - start local release timer or trigger
177 * remote timer (pull timer)
178 *
179 * Called by add_release() with:
180 * - tobe_lock taken
181 * - IRQ disabled
182 */
183#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
186#else
187static void arm_release_timer(rt_domain_t *_rt)
188#endif
189{
190 rt_domain_t *rt = _rt;
191 struct list_head list;
192 struct list_head *pos, *safe;
193 struct task_struct* t;
194 struct release_heap* rh;
195
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list);
198
199 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t);
203 list_del(pos);
204
205 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock);
207 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
208
209 rh = get_release_heap(rt, t, 0);
210 if (!rh) {
211 /* need to use our own, but drop lock first */
212 raw_spin_unlock(&rt->release_lock);
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock);
215
216 reinit_release_heap(t);
217 VTRACE_TASK(t, "release_heap ready\n");
218
219 raw_spin_lock(&rt->release_lock);
220 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
221 &rt->release_lock);
222
223 rh = get_release_heap(rt, t, 1);
224 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
227
228 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
230
231 /* To avoid arming the timer multiple times, we only let the
232 * owner do the arming (which is the "first" task to reference
233 * this release_heap anyway).
234 */
235 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237 /* we cannot arm the timer using hrtimer_start()
238 * as it may deadlock on rq->lock
239 *
240 * PINNED mode is ok on both local and remote CPU
241 */
242#ifdef CONFIG_RELEASE_MASTER
243 if (rt->release_master == NO_CPU &&
244 target_cpu == NO_CPU)
245#endif
246 __hrtimer_start_range_ns(&rh->timer,
247 ns_to_ktime(rh->release_time),
248 0, HRTIMER_MODE_ABS_PINNED, 0);
249#ifdef CONFIG_RELEASE_MASTER
250 else
251 hrtimer_start_on(
252 /* target_cpu overrides release master */
253 (target_cpu != NO_CPU ?
254 target_cpu : rt->release_master),
255 &rh->info, &rh->timer,
256 ns_to_ktime(rh->release_time),
257 HRTIMER_MODE_ABS_PINNED);
258#endif
259 } else
260 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
261 }
262}
263
264void rt_domain_init(rt_domain_t *rt,
265 bheap_prio_t order,
266 check_resched_needed_t check,
267 release_jobs_t release
268 )
269{
270 int i;
271
272 BUG_ON(!rt);
273 if (!check)
274 check = dummy_resched;
275 if (!release)
276 release = default_release_jobs;
277 if (!order)
278 order = dummy_order;
279
280#ifdef CONFIG_RELEASE_MASTER
281 rt->release_master = NO_CPU;
282#endif
283
284 bheap_init(&rt->ready_queue);
285 INIT_LIST_HEAD(&rt->tobe_released);
286 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
287 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
288
289 raw_spin_lock_init(&rt->ready_lock);
290 raw_spin_lock_init(&rt->release_lock);
291 raw_spin_lock_init(&rt->tobe_lock);
292
293 rt->check_resched = check;
294 rt->release_jobs = release;
295 rt->order = order;
296}
297
298/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
299 * @new: the newly released task
300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{
303 TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
304 new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
305 get_release(new), litmus_clock());
306
307 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
308
309 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
310 rt->check_resched(rt);
311}
312
313/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
314 * @tasks - the newly released tasks
315 */
316void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
317{
318 bheap_union(rt->order, &rt->ready_queue, tasks);
319 rt->check_resched(rt);
320}
321
322
323#ifdef CONFIG_RELEASE_MASTER
324void __add_release_on(rt_domain_t* rt, struct task_struct *task,
325 int target_cpu)
326{
327 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
328 get_release(task), target_cpu);
329 list_add(&tsk_rt(task)->list, &rt->tobe_released);
330 task->rt_param.domain = rt;
331
332 /* start release timer */
333 TS_SCHED2_START(task);
334
335 arm_release_timer_on(rt, target_cpu);
336
337 TS_SCHED2_END(task);
338}
339#endif
340
341/* add_release - add a real-time task to the rt release queue.
342 * @task: the sleeping task
343 */
344void __add_release(rt_domain_t* rt, struct task_struct *task)
345{
346 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
347 list_add(&tsk_rt(task)->list, &rt->tobe_released);
348 task->rt_param.domain = rt;
349
350 /* start release timer */
351 TS_SCHED2_START(task);
352
353 arm_release_timer(rt);
354
355 TS_SCHED2_END(task);
356}
357
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 000000000000..4f5bb26b339b
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,1526 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/wait.h>
37#include <litmus/jobs.h>
38#include <litmus/preempt.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_common.h>
41#include <litmus/sched_trace.h>
42#include <litmus/trace.h>
43
44#include <litmus/clustered.h>
45
46#include <litmus/bheap.h>
47
48/* to configure the cluster size */
49#include <litmus/litmus_proc.h>
50#include <linux/uaccess.h>
51
52/* Reference configuration variable. Determines which cache level is used to
53 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
54 * all CPUs form a single cluster (just like GSN-EDF).
55 */
56static enum cache_level cluster_config = GLOBAL_CLUSTER;
57
58struct clusterdomain;
59
60/* cpu_entry_t - maintain the linked and scheduled state
61 *
62 * A cpu also contains a pointer to the cedf_domain_t cluster
63 * that owns it (struct clusterdomain*)
64 */
65typedef struct {
66 int cpu;
67 struct clusterdomain* cluster; /* owning cluster */
68 struct task_struct* linked; /* only RT tasks */
69 struct task_struct* scheduled; /* only RT tasks */
70 atomic_t will_schedule; /* prevent unneeded IPIs */
71 struct bheap_node* hn;
72#ifdef CONFIG_LITMUS_LOCKING
73 struct bheap_node* pending_hn;
74 struct task_struct* pending;
75#endif
76} cpu_entry_t;
77
78/* one cpu_entry_t per CPU */
79DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
80
81
82static struct bheap_node cpu_nodes[NR_CPUS];
83#ifdef CONFIG_LITMUS_LOCKING
84static struct bheap_node pending_nodes[NR_CPUS];
85#endif
86
87/*
88 * In C-EDF there is a cedf domain _per_ cluster
89 * The number of clusters is dynamically determined accordingly to the
90 * total cpu number and the cluster size
91 */
92typedef struct clusterdomain {
93 /* rt_domain for this cluster */
94 rt_domain_t domain;
95 /* map of this cluster cpus */
96 cpumask_var_t cpu_map;
97 unsigned int num_cpus;
98 /* the cpus queue themselves according to priority in here */
99 struct bheap cpu_heap;
100#ifdef CONFIG_LITMUS_LOCKING
101 struct bheap pending_jobs;
102 struct bheap pending_cpus;
103#endif
104 /* lock for this cluster */
105#define cluster_lock domain.ready_lock
106} cedf_domain_t;
107
108/* a cedf_domain per cluster; allocation is done at init/activation time */
109cedf_domain_t *cedf;
110
111#define remote_cpu(cpu) (&per_cpu(cedf_cpu_entries, cpu))
112#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
113#define task_cpu_cluster(task) remote_cluster(get_partition(task))
114
115/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
116 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
117 * information during the initialization of the plugin (e.g., topology)
118#define WANT_ALL_SCHED_EVENTS
119 */
120#define VERBOSE_INIT
121
122static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
123{
124 cpu_entry_t *a, *b;
125 a = _a->value;
126 b = _b->value;
127 /* Note that a and b are inverted: we want the lowest-priority CPU at
128 * the top of the heap.
129 */
130 return edf_higher_prio(b->linked, a->linked);
131}
132
133/* update_cpu_position - Move the cpu entry to the correct place to maintain
134 * order in the cpu queue. Caller must hold cedf lock.
135 */
136static void update_cpu_position(cpu_entry_t *entry)
137{
138 cedf_domain_t *cluster = entry->cluster;
139
140 if (likely(bheap_node_in_heap(entry->hn)))
141 bheap_delete(cpu_lower_prio,
142 &cluster->cpu_heap,
143 entry->hn);
144
145 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
146}
147
148/* caller must hold cedf lock */
149static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
150{
151 struct bheap_node* hn;
152 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
153 return hn->value;
154}
155
156
157/* link_task_to_cpu - Update the link of a CPU.
158 * Handles the case where the to-be-linked task is already
159 * scheduled on a different CPU.
160 */
161static noinline void link_task_to_cpu(struct task_struct* linked,
162 cpu_entry_t *entry)
163{
164 cpu_entry_t *sched;
165 struct task_struct* tmp;
166 int on_cpu;
167
168 BUG_ON(linked && !is_realtime(linked));
169
170 /* Currently linked task is set to be unlinked. */
171 if (entry->linked) {
172 entry->linked->rt_param.linked_on = NO_CPU;
173 }
174
175 /* Link new task to CPU. */
176 if (linked) {
177 /* handle task is already scheduled somewhere! */
178 on_cpu = linked->rt_param.scheduled_on;
179 if (on_cpu != NO_CPU) {
180 sched = &per_cpu(cedf_cpu_entries, on_cpu);
181 /* this should only happen if not linked already */
182 BUG_ON(sched->linked == linked);
183
184 /* If we are already scheduled on the CPU to which we
185 * wanted to link, we don't need to do the swap --
186 * we just link ourselves to the CPU and depend on
187 * the caller to get things right.
188 */
189 if (entry != sched) {
190 TRACE_TASK(linked,
191 "already scheduled on %d, updating link.\n",
192 sched->cpu);
193 tmp = sched->linked;
194 linked->rt_param.linked_on = sched->cpu;
195 sched->linked = linked;
196 update_cpu_position(sched);
197 linked = tmp;
198 }
199 }
200 if (linked) /* might be NULL due to swap */
201 linked->rt_param.linked_on = entry->cpu;
202 }
203 entry->linked = linked;
204#ifdef WANT_ALL_SCHED_EVENTS
205 if (linked)
206 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
207 else
208 TRACE("NULL linked to %d.\n", entry->cpu);
209#endif
210 update_cpu_position(entry);
211}
212
213/* unlink - Make sure a task is not linked any longer to an entry
214 * where it was linked before. Must hold cedf_lock.
215 */
216static noinline void unlink(struct task_struct* t)
217{
218 cpu_entry_t *entry;
219
220 if (t->rt_param.linked_on != NO_CPU) {
221 /* unlink */
222 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
223 t->rt_param.linked_on = NO_CPU;
224 link_task_to_cpu(NULL, entry);
225 } else if (is_queued(t)) {
226 /* This is an interesting situation: t is scheduled,
227 * but was just recently unlinked. It cannot be
228 * linked anywhere else (because then it would have
229 * been relinked to this CPU), thus it must be in some
230 * queue. We must remove it from the list in this
231 * case.
232 *
233 * in C-EDF case is should be somewhere in the queue for
234 * its domain, therefore and we can get the domain using
235 * task_cpu_cluster
236 */
237 remove(&(task_cpu_cluster(t))->domain, t);
238 }
239}
240
241
242/* preempt - force a CPU to reschedule
243 */
244static void preempt(cpu_entry_t *entry)
245{
246 preempt_if_preemptable(entry->scheduled, entry->cpu);
247}
248
249#ifdef CONFIG_LITMUS_LOCKING
250static int update_pending_job(cedf_domain_t* cluster, struct task_struct* t);
251static void priodon_become_eligible(void);
252static void priodon_complete_request(void);
253
254static inline int in_pending_heap(struct task_struct* t)
255{
256 return bheap_node_in_heap(tsk_rt(t)->pending_node);
257}
258
259/* has this task already been processed for pending */
260static inline int is_pending(struct task_struct* t)
261{
262 return tsk_rt(t)->pending_on != NO_CPU ||
263 in_pending_heap(t);
264}
265
266#endif
267
268/* requeue - Put an unlinked task into gsn-edf domain.
269 * Caller must hold cedf_lock.
270 */
271static noinline void requeue(struct task_struct* task)
272{
273 cedf_domain_t *cluster = task_cpu_cluster(task);
274 BUG_ON(!task);
275 /* sanity check before insertion */
276 BUG_ON(is_queued(task));
277
278 if (is_released(task, litmus_clock())) {
279#ifdef CONFIG_LITMUS_LOCKING
280 if (!is_pending(task))
281 update_pending_job(cluster, task);
282#endif
283 __add_ready(&cluster->domain, task);
284 } else {
285 /* it has got to wait */
286 add_release(&cluster->domain, task);
287 }
288}
289
290/* check for any necessary preemptions */
291static void check_for_preemptions(cedf_domain_t *cluster)
292{
293 struct task_struct *task;
294 cpu_entry_t* last;
295
296 for(last = lowest_prio_cpu(cluster);
297 edf_preemption_needed(&cluster->domain, last->linked);
298 last = lowest_prio_cpu(cluster)) {
299 /* preemption necessary */
300
301#ifdef CONFIG_LITMUS_LOCKING
302 task = __peek_ready(&cluster->domain);
303 if (update_pending_job(cluster, task)) {
304 /* Something changed, re-evaluate priorites to
305 * see if we still need to preempt.
306 * */
307 TRACE_TASK(task, "hitting continue\n");
308 continue;
309 }
310#endif
311 task = __take_ready(&cluster->domain);
312 TRACE_TASK(task, "attempting to link task to P%d\n",
313 last->cpu);
314 if (last->linked)
315 requeue(last->linked);
316 link_task_to_cpu(task, last);
317 preempt(last);
318 }
319}
320
321#ifdef CONFIG_LITMUS_LOCKING
322
323static int pending_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
324{
325 cpu_entry_t *a, *b;
326 a = _a->value;
327 b = _b->value;
328 /* Note that a and b are inverted: we want the lowest-priority CPU at
329 * the top of the heap.
330 */
331 return edf_higher_base_prio(b->pending, a->pending);
332}
333
334/* update_cpu_position - Move the cpu entry to the correct place to maintain
335 * order in the cpu queue. Caller must hold cedf lock.
336 */
337static void update_pending_position(cpu_entry_t *entry)
338{
339 cedf_domain_t *cluster = entry->cluster;
340
341 if (likely(bheap_node_in_heap(entry->pending_hn)))
342 bheap_delete(pending_lower_prio,
343 &cluster->pending_cpus,
344 entry->pending_hn);
345
346 bheap_insert(pending_lower_prio, &cluster->pending_cpus, entry->pending_hn);
347}
348
349/* caller must hold cedf lock */
350static cpu_entry_t* lowest_pending_cpu(cedf_domain_t *cluster)
351{
352 struct bheap_node* hn;
353 hn = bheap_peek(pending_lower_prio, &cluster->pending_cpus);
354 return hn->value;
355}
356
357static void priority_raised(struct task_struct* t)
358{
359 cedf_domain_t *cluster = task_cpu_cluster(t);
360 int linked_on;
361
362 linked_on = tsk_rt(t)->linked_on;
363
364 /* If it is scheduled, then we need to reorder the CPU heap. */
365 if (linked_on != NO_CPU) {
366 TRACE_TASK(t, "%s: linked on %d\n",
367 __FUNCTION__, linked_on);
368 /* Holder is scheduled; need to re-order CPUs.
369 * We can't use heap_decrease() here since
370 * the cpu_heap is ordered in reverse direction, so
371 * it is actually an increase. */
372 bheap_delete(cpu_lower_prio, &cluster->cpu_heap,
373 remote_cpu(linked_on)->hn);
374 bheap_insert(cpu_lower_prio, &cluster->cpu_heap,
375 remote_cpu(linked_on)->hn);
376 } else {
377 /* holder may be queued: first stop queue changes */
378 raw_spin_lock(&cluster->domain.release_lock);
379 if (is_queued(t)) {
380 TRACE_TASK(t, "%s: is queued\n",
381 __FUNCTION__);
382 bheap_decrease(edf_ready_order,
383 tsk_rt(t)->heap_node);
384 } else {
385 /* Nothing to do: if it is not queued and not linked
386 * then it is either sleeping or currently being moved
387 * by other code (e.g., a timer interrupt handler) that
388 * will use the correct priority when enqueuing the
389 * task. */
390 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
391 __FUNCTION__);
392 }
393 raw_spin_unlock(&cluster->domain.release_lock);
394 }
395}
396
397static void priority_lowered(struct task_struct* t)
398{
399 /* assumption: t is not in a release heap */
400 if (is_queued(t) || tsk_rt(t)->linked_on != NO_CPU) {
401 unlink(t);
402 requeue(t);
403 }
404}
405
406static void donate_priority(struct task_struct* recipient, struct task_struct* donor)
407{
408 cedf_domain_t *cluster = task_cpu_cluster(donor);
409
410 BUG_ON(task_cpu_cluster(recipient) != task_cpu_cluster(donor));
411 BUG_ON(tsk_rt(donor)->is_donor);
412 BUG_ON(tsk_rt(recipient)->is_donor);
413 BUG_ON(tsk_rt(donor)->inh_task);
414 BUG_ON(tsk_rt(recipient)->inh_task);
415
416 TRACE_TASK(donor, "priodon: becomes priority donor for %s/%d\n",
417 recipient->comm, recipient->pid);
418
419 /* swap priorities */
420 tsk_rt(recipient)->inh_task = donor;
421 tsk_rt(donor)->inh_task = recipient;
422 tsk_rt(donor)->is_donor = 1;
423
424 priority_lowered(donor);
425 priority_raised(recipient);
426
427 bheap_uncache_min(edf_ready_order,
428 &cluster->domain.ready_queue);
429}
430
431/* assumption: new_donor has a higher priority than old_donor */
432static void switch_donor(struct task_struct* recipient,
433 struct task_struct* old_donor,
434 struct task_struct* new_donor)
435{
436 TRACE_TASK(new_donor, "becomes donor for %s/%d instead of %s/%d\n",
437 recipient->comm, recipient->pid, old_donor->comm, old_donor->pid);
438
439 BUG_ON(tsk_rt(recipient)->inh_task != old_donor);
440 BUG_ON(tsk_rt(old_donor)->inh_task != recipient);
441 BUG_ON(tsk_rt(new_donor)->inh_task != NULL);
442 BUG_ON(tsk_rt(new_donor)->is_donor);
443
444 tsk_rt(old_donor)->inh_task = NULL;
445 tsk_rt(old_donor)->is_donor = 0;
446
447 tsk_rt(recipient)->inh_task = new_donor;
448 tsk_rt(new_donor)->inh_task = recipient;
449 tsk_rt(new_donor)->is_donor = 1;
450
451 priority_raised(recipient);
452 priority_raised(old_donor);
453 priority_lowered(new_donor);
454}
455
456static void undonate_priority(struct task_struct* recipient, struct task_struct* donor)
457{
458 cedf_domain_t *cluster = task_cpu_cluster(donor);
459
460 BUG_ON(tsk_rt(recipient)->inh_task != donor);
461 BUG_ON(tsk_rt(donor)->inh_task != recipient);
462
463 TRACE_TASK(donor, "priodon: is no longer priority donor of %s/%d\n",
464 recipient->comm, recipient->pid);
465
466 tsk_rt(recipient)->inh_task = NULL;
467 tsk_rt(donor)->inh_task = NULL;
468 tsk_rt(donor)->is_donor = 0;
469
470 priority_lowered(recipient);
471 priority_raised(donor);
472
473 bheap_uncache_min(edf_ready_order,
474 &cluster->domain.ready_queue);
475}
476
477static inline void add_to_pending(cedf_domain_t* cluster, struct task_struct* t)
478{
479 TRACE_TASK(t, "priodon: adding to pending heap wait:%u donor:%u req:%u pend:%d\n",
480 tsk_rt(t)->waiting_eligible,
481 tsk_rt(t)->is_donor, tsk_rt(t)->request_incomplete,
482 tsk_rt(t)->pending_on);
483 bheap_insert(edf_pending_order,
484 &cluster->pending_jobs,
485 tsk_rt(t)->pending_node);
486}
487
488static inline struct task_struct* take_pending(cedf_domain_t* cluster)
489{
490 struct bheap_node* node;
491 node = bheap_take(edf_pending_order, &cluster->pending_jobs);
492 return node ? (struct task_struct*) node->value : NULL;
493}
494
495static inline struct task_struct* peek_pending(cedf_domain_t* cluster)
496{
497 struct bheap_node* node;
498 node = bheap_peek(edf_pending_order, &cluster->pending_jobs);
499 return node ? (struct task_struct*) node->value : NULL;
500}
501
502static inline int fake_resume(struct task_struct* t)
503{
504 TRACE_TASK(t, "priodon: fake resume wait:%u donor:%u\n",
505 tsk_rt(t)->waiting_eligible, tsk_rt(t)->is_donor);
506 /* Fake suspended. Let's resume it. */
507 if (tsk_rt(t)->waiting_eligible) {
508 tsk_rt(t)->waiting_eligible = 0;
509 if (tsk_rt(t)->scheduled_on == NO_CPU) {
510 /* it was removed from the queue */
511 requeue(t);
512 return 1;
513 }
514 }
515 return 0;
516}
517
518
519/* Lazily update set of highest-priority pending jobs.
520 * Returns 1 if priority recheck is required.
521 */
522static int update_pending_job(cedf_domain_t* cluster,
523 struct task_struct* to_be_linked)
524{
525 cpu_entry_t* entry;
526 struct task_struct* lowest_hp; /* lowest-priority high-priority task */
527 struct task_struct* highest_lp; /* highest-priority low-priority task */
528 int reeval = 0;
529
530 entry = lowest_pending_cpu(cluster);
531 lowest_hp = entry->pending;
532
533 if (to_be_linked && !is_pending(to_be_linked))
534 /* not yet accounted for, stick in heap */
535 add_to_pending(cluster, to_be_linked);
536
537 highest_lp = peek_pending(cluster);
538 if (edf_higher_base_prio(highest_lp, lowest_hp)) {
539 /* yep, should be become of the c highest-prior pending jobs */
540
541 TRACE_TASK(highest_lp,
542 "priodon: became one of the %u highest-prio tasks (P%d, req:%u) X\n",
543 cluster->num_cpus,
544 entry->cpu,
545 tsk_rt(highest_lp)->request_incomplete);
546
547 /* get it out of the heap */
548 highest_lp = take_pending(cluster);
549
550 BUG_ON(highest_lp == lowest_hp);
551
552 /* it should never be a priority donor at this point */
553 BUG_ON(tsk_rt(highest_lp)->is_donor);
554
555 entry->pending = highest_lp;
556 update_pending_position(entry);
557 tsk_rt(highest_lp)->pending_on = entry->cpu;
558
559 /* things that could happen:
560 *
561 * 1) lowest_hp has no donor, but is in a request => highest_lp becomes donor
562 * 2) lowest_hp is donor => highest_lp becomes new donor, old donor is resumed if suspended
563 * 3) lowest_hp is not in a request, and highest_lp is waiting => highest_lp is resumed
564 * 4) lowest_hp is not in a request, and highest_lp is not waiting => nothing to do
565 * 5) highest_lp has a priority donor => resume its donor
566 */
567
568 /* do we need to put it back? */
569 if (lowest_hp) {
570 TRACE_TASK(lowest_hp,
571 "priodon: no longer among %u highest-prio tasks req:%u\n",
572 cluster->num_cpus,
573 tsk_rt(lowest_hp)->request_incomplete);
574 tsk_rt(lowest_hp)->pending_on = NO_CPU;
575 add_to_pending(cluster, lowest_hp);
576
577
578 if (tsk_rt(lowest_hp)->request_incomplete) {
579 /* case 1) */
580 donate_priority(lowest_hp, highest_lp);
581 reeval = 1;
582 } else if (tsk_rt(lowest_hp)->inh_task) {
583 /* case 2) */
584 switch_donor(tsk_rt(lowest_hp)->inh_task,
585 lowest_hp, highest_lp);
586 fake_resume(lowest_hp);
587 reeval = 1;
588 }
589 }
590
591
592 if (!tsk_rt(highest_lp)->is_donor) {
593 if (tsk_rt(highest_lp)->waiting_eligible) {
594 /* case 3) */
595 reeval = fake_resume(highest_lp);
596 BUG_ON(tsk_rt(highest_lp)->inh_task);
597 } else if (tsk_rt(highest_lp)->inh_task) {
598 /* case 5 */
599 struct task_struct* donor = tsk_rt(highest_lp)->inh_task;
600 undonate_priority(highest_lp, donor);
601 reeval = fake_resume(donor);
602 }
603 }
604 }
605
606 return reeval;
607}
608
609/* job has exited => no longer pending */
610
611static void job_pending_exit(struct task_struct* t)
612{
613 cedf_domain_t *cluster;
614 cpu_entry_t* entry;
615
616 TRACE_TASK(t, "priodon: is no longer pending (pending_on:%d, queued:%d)\n",
617 tsk_rt(t)->pending_on, in_pending_heap(t));
618
619 cluster = task_cpu_cluster(t);
620
621 if (tsk_rt(t)->pending_on != NO_CPU) {
622 entry = &per_cpu(cedf_cpu_entries, tsk_rt(t)->pending_on);
623 tsk_rt(t)->pending_on = NO_CPU;
624 entry->pending = NULL;
625 update_pending_position(entry);
626
627 /* let's see if anything changed */
628 update_pending_job(cluster, NULL);
629 } else if (in_pending_heap(t)) {
630 bheap_delete(edf_pending_order, &cluster->pending_jobs,
631 tsk_rt(t)->pending_node);
632 }
633}
634
635#endif
636
637
638/* cedf_job_arrival: task is either resumed or released */
639static noinline void cedf_job_arrival(struct task_struct* task)
640{
641 cedf_domain_t *cluster = task_cpu_cluster(task);
642 BUG_ON(!task);
643
644 requeue(task);
645 check_for_preemptions(cluster);
646}
647
648
649static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
650{
651 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
652 unsigned long flags;
653
654 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
655
656 __merge_ready(&cluster->domain, tasks);
657 check_for_preemptions(cluster);
658
659 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
660}
661
662/* caller holds cedf_lock */
663static noinline void job_completion(struct task_struct *t, int forced)
664{
665 BUG_ON(!t);
666
667 sched_trace_task_completion(t, forced);
668
669 TRACE_TASK(t, "job_completion().\n");
670
671#ifdef CONFIG_LITMUS_LOCKING
672 job_pending_exit(t);
673#endif
674
675 /* prepare for next period */
676 prepare_for_next_period(t);
677 if (is_released(t, litmus_clock()))
678 sched_trace_task_release(t);
679 /* unlink */
680 unlink(t);
681 /* requeue
682 * But don't requeue a blocking task. */
683 set_rt_flags(t, RT_F_RUNNING);
684 if (is_running(t))
685 cedf_job_arrival(t);
686}
687
688/* cedf_tick - this function is called for every local timer
689 * interrupt.
690 *
691 * checks whether the current task has expired and checks
692 * whether we need to preempt it if it has not expired
693 */
694static void cedf_tick(struct task_struct* t)
695{
696 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
697 if (!is_np(t)) {
698 /* np tasks will be preempted when they become
699 * preemptable again
700 */
701 litmus_reschedule_local();
702 TRACE("cedf_scheduler_tick: "
703 "%d is preemptable "
704 " => FORCE_RESCHED\n", t->pid);
705 } else if (is_user_np(t)) {
706 TRACE("cedf_scheduler_tick: "
707 "%d is non-preemptable, "
708 "preemption delayed.\n", t->pid);
709 request_exit_np(t);
710 }
711 }
712}
713
714/* Getting schedule() right is a bit tricky. schedule() may not make any
715 * assumptions on the state of the current task since it may be called for a
716 * number of reasons. The reasons include a scheduler_tick() determined that it
717 * was necessary, because sys_exit_np() was called, because some Linux
718 * subsystem determined so, or even (in the worst case) because there is a bug
719 * hidden somewhere. Thus, we must take extreme care to determine what the
720 * current state is.
721 *
722 * The CPU could currently be scheduling a task (or not), be linked (or not).
723 *
724 * The following assertions for the scheduled task could hold:
725 *
726 * - !is_running(scheduled) // the job blocks
727 * - scheduled->timeslice == 0 // the job completed (forcefully)
728 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
729 * - linked != scheduled // we need to reschedule (for any reason)
730 * - is_np(scheduled) // rescheduling must be delayed,
731 * sys_exit_np must be requested
732 *
733 * Any of these can occur together.
734 */
735static struct task_struct* cedf_schedule(struct task_struct * prev)
736{
737 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
738 cedf_domain_t *cluster = entry->cluster;
739 int out_of_time, sleep, preempt, np, exists, blocks;
740 struct task_struct* next = NULL;
741
742#ifdef CONFIG_LITMUS_LOCKING
743 int priodon;
744#else
745#define priodon 0
746#endif
747
748#ifdef CONFIG_RELEASE_MASTER
749 /* Bail out early if we are the release master.
750 * The release master never schedules any real-time tasks.
751 */
752 if (cluster->domain.release_master == entry->cpu) {
753 sched_state_task_picked();
754 return NULL;
755 }
756#endif
757
758 raw_spin_lock(&cluster->cluster_lock);
759
760 /* sanity checking */
761 BUG_ON(entry->scheduled && entry->scheduled != prev);
762 BUG_ON(entry->scheduled && !is_realtime(prev));
763 BUG_ON(is_realtime(prev) && !entry->scheduled);
764
765 /* (0) Determine state */
766 exists = entry->scheduled != NULL;
767 blocks = exists && !is_running(entry->scheduled);
768 out_of_time = exists &&
769 budget_enforced(entry->scheduled) &&
770 budget_exhausted(entry->scheduled);
771 np = exists && is_np(entry->scheduled);
772 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
773 preempt = entry->scheduled != entry->linked;
774
775#ifdef CONFIG_LITMUS_LOCKING
776 priodon = exists && (tsk_rt(entry->scheduled)->waiting_eligible ||
777 /* can't allow job to exit until request is over */
778 (tsk_rt(entry->scheduled)->is_donor && sleep));
779
780 /* this should never happend together (at least we don't handle it atm) */
781 BUG_ON(priodon && blocks);
782#endif
783
784#ifdef WANT_ALL_SCHED_EVENTS
785 TRACE_TASK(prev, "invoked cedf_schedule.\n");
786#endif
787
788 if (exists)
789 TRACE_TASK(prev,
790 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
791 "state:%d sig:%d priodon:%d\n",
792 blocks, out_of_time, np, sleep, preempt,
793 prev->state, signal_pending(prev), priodon);
794 if (entry->linked && preempt)
795 TRACE_TASK(prev, "will be preempted by %s/%d\n",
796 entry->linked->comm, entry->linked->pid);
797
798
799 /* If a task blocks we have no choice but to reschedule.
800 */
801 if (blocks || priodon)
802 unlink(entry->scheduled);
803
804 /* Request a sys_exit_np() call if we would like to preempt but cannot.
805 * Do not unlink since entry->scheduled is currently in the ready queue.
806 * We don't process out_of_time and sleep until the job is preemptive again.
807 */
808 if (np && (out_of_time || preempt || sleep)) {
809 request_exit_np(entry->scheduled);
810 }
811
812 /* Any task that is preemptable and either exhausts its execution
813 * budget or wants to sleep completes. We may have to reschedule after
814 * this. Don't do a job completion if we block (can't have timers running
815 * for blocked jobs). Preemption go first for the same reason.
816 */
817 if (!np && (out_of_time || sleep) && !blocks && !preempt
818 && !priodon)
819 /* note: priority donation prevents job completion */
820 job_completion(entry->scheduled, !sleep);
821
822 /* Link pending task if we became unlinked.
823 */
824
825 if (!entry->linked) {
826#ifdef CONFIG_LITMUS_LOCKING
827 struct task_struct *pulled;
828 int reeval;
829 do {
830 pulled = __take_ready(&cluster->domain);
831 reeval = 0;
832 if (pulled && !is_pending(pulled)) {
833 /* Pulled an un-processed task from the ready queue. */
834 TRACE_TASK(pulled, "pulled unprocessed\n");
835 reeval = update_pending_job(cluster, pulled);
836 if (reeval)
837 /* priority may have changed --- try again */
838 requeue(pulled);
839 }
840 } while (reeval);
841 link_task_to_cpu(pulled, entry);
842#else
843 link_task_to_cpu(__take_ready(&cluster->domain), entry);
844#endif
845 }
846
847 /* The final scheduling decision. Do we need to switch for some reason?
848 * If linked is different from scheduled, then select linked as next.
849 */
850 if ((!np || blocks || priodon) &&
851 entry->linked != entry->scheduled) {
852 /* Schedule a linked job? */
853 if (entry->linked) {
854 entry->linked->rt_param.scheduled_on = entry->cpu;
855 next = entry->linked;
856 }
857 if (entry->scheduled) {
858 /* not gonna be scheduled soon */
859 entry->scheduled->rt_param.scheduled_on = NO_CPU;
860 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
861 }
862 } else
863 /* Only override Linux scheduler if we have a real-time task
864 * scheduled that needs to continue.
865 */
866 if (exists)
867 next = prev;
868
869 sched_state_task_picked();
870 raw_spin_unlock(&cluster->cluster_lock);
871
872#ifdef WANT_ALL_SCHED_EVENTS
873 TRACE("cedf_lock released, next=0x%p\n", next);
874
875 if (next)
876 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
877 else if (exists && !next)
878 TRACE("becomes idle at %llu.\n", litmus_clock());
879#endif
880
881
882 return next;
883}
884
885
886/* _finish_switch - we just finished the switch away from prev
887 */
888static void cedf_finish_switch(struct task_struct *prev)
889{
890 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
891
892 entry->scheduled = is_realtime(current) ? current : NULL;
893#ifdef WANT_ALL_SCHED_EVENTS
894 TRACE_TASK(prev, "switched away from\n");
895#endif
896}
897
898
899/* Prepare a task for running in RT mode
900 */
901static void cedf_task_new(struct task_struct * t, int on_rq, int running)
902{
903 unsigned long flags;
904 cpu_entry_t* entry;
905 cedf_domain_t* cluster;
906
907 TRACE("gsn edf: task new %d\n", t->pid);
908
909 /* the cluster doesn't change even if t is running */
910 cluster = task_cpu_cluster(t);
911
912 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
913
914 /* setup job params */
915 release_at(t, litmus_clock());
916
917#ifdef CONFIG_LITMUS_LOCKING
918 tsk_rt(t)->pending_node = bheap_node_alloc(GFP_ATOMIC | __GFP_NOFAIL);
919 bheap_node_init(&tsk_rt(t)->pending_node, t);
920 tsk_rt(t)->pending_on = NO_CPU;
921 add_to_pending(cluster, t);
922#endif
923
924 if (running) {
925 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
926 BUG_ON(entry->scheduled);
927
928#ifdef CONFIG_RELEASE_MASTER
929 if (entry->cpu != cluster->domain.release_master) {
930#endif
931 entry->scheduled = t;
932 tsk_rt(t)->scheduled_on = task_cpu(t);
933#ifdef CONFIG_RELEASE_MASTER
934 } else {
935 /* do not schedule on release master */
936 preempt(entry); /* force resched */
937 tsk_rt(t)->scheduled_on = NO_CPU;
938 }
939#endif
940 } else {
941 t->rt_param.scheduled_on = NO_CPU;
942 }
943 t->rt_param.linked_on = NO_CPU;
944
945 cedf_job_arrival(t);
946 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
947}
948
949static void cedf_task_wake_up(struct task_struct *task)
950{
951 unsigned long flags;
952 lt_t now;
953 cedf_domain_t *cluster;
954
955 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
956
957 cluster = task_cpu_cluster(task);
958
959 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
960 /* We need to take suspensions because of semaphores into
961 * account! If a job resumes after being suspended due to acquiring
962 * a semaphore, it should never be treated as a new job release.
963 */
964 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
965 set_rt_flags(task, RT_F_RUNNING);
966 } else {
967 now = litmus_clock();
968 if (is_tardy(task, now)) {
969 /* new sporadic release */
970 release_at(task, now);
971 sched_trace_task_release(task);
972 }
973 else {
974 if (task->rt.time_slice) {
975 /* came back in time before deadline
976 */
977 set_rt_flags(task, RT_F_RUNNING);
978 }
979 }
980 }
981 cedf_job_arrival(task);
982 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
983}
984
985static void cedf_task_block(struct task_struct *t)
986{
987 unsigned long flags;
988 cedf_domain_t *cluster;
989
990 TRACE_TASK(t, "block at %llu\n", litmus_clock());
991
992 cluster = task_cpu_cluster(t);
993
994 /* unlink if necessary */
995 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
996 unlink(t);
997 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
998
999 BUG_ON(!is_realtime(t));
1000}
1001
1002#ifdef CONFIG_LITMUS_LOCKING
1003static void cedf_pre_setsched(struct task_struct *t, int policy)
1004{
1005
1006 unsigned long flags;
1007 cedf_domain_t *cluster = task_cpu_cluster(t);
1008
1009 int delay_donor_exit = 0;
1010
1011 while (1) {
1012 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1013
1014 TRACE_CUR("cedf_pre_setsched wait:%u pend:%d donor:%u req:%u\n",
1015 tsk_rt(t)->waiting_eligible,
1016 tsk_rt(t)->pending_on, tsk_rt(t)->is_donor,
1017 tsk_rt(t)->request_incomplete);
1018
1019 delay_donor_exit = tsk_rt(current)->is_donor;
1020
1021 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1022
1023 if (!delay_donor_exit)
1024 break;
1025
1026 TRACE_CUR("donor exit delay\n");
1027 set_current_state(TASK_INTERRUPTIBLE);
1028 schedule_timeout(HZ);
1029 }
1030}
1031#endif
1032
1033static void cedf_task_exit(struct task_struct * t)
1034{
1035 unsigned long flags;
1036 cedf_domain_t *cluster = task_cpu_cluster(t);
1037
1038 /* unlink if necessary */
1039 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1040
1041 unlink(t);
1042
1043#ifdef CONFIG_LITMUS_LOCKING
1044 /* make sure it's not pending anymore */
1045 job_pending_exit(t);
1046 bheap_node_free(tsk_rt(t)->pending_node);
1047#endif
1048
1049 if (tsk_rt(t)->scheduled_on != NO_CPU) {
1050 cpu_entry_t *cpu;
1051 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
1052 cpu->scheduled = NULL;
1053 tsk_rt(t)->scheduled_on = NO_CPU;
1054 }
1055 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1056
1057
1058 BUG_ON(!is_realtime(t));
1059 TRACE_TASK(t, "RIP\n");
1060}
1061
1062#ifdef CONFIG_LITMUS_LOCKING
1063
1064#include <litmus/fdso.h>
1065#include <litmus/locking.h>
1066
1067/* NOTE: we use fake suspensions because we must wake the task from within the
1068 * scheduler */
1069
1070/* suspend until the current task becomes eligible to issue a lock request */
1071static void priodon_become_eligible(void)
1072{
1073 struct task_struct* t = current;
1074 unsigned long flags;
1075 cedf_domain_t *cluster;
1076
1077 cluster = task_cpu_cluster(t);
1078
1079 do {
1080 TRACE_CUR("priodon: checking whether request may be issued\n");
1081 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1082
1083 if (tsk_rt(t)->pending_on == NO_CPU ||
1084 tsk_rt(t)->is_donor) {
1085 /* nope, gotta wait */
1086 tsk_rt(t)->waiting_eligible = 1;
1087 TRACE_CUR("priodon: not eligible pend:%u donor:%u\n",
1088 tsk_rt(t)->pending_on, tsk_rt(t)->is_donor);
1089 } else {
1090 /* alright! we are good to go! */
1091 tsk_rt(t)->request_incomplete = 1;
1092 TRACE_CUR("priodon: request issued\n");
1093 }
1094
1095 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1096
1097 if (tsk_rt(t)->waiting_eligible) {
1098 TRACE_CUR("priodon: fake suspending\n");
1099 TS_LOCK_SUSPEND;
1100 schedule();
1101 TS_LOCK_RESUME;
1102 }
1103
1104 } while (!tsk_rt(t)->request_incomplete);
1105}
1106
1107/* current task has completed its request */
1108static void priodon_complete_request(void)
1109{
1110 struct task_struct* t = current;
1111 struct task_struct* donor;
1112 unsigned long flags;
1113 cedf_domain_t *cluster;
1114
1115 cluster = task_cpu_cluster(t);
1116
1117 preempt_disable();
1118
1119 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1120
1121 TRACE_CUR("priodon: completing request\n");
1122
1123 if (tsk_rt(t)->inh_task) {
1124 /* we have a donor job --- see if we need to wake it */
1125 donor = tsk_rt(t)->inh_task;
1126 undonate_priority(t, donor);
1127
1128 if (fake_resume(donor))
1129 check_for_preemptions(cluster);
1130 }
1131
1132 tsk_rt(t)->request_incomplete = 0;
1133
1134 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
1135
1136 preempt_enable();
1137}
1138
1139/* struct for semaphore with priority inheritance */
1140struct omlp_semaphore {
1141 struct litmus_lock litmus_lock;
1142
1143 /* current resource holder */
1144 struct task_struct *owner;
1145
1146 /* FIFO queue of waiting tasks */
1147 wait_queue_head_t fifo_wait;
1148};
1149
1150static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
1151{
1152 return container_of(lock, struct omlp_semaphore, litmus_lock);
1153}
1154
1155static int cedf_omlp_lock(struct litmus_lock* l)
1156{
1157 struct task_struct* t = current;
1158 struct omlp_semaphore *sem = omlp_from_lock(l);
1159 wait_queue_t wait;
1160 unsigned long flags;
1161
1162 if (!is_realtime(t))
1163 return -EPERM;
1164
1165 priodon_become_eligible();
1166
1167 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1168
1169 if (sem->owner) {
1170 /* resource is not free => must suspend and wait */
1171
1172 init_waitqueue_entry(&wait, t);
1173
1174 set_task_state(t, TASK_UNINTERRUPTIBLE);
1175
1176 __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait);
1177
1178 TS_LOCK_SUSPEND;
1179
1180 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1181
1182 schedule();
1183
1184 TS_LOCK_RESUME;
1185
1186 BUG_ON(sem->owner != t);
1187 } else {
1188 /* it's ours now */
1189 sem->owner = t;
1190
1191 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1192 }
1193
1194 return 0;
1195}
1196
1197static int cedf_omlp_unlock(struct litmus_lock* l)
1198{
1199 struct task_struct *t = current, *next;
1200 struct omlp_semaphore *sem = omlp_from_lock(l);
1201 unsigned long flags;
1202 int err = 0;
1203
1204 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1205
1206 if (sem->owner != t) {
1207 err = -EINVAL;
1208 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1209 goto out;
1210 }
1211
1212 /* check if there are jobs waiting for this resource */
1213 next = __waitqueue_remove_first(&sem->fifo_wait);
1214 if (next) {
1215 /* next becomes the resouce holder */
1216 sem->owner = next;
1217 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1218
1219 /* wake up next */
1220 wake_up_process(next);
1221 } else
1222 /* becomes available */
1223 sem->owner = NULL;
1224
1225 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1226
1227 priodon_complete_request();
1228
1229out:
1230 return err;
1231}
1232
1233static int cedf_omlp_close(struct litmus_lock* l)
1234{
1235 struct task_struct *t = current;
1236 struct omlp_semaphore *sem = omlp_from_lock(l);
1237 unsigned long flags;
1238
1239 int owner;
1240
1241 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1242
1243 owner = sem->owner == t;
1244
1245 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1246
1247 if (owner)
1248 cedf_omlp_unlock(l);
1249
1250 return 0;
1251}
1252
1253static void cedf_omlp_free(struct litmus_lock* lock)
1254{
1255 kfree(omlp_from_lock(lock));
1256}
1257
1258static struct litmus_lock_ops cedf_omlp_lock_ops = {
1259 .close = cedf_omlp_close,
1260 .lock = cedf_omlp_lock,
1261 .unlock = cedf_omlp_unlock,
1262 .deallocate = cedf_omlp_free,
1263};
1264
1265static struct litmus_lock* cedf_new_omlp(void)
1266{
1267 struct omlp_semaphore* sem;
1268
1269 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1270 if (!sem)
1271 return NULL;
1272
1273 sem->owner = NULL;
1274 init_waitqueue_head(&sem->fifo_wait);
1275 sem->litmus_lock.ops = &cedf_omlp_lock_ops;
1276
1277 return &sem->litmus_lock;
1278}
1279
1280static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1281 void* __user unused)
1282{
1283 int err = -ENXIO;
1284
1285 switch (type) {
1286
1287 case OMLP_SEM:
1288 /* O(m) Multiprocessor Locking Protocol */
1289 *lock = cedf_new_omlp();
1290 if (*lock)
1291 err = 0;
1292 else
1293 err = -ENOMEM;
1294 break;
1295
1296 };
1297
1298 return err;
1299}
1300
1301
1302#endif
1303
1304static long cedf_admit_task(struct task_struct* tsk)
1305{
1306 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu) {
1307#ifdef CONFIG_LITMUS_LOCKING
1308
1309#endif
1310 return 0;
1311 }
1312 else
1313 return -EINVAL;
1314}
1315
1316/* total number of cluster */
1317static int num_clusters;
1318/* we do not support cluster of different sizes */
1319static unsigned int cluster_size;
1320
1321#ifdef VERBOSE_INIT
1322static void print_cluster_topology(cpumask_var_t mask, int cpu)
1323{
1324 int chk;
1325 char buf[255];
1326
1327 chk = cpulist_scnprintf(buf, 254, mask);
1328 buf[chk] = '\0';
1329 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
1330
1331}
1332#endif
1333
1334static int clusters_allocated = 0;
1335
1336static void cleanup_cedf(void)
1337{
1338 int i;
1339
1340 if (clusters_allocated) {
1341 for (i = 0; i < num_clusters; i++) {
1342 free_cpumask_var(cedf[i].cpu_map);
1343 }
1344
1345 kfree(cedf);
1346 }
1347}
1348
1349static long cedf_activate_plugin(void)
1350{
1351 int i, j, cpu, ccpu, cpu_count;
1352 cpu_entry_t *entry;
1353
1354 cpumask_var_t mask;
1355 int chk = 0;
1356
1357 /* de-allocate old clusters, if any */
1358 cleanup_cedf();
1359
1360 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
1361 cluster_config);
1362
1363 /* need to get cluster_size first */
1364 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
1365 return -ENOMEM;
1366
1367 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
1368 cluster_size = num_online_cpus();
1369 } else {
1370 chk = get_shared_cpu_map(mask, 0, cluster_config);
1371 if (chk) {
1372 /* if chk != 0 then it is the max allowed index */
1373 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
1374 "is not supported on this hardware.\n",
1375 cluster_config);
1376 /* User should notice that the configuration failed, so
1377 * let's bail out. */
1378 return -EINVAL;
1379 }
1380
1381 cluster_size = cpumask_weight(mask);
1382 }
1383
1384 if ((num_online_cpus() % cluster_size) != 0) {
1385 /* this can't be right, some cpus are left out */
1386 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
1387 num_online_cpus(), cluster_size);
1388 return -1;
1389 }
1390
1391 num_clusters = num_online_cpus() / cluster_size;
1392 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
1393 num_clusters, cluster_size);
1394
1395 /* initialize clusters */
1396 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
1397 for (i = 0; i < num_clusters; i++) {
1398 bheap_init(&(cedf[i].cpu_heap));
1399#ifdef CONFIG_LITMUS_LOCKING
1400 bheap_init(&(cedf[i].pending_jobs));
1401 bheap_init(&(cedf[i].pending_cpus));
1402#endif
1403 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
1404
1405 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
1406 return -ENOMEM;
1407#ifdef CONFIG_RELEASE_MASTER
1408 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
1409#endif
1410 }
1411
1412 /* cycle through cluster and add cpus to them */
1413 for (i = 0; i < num_clusters; i++) {
1414
1415 for_each_online_cpu(cpu) {
1416 /* check if the cpu is already in a cluster */
1417 for (j = 0; j < num_clusters; j++)
1418 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
1419 break;
1420 /* if it is in a cluster go to next cpu */
1421 if (j < num_clusters &&
1422 cpumask_test_cpu(cpu, cedf[j].cpu_map))
1423 continue;
1424
1425 /* this cpu isn't in any cluster */
1426 /* get the shared cpus */
1427 if (unlikely(cluster_config == GLOBAL_CLUSTER))
1428 cpumask_copy(mask, cpu_online_mask);
1429 else
1430 get_shared_cpu_map(mask, cpu, cluster_config);
1431
1432 cpumask_copy(cedf[i].cpu_map, mask);
1433#ifdef VERBOSE_INIT
1434 print_cluster_topology(mask, cpu);
1435#endif
1436 /* add cpus to current cluster and init cpu_entry_t */
1437 cpu_count = 0;
1438 cedf[i].num_cpus = 0;
1439 for_each_cpu(ccpu, cedf[i].cpu_map) {
1440
1441 entry = &per_cpu(cedf_cpu_entries, ccpu);
1442 atomic_set(&entry->will_schedule, 0);
1443 entry->cpu = ccpu;
1444 entry->cluster = &cedf[i];
1445 entry->hn = cpu_nodes + ccpu;
1446 bheap_node_init(&entry->hn, entry);
1447
1448#ifdef CONFIG_LITMUS_LOCKING
1449 entry->pending_hn = pending_nodes + ccpu;
1450 bheap_node_init(&entry->pending_hn, entry);
1451 entry->pending = NULL;
1452#endif
1453
1454 cpu_count++;
1455
1456 entry->linked = NULL;
1457 entry->scheduled = NULL;
1458#ifdef CONFIG_RELEASE_MASTER
1459 /* only add CPUs that should schedule jobs */
1460 if (entry->cpu != entry->cluster->domain.release_master)
1461#endif
1462 {
1463 cedf[i].num_cpus++;
1464 update_cpu_position(entry);
1465#ifdef CONFIG_LITMUS_LOCKING
1466 update_pending_position(entry);
1467#endif
1468 }
1469 }
1470 /* done with this cluster */
1471 break;
1472 }
1473 }
1474
1475 free_cpumask_var(mask);
1476 clusters_allocated = 1;
1477 return 0;
1478}
1479
1480/* Plugin object */
1481static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
1482 .plugin_name = "C-EDF",
1483 .finish_switch = cedf_finish_switch,
1484 .tick = cedf_tick,
1485 .task_new = cedf_task_new,
1486 .complete_job = complete_job,
1487 .task_exit = cedf_task_exit,
1488 .schedule = cedf_schedule,
1489 .task_wake_up = cedf_task_wake_up,
1490 .task_block = cedf_task_block,
1491 .admit_task = cedf_admit_task,
1492 .activate_plugin = cedf_activate_plugin,
1493#ifdef CONFIG_LITMUS_LOCKING
1494 .allocate_lock = cedf_allocate_lock,
1495 .pre_setsched = cedf_pre_setsched,
1496#endif
1497};
1498
1499static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
1500
1501static int __init init_cedf(void)
1502{
1503 int err, fs;
1504
1505 err = register_sched_plugin(&cedf_plugin);
1506 if (!err) {
1507 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
1508 if (!fs)
1509 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
1510 else
1511 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
1512 }
1513 return err;
1514}
1515
1516static void clean_cedf(void)
1517{
1518 cleanup_cedf();
1519 if (cluster_file)
1520 remove_proc_entry("cluster", cedf_dir);
1521 if (cedf_dir)
1522 remove_plugin_proc_dir(&cedf_plugin);
1523}
1524
1525module_init(init_cedf);
1526module_exit(clean_cedf);
diff --git a/litmus/sched_cedf.c.rej b/litmus/sched_cedf.c.rej
new file mode 100644
index 000000000000..ec74da6c4a64
--- /dev/null
+++ b/litmus/sched_cedf.c.rej
@@ -0,0 +1,53 @@
1--- litmus/sched_cedf.c
2+++ litmus/sched_cedf.c
3@@ -739,6 +1100,12 @@
4 int out_of_time, sleep, preempt, np, exists, blocks;
5 struct task_struct* next = NULL;
6
7+#ifdef CONFIG_LITMUS_LOCKING
8+ int priodon;
9+#else
10+#define priodon 0
11+#endif
12+
13 #ifdef CONFIG_RELEASE_MASTER
14 /* Bail out early if we are the release master.
15 * The release master never schedules any real-time tasks.
16@@ -750,7 +1117,6 @@
17 #endif
18
19 raw_spin_lock(&cluster->cluster_lock);
20- clear_will_schedule();
21
22 /* sanity checking */
23 BUG_ON(entry->scheduled && entry->scheduled != prev);
24@@ -1032,7 +1466,15 @@
25
26 /* unlink if necessary */
27 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
28+
29 unlink(t);
30+
31+#ifdef CONFIG_LITMUS_LOCKING
32+ /* make sure it's not pending anymore */
33+ job_pending_exit(t);
34+ bheap_node_free(tsk_rt(t)->pending_node);
35+#endif
36+
37 if (tsk_rt(t)->scheduled_on != NO_CPU) {
38 cpu_entry_t *cpu;
39 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
40@@ -1446,7 +2140,13 @@
41 /* only add CPUs that should schedule jobs */
42 if (entry->cpu != entry->cluster->domain.release_master)
43 #endif
44+ {
45+ cedf[i].num_cpus++;
46 update_cpu_position(entry);
47+#ifdef CONFIG_LITMUS_LOCKING
48+ update_pending_position(entry);
49+#endif
50+ }
51 }
52 /* done with this cluster */
53 break;
diff --git a/litmus/sched_gfl_split_namechange.c b/litmus/sched_gfl_split_namechange.c
new file mode 100644
index 000000000000..c154b115a00e
--- /dev/null
+++ b/litmus/sched_gfl_split_namechange.c
@@ -0,0 +1,1149 @@
1/*
2 * litmus/sched_gfl_split.c
3 *
4 * Implementation of the G-FL with job splitting. See the Erickson/Anderson
5 * paper at ECRTS 2012 for a description of G-FL.
6 *
7 * This plugin is a modified version of the prior GSN-EDF-split plugin in
8 * litmus/sched_gsn_edf_split.c. Job splitting works the same way as in that
9 * plugin. The subjob "deadlines" (really priorities) are computed according
10 * to G-FL with respect to the post-split (smaller) jobs.
11 *
12 */
13
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
16#include <linux/sched.h>
17#include <linux/slab.h>
18
19#include <litmus/litmus.h>
20#include <litmus/jobs.h>
21#include <litmus/sched_plugin.h>
22#include <litmus/edf_split_common.h>
23#include <litmus/sched_trace.h>
24#include <litmus/trace.h>
25
26#include <litmus/preempt.h>
27
28#include <litmus/bheap.h>
29
30#ifdef CONFIG_SCHED_CPU_AFFINITY
31#include <litmus/affinity.h>
32#endif
33
34#include <linux/module.h>
35
36/* cpu_entry_t - maintain the linked and scheduled state
37 */
38typedef struct {
39 int cpu;
40 struct task_struct* linked; /* only RT tasks */
41 struct task_struct* scheduled; /* only RT tasks */
42 struct bheap_node* hn;
43 struct hrtimer split_timer;
44 int timer_armed;
45} cpu_entry_t;
46DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
47
48cpu_entry_t* gsnedf_cpus[NR_CPUS];
49
50/* the cpus queue themselves according to priority in here */
51static struct bheap_node gsnedf_heap_node[NR_CPUS];
52static struct bheap gsnedf_cpu_heap;
53
54static rt_domain_t gsnedf;
55#define gsnedf_lock (gsnedf.ready_lock)
56
57inline static int get_slice_num(struct task_struct* t)
58{
59 int basic = ((t->rt_param.job_params.exec_time *
60 t->rt_param.task_params.split) /
61 t->rt_param.task_params.exec_cost) + 1;
62 if (basic <= t->rt_param.task_params.split){
63 return basic;
64 }
65 else{
66 /*Since we don't police budget, just leave where it's at.*/
67 return t->rt_param.task_params.split;
68 }
69}
70
71/* Returns the appropriate subjob deadline.*/
72inline static lt_t get_proper_deadline(struct task_struct* t)
73{
74 unsigned int num_cpus = num_online_cpus();
75 return t->rt_param.job_params.release +
76 ((t->rt_param.task_params.period * get_slice_num(t))
77 / t->rt_param.task_params.split)
78 /* G-FL correction */
79 - (((num_cpus - 1) * t->rt_param.task_params.exec_cost)
80 / (num_cpus * t->rt_param.task_params.split));
81}
82
83/* Tells us if the current deadline is too small.*/
84inline static int needs_deadline_move(struct task_struct* t)
85{
86 BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline);
87#ifdef CONFIG_LITMUS_LOCKING
88 return !is_in_crit_section(t) &&
89 (get_proper_deadline(t) !=
90 tsk_rt(t)->job_params.subjob_deadline);
91#else
92 return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline;
93#endif
94}
95
96/*Returns execution time until the next deadline move.
97 * 0 means the task has no more deadline moves
98 */
99inline static lt_t time_to_next_move(struct task_struct* t)
100{
101 if (get_slice_num(t) == t->rt_param.task_params.split){
102 return 0;
103 }
104 /* +1 upper bounds ceiling, since integer division is floor*/
105 return ((get_slice_num(t) * t->rt_param.task_params.exec_cost)
106 / t->rt_param.task_params.split) + 1
107 - t->rt_param.job_params.exec_time;
108}
109
110/* Timer stuff - similar to budget.c. */
111static enum hrtimer_restart on_split_timeout(struct hrtimer *timer)
112{
113 cpu_entry_t* st = container_of(timer,
114 cpu_entry_t,
115 split_timer);
116
117 unsigned long flags;
118
119 local_irq_save(flags);
120 TRACE("split timer fired.\n");
121 st->timer_armed = 0;
122 /* Activate scheduler */
123 litmus_reschedule_local();
124 local_irq_restore(flags);
125
126 return HRTIMER_NORESTART;
127}
128
129static void cancel_split_timer(cpu_entry_t* ce)
130{
131 int ret;
132
133 TRACE("cancelling split time.\n");
134
135 /* Since interrupts are disabled and et->timer_armed is only
136 * modified locally, we do not need any locks.
137 */
138
139 if (ce->timer_armed) {
140 ret = hrtimer_try_to_cancel(&ce->split_timer);
141 /* Should never be inactive. */
142 BUG_ON(ret == 0);
143 /* Should never be running concurrently.*/
144 BUG_ON(ret == -1);
145
146 ce->timer_armed = 0;
147 }
148}
149
150/* assumes called with IRQs off */
151static void arm_split_timer(cpu_entry_t *ce,
152 struct task_struct* t)
153{
154 lt_t when_to_fire;
155 lt_t time_to_move;
156 TRACE_TASK(t, "arming split timer.\n");
157
158 /* __hrtimer_start_range_ns() cancels the timer
159 * anyway, so we don't have to check whether it is still armed */
160
161 /*We won't do any new deadline moves if the budget has been exhausted*/
162 if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) {
163 when_to_fire = litmus_clock() + time_to_move;
164 TRACE_TASK(t, "actually arming for %llu into the future\n",
165 time_to_move);
166 __hrtimer_start_range_ns(&ce->split_timer,
167 ns_to_ktime(when_to_fire),
168 0 /* delta */,
169 HRTIMER_MODE_ABS_PINNED,
170 0 /* no wakeup */);
171 ce->timer_armed = 1;
172 }
173}
174
175/* Uncomment this if you want to see all scheduling decisions in the
176 * TRACE() log.
177#define WANT_ALL_SCHED_EVENTS
178 */
179
180static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
181{
182 cpu_entry_t *a, *b;
183 a = _a->value;
184 b = _b->value;
185 /* Note that a and b are inverted: we want the lowest-priority CPU at
186 * the top of the heap.
187 */
188 return edf_split_higher_prio(b->linked, a->linked);
189}
190
191/* update_cpu_position - Move the cpu entry to the correct place to maintain
192 * order in the cpu queue. Caller must hold gsnedf lock.
193 */
194static void update_cpu_position(cpu_entry_t *entry)
195{
196 if (likely(bheap_node_in_heap(entry->hn)))
197 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
198 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
199}
200
201/* caller must hold gsnedf lock */
202static cpu_entry_t* lowest_prio_cpu(void)
203{
204 struct bheap_node* hn;
205 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
206 return hn->value;
207}
208
209
210/* link_task_to_cpu - Update the link of a CPU.
211 * Handles the case where the to-be-linked task is already
212 * scheduled on a different CPU.
213 */
214static noinline void link_task_to_cpu(struct task_struct* linked,
215 cpu_entry_t *entry)
216{
217 cpu_entry_t *sched;
218 struct task_struct* tmp;
219 int on_cpu;
220
221 BUG_ON(linked && !is_realtime(linked));
222
223 /* Currently linked task is set to be unlinked. */
224 if (entry->linked) {
225 entry->linked->rt_param.linked_on = NO_CPU;
226 }
227
228 /* Link new task to CPU. */
229 if (linked) {
230 set_rt_flags(linked, RT_F_RUNNING);
231 /* handle task is already scheduled somewhere! */
232 on_cpu = linked->rt_param.scheduled_on;
233 if (on_cpu != NO_CPU) {
234 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
235 /* this should only happen if not linked already */
236 BUG_ON(sched->linked == linked);
237
238 /* If we are already scheduled on the CPU to which we
239 * wanted to link, we don't need to do the swap --
240 * we just link ourselves to the CPU and depend on
241 * the caller to get things right.
242 */
243 if (entry != sched) {
244 TRACE_TASK(linked,
245 "already scheduled on %d, updating link.\n",
246 sched->cpu);
247 tmp = sched->linked;
248 linked->rt_param.linked_on = sched->cpu;
249 sched->linked = linked;
250 update_cpu_position(sched);
251 linked = tmp;
252 }
253 }
254 if (linked) /* might be NULL due to swap */
255 linked->rt_param.linked_on = entry->cpu;
256 }
257 entry->linked = linked;
258#ifdef WANT_ALL_SCHED_EVENTS
259 if (linked)
260 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
261 else
262 TRACE("NULL linked to %d.\n", entry->cpu);
263#endif
264 update_cpu_position(entry);
265}
266
267/* unlink - Make sure a task is not linked any longer to an entry
268 * where it was linked before. Must hold gsnedf_lock.
269 */
270static noinline void unlink(struct task_struct* t)
271{
272 cpu_entry_t *entry;
273
274 if (t->rt_param.linked_on != NO_CPU) {
275 /* unlink */
276 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
277 t->rt_param.linked_on = NO_CPU;
278 link_task_to_cpu(NULL, entry);
279 } else if (is_queued(t)) {
280 /* This is an interesting situation: t is scheduled,
281 * but was just recently unlinked. It cannot be
282 * linked anywhere else (because then it would have
283 * been relinked to this CPU), thus it must be in some
284 * queue. We must remove it from the list in this
285 * case.
286 */
287 remove(&gsnedf, t);
288 }
289}
290
291
292/* preempt - force a CPU to reschedule
293 */
294static void preempt(cpu_entry_t *entry)
295{
296 preempt_if_preemptable(entry->scheduled, entry->cpu);
297}
298
299/* requeue - Put an unlinked task into gsn-edf domain.
300 * Caller must hold gsnedf_lock.
301 */
302static noinline void requeue(struct task_struct* task)
303{
304 BUG_ON(!task);
305 /* sanity check before insertion */
306 BUG_ON(is_queued(task));
307
308 if (is_released(task, litmus_clock()))
309 __add_ready(&gsnedf, task);
310 else {
311 /* it has got to wait */
312 add_release(&gsnedf, task);
313 }
314}
315
316#ifdef CONFIG_SCHED_CPU_AFFINITY
317static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
318{
319 cpu_entry_t *affinity;
320
321 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
322#ifdef CONFIG_RELEASE_MASTER
323 gsnedf.release_master
324#else
325 NO_CPU
326#endif
327 );
328
329 return(affinity);
330}
331#endif
332
333/* check for any necessary preemptions */
334static void check_for_preemptions(void)
335{
336 struct task_struct *task;
337 cpu_entry_t *last;
338
339 for (last = lowest_prio_cpu();
340 edf_split_preemption_needed(&gsnedf, last->linked);
341 last = lowest_prio_cpu()) {
342 /* preemption necessary */
343 task = __take_ready(&gsnedf);
344 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
345 task->pid, last->cpu);
346
347#ifdef CONFIG_SCHED_CPU_AFFINITY
348 {
349 cpu_entry_t *affinity =
350 gsnedf_get_nearest_available_cpu(
351 &per_cpu(gsnedf_cpu_entries,
352 task_cpu(task)));
353 if (affinity)
354 last = affinity;
355 else if (last->linked)
356 requeue(last->linked);
357 }
358#else
359 if (last->linked)
360 requeue(last->linked);
361#endif
362
363 link_task_to_cpu(task, last);
364 preempt(last);
365 }
366}
367
368/* gsnedf_job_arrival: task is either resumed or released */
369static noinline void gsnedf_job_arrival(struct task_struct* task)
370{
371 BUG_ON(!task);
372
373 requeue(task);
374 check_for_preemptions();
375}
376
377static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
378{
379 unsigned long flags;
380
381 raw_spin_lock_irqsave(&gsnedf_lock, flags);
382
383 __merge_ready(rt, tasks);
384 check_for_preemptions();
385
386 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
387}
388
389/* caller holds gsnedf_lock */
390static noinline void job_completion(struct task_struct *t, int forced)
391{
392 BUG_ON(!t);
393
394 sched_trace_task_completion(t, forced);
395
396 TRACE_TASK(t, "job_completion().\n");
397
398 /* set flags */
399 set_rt_flags(t, RT_F_SLEEP);
400 /* prepare for next period */
401 /* prepare_for_next_period assumes implicit deadlines and no splitting,
402 * so we call it with the job deadline it expects.
403 */
404 t->rt_param.job_params.deadline = t->rt_param.job_params.release +
405 t->rt_param.task_params.period;
406 prepare_for_next_period(t);
407 /* We now set the subjob deadline to what it should be for scheduling
408 * priority.
409 */
410 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
411 if (is_released(t, litmus_clock()))
412 sched_trace_task_release(t);
413 /* unlink */
414 unlink(t);
415 /* requeue
416 * But don't requeue a blocking task. */
417 if (is_running(t))
418 gsnedf_job_arrival(t);
419}
420
421static void move_deadline(struct task_struct *t)
422{
423 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
424 /* Check if rescheduling needed with lower priority. */
425 unlink(t);
426 gsnedf_job_arrival(t);
427}
428
429/* gsnedf_tick - this function is called for every local timer
430 * interrupt.
431 *
432 * checks whether the current task has expired and checks
433 * whether we need to preempt it if it has not expired
434 */
435static void gsnedf_tick(struct task_struct* t)
436{
437 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
438 if (!is_np(t)) {
439 /* np tasks will be preempted when they become
440 * preemptable again
441 */
442 litmus_reschedule_local();
443 TRACE("gsnedf_scheduler_tick: "
444 "%d is preemptable "
445 " => FORCE_RESCHED\n", t->pid);
446 } else if (is_user_np(t)) {
447 TRACE("gsnedf_scheduler_tick: "
448 "%d is non-preemptable, "
449 "preemption delayed.\n", t->pid);
450 request_exit_np(t);
451 }
452 }
453}
454
455/* Getting schedule() right is a bit tricky. schedule() may not make any
456 * assumptions on the state of the current task since it may be called for a
457 * number of reasons. The reasons include a scheduler_tick() determined that it
458 * was necessary, because sys_exit_np() was called, because some Linux
459 * subsystem determined so, or even (in the worst case) because there is a bug
460 * hidden somewhere. Thus, we must take extreme care to determine what the
461 * current state is.
462 *
463 * The CPU could currently be scheduling a task (or not), be linked (or not).
464 *
465 * The following assertions for the scheduled task could hold:
466 *
467 * - !is_running(scheduled) // the job blocks
468 * - scheduled->timeslice == 0 // the job completed (forcefully)
469 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
470 * - linked != scheduled // we need to reschedule (for any reason)
471 * - is_np(scheduled) // rescheduling must be delayed,
472 * sys_exit_np must be requested
473 *
474 * Any of these can occur together.
475 */
476static struct task_struct* gsnedf_schedule(struct task_struct * prev)
477{
478 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
479 int out_of_time, sleep, preempt, np, exists, blocks, needs_move;
480 struct task_struct* next = NULL;
481
482#ifdef CONFIG_RELEASE_MASTER
483 /* Bail out early if we are the release master.
484 * The release master never schedules any real-time tasks.
485 */
486 if (unlikely(gsnedf.release_master == entry->cpu)) {
487 sched_state_task_picked();
488 return NULL;
489 }
490#endif
491
492 raw_spin_lock(&gsnedf_lock);
493
494 /* sanity checking */
495 BUG_ON(entry->scheduled && entry->scheduled != prev);
496 BUG_ON(entry->scheduled && !is_realtime(prev));
497 BUG_ON(is_realtime(prev) && !entry->scheduled);
498
499 /* (0) Determine state */
500 exists = entry->scheduled != NULL;
501 blocks = exists && !is_running(entry->scheduled);
502 out_of_time = exists &&
503 budget_enforced(entry->scheduled) &&
504 budget_exhausted(entry->scheduled);
505 needs_move = exists && needs_deadline_move(entry->scheduled);
506 np = exists && is_np(entry->scheduled);
507 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
508 preempt = entry->scheduled != entry->linked;
509
510#ifdef WANT_ALL_SCHED_EVENTS
511 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
512#endif
513
514 if (exists)
515 TRACE_TASK(prev,
516 "blocks:%d out_of_time:%d needs_move:%d np:%d"
517 " sleep:%d preempt:%d state:%d sig:%d\n",
518 blocks, out_of_time, needs_move, np, sleep, preempt,
519 prev->state, signal_pending(prev));
520 if (entry->linked && preempt)
521 TRACE_TASK(prev, "will be preempted by %s/%d\n",
522 entry->linked->comm, entry->linked->pid);
523
524
525 /* If a task blocks we have no choice but to reschedule.
526 */
527 if (blocks)
528 unlink(entry->scheduled);
529
530 /* Request a sys_exit_np() call if we would like to preempt but cannot.
531 * We need to make sure to update the link structure anyway in case
532 * that we are still linked. Multiple calls to request_exit_np() don't
533 * hurt.
534 *
535 * Job deadline moves handled similarly
536 */
537 if (np && (out_of_time || preempt || sleep)) {
538 unlink(entry->scheduled);
539 request_exit_np(entry->scheduled);
540 }
541 else if (np && needs_move) {
542 move_deadline(entry->scheduled);
543 }
544
545 /* Any task that is preemptable and either exhausts its execution
546 * budget or wants to sleep completes. We may have to reschedule after
547 * this. Don't do a job completion if we block (can't have timers running
548 * for blocked jobs). Preemption go first for the same reason.
549 */
550 if (!np && (out_of_time || sleep) && !blocks && !preempt)
551 job_completion(entry->scheduled, !sleep);
552 else if (!np && needs_move && !blocks && !preempt) {
553 move_deadline(entry->scheduled);
554 }
555
556 /* Link pending task if we became unlinked.
557 */
558 if (!entry->linked)
559 link_task_to_cpu(__take_ready(&gsnedf), entry);
560
561 /* The final scheduling decision. Do we need to switch for some reason?
562 * If linked is different from scheduled, then select linked as next.
563 */
564 if ((!np || blocks) &&
565 entry->linked != entry->scheduled) {
566 /* Schedule a linked job? */
567 if (entry->linked) {
568 entry->linked->rt_param.scheduled_on = entry->cpu;
569 next = entry->linked;
570 TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
571 }
572 if (entry->scheduled) {
573 /* not gonna be scheduled soon */
574 entry->scheduled->rt_param.scheduled_on = NO_CPU;
575 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
576 }
577 } else
578 /* Only override Linux scheduler if we have a real-time task
579 * scheduled that needs to continue.
580 */
581 if (exists)
582 next = prev;
583
584 sched_state_task_picked();
585
586 raw_spin_unlock(&gsnedf_lock);
587
588 if (next) {
589 arm_split_timer(entry, next);
590 }
591 else if (entry->timer_armed) {
592 cancel_split_timer(entry);
593 }
594
595#ifdef WANT_ALL_SCHED_EVENTS
596 TRACE("gsnedf_lock released, next=0x%p\n", next);
597
598 if (next)
599 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
600 else if (exists && !next)
601 TRACE("becomes idle at %llu.\n", litmus_clock());
602#endif
603
604
605 return next;
606}
607
608
609/* _finish_switch - we just finished the switch away from prev
610 */
611static void gsnedf_finish_switch(struct task_struct *prev)
612{
613 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
614
615 entry->scheduled = is_realtime(current) ? current : NULL;
616#ifdef WANT_ALL_SCHED_EVENTS
617 TRACE_TASK(prev, "switched away from\n");
618#endif
619}
620
621static void gsnedf_release_at(struct task_struct *t, lt_t start)
622{
623 t->rt_param.job_params.deadline = start;
624 prepare_for_next_period(t);
625 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
626 set_rt_flags(t, RT_F_RUNNING);
627}
628
629/* Prepare a task for running in RT mode
630 */
631static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
632{
633 unsigned long flags;
634 cpu_entry_t* entry;
635
636 TRACE("gsn edf: task new %d\n", t->pid);
637
638 raw_spin_lock_irqsave(&gsnedf_lock, flags);
639
640 /* setup job params */
641 gsnedf_release_at(t, litmus_clock());
642
643 if (running) {
644 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
645 BUG_ON(entry->scheduled);
646
647#ifdef CONFIG_RELEASE_MASTER
648 if (entry->cpu != gsnedf.release_master) {
649#endif
650 entry->scheduled = t;
651 tsk_rt(t)->scheduled_on = task_cpu(t);
652#ifdef CONFIG_RELEASE_MASTER
653 } else {
654 /* do not schedule on release master */
655 preempt(entry); /* force resched */
656 tsk_rt(t)->scheduled_on = NO_CPU;
657 }
658#endif
659 } else {
660 t->rt_param.scheduled_on = NO_CPU;
661 }
662 t->rt_param.linked_on = NO_CPU;
663
664 gsnedf_job_arrival(t);
665 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
666}
667
668static void gsnedf_task_wake_up(struct task_struct *task)
669{
670 unsigned long flags;
671 lt_t now;
672
673 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
674
675 raw_spin_lock_irqsave(&gsnedf_lock, flags);
676 /* We need to take suspensions because of semaphores into
677 * account! If a job resumes after being suspended due to acquiring
678 * a semaphore, it should never be treated as a new job release.
679 */
680 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
681 set_rt_flags(task, RT_F_RUNNING);
682 } else {
683 now = litmus_clock();
684 if (is_tardy(task, now)) {
685 /* new sporadic release */
686 gsnedf_release_at(task, now);
687 sched_trace_task_release(task);
688 }
689 else {
690 if (task->rt.time_slice) {
691 /* came back in time before deadline
692 */
693 set_rt_flags(task, RT_F_RUNNING);
694 }
695 }
696 }
697 gsnedf_job_arrival(task);
698 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
699}
700
701static void gsnedf_task_block(struct task_struct *t)
702{
703 unsigned long flags;
704
705 TRACE_TASK(t, "block at %llu\n", litmus_clock());
706
707 /* unlink if necessary */
708 raw_spin_lock_irqsave(&gsnedf_lock, flags);
709 unlink(t);
710 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
711
712 BUG_ON(!is_realtime(t));
713}
714
715
716static void gsnedf_task_exit(struct task_struct * t)
717{
718 unsigned long flags;
719
720 /* unlink if necessary */
721 raw_spin_lock_irqsave(&gsnedf_lock, flags);
722 unlink(t);
723 if (tsk_rt(t)->scheduled_on != NO_CPU) {
724 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
725 tsk_rt(t)->scheduled_on = NO_CPU;
726 }
727 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
728
729 BUG_ON(!is_realtime(t));
730 TRACE_TASK(t, "RIP\n");
731}
732
733
734static long gsnedf_admit_task(struct task_struct* tsk)
735{
736 return 0;
737}
738
739#ifdef CONFIG_LITMUS_LOCKING
740
741#include <litmus/fdso.h>
742
743/* called with IRQs off */
744static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
745{
746 int linked_on;
747 int check_preempt = 0;
748
749 raw_spin_lock(&gsnedf_lock);
750
751 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
752 tsk_rt(t)->inh_task = prio_inh;
753
754 linked_on = tsk_rt(t)->linked_on;
755
756 /* If it is scheduled, then we need to reorder the CPU heap. */
757 if (linked_on != NO_CPU) {
758 TRACE_TASK(t, "%s: linked on %d\n",
759 __FUNCTION__, linked_on);
760 /* Holder is scheduled; need to re-order CPUs.
761 * We can't use heap_decrease() here since
762 * the cpu_heap is ordered in reverse direction, so
763 * it is actually an increase. */
764 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
765 gsnedf_cpus[linked_on]->hn);
766 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
767 gsnedf_cpus[linked_on]->hn);
768 } else {
769 /* holder may be queued: first stop queue changes */
770 raw_spin_lock(&gsnedf.release_lock);
771 if (is_queued(t)) {
772 TRACE_TASK(t, "%s: is queued\n",
773 __FUNCTION__);
774 /* We need to update the position of holder in some
775 * heap. Note that this could be a release heap if we
776 * budget enforcement is used and this job overran. */
777 check_preempt =
778 !bheap_decrease(edf_split_ready_order,
779 tsk_rt(t)->heap_node);
780 } else {
781 /* Nothing to do: if it is not queued and not linked
782 * then it is either sleeping or currently being moved
783 * by other code (e.g., a timer interrupt handler) that
784 * will use the correct priority when enqueuing the
785 * task. */
786 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
787 __FUNCTION__);
788 }
789 raw_spin_unlock(&gsnedf.release_lock);
790
791 /* If holder was enqueued in a release heap, then the following
792 * preemption check is pointless, but we can't easily detect
793 * that case. If you want to fix this, then consider that
794 * simply adding a state flag requires O(n) time to update when
795 * releasing n tasks, which conflicts with the goal to have
796 * O(log n) merges. */
797 if (check_preempt) {
798 /* heap_decrease() hit the top level of the heap: make
799 * sure preemption checks get the right task, not the
800 * potentially stale cache. */
801 bheap_uncache_min(edf_split_ready_order,
802 &gsnedf.ready_queue);
803 check_for_preemptions();
804 }
805 }
806
807 raw_spin_unlock(&gsnedf_lock);
808}
809
810/* called with IRQs off */
811static void update_unlocked_priority(struct task_struct* t)
812{
813 raw_spin_lock(&gsnedf_lock);
814
815 /* A job only stops inheriting a priority when it releases a
816 * resource. Thus we can make the following assumption.*/
817 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
818
819 /* Clear priority inheritance */
820 TRACE_TASK(t, "priority restored\n");
821 tsk_rt(t)->inh_task = NULL;
822
823 /* Update splitting deadline */
824 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
825
826 /* Check if rescheduling is necessary. We can't use heap_decrease()
827 * since the priority was effectively lowered. */
828 unlink(t);
829 gsnedf_job_arrival(t);
830
831 raw_spin_unlock(&gsnedf_lock);
832}
833
834
835/* ******************** FMLP support ********************** */
836
837/* struct for semaphore with priority inheritance */
838struct fmlp_semaphore {
839 struct litmus_lock litmus_lock;
840
841 /* current resource holder */
842 struct task_struct *owner;
843
844 /* highest-priority waiter */
845 struct task_struct *hp_waiter;
846
847 /* FIFO queue of waiting tasks */
848 wait_queue_head_t wait;
849};
850
851static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
852{
853 return container_of(lock, struct fmlp_semaphore, litmus_lock);
854}
855
856/* caller is responsible for locking */
857static struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
858 struct task_struct* skip)
859{
860 struct list_head *pos;
861 struct task_struct *queued, *found = NULL;
862
863 list_for_each(pos, &sem->wait.task_list) {
864 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
865 task_list)->private;
866
867 /* Compare task prios, find high prio task. */
868 if (queued != skip && edf_split_higher_prio(queued, found))
869 found = queued;
870 }
871 return found;
872}
873
874int gsnedf_fmlp_lock(struct litmus_lock* l)
875{
876 struct task_struct* t = current;
877 struct fmlp_semaphore *sem = fmlp_from_lock(l);
878 cpu_entry_t* entry;
879 wait_queue_t wait;
880 unsigned long flags;
881
882 if (!is_realtime(t))
883 return -EPERM;
884
885 spin_lock_irqsave(&sem->wait.lock, flags);
886 entry = &__get_cpu_var(gsnedf_cpu_entries);
887
888 tsk_rt(t)->in_crit_section = 1;
889 if (entry->timer_armed) {
890 cancel_split_timer(entry);
891 }
892
893 if (sem->owner) {
894 /* resource is not free => must suspend and wait */
895
896 init_waitqueue_entry(&wait, t);
897
898 /* FIXME: interruptible would be nice some day */
899 set_task_state(t, TASK_UNINTERRUPTIBLE);
900
901 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
902
903 /* check if we need to activate priority inheritance */
904 if (edf_split_higher_prio(t, sem->hp_waiter)) {
905 sem->hp_waiter = t;
906 if (edf_split_higher_prio(t, sem->owner))
907 set_priority_inheritance(sem->owner, sem->hp_waiter);
908 }
909
910 TS_LOCK_SUSPEND;
911
912 /* release lock before sleeping */
913 spin_unlock_irqrestore(&sem->wait.lock, flags);
914
915 /* We depend on the FIFO order. Thus, we don't need to recheck
916 * when we wake up; we are guaranteed to have the lock since
917 * there is only one wake up per release.
918 */
919
920 schedule();
921
922 TS_LOCK_RESUME;
923
924 /* Since we hold the lock, no other task will change
925 * ->owner. We can thus check it without acquiring the spin
926 * lock. */
927 BUG_ON(sem->owner != t);
928 } else {
929 /* it's ours now */
930 sem->owner = t;
931
932 spin_unlock_irqrestore(&sem->wait.lock, flags);
933 }
934
935 return 0;
936}
937
938int gsnedf_fmlp_unlock(struct litmus_lock* l)
939{
940 struct task_struct *t = current, *next;
941 struct fmlp_semaphore *sem = fmlp_from_lock(l);
942 unsigned long flags;
943 int err = 0;
944
945 spin_lock_irqsave(&sem->wait.lock, flags);
946
947 if (sem->owner != t) {
948 err = -EINVAL;
949 goto out;
950 }
951
952 /* check if there are jobs waiting for this resource */
953 next = __waitqueue_remove_first(&sem->wait);
954 if (next) {
955 /* next becomes the resouce holder */
956 sem->owner = next;
957 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
958
959 /* determine new hp_waiter if necessary */
960 if (next == sem->hp_waiter) {
961 TRACE_TASK(next, "was highest-prio waiter\n");
962 /* next has the highest priority --- it doesn't need to
963 * inherit. However, we need to make sure that the
964 * next-highest priority in the queue is reflected in
965 * hp_waiter. */
966 sem->hp_waiter = find_hp_waiter(sem, next);
967 if (sem->hp_waiter)
968 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
969 else
970 TRACE("no further waiters\n");
971 } else {
972 /* Well, if next is not the highest-priority waiter,
973 * then it ought to inherit the highest-priority
974 * waiter's priority. */
975 set_priority_inheritance(next, sem->hp_waiter);
976 }
977
978 /* wake up next */
979 wake_up_process(next);
980 } else
981 /* becomes available */
982 sem->owner = NULL;
983
984 /* We are no longer in the critical section */
985 tsk_rt(t)->in_crit_section = 0;
986
987 /* we lose the benefit of priority inheritance (if any) and may need
988 * to move the deadline. In either case, may need to reschedule
989 * due to reduced priority. */
990 if (tsk_rt(t)->inh_task || needs_deadline_move(t))
991 update_unlocked_priority(t);
992 /* TODO: Check that schedule() gets called - it needs to arm the
993 * enforcement timer. Otherwise we should do it here or in
994 * update_unlocked_priority. */
995
996out:
997 spin_unlock_irqrestore(&sem->wait.lock, flags);
998
999 return err;
1000}
1001
1002int gsnedf_fmlp_close(struct litmus_lock* l)
1003{
1004 struct task_struct *t = current;
1005 struct fmlp_semaphore *sem = fmlp_from_lock(l);
1006 unsigned long flags;
1007
1008 int owner;
1009
1010 spin_lock_irqsave(&sem->wait.lock, flags);
1011
1012 owner = sem->owner == t;
1013
1014 spin_unlock_irqrestore(&sem->wait.lock, flags);
1015
1016 if (owner)
1017 gsnedf_fmlp_unlock(l);
1018
1019 return 0;
1020}
1021
1022void gsnedf_fmlp_free(struct litmus_lock* lock)
1023{
1024 kfree(fmlp_from_lock(lock));
1025}
1026
1027static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
1028 .close = gsnedf_fmlp_close,
1029 .lock = gsnedf_fmlp_lock,
1030 .unlock = gsnedf_fmlp_unlock,
1031 .deallocate = gsnedf_fmlp_free,
1032};
1033
1034static struct litmus_lock* gsnedf_new_fmlp(void)
1035{
1036 struct fmlp_semaphore* sem;
1037
1038 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1039 if (!sem)
1040 return NULL;
1041
1042 sem->owner = NULL;
1043 sem->hp_waiter = NULL;
1044 init_waitqueue_head(&sem->wait);
1045 sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
1046
1047 return &sem->litmus_lock;
1048}
1049
1050/* **** lock constructor **** */
1051
1052
1053static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
1054 void* __user unused)
1055{
1056 int err = -ENXIO;
1057
1058 /* GSN-EDF currently only supports the FMLP for global resources. */
1059 switch (type) {
1060
1061 case FMLP_SEM:
1062 /* Flexible Multiprocessor Locking Protocol */
1063 *lock = gsnedf_new_fmlp();
1064 if (*lock)
1065 err = 0;
1066 else
1067 err = -ENOMEM;
1068 break;
1069
1070 };
1071
1072 return err;
1073}
1074
1075#endif
1076
1077
1078static long gsnedf_activate_plugin(void)
1079{
1080 int cpu;
1081 cpu_entry_t *entry;
1082
1083 bheap_init(&gsnedf_cpu_heap);
1084#ifdef CONFIG_RELEASE_MASTER
1085 gsnedf.release_master = atomic_read(&release_master_cpu);
1086#endif
1087
1088 for_each_online_cpu(cpu) {
1089 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1090 bheap_node_init(&entry->hn, entry);
1091 entry->linked = NULL;
1092 entry->scheduled = NULL;
1093#ifdef CONFIG_RELEASE_MASTER
1094 if (cpu != gsnedf.release_master) {
1095#endif
1096 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
1097 update_cpu_position(entry);
1098#ifdef CONFIG_RELEASE_MASTER
1099 } else {
1100 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
1101 }
1102#endif
1103 }
1104 return 0;
1105}
1106
1107/* Plugin object */
1108static struct sched_plugin gfl_plugin __cacheline_aligned_in_smp = {
1109 .plugin_name = "GSN-EDF",
1110 .finish_switch = gsnedf_finish_switch,
1111 .tick = gsnedf_tick,
1112 .task_new = gsnedf_task_new,
1113 .complete_job = complete_job,
1114 .task_exit = gsnedf_task_exit,
1115 .schedule = gsnedf_schedule,
1116 .release_at = gsnedf_release_at,
1117 .task_wake_up = gsnedf_task_wake_up,
1118 .task_block = gsnedf_task_block,
1119 .admit_task = gsnedf_admit_task,
1120 .activate_plugin = gsnedf_activate_plugin,
1121#ifdef CONFIG_LITMUS_LOCKING
1122 .allocate_lock = gsnedf_allocate_lock,
1123#endif
1124};
1125
1126
1127static int __init init_gfl(void)
1128{
1129 int cpu;
1130 cpu_entry_t *entry;
1131
1132 bheap_init(&gsnedf_cpu_heap);
1133 /* initialize CPU state */
1134 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1135 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1136 gsnedf_cpus[cpu] = entry;
1137 entry->cpu = cpu;
1138 entry->hn = &gsnedf_heap_node[cpu];
1139 hrtimer_init(&entry->split_timer,
1140 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1141 entry->split_timer.function = on_split_timeout;
1142 bheap_node_init(&entry->hn, entry);
1143 }
1144 edf_split_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1145 return register_sched_plugin(&gfl_plugin);
1146}
1147
1148
1149module_init(init_gfl);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 000000000000..9debea981419
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,1286 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/slab.h>
15
16#include <litmus/litmus.h>
17#include <litmus/wait.h>
18#include <litmus/jobs.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/edf_common.h>
21#include <litmus/sched_trace.h>
22#include <litmus/trace.h>
23
24#include <litmus/preempt.h>
25
26#include <litmus/bheap.h>
27
28#include <linux/module.h>
29
30/* Overview of GSN-EDF operations.
31 *
32 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
33 * description only covers how the individual operations are implemented in
34 * LITMUS.
35 *
36 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
37 * structure (NOT the actually scheduled
38 * task). If there is another linked task To
39 * already it will set To->linked_on = NO_CPU
40 * (thereby removing its association with this
41 * CPU). However, it will not requeue the
42 * previously linked task (if any). It will set
43 * T's state to RT_F_RUNNING and check whether
44 * it is already running somewhere else. If T
45 * is scheduled somewhere else it will link
46 * it to that CPU instead (and pull the linked
47 * task to cpu). T may be NULL.
48 *
49 * unlink(T) - Unlink removes T from all scheduler data
50 * structures. If it is linked to some CPU it
51 * will link NULL to that CPU. If it is
52 * currently queued in the gsnedf queue it will
53 * be removed from the rt_domain. It is safe to
54 * call unlink(T) if T is not linked. T may not
55 * be NULL.
56 *
57 * requeue(T) - Requeue will insert T into the appropriate
58 * queue. If the system is in real-time mode and
59 * the T is released already, it will go into the
60 * ready queue. If the system is not in
61 * real-time mode is T, then T will go into the
62 * release queue. If T's release time is in the
63 * future, it will go into the release
64 * queue. That means that T's release time/job
65 * no/etc. has to be updated before requeu(T) is
66 * called. It is not safe to call requeue(T)
67 * when T is already queued. T may not be NULL.
68 *
69 * gsnedf_job_arrival(T) - This is the catch all function when T enters
70 * the system after either a suspension or at a
71 * job release. It will queue T (which means it
72 * is not safe to call gsnedf_job_arrival(T) if
73 * T is already queued) and then check whether a
74 * preemption is necessary. If a preemption is
75 * necessary it will update the linkage
76 * accordingly and cause scheduled to be called
77 * (either with an IPI or need_resched). It is
78 * safe to call gsnedf_job_arrival(T) if T's
79 * next job has not been actually released yet
80 * (releast time in the future). T will be put
81 * on the release queue in that case.
82 *
83 * job_completion(T) - Take care of everything that needs to be done
84 * to prepare T for its next release and place
85 * it in the right queue with
86 * gsnedf_job_arrival().
87 *
88 *
89 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
90 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
91 * the functions will automatically propagate pending task from the ready queue
92 * to a linked task. This is the job of the calling function ( by means of
93 * __take_ready).
94 */
95
96
97/* cpu_entry_t - maintain the linked and scheduled state
98 */
99typedef struct {
100 int cpu;
101 struct task_struct* linked; /* only RT tasks */
102 struct task_struct* scheduled; /* only RT tasks */
103 struct bheap_node* hn;
104} cpu_entry_t;
105DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
106
107cpu_entry_t* gsnedf_cpus[NR_CPUS];
108
109/* the cpus queue themselves according to priority in here */
110static struct bheap_node gsnedf_heap_node[NR_CPUS];
111static struct bheap gsnedf_cpu_heap;
112
113static rt_domain_t gsnedf;
114#define gsnedf_lock (gsnedf.ready_lock)
115
116
117/* Uncomment this if you want to see all scheduling decisions in the
118 * TRACE() log.
119#define WANT_ALL_SCHED_EVENTS
120 */
121
122static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
123{
124 cpu_entry_t *a, *b;
125 a = _a->value;
126 b = _b->value;
127 /* Note that a and b are inverted: we want the lowest-priority CPU at
128 * the top of the heap.
129 */
130 return edf_higher_prio(b->linked, a->linked);
131}
132
133/* update_cpu_position - Move the cpu entry to the correct place to maintain
134 * order in the cpu queue. Caller must hold gsnedf lock.
135 */
136static void update_cpu_position(cpu_entry_t *entry)
137{
138 if (likely(bheap_node_in_heap(entry->hn)))
139 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
140 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
141}
142
143/* caller must hold gsnedf lock */
144static cpu_entry_t* lowest_prio_cpu(void)
145{
146 struct bheap_node* hn;
147 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
148 return hn->value;
149}
150
151
152/* link_task_to_cpu - Update the link of a CPU.
153 * Handles the case where the to-be-linked task is already
154 * scheduled on a different CPU.
155 */
156static noinline void link_task_to_cpu(struct task_struct* linked,
157 cpu_entry_t *entry)
158{
159 cpu_entry_t *sched;
160 struct task_struct* tmp;
161 int on_cpu;
162
163 BUG_ON(linked && !is_realtime(linked));
164
165 /* Currently linked task is set to be unlinked. */
166 if (entry->linked) {
167 entry->linked->rt_param.linked_on = NO_CPU;
168 }
169
170 /* Link new task to CPU. */
171 if (linked) {
172 set_rt_flags(linked, RT_F_RUNNING);
173 /* handle task is already scheduled somewhere! */
174 on_cpu = linked->rt_param.scheduled_on;
175 if (on_cpu != NO_CPU) {
176 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
177 /* this should only happen if not linked already */
178 BUG_ON(sched->linked == linked);
179
180 /* If we are already scheduled on the CPU to which we
181 * wanted to link, we don't need to do the swap --
182 * we just link ourselves to the CPU and depend on
183 * the caller to get things right.
184 */
185 if (entry != sched) {
186 TRACE_TASK(linked,
187 "already scheduled on %d, updating link.\n",
188 sched->cpu);
189 tmp = sched->linked;
190 linked->rt_param.linked_on = sched->cpu;
191 sched->linked = linked;
192 update_cpu_position(sched);
193 linked = tmp;
194 }
195 }
196 if (linked) /* might be NULL due to swap */
197 linked->rt_param.linked_on = entry->cpu;
198 }
199 entry->linked = linked;
200#ifdef WANT_ALL_SCHED_EVENTS
201 if (linked)
202 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
203 else
204 TRACE("NULL linked to %d.\n", entry->cpu);
205#endif
206 update_cpu_position(entry);
207}
208
209/* unlink - Make sure a task is not linked any longer to an entry
210 * where it was linked before. Must hold gsnedf_lock.
211 */
212static noinline void unlink(struct task_struct* t)
213{
214 cpu_entry_t *entry;
215
216 if (t->rt_param.linked_on != NO_CPU) {
217 /* unlink */
218 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
219 t->rt_param.linked_on = NO_CPU;
220 link_task_to_cpu(NULL, entry);
221 } else if (is_queued(t)) {
222 /* This is an interesting situation: t is scheduled,
223 * but was just recently unlinked. It cannot be
224 * linked anywhere else (because then it would have
225 * been relinked to this CPU), thus it must be in some
226 * queue. We must remove it from the list in this
227 * case.
228 */
229 remove(&gsnedf, t);
230 }
231}
232
233
234/* preempt - force a CPU to reschedule
235 */
236static void preempt(cpu_entry_t *entry)
237{
238 preempt_if_preemptable(entry->scheduled, entry->cpu);
239}
240
241/* requeue - Put an unlinked task into gsn-edf domain.
242 * Caller must hold gsnedf_lock.
243 */
244static noinline void requeue(struct task_struct* task)
245{
246 BUG_ON(!task);
247 /* sanity check before insertion */
248 BUG_ON(is_queued(task));
249
250 if (is_released(task, litmus_clock()))
251 __add_ready(&gsnedf, task);
252 else {
253 /* it has got to wait */
254 add_release(&gsnedf, task);
255 }
256}
257
258/* check for any necessary preemptions */
259static void check_for_preemptions(void)
260{
261 struct task_struct *task;
262 cpu_entry_t* last;
263
264 for(last = lowest_prio_cpu();
265 edf_preemption_needed(&gsnedf, last->linked);
266 last = lowest_prio_cpu()) {
267 /* preemption necessary */
268 task = __take_ready(&gsnedf);
269 TRACE_TASK(task, "attempting to link to P%d\n",
270 last->cpu);
271 if (last->linked)
272 requeue(last->linked);
273 link_task_to_cpu(task, last);
274 preempt(last);
275 }
276}
277
278/* gsnedf_job_arrival: task is either resumed or released */
279static noinline void gsnedf_job_arrival(struct task_struct* task)
280{
281 BUG_ON(!task);
282
283 requeue(task);
284 check_for_preemptions();
285}
286
287static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
288{
289 unsigned long flags;
290
291 raw_spin_lock_irqsave(&gsnedf_lock, flags);
292
293 __merge_ready(rt, tasks);
294 check_for_preemptions();
295
296 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
297}
298
299/* caller holds gsnedf_lock */
300static noinline void job_completion(struct task_struct *t, int forced)
301{
302 BUG_ON(!t);
303
304 sched_trace_task_completion(t, forced);
305
306 TRACE_TASK(t, "job_completion().\n");
307
308 /* set flags */
309 set_rt_flags(t, RT_F_SLEEP);
310 /* prepare for next period */
311 prepare_for_next_period(t);
312 if (is_released(t, litmus_clock()))
313 sched_trace_task_release(t);
314 /* unlink */
315 unlink(t);
316 /* requeue
317 * But don't requeue a blocking task. */
318 if (is_running(t))
319 gsnedf_job_arrival(t);
320}
321
322/* gsnedf_tick - this function is called for every local timer
323 * interrupt.
324 *
325 * checks whether the current task has expired and checks
326 * whether we need to preempt it if it has not expired
327 */
328static void gsnedf_tick(struct task_struct* t)
329{
330 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
331 if (!is_np(t)) {
332 /* np tasks will be preempted when they become
333 * preemptable again
334 */
335 litmus_reschedule_local();
336 TRACE("gsnedf_scheduler_tick: "
337 "%d is preemptable "
338 " => FORCE_RESCHED\n", t->pid);
339 } else if (is_user_np(t)) {
340 TRACE("gsnedf_scheduler_tick: "
341 "%d is non-preemptable, "
342 "preemption delayed.\n", t->pid);
343 request_exit_np(t);
344 }
345 }
346}
347
348/* Getting schedule() right is a bit tricky. schedule() may not make any
349 * assumptions on the state of the current task since it may be called for a
350 * number of reasons. The reasons include a scheduler_tick() determined that it
351 * was necessary, because sys_exit_np() was called, because some Linux
352 * subsystem determined so, or even (in the worst case) because there is a bug
353 * hidden somewhere. Thus, we must take extreme care to determine what the
354 * current state is.
355 *
356 * The CPU could currently be scheduling a task (or not), be linked (or not).
357 *
358 * The following assertions for the scheduled task could hold:
359 *
360 * - !is_running(scheduled) // the job blocks
361 * - scheduled->timeslice == 0 // the job completed (forcefully)
362 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
363 * - linked != scheduled // we need to reschedule (for any reason)
364 * - is_np(scheduled) // rescheduling must be delayed,
365 * sys_exit_np must be requested
366 *
367 * Any of these can occur together.
368 */
369static struct task_struct* gsnedf_schedule(struct task_struct * prev)
370{
371 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
372 int out_of_time, sleep, preempt, np, exists, blocks;
373 struct task_struct* next = NULL;
374
375#ifdef CONFIG_RELEASE_MASTER
376 /* Bail out early if we are the release master.
377 * The release master never schedules any real-time tasks.
378 */
379 if (gsnedf.release_master == entry->cpu) {
380 sched_state_task_picked();
381 return NULL;
382 }
383#endif
384
385 raw_spin_lock(&gsnedf_lock);
386
387 /* sanity checking */
388 BUG_ON(entry->scheduled && entry->scheduled != prev);
389 BUG_ON(entry->scheduled && !is_realtime(prev));
390 BUG_ON(is_realtime(prev) && !entry->scheduled);
391
392 /* (0) Determine state */
393 exists = entry->scheduled != NULL;
394 blocks = exists && !is_running(entry->scheduled);
395 out_of_time = exists &&
396 budget_enforced(entry->scheduled) &&
397 budget_exhausted(entry->scheduled);
398 np = exists && is_np(entry->scheduled);
399 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
400 preempt = entry->scheduled != entry->linked;
401
402#ifdef WANT_ALL_SCHED_EVENTS
403 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
404#endif
405
406 if (exists)
407 TRACE_TASK(prev,
408 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
409 "state:%d sig:%d\n",
410 blocks, out_of_time, np, sleep, preempt,
411 prev->state, signal_pending(prev));
412 if (entry->linked && preempt && !np)
413 TRACE_TASK(prev, "will be preempted by %s/%d\n",
414 entry->linked->comm, entry->linked->pid);
415
416
417 /* If a task blocks we have no choice but to reschedule.
418 */
419 if (blocks)
420 unlink(entry->scheduled);
421
422 /* Request a sys_exit_np() call if we would like to preempt but cannot.
423 * Do not unlink since entry->scheduled is currently in the ready queue.
424 * We don't process out_of_time and sleep until the job is preemptive again.
425 */
426 if (np && (out_of_time || preempt || sleep)) {
427 request_exit_np(entry->scheduled);
428 }
429
430 /* Any task that is preemptable and either exhausts its execution
431 * budget or wants to sleep completes. We may have to reschedule after
432 * this. Don't do a job completion if we block (can't have timers running
433 * for blocked jobs). Preemption go first for the same reason.
434 */
435 if (!np && (out_of_time || sleep) && !blocks && !preempt)
436 job_completion(entry->scheduled, !sleep);
437
438 /* Link pending task if we became unlinked.
439 */
440 if (!entry->linked)
441 link_task_to_cpu(__take_ready(&gsnedf), entry);
442
443 /* The final scheduling decision. Do we need to switch for some reason?
444 * If linked is different from scheduled, then select linked as next.
445 */
446 if ((!np || blocks) &&
447 entry->linked != entry->scheduled) {
448 /* Schedule a linked job? */
449 if (entry->linked) {
450 entry->linked->rt_param.scheduled_on = entry->cpu;
451 next = entry->linked;
452 TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
453 }
454 if (entry->scheduled) {
455 /* not gonna be scheduled soon */
456 entry->scheduled->rt_param.scheduled_on = NO_CPU;
457 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
458 }
459 } else
460 /* Only override Linux scheduler if we have a real-time task
461 * scheduled that needs to continue.
462 */
463 if (exists)
464 next = prev;
465
466 sched_state_task_picked();
467
468 raw_spin_unlock(&gsnedf_lock);
469
470#ifdef WANT_ALL_SCHED_EVENTS
471 TRACE("gsnedf_lock released, next=0x%p\n", next);
472
473 if (next)
474 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
475 else if (exists && !next)
476 TRACE("becomes idle at %llu.\n", litmus_clock());
477#endif
478
479
480 return next;
481}
482
483
484/* _finish_switch - we just finished the switch away from prev
485 */
486static void gsnedf_finish_switch(struct task_struct *prev)
487{
488 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
489
490 entry->scheduled = is_realtime(current) ? current : NULL;
491#ifdef WANT_ALL_SCHED_EVENTS
492 TRACE_TASK(prev, "switched away from\n");
493#endif
494}
495
496
497/* Prepare a task for running in RT mode
498 */
499static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
500{
501 unsigned long flags;
502 cpu_entry_t* entry;
503
504 TRACE("gsn edf: task new %d\n", t->pid);
505
506 raw_spin_lock_irqsave(&gsnedf_lock, flags);
507
508 /* setup job params */
509 release_at(t, litmus_clock());
510
511 if (running) {
512 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
513 BUG_ON(entry->scheduled);
514
515#ifdef CONFIG_RELEASE_MASTER
516 if (entry->cpu != gsnedf.release_master) {
517#endif
518 entry->scheduled = t;
519 tsk_rt(t)->scheduled_on = task_cpu(t);
520#ifdef CONFIG_RELEASE_MASTER
521 } else {
522 /* do not schedule on release master */
523 preempt(entry); /* force resched */
524 tsk_rt(t)->scheduled_on = NO_CPU;
525 }
526#endif
527 } else {
528 t->rt_param.scheduled_on = NO_CPU;
529 }
530 t->rt_param.linked_on = NO_CPU;
531
532 gsnedf_job_arrival(t);
533 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
534}
535
536static void gsnedf_task_wake_up(struct task_struct *task)
537{
538 unsigned long flags;
539 lt_t now;
540
541 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
542
543 raw_spin_lock_irqsave(&gsnedf_lock, flags);
544 /* We need to take suspensions because of semaphores into
545 * account! If a job resumes after being suspended due to acquiring
546 * a semaphore, it should never be treated as a new job release.
547 */
548 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
549 set_rt_flags(task, RT_F_RUNNING);
550 } else {
551 now = litmus_clock();
552 if (is_tardy(task, now)) {
553 /* new sporadic release */
554 release_at(task, now);
555 sched_trace_task_release(task);
556 }
557 else {
558 if (task->rt.time_slice) {
559 /* came back in time before deadline
560 */
561 set_rt_flags(task, RT_F_RUNNING);
562 }
563 }
564 }
565 gsnedf_job_arrival(task);
566 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
567}
568
569static void gsnedf_task_block(struct task_struct *t)
570{
571 unsigned long flags;
572
573 TRACE_TASK(t, "block at %llu\n", litmus_clock());
574
575 /* unlink if necessary */
576 raw_spin_lock_irqsave(&gsnedf_lock, flags);
577 unlink(t);
578 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
579
580 BUG_ON(!is_realtime(t));
581}
582
583
584static void gsnedf_task_exit(struct task_struct * t)
585{
586 unsigned long flags;
587
588 /* unlink if necessary */
589 raw_spin_lock_irqsave(&gsnedf_lock, flags);
590 unlink(t);
591 if (tsk_rt(t)->scheduled_on != NO_CPU) {
592 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
593 tsk_rt(t)->scheduled_on = NO_CPU;
594 }
595 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
596
597 BUG_ON(!is_realtime(t));
598 TRACE_TASK(t, "RIP\n");
599}
600
601
602static long gsnedf_admit_task(struct task_struct* tsk)
603{
604 return 0;
605}
606
607#ifdef CONFIG_LITMUS_LOCKING
608
609#include <litmus/fdso.h>
610
611
612
613/* called with IRQs off */
614static void __set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
615{
616 int linked_on;
617 int check_preempt = 0;
618
619 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
620 tsk_rt(t)->inh_task = prio_inh;
621
622 linked_on = tsk_rt(t)->linked_on;
623
624 /* If it is scheduled, then we need to reorder the CPU heap. */
625 if (linked_on != NO_CPU) {
626 TRACE_TASK(t, "%s: linked on %d\n",
627 __FUNCTION__, linked_on);
628 /* Holder is scheduled; need to re-order CPUs.
629 * We can't use heap_decrease() here since
630 * the cpu_heap is ordered in reverse direction, so
631 * it is actually an increase. */
632 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
633 gsnedf_cpus[linked_on]->hn);
634 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
635 gsnedf_cpus[linked_on]->hn);
636 } else {
637 /* holder may be queued: first stop queue changes */
638 raw_spin_lock(&gsnedf.release_lock);
639 if (is_queued(t)) {
640 TRACE_TASK(t, "%s: is queued\n",
641 __FUNCTION__);
642 /* We need to update the position of holder in some
643 * heap. Note that this could be a release heap if
644 * budget enforcement is used and this job overran. */
645 check_preempt =
646 !bheap_decrease(edf_ready_order,
647 tsk_rt(t)->heap_node);
648 } else {
649 /* Nothing to do: if it is not queued and not linked
650 * then it is either sleeping or currently being moved
651 * by other code (e.g., a timer interrupt handler) that
652 * will use the correct priority when enqueuing the
653 * task. */
654 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
655 __FUNCTION__);
656 }
657 raw_spin_unlock(&gsnedf.release_lock);
658
659 /* If holder was enqueued in a release heap, then the following
660 * preemption check is pointless, but we can't easily detect
661 * that case. If you want to fix this, then consider that
662 * simply adding a state flag requires O(n) time to update when
663 * releasing n tasks, which conflicts with the goal to have
664 * O(log n) merges. */
665 if (check_preempt) {
666 /* heap_decrease() hit the top level of the heap: make
667 * sure preemption checks get the right task, not the
668 * potentially stale cache. */
669 bheap_uncache_min(edf_ready_order,
670 &gsnedf.ready_queue);
671 check_for_preemptions();
672 }
673 }
674}
675
676static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
677{
678 raw_spin_lock(&gsnedf_lock);
679 __set_priority_inheritance(t, prio_inh);
680 raw_spin_unlock(&gsnedf_lock);
681}
682
683static void __clear_priority_inheritance(struct task_struct* t)
684{
685 /* A job only stops inheriting a priority when it releases a
686 * resource. Thus we can make the following assumption.*/
687 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
688
689 TRACE_TASK(t, "priority restored\n");
690 tsk_rt(t)->inh_task = NULL;
691
692 /* Check if rescheduling is necessary. We can't use heap_decrease()
693 * since the priority was effectively lowered. */
694 unlink(t);
695 gsnedf_job_arrival(t);
696}
697
698/* set and clear at the same time to avoid having to
699 * acquire the runqueue lock twice */
700static void update_priority_inheritance(
701 struct task_struct* deprived,
702 struct task_struct* blocker,
703 struct task_struct* blocked)
704{
705 /* things to do:
706 * 1) deprived no longer inherits anything.
707 * 2) blocker gets blocked's priority.
708 */
709
710 raw_spin_lock(&gsnedf_lock);
711
712 if (tsk_rt(deprived)->inh_task)
713 __clear_priority_inheritance(deprived);
714
715 if (blocked)
716 __set_priority_inheritance(blocker, blocked);
717
718 raw_spin_unlock(&gsnedf_lock);
719}
720
721
722/* ******************** FMLP support ********************** */
723
724/* struct for semaphore with priority inheritance */
725struct fmlp_semaphore {
726 struct litmus_lock litmus_lock;
727
728 /* current resource holder */
729 struct task_struct *owner;
730
731 /* highest-priority waiter */
732 struct task_struct *hp_waiter;
733
734 /* FIFO queue of waiting tasks */
735 wait_queue_head_t wait;
736};
737
738static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
739{
740 return container_of(lock, struct fmlp_semaphore, litmus_lock);
741}
742
743/* caller is responsible for locking */
744struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
745 struct task_struct* skip)
746{
747 struct list_head *pos;
748 struct task_struct *queued, *found = NULL;
749
750 list_for_each(pos, &sem->wait.task_list) {
751 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
752 task_list)->private;
753
754 /* Compare task prios, find high prio task. */
755 if (queued != skip && edf_higher_prio(queued, found))
756 found = queued;
757 }
758 return found;
759}
760
761int gsnedf_fmlp_lock(struct litmus_lock* l)
762{
763 struct task_struct* t = current;
764 struct fmlp_semaphore *sem = fmlp_from_lock(l);
765 wait_queue_t wait;
766 unsigned long flags;
767
768 if (!is_realtime(t))
769 return -EPERM;
770
771 spin_lock_irqsave(&sem->wait.lock, flags);
772
773 if (sem->owner) {
774 /* resource is not free => must suspend and wait */
775
776 init_waitqueue_entry(&wait, t);
777
778 /* FIXME: interruptible would be nice some day */
779 set_task_state(t, TASK_UNINTERRUPTIBLE);
780
781 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
782
783 /* check if we need to activate priority inheritance */
784 if (edf_higher_prio(t, sem->hp_waiter)) {
785 sem->hp_waiter = t;
786 if (edf_higher_prio(t, sem->owner))
787 set_priority_inheritance(sem->owner, sem->hp_waiter);
788 }
789
790 TS_LOCK_SUSPEND;
791
792 /* release lock before sleeping */
793 spin_unlock_irqrestore(&sem->wait.lock, flags);
794
795 /* We depend on the FIFO order. Thus, we don't need to recheck
796 * when we wake up; we are guaranteed to have the lock since
797 * there is only one wake up per release.
798 */
799
800 schedule();
801
802 TS_LOCK_RESUME;
803
804 /* Since we hold the lock, no other task will change
805 * ->owner. We can thus check it without acquiring the spin
806 * lock. */
807 BUG_ON(sem->owner != t);
808 } else {
809 /* it's ours now */
810 sem->owner = t;
811
812 spin_unlock_irqrestore(&sem->wait.lock, flags);
813 }
814
815 return 0;
816}
817
818int gsnedf_fmlp_unlock(struct litmus_lock* l)
819{
820 struct task_struct *t = current, *next, *blocked = NULL;
821 struct fmlp_semaphore *sem = fmlp_from_lock(l);
822 unsigned long flags;
823 int err = 0;
824
825 spin_lock_irqsave(&sem->wait.lock, flags);
826
827 if (sem->owner != t) {
828 err = -EINVAL;
829 goto out;
830 }
831
832 /* check if there are jobs waiting for this resource */
833 next = __waitqueue_remove_first(&sem->wait);
834 if (next) {
835 /* next becomes the resouce holder */
836 sem->owner = next;
837 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
838
839 /* determine new hp_waiter if necessary */
840 if (next == sem->hp_waiter) {
841 TRACE_TASK(next, "was highest-prio waiter\n");
842 /* next has the highest priority --- it doesn't need to
843 * inherit. However, we need to make sure that the
844 * next-highest priority in the queue is reflected in
845 * hp_waiter. */
846 sem->hp_waiter = find_hp_waiter(sem, next);
847 if (sem->hp_waiter)
848 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
849 else
850 TRACE("no further waiters\n");
851 } else {
852 /* Well, if next is not the highest-priority waiter,
853 * then it ought to inherit the highest-priority
854 * waiter's priority. */
855 blocked = sem->hp_waiter;
856 }
857
858 /* wake up next */
859 wake_up_process(next);
860 } else
861 /* becomes available */
862 sem->owner = NULL;
863
864 /* we lose the benefit of priority inheritance (if any) */
865 if (tsk_rt(t)->inh_task || blocked)
866 update_priority_inheritance(t, next, blocked);
867
868out:
869 spin_unlock_irqrestore(&sem->wait.lock, flags);
870
871 return err;
872}
873
874int gsnedf_fmlp_close(struct litmus_lock* l)
875{
876 struct task_struct *t = current;
877 struct fmlp_semaphore *sem = fmlp_from_lock(l);
878 unsigned long flags;
879
880 int owner;
881
882 spin_lock_irqsave(&sem->wait.lock, flags);
883
884 owner = sem->owner == t;
885
886 spin_unlock_irqrestore(&sem->wait.lock, flags);
887
888 if (owner)
889 gsnedf_fmlp_unlock(l);
890
891 return 0;
892}
893
894void gsnedf_fmlp_free(struct litmus_lock* lock)
895{
896 kfree(fmlp_from_lock(lock));
897}
898
899static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
900 .close = gsnedf_fmlp_close,
901 .lock = gsnedf_fmlp_lock,
902 .unlock = gsnedf_fmlp_unlock,
903 .deallocate = gsnedf_fmlp_free,
904};
905
906static struct litmus_lock* gsnedf_new_fmlp(void)
907{
908 struct fmlp_semaphore* sem;
909
910 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
911 if (!sem)
912 return NULL;
913
914 sem->owner = NULL;
915 sem->hp_waiter = NULL;
916 init_waitqueue_head(&sem->wait);
917 sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
918
919 return &sem->litmus_lock;
920}
921
922
923/* ******************** OMLP support ********************** */
924
925/* struct for semaphore with priority inheritance */
926struct omlp_semaphore {
927 struct litmus_lock litmus_lock;
928
929 /* current resource holder */
930 struct task_struct *owner;
931
932 /* highest-priority waiter */
933 struct task_struct *hp_waiter;
934
935 /* FIFO queue of waiting tasks */
936 wait_queue_head_t fifo_wait;
937 /* Priority queue of waiting tasks */
938 wait_queue_head_t prio_wait;
939
940 /* How many slots remaining in FIFO queue? */
941 unsigned int num_free;
942};
943
944static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
945{
946 return container_of(lock, struct omlp_semaphore, litmus_lock);
947}
948
949/* already locked */
950static void omlp_enqueue(struct omlp_semaphore *sem, prio_wait_queue_t* wait)
951{
952 if (sem->num_free) {
953 /* there is space in the FIFO queue */
954 sem->num_free--;
955 __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait->wq);
956 } else {
957 /* nope, gotta go to the priority queue */
958 __add_wait_queue_prio_exclusive(&sem->prio_wait, wait);
959 }
960}
961
962/* already locked */
963static int omlp_move(struct omlp_semaphore *sem)
964{
965 struct list_head* first;
966
967 if (waitqueue_active(&sem->prio_wait)) {
968 first = sem->prio_wait.task_list.next;
969 list_move_tail(first, &sem->fifo_wait.task_list);
970 return 1;
971 }
972 else
973 return 0;
974}
975
976static struct task_struct* omlp_dequeue(struct omlp_semaphore *sem)
977{
978 struct task_struct* first = __waitqueue_remove_first(&sem->fifo_wait);
979
980 if (first && !omlp_move(sem))
981 sem->num_free++;
982
983 return first;
984}
985
986/* caller is responsible for locking */
987static struct task_struct* omlp_find_hp_waiter(struct omlp_semaphore *sem,
988 struct task_struct* skip)
989{
990 struct list_head *pos;
991 struct task_struct *queued, *found = NULL;
992
993 /* check FIFO queue first */
994 list_for_each(pos, &sem->fifo_wait.task_list) {
995 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
996 task_list)->private;
997
998 /* Compare task prios, find high prio task. */
999 if (queued != skip && edf_higher_prio(queued, found))
1000 found = queued;
1001 }
1002
1003 /* check priority queue next */
1004 if (waitqueue_active(&sem->prio_wait)) {
1005 /* first has highest priority */
1006 pos = sem->prio_wait.task_list.next;
1007 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
1008 task_list)->private;
1009 if (edf_higher_prio(queued, found))
1010 found = queued;
1011 }
1012
1013 return found;
1014}
1015
1016int gsnedf_omlp_lock(struct litmus_lock* l)
1017{
1018 struct task_struct* t = current;
1019 struct omlp_semaphore *sem = omlp_from_lock(l);
1020 prio_wait_queue_t wait;
1021 unsigned long flags;
1022
1023 if (!is_realtime(t))
1024 return -EPERM;
1025
1026 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1027
1028 if (sem->owner) {
1029 /* resource is not free => must suspend and wait */
1030
1031 init_prio_waitqueue_entry(&wait, t, get_deadline(t));
1032
1033 set_task_state(t, TASK_UNINTERRUPTIBLE);
1034
1035 omlp_enqueue(sem, &wait);
1036
1037 /* check if we need to activate priority inheritance */
1038 if (edf_higher_prio(t, sem->hp_waiter)) {
1039 sem->hp_waiter = t;
1040 if (edf_higher_prio(t, sem->owner))
1041 set_priority_inheritance(sem->owner, sem->hp_waiter);
1042 }
1043
1044 TS_LOCK_SUSPEND;
1045
1046 /* release lock before sleeping */
1047 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1048
1049 schedule();
1050
1051 TS_LOCK_RESUME;
1052
1053 /* Since we hold the lock, no other task will change
1054 * ->owner. We can thus check it without acquiring the spin
1055 * lock. */
1056 BUG_ON(sem->owner != t);
1057 } else {
1058 /* it's ours now */
1059 sem->owner = t;
1060
1061 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1062 }
1063
1064 return 0;
1065}
1066
1067static int gsnedf_omlp_unlock(struct litmus_lock* l)
1068{
1069 struct task_struct *t = current, *next, *blocked = NULL;
1070 struct omlp_semaphore *sem = omlp_from_lock(l);
1071 unsigned long flags;
1072 int err = 0;
1073
1074 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1075
1076 if (sem->owner != t) {
1077 err = -EINVAL;
1078 goto out;
1079 }
1080
1081 /* check if there are jobs waiting for this resource */
1082 next = omlp_dequeue(sem);
1083 if (next) {
1084 /* next becomes the resouce holder */
1085 sem->owner = next;
1086 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
1087
1088 /* determine new hp_waiter if necessary */
1089 if (next == sem->hp_waiter) {
1090 TRACE_TASK(next, "was highest-prio waiter\n");
1091 /* next has the highest priority --- it doesn't need to
1092 * inherit. However, we need to make sure that the
1093 * next-highest priority in the queue is reflected in
1094 * hp_waiter. */
1095 sem->hp_waiter = omlp_find_hp_waiter(sem, next);
1096 if (sem->hp_waiter)
1097 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
1098 else
1099 TRACE("no further waiters\n");
1100 } else {
1101 /* Well, if next is not the highest-priority waiter,
1102 * then it ought to inherit the highest-priority
1103 * waiter's priority. */
1104 blocked = sem->hp_waiter;
1105 }
1106
1107 /* wake up next */
1108 wake_up_process(next);
1109 } else
1110 /* becomes available */
1111 sem->owner = NULL;
1112
1113 /* we lose the benefit of priority inheritance (if any) */
1114 if (tsk_rt(t)->inh_task || blocked)
1115 update_priority_inheritance(t, next, blocked);
1116
1117out:
1118 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1119
1120 return err;
1121}
1122
1123static int gsnedf_omlp_close(struct litmus_lock* l)
1124{
1125 struct task_struct *t = current;
1126 struct omlp_semaphore *sem = omlp_from_lock(l);
1127 unsigned long flags;
1128
1129 int owner;
1130
1131 spin_lock_irqsave(&sem->fifo_wait.lock, flags);
1132
1133 owner = sem->owner == t;
1134
1135 spin_unlock_irqrestore(&sem->fifo_wait.lock, flags);
1136
1137 if (owner)
1138 gsnedf_omlp_unlock(l);
1139
1140 return 0;
1141}
1142
1143static void gsnedf_omlp_free(struct litmus_lock* lock)
1144{
1145 kfree(omlp_from_lock(lock));
1146}
1147
1148static struct litmus_lock_ops gsnedf_omlp_lock_ops = {
1149 .close = gsnedf_omlp_close,
1150 .lock = gsnedf_omlp_lock,
1151 .unlock = gsnedf_omlp_unlock,
1152 .deallocate = gsnedf_omlp_free,
1153};
1154
1155static struct litmus_lock* gsnedf_new_omlp(void)
1156{
1157 struct omlp_semaphore* sem;
1158
1159 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1160 if (!sem)
1161 return NULL;
1162
1163 sem->owner = NULL;
1164 sem->hp_waiter = NULL;
1165 init_waitqueue_head(&sem->fifo_wait);
1166 init_waitqueue_head(&sem->prio_wait);
1167 sem->litmus_lock.ops = &gsnedf_omlp_lock_ops;
1168 /* free = cpus -1 since ->owner is the head and also counted */
1169 sem->num_free = num_online_cpus() - 1;
1170
1171#ifdef CONFIG_RELEASE_MASTER
1172 /* If we use dedicated interrupt handling, then there are actually
1173 * only m - 1 CPUs around. */
1174 if (gsnedf.release_master != NO_CPU)
1175 sem->num_free -= 1;
1176#endif
1177
1178 return &sem->litmus_lock;
1179}
1180
1181
1182/* **** lock constructor **** */
1183
1184
1185static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
1186 void* __user unused)
1187{
1188 int err = -ENXIO;
1189
1190 /* GSN-EDF currently only supports the FMLP for global resources. */
1191 switch (type) {
1192
1193 case FMLP_SEM:
1194 /* Flexible Multiprocessor Locking Protocol */
1195 *lock = gsnedf_new_fmlp();
1196 if (*lock)
1197 err = 0;
1198 else
1199 err = -ENOMEM;
1200 break;
1201
1202 case OMLP_SEM:
1203 /* O(m) Multiprocessor Locking Protocol */
1204 *lock = gsnedf_new_omlp();
1205 if (*lock)
1206 err = 0;
1207 else
1208 err = -ENOMEM;
1209 break;
1210
1211 };
1212
1213 return err;
1214}
1215
1216#endif
1217
1218
1219static long gsnedf_activate_plugin(void)
1220{
1221 int cpu;
1222 cpu_entry_t *entry;
1223
1224 bheap_init(&gsnedf_cpu_heap);
1225#ifdef CONFIG_RELEASE_MASTER
1226 gsnedf.release_master = atomic_read(&release_master_cpu);
1227#endif
1228
1229 for_each_online_cpu(cpu) {
1230 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1231 bheap_node_init(&entry->hn, entry);
1232 entry->linked = NULL;
1233 entry->scheduled = NULL;
1234#ifdef CONFIG_RELEASE_MASTER
1235 if (cpu != gsnedf.release_master) {
1236#endif
1237 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
1238 update_cpu_position(entry);
1239#ifdef CONFIG_RELEASE_MASTER
1240 } else {
1241 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
1242 }
1243#endif
1244 }
1245 return 0;
1246}
1247
1248/* Plugin object */
1249static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1250 .plugin_name = "GSN-EDF",
1251 .finish_switch = gsnedf_finish_switch,
1252 .tick = gsnedf_tick,
1253 .task_new = gsnedf_task_new,
1254 .complete_job = complete_job,
1255 .task_exit = gsnedf_task_exit,
1256 .schedule = gsnedf_schedule,
1257 .task_wake_up = gsnedf_task_wake_up,
1258 .task_block = gsnedf_task_block,
1259 .admit_task = gsnedf_admit_task,
1260 .activate_plugin = gsnedf_activate_plugin,
1261#ifdef CONFIG_LITMUS_LOCKING
1262 .allocate_lock = gsnedf_allocate_lock,
1263#endif
1264};
1265
1266
1267static int __init init_gsn_edf(void)
1268{
1269 int cpu;
1270 cpu_entry_t *entry;
1271
1272 bheap_init(&gsnedf_cpu_heap);
1273 /* initialize CPU state */
1274 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1275 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1276 gsnedf_cpus[cpu] = entry;
1277 entry->cpu = cpu;
1278 entry->hn = &gsnedf_heap_node[cpu];
1279 bheap_node_init(&entry->hn, entry);
1280 }
1281 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1282 return register_sched_plugin(&gsn_edf_plugin);
1283}
1284
1285
1286module_init(init_gsn_edf);
diff --git a/litmus/sched_gsn_edf_split_namechange.c b/litmus/sched_gsn_edf_split_namechange.c
new file mode 100644
index 000000000000..6839ae642b3a
--- /dev/null
+++ b/litmus/sched_gsn_edf_split_namechange.c
@@ -0,0 +1,1165 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm with job splitting, i.e.
5 * GSN-EDF.
6 *
7 * This plugin is a modified version of the prior GSN-EDF plugin in
8 * litmus/sched_gsn_edf.c
9 *
10 * Splitting an implicit-deadline job simply means splitting each job into an
11 * integral number of subjobs. For example, a task with a period of 10 ms and
12 * a runtime of 4 ms could be re-organized as a task with a period of 5 ms and
13 * a runtime of 2 ms, with analytical benefit for bounded tardiness (ignoring
14 * overheads and assuming no critical sections). This would have a "splitting
15 * factor" of 2.
16 *
17 * Because our analysis works with early releasing, we actually only release
18 * each job once, but move the subjob deadline back when the appropriate amount
19 * of execution has been completed. (In the example above, a job released at
20 * time 0 would intially have a subjob deadline at time 5, but this deadline
21 * would be moved to time 10 as soon as 2 ms of execution had completed.)
22 */
23
24#include <linux/spinlock.h>
25#include <linux/percpu.h>
26#include <linux/sched.h>
27#include <linux/slab.h>
28
29#include <litmus/litmus.h>
30#include <litmus/jobs.h>
31#include <litmus/sched_plugin.h>
32#include <litmus/edf_common.h>
33#include <litmus/sched_trace.h>
34#include <litmus/trace.h>
35
36#include <litmus/preempt.h>
37
38#include <litmus/bheap.h>
39
40#ifdef CONFIG_SCHED_CPU_AFFINITY
41#include <litmus/affinity.h>
42#endif
43
44#include <linux/module.h>
45
46/* cpu_entry_t - maintain the linked and scheduled state
47 */
48typedef struct {
49 int cpu;
50 struct task_struct* linked; /* only RT tasks */
51 struct task_struct* scheduled; /* only RT tasks */
52 struct bheap_node* hn;
53 struct hrtimer split_timer;
54 int timer_armed;
55} cpu_entry_t;
56DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
57
58cpu_entry_t* gsnedf_cpus[NR_CPUS];
59
60/* the cpus queue themselves according to priority in here */
61static struct bheap_node gsnedf_heap_node[NR_CPUS];
62static struct bheap gsnedf_cpu_heap;
63
64static rt_domain_t gsnedf;
65#define gsnedf_lock (gsnedf.ready_lock)
66
67inline static int get_slice_num(struct task_struct* t)
68{
69 int basic = ((t->rt_param.job_params.exec_time *
70 t->rt_param.task_params.split) /
71 t->rt_param.task_params.exec_cost) + 1;
72 if (basic <= t->rt_param.task_params.split){
73 return basic;
74 }
75 else{
76 /*Since we don't police budget, just leave where it's at.*/
77 return t->rt_param.task_params.split;
78 }
79}
80
81/* Returns the appropriate subjob deadline.*/
82inline static lt_t get_proper_deadline(struct task_struct* t)
83{
84 return t->rt_param.job_params.release +
85 ((t->rt_param.task_params.period * get_slice_num(t))
86 / t->rt_param.task_params.split);
87}
88
89/* Tells us if the current deadline is too small.*/
90inline static int needs_deadline_move(struct task_struct* t)
91{
92 BUG_ON(get_proper_deadline(t) < t->rt_param.job_params.subjob_deadline);
93#ifdef CONFIG_LITMUS_LOCKING
94 return !is_in_crit_section(t) &&
95 (get_proper_deadline(t) !=
96 tsk_rt(t)->job_params.subjob_deadline);
97#else
98 return get_proper_deadline(t) != tsk_rt(t)->job_params.subjob_deadline;
99#endif
100}
101
102/*Returns execution time until the next deadline move.
103 * 0 means the task has no more deadline moves
104 */
105inline static lt_t time_to_next_move(struct task_struct* t)
106{
107 if (get_slice_num(t) == t->rt_param.task_params.split){
108 return 0;
109 }
110 /* +1 upper bounds ceiling, since integer division is floor*/
111 return ((get_slice_num(t) * t->rt_param.task_params.exec_cost)
112 / t->rt_param.task_params.split) + 1
113 - t->rt_param.job_params.exec_time;
114}
115
116/* Timer stuff - similar to budget.c. */
117static enum hrtimer_restart on_split_timeout(struct hrtimer *timer)
118{
119 cpu_entry_t* st = container_of(timer,
120 cpu_entry_t,
121 split_timer);
122
123 unsigned long flags;
124
125 local_irq_save(flags);
126 TRACE("split timer fired.\n");
127 st->timer_armed = 0;
128 /* Activate scheduler */
129 litmus_reschedule_local();
130 local_irq_restore(flags);
131
132 return HRTIMER_NORESTART;
133}
134
135static void cancel_split_timer(cpu_entry_t* ce)
136{
137 int ret;
138
139 TRACE("cancelling split time.\n");
140
141 /* Since interrupts are disabled and et->timer_armed is only
142 * modified locally, we do not need any locks.
143 */
144
145 if (ce->timer_armed) {
146 ret = hrtimer_try_to_cancel(&ce->split_timer);
147 /* Should never be inactive. */
148 BUG_ON(ret == 0);
149 /* Should never be running concurrently.*/
150 BUG_ON(ret == -1);
151
152 ce->timer_armed = 0;
153 }
154}
155
156/* assumes called with IRQs off */
157static void arm_split_timer(cpu_entry_t *ce,
158 struct task_struct* t)
159{
160 lt_t when_to_fire;
161 lt_t time_to_move;
162 TRACE_TASK(t, "arming split timer.\n");
163
164 /* __hrtimer_start_range_ns() cancels the timer
165 * anyway, so we don't have to check whether it is still armed */
166
167 /*We won't do any new deadline moves if the budget has been exhausted*/
168 if (likely(!is_np(t) && (time_to_move = time_to_next_move(t)))) {
169 when_to_fire = litmus_clock() + time_to_move;
170 TRACE_TASK(t, "actually arming for %llu into the future\n",
171 time_to_move);
172 __hrtimer_start_range_ns(&ce->split_timer,
173 ns_to_ktime(when_to_fire),
174 0 /* delta */,
175 HRTIMER_MODE_ABS_PINNED,
176 0 /* no wakeup */);
177 ce->timer_armed = 1;
178 }
179}
180
181/* Uncomment this if you want to see all scheduling decisions in the
182 * TRACE() log.
183#define WANT_ALL_SCHED_EVENTS
184 */
185
186static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
187{
188 cpu_entry_t *a, *b;
189 a = _a->value;
190 b = _b->value;
191 /* Note that a and b are inverted: we want the lowest-priority CPU at
192 * the top of the heap.
193 */
194 return edf_higher_prio(b->linked, a->linked);
195}
196
197/* update_cpu_position - Move the cpu entry to the correct place to maintain
198 * order in the cpu queue. Caller must hold gsnedf lock.
199 */
200static void update_cpu_position(cpu_entry_t *entry)
201{
202 if (likely(bheap_node_in_heap(entry->hn)))
203 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
204 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
205}
206
207/* caller must hold gsnedf lock */
208static cpu_entry_t* lowest_prio_cpu(void)
209{
210 struct bheap_node* hn;
211 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
212 return hn->value;
213}
214
215
216/* link_task_to_cpu - Update the link of a CPU.
217 * Handles the case where the to-be-linked task is already
218 * scheduled on a different CPU.
219 */
220static noinline void link_task_to_cpu(struct task_struct* linked,
221 cpu_entry_t *entry)
222{
223 cpu_entry_t *sched;
224 struct task_struct* tmp;
225 int on_cpu;
226
227 BUG_ON(linked && !is_realtime(linked));
228
229 /* Currently linked task is set to be unlinked. */
230 if (entry->linked) {
231 entry->linked->rt_param.linked_on = NO_CPU;
232 }
233
234 /* Link new task to CPU. */
235 if (linked) {
236 set_rt_flags(linked, RT_F_RUNNING);
237 /* handle task is already scheduled somewhere! */
238 on_cpu = linked->rt_param.scheduled_on;
239 if (on_cpu != NO_CPU) {
240 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
241 /* this should only happen if not linked already */
242 BUG_ON(sched->linked == linked);
243
244 /* If we are already scheduled on the CPU to which we
245 * wanted to link, we don't need to do the swap --
246 * we just link ourselves to the CPU and depend on
247 * the caller to get things right.
248 */
249 if (entry != sched) {
250 TRACE_TASK(linked,
251 "already scheduled on %d, updating link.\n",
252 sched->cpu);
253 tmp = sched->linked;
254 linked->rt_param.linked_on = sched->cpu;
255 sched->linked = linked;
256 update_cpu_position(sched);
257 linked = tmp;
258 }
259 }
260 if (linked) /* might be NULL due to swap */
261 linked->rt_param.linked_on = entry->cpu;
262 }
263 entry->linked = linked;
264#ifdef WANT_ALL_SCHED_EVENTS
265 if (linked)
266 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
267 else
268 TRACE("NULL linked to %d.\n", entry->cpu);
269#endif
270 update_cpu_position(entry);
271}
272
273/* unlink - Make sure a task is not linked any longer to an entry
274 * where it was linked before. Must hold gsnedf_lock.
275 */
276static noinline void unlink(struct task_struct* t)
277{
278 cpu_entry_t *entry;
279
280 if (t->rt_param.linked_on != NO_CPU) {
281 /* unlink */
282 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
283 t->rt_param.linked_on = NO_CPU;
284 link_task_to_cpu(NULL, entry);
285 } else if (is_queued(t)) {
286 /* This is an interesting situation: t is scheduled,
287 * but was just recently unlinked. It cannot be
288 * linked anywhere else (because then it would have
289 * been relinked to this CPU), thus it must be in some
290 * queue. We must remove it from the list in this
291 * case.
292 */
293 remove(&gsnedf, t);
294 }
295}
296
297
298/* preempt - force a CPU to reschedule
299 */
300static void preempt(cpu_entry_t *entry)
301{
302 preempt_if_preemptable(entry->scheduled, entry->cpu);
303}
304
305/* requeue - Put an unlinked task into gsn-edf domain.
306 * Caller must hold gsnedf_lock.
307 */
308static noinline void requeue(struct task_struct* task)
309{
310 BUG_ON(!task);
311 /* sanity check before insertion */
312 BUG_ON(is_queued(task));
313
314 if (is_released(task, litmus_clock()))
315 __add_ready(&gsnedf, task);
316 else {
317 /* it has got to wait */
318 add_release(&gsnedf, task);
319 }
320}
321
322#ifdef CONFIG_SCHED_CPU_AFFINITY
323static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
324{
325 cpu_entry_t *affinity;
326
327 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
328#ifdef CONFIG_RELEASE_MASTER
329 gsnedf.release_master
330#else
331 NO_CPU
332#endif
333 );
334
335 return(affinity);
336}
337#endif
338
339/* check for any necessary preemptions */
340static void check_for_preemptions(void)
341{
342 struct task_struct *task;
343 cpu_entry_t *last;
344
345 for (last = lowest_prio_cpu();
346 edf_preemption_needed(&gsnedf, last->linked);
347 last = lowest_prio_cpu()) {
348 /* preemption necessary */
349 task = __take_ready(&gsnedf);
350 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
351 task->pid, last->cpu);
352
353#ifdef CONFIG_SCHED_CPU_AFFINITY
354 {
355 cpu_entry_t *affinity =
356 gsnedf_get_nearest_available_cpu(
357 &per_cpu(gsnedf_cpu_entries,
358 task_cpu(task)));
359 if (affinity)
360 last = affinity;
361 else if (last->linked)
362 requeue(last->linked);
363 }
364#else
365 if (last->linked)
366 requeue(last->linked);
367#endif
368
369 link_task_to_cpu(task, last);
370 preempt(last);
371 }
372}
373
374/* gsnedf_job_arrival: task is either resumed or released */
375static noinline void gsnedf_job_arrival(struct task_struct* task)
376{
377 BUG_ON(!task);
378
379 requeue(task);
380 check_for_preemptions();
381}
382
383static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
384{
385 unsigned long flags;
386
387 raw_spin_lock_irqsave(&gsnedf_lock, flags);
388
389 __merge_ready(rt, tasks);
390 check_for_preemptions();
391
392 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
393}
394
395/* caller holds gsnedf_lock */
396static noinline void job_completion(struct task_struct *t, int forced)
397{
398 BUG_ON(!t);
399
400 sched_trace_task_completion(t, forced);
401
402 TRACE_TASK(t, "job_completion().\n");
403
404 /* set flags */
405 set_rt_flags(t, RT_F_SLEEP);
406 /* prepare for next period */
407 /* prepare_for_next_period assumes implicit deadlines and no splitting,
408 * so we call it with the job deadline it expects.
409 */
410 t->rt_param.job_params.deadline = t->rt_param.job_params.release +
411 t->rt_param.task_params.period;
412 prepare_for_next_period(t);
413 /* We now set the subjob deadline to what it should be for scheduling
414 * priority.
415 */
416 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
417 if (is_released(t, litmus_clock()))
418 sched_trace_task_release(t);
419 /* unlink */
420 unlink(t);
421 /* requeue
422 * But don't requeue a blocking task. */
423 if (is_running(t))
424 gsnedf_job_arrival(t);
425}
426
427static void move_deadline(struct task_struct *t)
428{
429 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
430 TRACE_TASK(t, "move_deadline called\nRelease: %llu\nPeriod: %llu"
431 "\nRelease + Period: %llu\nDeadline: %llu"
432 "\nDeadline - Release: %llu\n",
433 t->rt_param.job_params.release,
434 t->rt_param.task_params.period,
435 t->rt_param.job_params.release
436 + t->rt_param.task_params.period,
437 t->rt_param.job_params.subjob_deadline,
438 t->rt_param.job_params.subjob_deadline
439 - t->rt_param.job_params.release);
440 /* Check if rescheduling needed with lower priority. */
441 unlink(t);
442 gsnedf_job_arrival(t);
443}
444
445/* gsnedf_tick - this function is called for every local timer
446 * interrupt.
447 *
448 * checks whether the current task has expired and checks
449 * whether we need to preempt it if it has not expired
450 */
451static void gsnedf_tick(struct task_struct* t)
452{
453 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
454 if (!is_np(t)) {
455 /* np tasks will be preempted when they become
456 * preemptable again
457 */
458 litmus_reschedule_local();
459 TRACE("gsnedf_scheduler_tick: "
460 "%d is preemptable "
461 " => FORCE_RESCHED\n", t->pid);
462 } else if (is_user_np(t)) {
463 TRACE("gsnedf_scheduler_tick: "
464 "%d is non-preemptable, "
465 "preemption delayed.\n", t->pid);
466 request_exit_np(t);
467 }
468 }
469}
470
471/* Getting schedule() right is a bit tricky. schedule() may not make any
472 * assumptions on the state of the current task since it may be called for a
473 * number of reasons. The reasons include a scheduler_tick() determined that it
474 * was necessary, because sys_exit_np() was called, because some Linux
475 * subsystem determined so, or even (in the worst case) because there is a bug
476 * hidden somewhere. Thus, we must take extreme care to determine what the
477 * current state is.
478 *
479 * The CPU could currently be scheduling a task (or not), be linked (or not).
480 *
481 * The following assertions for the scheduled task could hold:
482 *
483 * - !is_running(scheduled) // the job blocks
484 * - scheduled->timeslice == 0 // the job completed (forcefully)
485 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
486 * - linked != scheduled // we need to reschedule (for any reason)
487 * - is_np(scheduled) // rescheduling must be delayed,
488 * sys_exit_np must be requested
489 *
490 * Any of these can occur together.
491 */
492static struct task_struct* gsnedf_schedule(struct task_struct * prev)
493{
494 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
495 int out_of_time, sleep, preempt, np, exists, blocks, needs_move;
496 struct task_struct* next = NULL;
497
498#ifdef CONFIG_RELEASE_MASTER
499 /* Bail out early if we are the release master.
500 * The release master never schedules any real-time tasks.
501 */
502 if (unlikely(gsnedf.release_master == entry->cpu)) {
503 sched_state_task_picked();
504 return NULL;
505 }
506#endif
507
508 raw_spin_lock(&gsnedf_lock);
509
510 /* sanity checking */
511 BUG_ON(entry->scheduled && entry->scheduled != prev);
512 BUG_ON(entry->scheduled && !is_realtime(prev));
513 BUG_ON(is_realtime(prev) && !entry->scheduled);
514
515 /* (0) Determine state */
516 exists = entry->scheduled != NULL;
517 blocks = exists && !is_running(entry->scheduled);
518 out_of_time = exists &&
519 budget_enforced(entry->scheduled) &&
520 budget_exhausted(entry->scheduled);
521 needs_move = exists && needs_deadline_move(entry->scheduled);
522 np = exists && is_np(entry->scheduled);
523 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
524 preempt = entry->scheduled != entry->linked;
525
526#ifdef WANT_ALL_SCHED_EVENTS
527 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
528#endif
529
530 if (exists)
531 TRACE_TASK(prev,
532 "blocks:%d out_of_time:%d needs_move:%d np:%d"
533 " sleep:%d preempt:%d state:%d sig:%d\n",
534 blocks, out_of_time, needs_move, np, sleep, preempt,
535 prev->state, signal_pending(prev));
536 if (entry->linked && preempt)
537 TRACE_TASK(prev, "will be preempted by %s/%d\n",
538 entry->linked->comm, entry->linked->pid);
539
540
541 /* If a task blocks we have no choice but to reschedule.
542 */
543 if (blocks)
544 unlink(entry->scheduled);
545
546 /* Request a sys_exit_np() call if we would like to preempt but cannot.
547 * We need to make sure to update the link structure anyway in case
548 * that we are still linked. Multiple calls to request_exit_np() don't
549 * hurt.
550 *
551 * Job deadline moves handled similarly
552 */
553 if (np && (out_of_time || preempt || sleep)) {
554 unlink(entry->scheduled);
555 request_exit_np(entry->scheduled);
556 }
557 else if (np && needs_move) {
558 move_deadline(entry->scheduled);
559 }
560
561 /* Any task that is preemptable and either exhausts its execution
562 * budget or wants to sleep completes. We may have to reschedule after
563 * this. Don't do a job completion if we block (can't have timers running
564 * for blocked jobs). Preemption go first for the same reason.
565 */
566 if (!np && (out_of_time || sleep) && !blocks && !preempt)
567 job_completion(entry->scheduled, !sleep);
568 else if (!np && needs_move && !blocks && !preempt) {
569 move_deadline(entry->scheduled);
570 }
571
572 /* Link pending task if we became unlinked.
573 */
574 if (!entry->linked)
575 link_task_to_cpu(__take_ready(&gsnedf), entry);
576
577 /* The final scheduling decision. Do we need to switch for some reason?
578 * If linked is different from scheduled, then select linked as next.
579 */
580 if ((!np || blocks) &&
581 entry->linked != entry->scheduled) {
582 /* Schedule a linked job? */
583 if (entry->linked) {
584 entry->linked->rt_param.scheduled_on = entry->cpu;
585 next = entry->linked;
586 TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
587 }
588 if (entry->scheduled) {
589 /* not gonna be scheduled soon */
590 entry->scheduled->rt_param.scheduled_on = NO_CPU;
591 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
592 }
593 } else
594 /* Only override Linux scheduler if we have a real-time task
595 * scheduled that needs to continue.
596 */
597 if (exists)
598 next = prev;
599
600 sched_state_task_picked();
601
602 raw_spin_unlock(&gsnedf_lock);
603
604 if (next) {
605 arm_split_timer(entry, next);
606 }
607 else if (entry->timer_armed) {
608 cancel_split_timer(entry);
609 }
610
611#ifdef WANT_ALL_SCHED_EVENTS
612 TRACE("gsnedf_lock released, next=0x%p\n", next);
613
614 if (next)
615 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
616 else if (exists && !next)
617 TRACE("becomes idle at %llu.\n", litmus_clock());
618#endif
619
620
621 return next;
622}
623
624
625/* _finish_switch - we just finished the switch away from prev
626 */
627static void gsnedf_finish_switch(struct task_struct *prev)
628{
629 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
630
631 entry->scheduled = is_realtime(current) ? current : NULL;
632#ifdef WANT_ALL_SCHED_EVENTS
633 TRACE_TASK(prev, "switched away from\n");
634#endif
635}
636
637static void gsnedf_release_at(struct task_struct *t, lt_t start)
638{
639 t->rt_param.job_params.deadline = start;
640 prepare_for_next_period(t);
641 t->rt_param.job_params.subjob_deadline = get_proper_deadline(t);
642 set_rt_flags(t, RT_F_RUNNING);
643}
644
645/* Prepare a task for running in RT mode
646 */
647static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
648{
649 unsigned long flags;
650 cpu_entry_t* entry;
651
652 TRACE("gsn edf: task new %d\n", t->pid);
653
654 raw_spin_lock_irqsave(&gsnedf_lock, flags);
655
656 /* setup job params */
657 gsnedf_release_at(t, litmus_clock());
658
659 if (running) {
660 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
661 BUG_ON(entry->scheduled);
662
663#ifdef CONFIG_RELEASE_MASTER
664 if (entry->cpu != gsnedf.release_master) {
665#endif
666 entry->scheduled = t;
667 tsk_rt(t)->scheduled_on = task_cpu(t);
668#ifdef CONFIG_RELEASE_MASTER
669 } else {
670 /* do not schedule on release master */
671 preempt(entry); /* force resched */
672 tsk_rt(t)->scheduled_on = NO_CPU;
673 }
674#endif
675 } else {
676 t->rt_param.scheduled_on = NO_CPU;
677 }
678 t->rt_param.linked_on = NO_CPU;
679
680 gsnedf_job_arrival(t);
681 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
682}
683
684static void gsnedf_task_wake_up(struct task_struct *task)
685{
686 unsigned long flags;
687 lt_t now;
688
689 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
690
691 raw_spin_lock_irqsave(&gsnedf_lock, flags);
692 /* We need to take suspensions because of semaphores into
693 * account! If a job resumes after being suspended due to acquiring
694 * a semaphore, it should never be treated as a new job release.
695 */
696 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
697 set_rt_flags(task, RT_F_RUNNING);
698 } else {
699 now = litmus_clock();
700 if (is_tardy(task, now)) {
701 /* new sporadic release */
702 gsnedf_release_at(task, now);
703 sched_trace_task_release(task);
704 }
705 else {
706 if (task->rt.time_slice) {
707 /* came back in time before deadline
708 */
709 set_rt_flags(task, RT_F_RUNNING);
710 }
711 }
712 }
713 gsnedf_job_arrival(task);
714 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
715}
716
717static void gsnedf_task_block(struct task_struct *t)
718{
719 unsigned long flags;
720
721 TRACE_TASK(t, "block at %llu\n", litmus_clock());
722
723 /* unlink if necessary */
724 raw_spin_lock_irqsave(&gsnedf_lock, flags);
725 unlink(t);
726 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
727
728 BUG_ON(!is_realtime(t));
729}
730
731
732static void gsnedf_task_exit(struct task_struct * t)
733{
734 unsigned long flags;
735
736 /* unlink if necessary */
737 raw_spin_lock_irqsave(&gsnedf_lock, flags);
738 unlink(t);
739 if (tsk_rt(t)->scheduled_on != NO_CPU) {
740 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
741 tsk_rt(t)->scheduled_on = NO_CPU;
742 }
743 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
744
745 BUG_ON(!is_realtime(t));
746 TRACE_TASK(t, "RIP\n");
747}
748
749
750static long gsnedf_admit_task(struct task_struct* tsk)
751{
752 return 0;
753}
754
755#ifdef CONFIG_LITMUS_LOCKING
756
757#include <litmus/fdso.h>
758
759/* called with IRQs off */
760static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
761{
762 int linked_on;
763 int check_preempt = 0;
764
765 raw_spin_lock(&gsnedf_lock);
766
767 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
768 tsk_rt(t)->inh_task = prio_inh;
769
770 linked_on = tsk_rt(t)->linked_on;
771
772 /* If it is scheduled, then we need to reorder the CPU heap. */
773 if (linked_on != NO_CPU) {
774 TRACE_TASK(t, "%s: linked on %d\n",
775 __FUNCTION__, linked_on);
776 /* Holder is scheduled; need to re-order CPUs.
777 * We can't use heap_decrease() here since
778 * the cpu_heap is ordered in reverse direction, so
779 * it is actually an increase. */
780 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
781 gsnedf_cpus[linked_on]->hn);
782 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
783 gsnedf_cpus[linked_on]->hn);
784 } else {
785 /* holder may be queued: first stop queue changes */
786 raw_spin_lock(&gsnedf.release_lock);
787 if (is_queued(t)) {
788 TRACE_TASK(t, "%s: is queued\n",
789 __FUNCTION__);
790 /* We need to update the position of holder in some
791 * heap. Note that this could be a release heap if we
792 * budget enforcement is used and this job overran. */
793 check_preempt =
794 !bheap_decrease(edf_ready_order,
795 tsk_rt(t)->heap_node);
796 } else {
797 /* Nothing to do: if it is not queued and not linked
798 * then it is either sleeping or currently being moved
799 * by other code (e.g., a timer interrupt handler) that
800 * will use the correct priority when enqueuing the
801 * task. */
802 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
803 __FUNCTION__);
804 }
805 raw_spin_unlock(&gsnedf.release_lock);
806
807 /* If holder was enqueued in a release heap, then the following
808 * preemption check is pointless, but we can't easily detect
809 * that case. If you want to fix this, then consider that
810 * simply adding a state flag requires O(n) time to update when
811 * releasing n tasks, which conflicts with the goal to have
812 * O(log n) merges. */
813 if (check_preempt) {
814 /* heap_decrease() hit the top level of the heap: make
815 * sure preemption checks get the right task, not the
816 * potentially stale cache. */
817 bheap_uncache_min(edf_ready_order,
818 &gsnedf.ready_queue);
819 check_for_preemptions();
820 }
821 }
822
823 raw_spin_unlock(&gsnedf_lock);
824}
825
826/* called with IRQs off */
827static void update_unlocked_priority(struct task_struct* t)
828{
829 raw_spin_lock(&gsnedf_lock);
830
831 /* A job only stops inheriting a priority when it releases a
832 * resource. Thus we can make the following assumption.*/
833 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
834
835 /* Clear priority inheritance */
836 TRACE_TASK(t, "priority restored\n");
837 tsk_rt(t)->inh_task = NULL;
838
839 /* Update splitting deadline */
840 tsk_rt(t)->job_params.subjob_deadline = get_proper_deadline(t);
841
842 /* Check if rescheduling is necessary. We can't use heap_decrease()
843 * since the priority was effectively lowered. */
844 unlink(t);
845 gsnedf_job_arrival(t);
846
847 raw_spin_unlock(&gsnedf_lock);
848}
849
850
851/* ******************** FMLP support ********************** */
852
853/* struct for semaphore with priority inheritance */
854struct fmlp_semaphore {
855 struct litmus_lock litmus_lock;
856
857 /* current resource holder */
858 struct task_struct *owner;
859
860 /* highest-priority waiter */
861 struct task_struct *hp_waiter;
862
863 /* FIFO queue of waiting tasks */
864 wait_queue_head_t wait;
865};
866
867static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
868{
869 return container_of(lock, struct fmlp_semaphore, litmus_lock);
870}
871
872/* caller is responsible for locking */
873static struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
874 struct task_struct* skip)
875{
876 struct list_head *pos;
877 struct task_struct *queued, *found = NULL;
878
879 list_for_each(pos, &sem->wait.task_list) {
880 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
881 task_list)->private;
882
883 /* Compare task prios, find high prio task. */
884 if (queued != skip && edf_higher_prio(queued, found))
885 found = queued;
886 }
887 return found;
888}
889
890int gsnedf_fmlp_lock(struct litmus_lock* l)
891{
892 struct task_struct* t = current;
893 struct fmlp_semaphore *sem = fmlp_from_lock(l);
894 cpu_entry_t* entry;
895 wait_queue_t wait;
896 unsigned long flags;
897
898 if (!is_realtime(t))
899 return -EPERM;
900
901 spin_lock_irqsave(&sem->wait.lock, flags);
902 entry = &__get_cpu_var(gsnedf_cpu_entries);
903
904 tsk_rt(t)->in_crit_section = 1;
905 if (entry->timer_armed) {
906 cancel_split_timer(entry);
907 }
908
909 if (sem->owner) {
910 /* resource is not free => must suspend and wait */
911
912 init_waitqueue_entry(&wait, t);
913
914 /* FIXME: interruptible would be nice some day */
915 set_task_state(t, TASK_UNINTERRUPTIBLE);
916
917 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
918
919 /* check if we need to activate priority inheritance */
920 if (edf_higher_prio(t, sem->hp_waiter)) {
921 sem->hp_waiter = t;
922 if (edf_higher_prio(t, sem->owner))
923 set_priority_inheritance(sem->owner, sem->hp_waiter);
924 }
925
926 TS_LOCK_SUSPEND;
927
928 /* release lock before sleeping */
929 spin_unlock_irqrestore(&sem->wait.lock, flags);
930
931 /* We depend on the FIFO order. Thus, we don't need to recheck
932 * when we wake up; we are guaranteed to have the lock since
933 * there is only one wake up per release.
934 */
935
936 schedule();
937
938 TS_LOCK_RESUME;
939
940 /* Since we hold the lock, no other task will change
941 * ->owner. We can thus check it without acquiring the spin
942 * lock. */
943 BUG_ON(sem->owner != t);
944 } else {
945 /* it's ours now */
946 sem->owner = t;
947
948 spin_unlock_irqrestore(&sem->wait.lock, flags);
949 }
950
951 return 0;
952}
953
954int gsnedf_fmlp_unlock(struct litmus_lock* l)
955{
956 struct task_struct *t = current, *next;
957 struct fmlp_semaphore *sem = fmlp_from_lock(l);
958 unsigned long flags;
959 int err = 0;
960
961 spin_lock_irqsave(&sem->wait.lock, flags);
962
963 if (sem->owner != t) {
964 err = -EINVAL;
965 goto out;
966 }
967
968 /* check if there are jobs waiting for this resource */
969 next = __waitqueue_remove_first(&sem->wait);
970 if (next) {
971 /* next becomes the resouce holder */
972 sem->owner = next;
973 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
974
975 /* determine new hp_waiter if necessary */
976 if (next == sem->hp_waiter) {
977 TRACE_TASK(next, "was highest-prio waiter\n");
978 /* next has the highest priority --- it doesn't need to
979 * inherit. However, we need to make sure that the
980 * next-highest priority in the queue is reflected in
981 * hp_waiter. */
982 sem->hp_waiter = find_hp_waiter(sem, next);
983 if (sem->hp_waiter)
984 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
985 else
986 TRACE("no further waiters\n");
987 } else {
988 /* Well, if next is not the highest-priority waiter,
989 * then it ought to inherit the highest-priority
990 * waiter's priority. */
991 set_priority_inheritance(next, sem->hp_waiter);
992 }
993
994 /* wake up next */
995 wake_up_process(next);
996 } else
997 /* becomes available */
998 sem->owner = NULL;
999
1000 /* We are no longer in a critical section */
1001 tsk_rt(t)->in_crit_section = 0;
1002
1003 /* we lose the benefit of priority inheritance (if any) and may need
1004 * to move the deadline. In either case, may need to reschedule
1005 * due to reduced priority. */
1006 if (tsk_rt(t)->inh_task || needs_deadline_move(t))
1007 update_unlocked_priority(t);
1008 /* TODO: Check that schedule() gets called - it needs to arm the
1009 * enforcement timer. Otherwise we should do it here or in
1010 * update_unlocked_priority. */
1011
1012out:
1013 spin_unlock_irqrestore(&sem->wait.lock, flags);
1014
1015 return err;
1016}
1017
1018int gsnedf_fmlp_close(struct litmus_lock* l)
1019{
1020 struct task_struct *t = current;
1021 struct fmlp_semaphore *sem = fmlp_from_lock(l);
1022 unsigned long flags;
1023
1024 int owner;
1025
1026 spin_lock_irqsave(&sem->wait.lock, flags);
1027
1028 owner = sem->owner == t;
1029
1030 spin_unlock_irqrestore(&sem->wait.lock, flags);
1031
1032 if (owner)
1033 gsnedf_fmlp_unlock(l);
1034
1035 return 0;
1036}
1037
1038void gsnedf_fmlp_free(struct litmus_lock* lock)
1039{
1040 kfree(fmlp_from_lock(lock));
1041}
1042
1043static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
1044 .close = gsnedf_fmlp_close,
1045 .lock = gsnedf_fmlp_lock,
1046 .unlock = gsnedf_fmlp_unlock,
1047 .deallocate = gsnedf_fmlp_free,
1048};
1049
1050static struct litmus_lock* gsnedf_new_fmlp(void)
1051{
1052 struct fmlp_semaphore* sem;
1053
1054 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1055 if (!sem)
1056 return NULL;
1057
1058 sem->owner = NULL;
1059 sem->hp_waiter = NULL;
1060 init_waitqueue_head(&sem->wait);
1061 sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
1062
1063 return &sem->litmus_lock;
1064}
1065
1066/* **** lock constructor **** */
1067
1068
1069static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
1070 void* __user unused)
1071{
1072 int err = -ENXIO;
1073
1074 /* GSN-EDF currently only supports the FMLP for global resources. */
1075 switch (type) {
1076
1077 case FMLP_SEM:
1078 /* Flexible Multiprocessor Locking Protocol */
1079 *lock = gsnedf_new_fmlp();
1080 if (*lock)
1081 err = 0;
1082 else
1083 err = -ENOMEM;
1084 break;
1085
1086 };
1087
1088 return err;
1089}
1090
1091#endif
1092
1093
1094static long gsnedf_activate_plugin(void)
1095{
1096 int cpu;
1097 cpu_entry_t *entry;
1098
1099 bheap_init(&gsnedf_cpu_heap);
1100#ifdef CONFIG_RELEASE_MASTER
1101 gsnedf.release_master = atomic_read(&release_master_cpu);
1102#endif
1103
1104 for_each_online_cpu(cpu) {
1105 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1106 bheap_node_init(&entry->hn, entry);
1107 entry->linked = NULL;
1108 entry->scheduled = NULL;
1109#ifdef CONFIG_RELEASE_MASTER
1110 if (cpu != gsnedf.release_master) {
1111#endif
1112 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
1113 update_cpu_position(entry);
1114#ifdef CONFIG_RELEASE_MASTER
1115 } else {
1116 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
1117 }
1118#endif
1119 }
1120 return 0;
1121}
1122
1123/* Plugin object */
1124static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1125 .plugin_name = "GSN-EDF",
1126 .finish_switch = gsnedf_finish_switch,
1127 .tick = gsnedf_tick,
1128 .task_new = gsnedf_task_new,
1129 .complete_job = complete_job,
1130 .task_exit = gsnedf_task_exit,
1131 .schedule = gsnedf_schedule,
1132 .release_at = gsnedf_release_at,
1133 .task_wake_up = gsnedf_task_wake_up,
1134 .task_block = gsnedf_task_block,
1135 .admit_task = gsnedf_admit_task,
1136 .activate_plugin = gsnedf_activate_plugin,
1137#ifdef CONFIG_LITMUS_LOCKING
1138 .allocate_lock = gsnedf_allocate_lock,
1139#endif
1140};
1141
1142
1143static int __init init_gsn_edf(void)
1144{
1145 int cpu;
1146 cpu_entry_t *entry;
1147
1148 bheap_init(&gsnedf_cpu_heap);
1149 /* initialize CPU state */
1150 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1151 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1152 gsnedf_cpus[cpu] = entry;
1153 entry->cpu = cpu;
1154 entry->hn = &gsnedf_heap_node[cpu];
1155 hrtimer_init(&entry->split_timer,
1156 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1157 entry->split_timer.function = on_split_timeout;
1158 bheap_node_init(&entry->hn, entry);
1159 }
1160 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1161 return register_sched_plugin(&gsn_edf_plugin);
1162}
1163
1164
1165module_init(init_gsn_edf);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
new file mode 100644
index 000000000000..5877307a996b
--- /dev/null
+++ b/litmus/sched_litmus.c
@@ -0,0 +1,328 @@
1/* This file is included from kernel/sched.c */
2
3#include <litmus/litmus.h>
4#include <litmus/budget.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/preempt.h>
7
8static void update_time_litmus(struct rq *rq, struct task_struct *p)
9{
10 u64 delta = rq->clock - p->se.exec_start;
11 if (unlikely((s64)delta < 0))
12 delta = 0;
13 /* per job counter */
14 p->rt_param.job_params.exec_time += delta;
15 /* task counter */
16 p->se.sum_exec_runtime += delta;
17 /* sched_clock() */
18 p->se.exec_start = rq->clock;
19 cpuacct_charge(p, delta);
20}
21
22static void double_rq_lock(struct rq *rq1, struct rq *rq2);
23static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
24
25/*
26 * litmus_tick gets called by scheduler_tick() with HZ freq
27 * Interrupts are disabled
28 */
29static void litmus_tick(struct rq *rq, struct task_struct *p)
30{
31 TS_PLUGIN_TICK_START;
32
33 if (is_realtime(p))
34 update_time_litmus(rq, p);
35
36 /* plugin tick */
37 litmus->tick(p);
38
39 TS_PLUGIN_TICK_END;
40
41 return;
42}
43
44static struct task_struct *
45litmus_schedule(struct rq *rq, struct task_struct *prev)
46{
47 struct rq* other_rq;
48 struct task_struct *next;
49
50 long was_running;
51 lt_t _maybe_deadlock = 0;
52
53 /* let the plugin schedule */
54 next = litmus->schedule(prev);
55
56 sched_state_plugin_check();
57
58 /* check if a global plugin pulled a task from a different RQ */
59 if (next && task_rq(next) != rq) {
60 /* we need to migrate the task */
61 other_rq = task_rq(next);
62 TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
63
64 /* while we drop the lock, the prev task could change its
65 * state
66 */
67 was_running = is_running(prev);
68 mb();
69 raw_spin_unlock(&rq->lock);
70
71 /* Don't race with a concurrent switch. This could deadlock in
72 * the case of cross or circular migrations. It's the job of
73 * the plugin to make sure that doesn't happen.
74 */
75 TRACE_TASK(next, "stack_in_use=%d\n",
76 next->rt_param.stack_in_use);
77 if (next->rt_param.stack_in_use != NO_CPU) {
78 TRACE_TASK(next, "waiting to deschedule\n");
79 _maybe_deadlock = litmus_clock();
80 }
81 while (next->rt_param.stack_in_use != NO_CPU) {
82 cpu_relax();
83 mb();
84 if (next->rt_param.stack_in_use == NO_CPU)
85 TRACE_TASK(next,"descheduled. Proceeding.\n");
86
87 if (lt_before(_maybe_deadlock + 10000000,
88 litmus_clock())) {
89 /* We've been spinning for 10ms.
90 * Something can't be right!
91 * Let's abandon the task and bail out; at least
92 * we will have debug info instead of a hard
93 * deadlock.
94 */
95 TRACE_TASK(next,"stack too long in use. "
96 "Deadlock?\n");
97 next = NULL;
98
99 /* bail out */
100 raw_spin_lock(&rq->lock);
101 return next;
102 }
103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu)
106 TRACE_TASK(next, "waiting for !oncpu");
107 while (next->oncpu) {
108 cpu_relax();
109 mb();
110 }
111#endif
112 double_rq_lock(rq, other_rq);
113 mb();
114 if (is_realtime(prev) && is_running(prev) != was_running) {
115 TRACE_TASK(prev,
116 "state changed while we dropped"
117 " the lock: is_running=%d, was_running=%d\n",
118 is_running(prev), was_running);
119 if (is_running(prev) && !was_running) {
120 /* prev task became unblocked
121 * we need to simulate normal sequence of events
122 * to scheduler plugins.
123 */
124 litmus->task_block(prev);
125 litmus->task_wake_up(prev);
126 }
127 }
128
129 set_task_cpu(next, smp_processor_id());
130
131 /* DEBUG: now that we have the lock we need to make sure a
132 * couple of things still hold:
133 * - it is still a real-time task
134 * - it is still runnable (could have been stopped)
135 * If either is violated, then the active plugin is
136 * doing something wrong.
137 */
138 if (!is_realtime(next) || !is_running(next)) {
139 /* BAD BAD BAD */
140 TRACE_TASK(next,"BAD: migration invariant FAILED: "
141 "rt=%d running=%d\n",
142 is_realtime(next),
143 is_running(next));
144 /* drop the task */
145 next = NULL;
146 }
147 /* release the other CPU's runqueue, but keep ours */
148 raw_spin_unlock(&other_rq->lock);
149 }
150 if (next) {
151 next->rt_param.stack_in_use = rq->cpu;
152 next->se.exec_start = rq->clock;
153 }
154
155 update_enforcement_timer(next);
156 return next;
157}
158
159static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
160 int flags)
161{
162 if (flags & ENQUEUE_WAKEUP) {
163 sched_trace_task_resume(p);
164 tsk_rt(p)->present = 1;
165 /* LITMUS^RT plugins need to update the state
166 * _before_ making it available in global structures.
167 * Linux gets away with being lazy about the task state
168 * update. We can't do that, hence we update the task
169 * state already here.
170 *
171 * WARNING: this needs to be re-evaluated when porting
172 * to newer kernel versions.
173 */
174 p->state = TASK_RUNNING;
175 litmus->task_wake_up(p);
176
177 rq->litmus.nr_running++;
178 } else
179 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
180}
181
182static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
183 int flags)
184{
185 if (flags & DEQUEUE_SLEEP) {
186 litmus->task_block(p);
187 tsk_rt(p)->present = 0;
188 sched_trace_task_block(p);
189
190 rq->litmus.nr_running--;
191 } else
192 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
193}
194
195static void yield_task_litmus(struct rq *rq)
196{
197 TS_SYSCALL_IN_START;
198
199 TS_SYSCALL_IN_END;
200
201 TRACE_CUR("yields\n");
202
203 BUG_ON(rq->curr != current);
204 /* sched_yield() is called to trigger delayed preemptions.
205 * Thus, mark the current task as needing to be rescheduled.
206 * This will cause the scheduler plugin to be invoked, which can
207 * then determine if a preemption is still required.
208 */
209 clear_exit_np(current);
210 litmus_reschedule_local();
211
212 TS_SYSCALL_OUT_START;
213}
214
215/* Plugins are responsible for this.
216 */
217static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
218{
219}
220
221static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
222{
223}
224
225static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
226{
227 update_time_litmus(rq, prev);
228 if (!is_running(prev))
229 tsk_rt(prev)->present = 0;
230}
231
232/* pick_next_task_litmus() - litmus_schedule() function
233 *
234 * return the next task to be scheduled
235 */
236static struct task_struct *pick_next_task_litmus(struct rq *rq)
237{
238 /* get the to-be-switched-out task (prev) */
239 struct task_struct *prev = rq->litmus.prev;
240 struct task_struct *next;
241
242 /* if not called from schedule() but from somewhere
243 * else (e.g., migration), return now!
244 */
245 if(!rq->litmus.prev)
246 return NULL;
247
248 rq->litmus.prev = NULL;
249
250 TS_PLUGIN_SCHED_START;
251 next = litmus_schedule(rq, prev);
252 TS_PLUGIN_SCHED_END;
253
254 return next;
255}
256
257static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
258{
259 /* nothing to do; tick related tasks are done by litmus_tick() */
260 return;
261}
262
263static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running)
264{
265}
266
267static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
268 int oldprio, int running)
269{
270}
271
272unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
273{
274 /* return infinity */
275 return 0;
276}
277
278/* This is called when a task became a real-time task, either due to a SCHED_*
279 * class transition or due to PI mutex inheritance. We don't handle Linux PI
280 * mutex inheritance yet (and probably never will). Use LITMUS provided
281 * synchronization primitives instead.
282 */
283static void set_curr_task_litmus(struct rq *rq)
284{
285 rq->curr->se.exec_start = rq->clock;
286}
287
288
289#ifdef CONFIG_SMP
290/* execve tries to rebalance task in this scheduling domain.
291 * We don't care about the scheduling domain; can gets called from
292 * exec, fork, wakeup.
293 */
294static int select_task_rq_litmus(struct rq *rq, struct task_struct *p,
295 int sd_flag, int flags)
296{
297 /* preemption is already disabled.
298 * We don't want to change cpu here
299 */
300 return task_cpu(p);
301}
302#endif
303
304static const struct sched_class litmus_sched_class = {
305 .next = &rt_sched_class,
306 .enqueue_task = enqueue_task_litmus,
307 .dequeue_task = dequeue_task_litmus,
308 .yield_task = yield_task_litmus,
309
310 .check_preempt_curr = check_preempt_curr_litmus,
311
312 .pick_next_task = pick_next_task_litmus,
313 .put_prev_task = put_prev_task_litmus,
314
315#ifdef CONFIG_SMP
316 .select_task_rq = select_task_rq_litmus,
317
318 .pre_schedule = pre_schedule_litmus,
319#endif
320
321 .set_curr_task = set_curr_task_litmus,
322 .task_tick = task_tick_litmus,
323
324 .get_rr_interval = get_rr_interval_litmus,
325
326 .prio_changed = prio_changed_litmus,
327 .switched_to = switched_to_litmus,
328};
diff --git a/litmus/sched_litmus.c.rej b/litmus/sched_litmus.c.rej
new file mode 100644
index 000000000000..e0750ecbe7a2
--- /dev/null
+++ b/litmus/sched_litmus.c.rej
@@ -0,0 +1,11 @@
1--- litmus/sched_litmus.c
2+++ litmus/sched_litmus.c
3@@ -196,7 +196,7 @@
4 {
5 TS_SYSCALL_IN_START;
6
7- TS_SYSCALL_OUT_END;
8+ TS_SYSCALL_IN_END;
9
10 TRACE_CUR("yields\n");
11
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
new file mode 100644
index 000000000000..c95bde87b5d7
--- /dev/null
+++ b/litmus/sched_pfair.c
@@ -0,0 +1,1056 @@
1/*
2 * kernel/sched_pfair.c
3 *
4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
6 *
7 */
8
9#include <asm/div64.h>
10#include <linux/delay.h>
11#include <linux/module.h>
12#include <linux/spinlock.h>
13#include <linux/percpu.h>
14#include <linux/sched.h>
15#include <linux/list.h>
16#include <linux/slab.h>
17
18#include <litmus/litmus.h>
19#include <litmus/jobs.h>
20#include <litmus/preempt.h>
21#include <litmus/rt_domain.h>
22#include <litmus/sched_plugin.h>
23#include <litmus/sched_trace.h>
24
25#include <litmus/bheap.h>
26
27/* to configure the cluster size */
28#include <litmus/litmus_proc.h>
29
30#include <litmus/clustered.h>
31
32static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER;
33
34struct subtask {
35 /* measured in quanta relative to job release */
36 quanta_t release;
37 quanta_t deadline;
38 quanta_t overlap; /* called "b bit" by PD^2 */
39 quanta_t group_deadline;
40};
41
42struct pfair_param {
43 quanta_t quanta; /* number of subtasks */
44 quanta_t cur; /* index of current subtask */
45
46 quanta_t release; /* in quanta */
47 quanta_t period; /* in quanta */
48
49 quanta_t last_quantum; /* when scheduled last */
50 int last_cpu; /* where scheduled last */
51
52 struct pfair_cluster* cluster; /* where this task is scheduled */
53
54 struct subtask subtasks[0]; /* allocate together with pfair_param */
55};
56
57#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
58
59struct pfair_state {
60 struct cluster_cpu topology;
61
62 volatile quanta_t cur_tick; /* updated by the CPU that is advancing
63 * the time */
64 volatile quanta_t local_tick; /* What tick is the local CPU currently
65 * executing? Updated only by the local
66 * CPU. In QEMU, this may lag behind the
67 * current tick. In a real system, with
68 * proper timers and aligned quanta,
69 * that should only be the case for a
70 * very short time after the time
71 * advanced. With staggered quanta, it
72 * will lag for the duration of the
73 * offset.
74 */
75
76 struct task_struct* linked; /* the task that should be executing */
77 struct task_struct* local; /* the local copy of linked */
78 struct task_struct* scheduled; /* what is actually scheduled */
79
80 lt_t offset; /* stagger offset */
81 unsigned int missed_updates;
82 unsigned int missed_quanta;
83};
84
85struct pfair_cluster {
86 struct scheduling_cluster topology;
87
88 /* The "global" time in this cluster. */
89 quanta_t pfair_time; /* the "official" PFAIR clock */
90
91 /* The ready queue for this cluster. */
92 rt_domain_t pfair;
93
94 /* The set of jobs that should have their release enacted at the next
95 * quantum boundary.
96 */
97 struct bheap release_queue;
98 raw_spinlock_t release_lock;
99};
100
101static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
102{
103 return container_of(state->topology.cluster, struct pfair_cluster, topology);
104}
105
106static inline int cpu_id(struct pfair_state* state)
107{
108 return state->topology.id;
109}
110
111static inline struct pfair_state* from_cluster_list(struct list_head* pos)
112{
113 return list_entry(pos, struct pfair_state, topology.cluster_list);
114}
115
116static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
117{
118 return container_of(rt, struct pfair_cluster, pfair);
119}
120
121static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
122{
123 /* The ready_lock is used to serialize all scheduling events. */
124 return &cluster->pfair.ready_lock;
125}
126
127static inline raw_spinlock_t* cpu_lock(struct pfair_state* state)
128{
129 return cluster_lock(cpu_cluster(state));
130}
131
132DEFINE_PER_CPU(struct pfair_state, pfair_state);
133struct pfair_state* *pstate; /* short cut */
134
135static struct pfair_cluster* pfair_clusters;
136static int num_pfair_clusters;
137
138/* Enable for lots of trace info.
139 * #define PFAIR_DEBUG
140 */
141
142#ifdef PFAIR_DEBUG
143#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
144#define PTRACE(f, args...) TRACE(f, ## args)
145#else
146#define PTRACE_TASK(t, f, args...)
147#define PTRACE(f, args...)
148#endif
149
150/* gcc will inline all of these accessor functions... */
151static struct subtask* cur_subtask(struct task_struct* t)
152{
153 return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
154}
155
156static quanta_t cur_deadline(struct task_struct* t)
157{
158 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
159}
160
161static quanta_t cur_release(struct task_struct* t)
162{
163 /* This is early releasing: only the release of the first subtask
164 * counts. */
165 return tsk_pfair(t)->release;
166}
167
168static quanta_t cur_overlap(struct task_struct* t)
169{
170 return cur_subtask(t)->overlap;
171}
172
173static quanta_t cur_group_deadline(struct task_struct* t)
174{
175 quanta_t gdl = cur_subtask(t)->group_deadline;
176 if (gdl)
177 return gdl + tsk_pfair(t)->release;
178 else
179 return gdl;
180}
181
182
183static int pfair_higher_prio(struct task_struct* first,
184 struct task_struct* second)
185{
186 return /* first task must exist */
187 first && (
188 /* Does the second task exist and is it a real-time task? If
189 * not, the first task (which is a RT task) has higher
190 * priority.
191 */
192 !second || !is_realtime(second) ||
193
194 /* Is the (subtask) deadline of the first task earlier?
195 * Then it has higher priority.
196 */
197 time_before(cur_deadline(first), cur_deadline(second)) ||
198
199 /* Do we have a deadline tie?
200 * Then break by B-bit.
201 */
202 (cur_deadline(first) == cur_deadline(second) &&
203 (cur_overlap(first) > cur_overlap(second) ||
204
205 /* Do we have a B-bit tie?
206 * Then break by group deadline.
207 */
208 (cur_overlap(first) == cur_overlap(second) &&
209 (time_after(cur_group_deadline(first),
210 cur_group_deadline(second)) ||
211
212 /* Do we have a group deadline tie?
213 * Then break by PID, which are unique.
214 */
215 (cur_group_deadline(first) ==
216 cur_group_deadline(second) &&
217 first->pid < second->pid))))));
218}
219
220int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
221{
222 return pfair_higher_prio(bheap2task(a), bheap2task(b));
223}
224
225static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
226{
227 struct pfair_cluster* cluster = from_domain(rt);
228 unsigned long flags;
229
230 raw_spin_lock_irqsave(&cluster->release_lock, flags);
231
232 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
233
234 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
235}
236
237static void prepare_release(struct task_struct* t, quanta_t at)
238{
239 tsk_pfair(t)->release = at;
240 tsk_pfair(t)->cur = 0;
241}
242
243/* pull released tasks from the release queue */
244static void poll_releases(struct pfair_cluster* cluster)
245{
246 raw_spin_lock(&cluster->release_lock);
247 __merge_ready(&cluster->pfair, &cluster->release_queue);
248 raw_spin_unlock(&cluster->release_lock);
249}
250
251static void check_preempt(struct task_struct* t)
252{
253 int cpu = NO_CPU;
254 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
255 tsk_rt(t)->present) {
256 /* the task can be scheduled and
257 * is not scheduled where it ought to be scheduled
258 */
259 cpu = tsk_rt(t)->linked_on != NO_CPU ?
260 tsk_rt(t)->linked_on :
261 tsk_rt(t)->scheduled_on;
262 PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
263 tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
264 /* preempt */
265 litmus_reschedule(cpu);
266 }
267}
268
269/* caller must hold pfair.ready_lock */
270static void drop_all_references(struct task_struct *t)
271{
272 int cpu;
273 struct pfair_state* s;
274 struct pfair_cluster* cluster;
275 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
276 /* It must be in the ready queue; drop references isn't called
277 * when the job is in a release queue. */
278 cluster = tsk_pfair(t)->cluster;
279 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
280 tsk_rt(t)->heap_node);
281 }
282 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
283 s = &per_cpu(pfair_state, cpu);
284 if (s->linked == t)
285 s->linked = NULL;
286 if (s->local == t)
287 s->local = NULL;
288 if (s->scheduled == t)
289 s->scheduled = NULL;
290 }
291}
292
293static void pfair_prepare_next_period(struct task_struct* t)
294{
295 struct pfair_param* p = tsk_pfair(t);
296
297 prepare_for_next_period(t);
298 get_rt_flags(t) = RT_F_RUNNING;
299 p->release += p->period;
300}
301
302/* returns 1 if the task needs to go the release queue */
303static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
304{
305 struct pfair_param* p = tsk_pfair(t);
306 int to_relq;
307 p->cur = (p->cur + 1) % p->quanta;
308 if (!p->cur) {
309 if (tsk_rt(t)->present) {
310 /* The job overran; we start a new budget allocation. */
311 pfair_prepare_next_period(t);
312 } else {
313 /* remove task from system until it wakes */
314 drop_all_references(t);
315 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
316 cpu, p->cur);
317 return 0;
318 }
319 }
320 to_relq = time_after(cur_release(t), time);
321 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
322 cpu, p->cur, to_relq, cur_release(t), time);
323 return to_relq;
324}
325
326static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
327{
328 struct task_struct* l;
329 struct pfair_param* p;
330 struct list_head* pos;
331 struct pfair_state* cpu;
332
333 list_for_each(pos, &cluster->topology.cpus) {
334 cpu = from_cluster_list(pos);
335 l = cpu->linked;
336 cpu->missed_updates += cpu->linked != cpu->local;
337 if (l) {
338 p = tsk_pfair(l);
339 p->last_quantum = time;
340 p->last_cpu = cpu_id(cpu);
341 if (advance_subtask(time, l, cpu_id(cpu))) {
342 //cpu->linked = NULL;
343 PTRACE_TASK(l, "should go to release queue. "
344 "scheduled_on=%d present=%d\n",
345 tsk_rt(l)->scheduled_on,
346 tsk_rt(l)->present);
347 }
348 }
349 }
350}
351
352static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
353{
354 int cpu;
355 if (tsk_rt(t)->scheduled_on != NO_CPU) {
356 /* always observe scheduled_on linkage */
357 default_cpu = tsk_rt(t)->scheduled_on;
358 } else if (tsk_pfair(t)->last_quantum == time - 1) {
359 /* back2back quanta */
360 /* Only observe last_quantum if no scheduled_on is in the way.
361 * This should only kick in if a CPU missed quanta, and that
362 * *should* only happen in QEMU.
363 */
364 cpu = tsk_pfair(t)->last_cpu;
365 if (!pstate[cpu]->linked ||
366 tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
367 default_cpu = cpu;
368 }
369 }
370 return default_cpu;
371}
372
373/* returns one if linking was redirected */
374static int pfair_link(quanta_t time, int cpu,
375 struct task_struct* t)
376{
377 int target = target_cpu(time, t, cpu);
378 struct task_struct* prev = pstate[cpu]->linked;
379 struct task_struct* other;
380 struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]);
381
382 if (target != cpu) {
383 BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster);
384 other = pstate[target]->linked;
385 pstate[target]->linked = t;
386 tsk_rt(t)->linked_on = target;
387 if (!other)
388 /* linked ok, but reschedule this CPU */
389 return 1;
390 if (target < cpu) {
391 /* link other to cpu instead */
392 tsk_rt(other)->linked_on = cpu;
393 pstate[cpu]->linked = other;
394 if (prev) {
395 /* prev got pushed back into the ready queue */
396 tsk_rt(prev)->linked_on = NO_CPU;
397 __add_ready(&cluster->pfair, prev);
398 }
399 /* we are done with this cpu */
400 return 0;
401 } else {
402 /* re-add other, it's original CPU was not considered yet */
403 tsk_rt(other)->linked_on = NO_CPU;
404 __add_ready(&cluster->pfair, other);
405 /* reschedule this CPU */
406 return 1;
407 }
408 } else {
409 pstate[cpu]->linked = t;
410 tsk_rt(t)->linked_on = cpu;
411 if (prev) {
412 /* prev got pushed back into the ready queue */
413 tsk_rt(prev)->linked_on = NO_CPU;
414 __add_ready(&cluster->pfair, prev);
415 }
416 /* we are done with this CPU */
417 return 0;
418 }
419}
420
421static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
422{
423 int retry;
424 struct list_head *pos;
425 struct pfair_state *cpu_state;
426
427 list_for_each(pos, &cluster->topology.cpus) {
428 cpu_state = from_cluster_list(pos);
429 retry = 1;
430#ifdef CONFIG_RELEASE_MASTER
431 /* skip release master */
432 if (cluster->pfair.release_master == cpu_id(cpu_state))
433 continue;
434#endif
435 while (retry) {
436 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
437 cpu_state->linked))
438 retry = pfair_link(time, cpu_id(cpu_state),
439 __take_ready(&cluster->pfair));
440 else
441 retry = 0;
442 }
443 }
444}
445
446static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
447{
448 struct pfair_state *cpu;
449 struct list_head* pos;
450
451 /* called with interrupts disabled */
452 PTRACE("--- Q %lu at %llu PRE-SPIN\n",
453 time, litmus_clock());
454 raw_spin_lock(cluster_lock(cluster));
455 PTRACE("<<< Q %lu at %llu\n",
456 time, litmus_clock());
457
458 sched_trace_quantum_boundary();
459
460 advance_subtasks(cluster, time);
461 poll_releases(cluster);
462 schedule_subtasks(cluster, time);
463
464 list_for_each(pos, &cluster->topology.cpus) {
465 cpu = from_cluster_list(pos);
466 if (cpu->linked)
467 PTRACE_TASK(cpu->linked,
468 " linked on %d.\n", cpu_id(cpu));
469 else
470 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
471 }
472 /* We are done. Advance time. */
473 mb();
474 list_for_each(pos, &cluster->topology.cpus) {
475 cpu = from_cluster_list(pos);
476 if (cpu->local_tick != cpu->cur_tick) {
477 TRACE("BAD Quantum not acked on %d "
478 "(l:%lu c:%lu p:%lu)\n",
479 cpu_id(cpu),
480 cpu->local_tick,
481 cpu->cur_tick,
482 cluster->pfair_time);
483 cpu->missed_quanta++;
484 }
485 cpu->cur_tick = time;
486 }
487 PTRACE(">>> Q %lu at %llu\n",
488 time, litmus_clock());
489 raw_spin_unlock(cluster_lock(cluster));
490}
491
492static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
493{
494 quanta_t loc;
495
496 goto first; /* skip mb() on first iteration */
497 do {
498 cpu_relax();
499 mb();
500 first: loc = state->cur_tick;
501 /* FIXME: what if loc > cur? */
502 } while (time_before(loc, q));
503 PTRACE("observed cur_tick:%lu >= q:%lu\n",
504 loc, q);
505}
506
507static quanta_t current_quantum(struct pfair_state* state)
508{
509 lt_t t = litmus_clock() - state->offset;
510 return time2quanta(t, FLOOR);
511}
512
513static void catchup_quanta(quanta_t from, quanta_t target,
514 struct pfair_state* state)
515{
516 quanta_t cur = from, time;
517 TRACE("+++< BAD catching up quanta from %lu to %lu\n",
518 from, target);
519 while (time_before(cur, target)) {
520 wait_for_quantum(cur, state);
521 cur++;
522 time = cmpxchg(&cpu_cluster(state)->pfair_time,
523 cur - 1, /* expected */
524 cur /* next */
525 );
526 if (time == cur - 1)
527 schedule_next_quantum(cpu_cluster(state), cur);
528 }
529 TRACE("+++> catching up done\n");
530}
531
532/* pfair_tick - this function is called for every local timer
533 * interrupt.
534 */
535static void pfair_tick(struct task_struct* t)
536{
537 struct pfair_state* state = &__get_cpu_var(pfair_state);
538 quanta_t time, cur;
539 int retry = 10;
540
541 do {
542 cur = current_quantum(state);
543 PTRACE("q %lu at %llu\n", cur, litmus_clock());
544
545 /* Attempt to advance time. First CPU to get here
546 * will prepare the next quantum.
547 */
548 time = cmpxchg(&cpu_cluster(state)->pfair_time,
549 cur - 1, /* expected */
550 cur /* next */
551 );
552 if (time == cur - 1) {
553 /* exchange succeeded */
554 wait_for_quantum(cur - 1, state);
555 schedule_next_quantum(cpu_cluster(state), cur);
556 retry = 0;
557 } else if (time_before(time, cur - 1)) {
558 /* the whole system missed a tick !? */
559 catchup_quanta(time, cur, state);
560 retry--;
561 } else if (time_after(time, cur)) {
562 /* our timer lagging behind!? */
563 TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
564 retry--;
565 } else {
566 /* Some other CPU already started scheduling
567 * this quantum. Let it do its job and then update.
568 */
569 retry = 0;
570 }
571 } while (retry);
572
573 /* Spin locally until time advances. */
574 wait_for_quantum(cur, state);
575
576 /* copy assignment */
577 /* FIXME: what if we race with a future update? Corrupted state? */
578 state->local = state->linked;
579 /* signal that we are done */
580 mb();
581 state->local_tick = state->cur_tick;
582
583 if (state->local != current
584 && (is_realtime(current) || is_present(state->local)))
585 litmus_reschedule_local();
586}
587
588static int safe_to_schedule(struct task_struct* t, int cpu)
589{
590 int where = tsk_rt(t)->scheduled_on;
591 if (where != NO_CPU && where != cpu) {
592 TRACE_TASK(t, "BAD: can't be scheduled on %d, "
593 "scheduled already on %d.\n", cpu, where);
594 return 0;
595 } else
596 return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
597}
598
599static struct task_struct* pfair_schedule(struct task_struct * prev)
600{
601 struct pfair_state* state = &__get_cpu_var(pfair_state);
602 struct pfair_cluster* cluster = cpu_cluster(state);
603 int blocks, completion, out_of_time;
604 struct task_struct* next = NULL;
605
606#ifdef CONFIG_RELEASE_MASTER
607 /* Bail out early if we are the release master.
608 * The release master never schedules any real-time tasks.
609 */
610 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
611 sched_state_task_picked();
612 return NULL;
613 }
614#endif
615
616 raw_spin_lock(cpu_lock(state));
617
618 blocks = is_realtime(prev) && !is_running(prev);
619 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
620 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
621 state->local_tick);
622
623 if (is_realtime(prev))
624 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
625 blocks, completion, out_of_time);
626
627 if (completion) {
628 sched_trace_task_completion(prev, 0);
629 pfair_prepare_next_period(prev);
630 prepare_release(prev, cur_release(prev));
631 }
632
633 if (!blocks && (completion || out_of_time)) {
634 drop_all_references(prev);
635 sched_trace_task_release(prev);
636 add_release(&cluster->pfair, prev);
637 }
638
639 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
640 next = state->local;
641
642 if (prev != next) {
643 tsk_rt(prev)->scheduled_on = NO_CPU;
644 if (next)
645 tsk_rt(next)->scheduled_on = cpu_id(state);
646 }
647 sched_state_task_picked();
648 raw_spin_unlock(cpu_lock(state));
649
650 if (next)
651 TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
652 tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock());
653 else if (is_realtime(prev))
654 TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock());
655
656 return next;
657}
658
659static void pfair_task_new(struct task_struct * t, int on_rq, int running)
660{
661 unsigned long flags;
662 struct pfair_cluster* cluster;
663
664 TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
665
666 cluster = tsk_pfair(t)->cluster;
667
668 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
669
670 prepare_release(t, cluster->pfair_time + 1);
671
672 t->rt_param.scheduled_on = NO_CPU;
673
674 if (running) {
675#ifdef CONFIG_RELEASE_MASTER
676 if (task_cpu(t) != cluster->pfair.release_master)
677#endif
678 t->rt_param.scheduled_on = task_cpu(t);
679 __add_ready(&cluster->pfair, t);
680 }
681
682 check_preempt(t);
683
684 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
685}
686
687static void pfair_task_wake_up(struct task_struct *t)
688{
689 unsigned long flags;
690 lt_t now;
691 struct pfair_cluster* cluster;
692
693 cluster = tsk_pfair(t)->cluster;
694
695 TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
696 litmus_clock(), cur_release(t), cluster->pfair_time);
697
698 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
699
700 /* If a task blocks and wakes before its next job release,
701 * then it may resume if it is currently linked somewhere
702 * (as if it never blocked at all). Otherwise, we have a
703 * new sporadic job release.
704 */
705 now = litmus_clock();
706 if (lt_before(get_deadline(t), now)) {
707 release_at(t, now);
708 prepare_release(t, time2quanta(now, CEIL));
709 sched_trace_task_release(t);
710 }
711
712 /* only add to ready queue if the task isn't still linked somewhere */
713 if (tsk_rt(t)->linked_on == NO_CPU)
714 __add_ready(&cluster->pfair, t);
715
716 check_preempt(t);
717
718 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
719 TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
720}
721
722static void pfair_task_block(struct task_struct *t)
723{
724 BUG_ON(!is_realtime(t));
725 TRACE_TASK(t, "blocks at %llu, state:%d\n",
726 litmus_clock(), t->state);
727}
728
729static void pfair_task_exit(struct task_struct * t)
730{
731 unsigned long flags;
732 struct pfair_cluster *cluster;
733
734 BUG_ON(!is_realtime(t));
735
736 cluster = tsk_pfair(t)->cluster;
737
738 /* Remote task from release or ready queue, and ensure
739 * that it is not the scheduled task for ANY CPU. We
740 * do this blanket check because occassionally when
741 * tasks exit while blocked, the task_cpu of the task
742 * might not be the same as the CPU that the PFAIR scheduler
743 * has chosen for it.
744 */
745 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
746
747 TRACE_TASK(t, "RIP, state:%d\n", t->state);
748 drop_all_references(t);
749
750 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
751
752 kfree(t->rt_param.pfair);
753 t->rt_param.pfair = NULL;
754}
755
756
757static void pfair_release_at(struct task_struct* task, lt_t start)
758{
759 unsigned long flags;
760 quanta_t release;
761
762 struct pfair_cluster *cluster;
763
764 cluster = tsk_pfair(task)->cluster;
765
766 BUG_ON(!is_realtime(task));
767
768 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
769 release_at(task, start);
770 release = time2quanta(start, CEIL);
771
772 TRACE_TASK(task, "sys release at %lu\n", release);
773
774 drop_all_references(task);
775 prepare_release(task, release);
776 add_release(&cluster->pfair, task);
777
778 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
779}
780
781static void init_subtask(struct subtask* sub, unsigned long i,
782 lt_t quanta, lt_t period)
783{
784 /* since i is zero-based, the formulas are shifted by one */
785 lt_t tmp;
786
787 /* release */
788 tmp = period * i;
789 do_div(tmp, quanta); /* floor */
790 sub->release = (quanta_t) tmp;
791
792 /* deadline */
793 tmp = period * (i + 1);
794 if (do_div(tmp, quanta)) /* ceil */
795 tmp++;
796 sub->deadline = (quanta_t) tmp;
797
798 /* next release */
799 tmp = period * (i + 1);
800 do_div(tmp, quanta); /* floor */
801 sub->overlap = sub->deadline - (quanta_t) tmp;
802
803 /* Group deadline.
804 * Based on the formula given in Uma's thesis.
805 */
806 if (2 * quanta >= period) {
807 /* heavy */
808 tmp = (sub->deadline - (i + 1)) * period;
809 if (period > quanta &&
810 do_div(tmp, (period - quanta))) /* ceil */
811 tmp++;
812 sub->group_deadline = (quanta_t) tmp;
813 } else
814 sub->group_deadline = 0;
815}
816
817static void dump_subtasks(struct task_struct* t)
818{
819 unsigned long i;
820 for (i = 0; i < t->rt_param.pfair->quanta; i++)
821 TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
822 i + 1,
823 t->rt_param.pfair->subtasks[i].release,
824 t->rt_param.pfair->subtasks[i].deadline,
825 t->rt_param.pfair->subtasks[i].overlap,
826 t->rt_param.pfair->subtasks[i].group_deadline);
827}
828
829static long pfair_admit_task(struct task_struct* t)
830{
831 lt_t quanta;
832 lt_t period;
833 s64 quantum_length = ktime_to_ns(tick_period);
834 struct pfair_param* param;
835 unsigned long i;
836
837 /* first check that the task is in the right cluster */
838 if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) !=
839 cpu_cluster(pstate[task_cpu(t)]))
840 return -EINVAL;
841
842 /* Pfair is a tick-based method, so the time
843 * of interest is jiffies. Calculate tick-based
844 * times for everything.
845 * (Ceiling of exec cost, floor of period.)
846 */
847
848 quanta = get_exec_cost(t);
849 period = get_rt_period(t);
850
851 quanta = time2quanta(get_exec_cost(t), CEIL);
852
853 if (do_div(period, quantum_length))
854 printk(KERN_WARNING
855 "The period of %s/%d is not a multiple of %llu.\n",
856 t->comm, t->pid, (unsigned long long) quantum_length);
857
858 if (quanta == period) {
859 /* special case: task has weight 1.0 */
860 printk(KERN_INFO
861 "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
862 t->comm, t->pid, quanta, period);
863 quanta = 1;
864 period = 1;
865 }
866
867 param = kmalloc(sizeof(*param) +
868 quanta * sizeof(struct subtask), GFP_ATOMIC);
869
870 if (!param)
871 return -ENOMEM;
872
873 param->quanta = quanta;
874 param->cur = 0;
875 param->release = 0;
876 param->period = period;
877
878 param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]);
879
880 for (i = 0; i < quanta; i++)
881 init_subtask(param->subtasks + i, i, quanta, period);
882
883 if (t->rt_param.pfair)
884 /* get rid of stale allocation */
885 kfree(t->rt_param.pfair);
886
887 t->rt_param.pfair = param;
888
889 /* spew out some debug info */
890 dump_subtasks(t);
891
892 return 0;
893}
894
895static void pfair_init_cluster(struct pfair_cluster* cluster)
896{
897 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
898 bheap_init(&cluster->release_queue);
899 raw_spin_lock_init(&cluster->release_lock);
900 INIT_LIST_HEAD(&cluster->topology.cpus);
901}
902
903static void cleanup_clusters(void)
904{
905 int i;
906
907 if (num_pfair_clusters)
908 kfree(pfair_clusters);
909 pfair_clusters = NULL;
910 num_pfair_clusters = 0;
911
912 /* avoid stale pointers */
913 for (i = 0; i < num_online_cpus(); i++) {
914 pstate[i]->topology.cluster = NULL;
915 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
916 pstate[i]->missed_updates, pstate[i]->missed_quanta);
917 }
918}
919
920static long pfair_activate_plugin(void)
921{
922 int err, i;
923 struct pfair_state* state;
924 struct pfair_cluster* cluster ;
925 quanta_t now;
926 int cluster_size;
927 struct cluster_cpu* cpus[NR_CPUS];
928 struct scheduling_cluster* clust[NR_CPUS];
929
930 cluster_size = get_cluster_size(pfair_cluster_level);
931
932 if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0)
933 return -EINVAL;
934
935 num_pfair_clusters = num_online_cpus() / cluster_size;
936
937 pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC);
938 if (!pfair_clusters) {
939 num_pfair_clusters = 0;
940 printk(KERN_ERR "Could not allocate Pfair clusters!\n");
941 return -ENOMEM;
942 }
943
944 state = &__get_cpu_var(pfair_state);
945 now = current_quantum(state);
946 TRACE("Activating PFAIR at q=%lu\n", now);
947
948 for (i = 0; i < num_pfair_clusters; i++) {
949 cluster = &pfair_clusters[i];
950 pfair_init_cluster(cluster);
951 cluster->pfair_time = now;
952 clust[i] = &cluster->topology;
953#ifdef CONFIG_RELEASE_MASTER
954 cluster->pfair.release_master = atomic_read(&release_master_cpu);
955#endif
956 }
957
958 for (i = 0; i < num_online_cpus(); i++) {
959 state = &per_cpu(pfair_state, i);
960 state->cur_tick = now;
961 state->local_tick = now;
962 state->missed_quanta = 0;
963 state->missed_updates = 0;
964 state->offset = cpu_stagger_offset(i);
965 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
966 cpus[i] = &state->topology;
967 }
968
969 err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters,
970 cpus, num_online_cpus());
971
972 if (err < 0)
973 cleanup_clusters();
974
975 return err;
976}
977
978static long pfair_deactivate_plugin(void)
979{
980 cleanup_clusters();
981 return 0;
982}
983
984/* Plugin object */
985static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
986 .plugin_name = "PFAIR",
987 .tick = pfair_tick,
988 .task_new = pfair_task_new,
989 .task_exit = pfair_task_exit,
990 .schedule = pfair_schedule,
991 .task_wake_up = pfair_task_wake_up,
992 .task_block = pfair_task_block,
993 .admit_task = pfair_admit_task,
994 .release_at = pfair_release_at,
995 .complete_job = complete_job,
996 .activate_plugin = pfair_activate_plugin,
997 .deactivate_plugin = pfair_deactivate_plugin,
998};
999
1000
1001static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL;
1002
1003static int __init init_pfair(void)
1004{
1005 int cpu, err, fs;
1006 struct pfair_state *state;
1007
1008 /*
1009 * initialize short_cut for per-cpu pfair state;
1010 * there may be a problem here if someone removes a cpu
1011 * while we are doing this initialization... and if cpus
1012 * are added / removed later... but we don't support CPU hotplug atm anyway.
1013 */
1014 pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
1015
1016 /* initialize CPU state */
1017 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
1018 state = &per_cpu(pfair_state, cpu);
1019 state->topology.id = cpu;
1020 state->cur_tick = 0;
1021 state->local_tick = 0;
1022 state->linked = NULL;
1023 state->local = NULL;
1024 state->scheduled = NULL;
1025 state->missed_quanta = 0;
1026 state->offset = cpu_stagger_offset(cpu);
1027 pstate[cpu] = state;
1028 }
1029
1030 pfair_clusters = NULL;
1031 num_pfair_clusters = 0;
1032
1033 err = register_sched_plugin(&pfair_plugin);
1034 if (!err) {
1035 fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir);
1036 if (!fs)
1037 cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level);
1038 else
1039 printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n");
1040 }
1041
1042 return err;
1043}
1044
1045static void __exit clean_pfair(void)
1046{
1047 kfree(pstate);
1048
1049 if (cluster_file)
1050 remove_proc_entry("cluster", pfair_dir);
1051 if (pfair_dir)
1052 remove_plugin_proc_dir(&pfair_plugin);
1053}
1054
1055module_init(init_pfair);
1056module_exit(clean_pfair);
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..74a77e7a4959
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,1542 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22
23#include <linux/uaccess.h>
24
25
26typedef struct {
27 rt_domain_t domain;
28 struct fp_prio_queue ready_queue;
29 int cpu;
30 struct task_struct* scheduled; /* only RT tasks */
31/*
32 * scheduling lock slock
33 * protects the domain and serializes scheduling decisions
34 */
35#define slock domain.ready_lock
36
37} pfp_domain_t;
38
39DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
40
41pfp_domain_t* pfp_doms[NR_CPUS];
42
43#define local_pfp (&__get_cpu_var(pfp_domains))
44#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
45#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
46#define task_dom(task) remote_dom(get_partition(task))
47#define task_pfp(task) remote_pfp(get_partition(task))
48
49/* we assume the lock is being held */
50static void preempt(pfp_domain_t *pfp)
51{
52 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
53}
54
55static unsigned int priority_index(struct task_struct* t)
56{
57#ifdef CONFIG_LOCKING
58 if (unlikely(t->rt_param.inh_task))
59 /* use effective priority */
60 t = t->rt_param.inh_task;
61
62 if (is_priority_boosted(t)) {
63 /* zero is reserved for priority-boosted tasks */
64 return 0;
65 } else
66#endif
67 return get_priority(t);
68}
69
70
71static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
72{
73 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
74 unsigned long flags;
75 struct task_struct* t;
76 struct bheap_node* hn;
77
78 raw_spin_lock_irqsave(&pfp->slock, flags);
79
80 while (!bheap_empty(tasks)) {
81 hn = bheap_take(fp_ready_order, tasks);
82 t = bheap2task(hn);
83 TRACE_TASK(t, "released (part:%d prio:%d)\n",
84 get_partition(t), get_priority(t));
85 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
86 }
87
88 /* do we need to preempt? */
89 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
90 TRACE_CUR("preempted by new release\n");
91 preempt(pfp);
92 }
93
94 raw_spin_unlock_irqrestore(&pfp->slock, flags);
95}
96
97static void pfp_domain_init(pfp_domain_t* pfp,
98 int cpu)
99{
100 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
101 pfp->cpu = cpu;
102 pfp->scheduled = NULL;
103 fp_prio_queue_init(&pfp->ready_queue);
104}
105
106static void requeue(struct task_struct* t, pfp_domain_t *pfp)
107{
108 if (t->state != TASK_RUNNING)
109 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
110
111 set_rt_flags(t, RT_F_RUNNING);
112 if (is_released(t, litmus_clock()))
113 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
114 else
115 add_release(&pfp->domain, t); /* it has got to wait */
116}
117
118static void job_completion(struct task_struct* t, int forced)
119{
120 sched_trace_task_completion(t,forced);
121 TRACE_TASK(t, "job_completion().\n");
122
123 set_rt_flags(t, RT_F_SLEEP);
124 prepare_for_next_period(t);
125}
126
127static void pfp_tick(struct task_struct *t)
128{
129 pfp_domain_t *pfp = local_pfp;
130
131 /* Check for inconsistency. We don't need the lock for this since
132 * ->scheduled is only changed in schedule, which obviously is not
133 * executing in parallel on this CPU
134 */
135 BUG_ON(is_realtime(t) && t != pfp->scheduled);
136
137 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
138 if (!is_np(t)) {
139 litmus_reschedule_local();
140 TRACE("pfp_scheduler_tick: "
141 "%d is preemptable "
142 " => FORCE_RESCHED\n", t->pid);
143 } else if (is_user_np(t)) {
144 TRACE("pfp_scheduler_tick: "
145 "%d is non-preemptable, "
146 "preemption delayed.\n", t->pid);
147 request_exit_np(t);
148 }
149 }
150}
151
152static struct task_struct* pfp_schedule(struct task_struct * prev)
153{
154 pfp_domain_t* pfp = local_pfp;
155 struct task_struct* next;
156
157 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
158
159 raw_spin_lock(&pfp->slock);
160
161 /* sanity checking
162 * differently from gedf, when a task exits (dead)
163 * pfp->schedule may be null and prev _is_ realtime
164 */
165 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
166 BUG_ON(pfp->scheduled && !is_realtime(prev));
167
168 /* (0) Determine state */
169 exists = pfp->scheduled != NULL;
170 blocks = exists && !is_running(pfp->scheduled);
171 out_of_time = exists &&
172 budget_enforced(pfp->scheduled) &&
173 budget_exhausted(pfp->scheduled);
174 np = exists && is_np(pfp->scheduled);
175 sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP;
176 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
177 preempt = migrate || fp_preemption_needed(&pfp->ready_queue, prev);
178
179 /* If we need to preempt do so.
180 * The following checks set resched to 1 in case of special
181 * circumstances.
182 */
183 resched = preempt;
184
185 /* If a task blocks we have no choice but to reschedule.
186 */
187 if (blocks)
188 resched = 1;
189
190 /* Request a sys_exit_np() call if we would like to preempt but cannot.
191 * Multiple calls to request_exit_np() don't hurt.
192 */
193 if (np && (out_of_time || preempt || sleep))
194 request_exit_np(pfp->scheduled);
195
196 /* Any task that is preemptable and either exhausts its execution
197 * budget or wants to sleep completes. We may have to reschedule after
198 * this.
199 */
200 if (!np && (out_of_time || sleep) && !blocks && !migrate) {
201 job_completion(pfp->scheduled, !sleep);
202 resched = 1;
203 }
204
205 /* The final scheduling decision. Do we need to switch for some reason?
206 * Switch if we are in RT mode and have no task or if we need to
207 * resched.
208 */
209 next = NULL;
210 if ((!np || blocks) && (resched || !exists)) {
211 /* When preempting a task that does not block, then
212 * re-insert it into either the ready queue or the
213 * release queue (if it completed). requeue() picks
214 * the appropriate queue.
215 */
216 if (pfp->scheduled && !blocks && !migrate)
217 requeue(pfp->scheduled, pfp);
218 next = fp_prio_take(&pfp->ready_queue);
219 } else
220 /* Only override Linux scheduler if we have a real-time task
221 * scheduled that needs to continue.
222 */
223 if (exists)
224 next = prev;
225
226 if (next) {
227 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
228 set_rt_flags(next, RT_F_RUNNING);
229 } else {
230 TRACE("becoming idle at %llu\n", litmus_clock());
231 }
232
233 pfp->scheduled = next;
234 sched_state_task_picked();
235 raw_spin_unlock(&pfp->slock);
236
237 return next;
238}
239
240#ifdef CONFIG_LITMUS_LOCKING
241
242/* prev is no longer scheduled --- see if it needs to migrate */
243static void pfp_finish_switch(struct task_struct *prev)
244{
245 pfp_domain_t *to;
246
247 if (is_realtime(prev) &&
248 is_running(prev) &&
249 get_partition(prev) != smp_processor_id()) {
250 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
251 smp_processor_id(), get_partition(prev));
252
253 to = task_pfp(prev);
254
255 raw_spin_lock(&to->slock);
256
257 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
258 requeue(prev, to);
259 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
260 preempt(to);
261
262 raw_spin_unlock(&to->slock);
263
264 }
265}
266
267#endif
268
269/* Prepare a task for running in RT mode
270 */
271static void pfp_task_new(struct task_struct * t, int on_rq, int running)
272{
273 pfp_domain_t* pfp = task_pfp(t);
274 unsigned long flags;
275
276 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
277 t->rt_param.task_params.cpu);
278
279 /* setup job parameters */
280 release_at(t, litmus_clock());
281
282 /* The task should be running in the queue, otherwise signal
283 * code will try to wake it up with fatal consequences.
284 */
285 raw_spin_lock_irqsave(&pfp->slock, flags);
286 if (running) {
287 /* there shouldn't be anything else running at the time */
288 BUG_ON(pfp->scheduled);
289 pfp->scheduled = t;
290 } else {
291 requeue(t, pfp);
292 /* maybe we have to reschedule */
293 preempt(pfp);
294 }
295 raw_spin_unlock_irqrestore(&pfp->slock, flags);
296}
297
298static void pfp_task_wake_up(struct task_struct *task)
299{
300 unsigned long flags;
301 pfp_domain_t* pfp = task_pfp(task);
302 lt_t now;
303
304 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
305 raw_spin_lock_irqsave(&pfp->slock, flags);
306
307#ifdef CONFIG_LITMUS_LOCKING
308 /* Should only be queued when processing a fake-wake up due to a
309 * migration-related state change. */
310 if (unlikely(is_queued(task))) {
311 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
312 goto out_unlock;
313 }
314#else
315 BUG_ON(is_queued(task));
316#endif
317 now = litmus_clock();
318 if (is_tardy(task, now)
319#ifdef CONFIG_LITMUS_LOCKING
320 /* We need to take suspensions because of semaphores into
321 * account! If a job resumes after being suspended due to acquiring
322 * a semaphore, it should never be treated as a new job release.
323 */
324 && !is_priority_boosted(task)
325#endif
326 ) {
327 /* new sporadic release */
328 release_at(task, now);
329 sched_trace_task_release(task);
330 }
331
332 /* Only add to ready queue if it is not the currently-scheduled
333 * task. This could be the case if a task was woken up concurrently
334 * on a remote CPU before the executing CPU got around to actually
335 * de-scheduling the task, i.e., wake_up() raced with schedule()
336 * and won. Also, don't requeue if it is still queued, which can
337 * happen under the DPCP due wake-ups racing with migrations.
338 */
339 if (pfp->scheduled != task)
340 requeue(task, pfp);
341
342out_unlock:
343 raw_spin_unlock_irqrestore(&pfp->slock, flags);
344 TRACE_TASK(task, "wake up done\n");
345}
346
347static void pfp_task_block(struct task_struct *t)
348{
349 /* only running tasks can block, thus t is in no queue */
350 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
351
352 BUG_ON(!is_realtime(t));
353
354 /* If this task blocked normally, it shouldn't be queued. The exception is
355 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
356 * This should only happen if the DPCP is being used.
357 */
358#ifdef CONFIG_LITMUS_LOCKING
359 if (unlikely(is_queued(t)))
360 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
361#else
362 BUG_ON(is_queued(t));
363#endif
364}
365
366static void pfp_task_exit(struct task_struct * t)
367{
368 unsigned long flags;
369 pfp_domain_t* pfp = task_pfp(t);
370 rt_domain_t* dom;
371
372 raw_spin_lock_irqsave(&pfp->slock, flags);
373 if (is_queued(t)) {
374 BUG(); /* This currently doesn't work. */
375 /* dequeue */
376 dom = task_dom(t);
377 remove(dom, t);
378 }
379 if (pfp->scheduled == t) {
380 pfp->scheduled = NULL;
381 preempt(pfp);
382 }
383 TRACE_TASK(t, "RIP, now reschedule\n");
384
385 raw_spin_unlock_irqrestore(&pfp->slock, flags);
386}
387
388#ifdef CONFIG_LITMUS_LOCKING
389
390#include <litmus/fdso.h>
391#include <litmus/srp.h>
392
393static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
394{
395 BUG_ON(pfp->scheduled == t && is_queued(t));
396 if (is_queued(t))
397 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
398}
399
400static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
401 struct task_struct* prio_inh)
402{
403 int requeue;
404
405 if (!t || t->rt_param.inh_task == prio_inh) {
406 /* no update required */
407 if (t)
408 TRACE_TASK(t, "no prio-inh update required\n");
409 return;
410 }
411
412 requeue = is_queued(t);
413 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
414
415 if (requeue)
416 /* first remove */
417 fp_dequeue(pfp, t);
418
419 t->rt_param.inh_task = prio_inh;
420
421 if (requeue)
422 /* add again to the right queue */
423 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
424}
425
426static int effective_agent_priority(int prio)
427{
428 /* make sure agents have higher priority */
429 return prio - LITMUS_MAX_PRIORITY;
430}
431
432static lt_t prio_point(int eprio)
433{
434 /* make sure we have non-negative prio points */
435 return eprio + LITMUS_MAX_PRIORITY;
436}
437
438static int prio_from_point(lt_t prio_point)
439{
440 return ((int) prio_point) - LITMUS_MAX_PRIORITY;
441}
442
443static void boost_priority(struct task_struct* t, lt_t priority_point)
444{
445 unsigned long flags;
446 pfp_domain_t* pfp = task_pfp(t);
447
448 raw_spin_lock_irqsave(&pfp->slock, flags);
449
450
451 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
452
453 tsk_rt(t)->priority_boosted = 1;
454 /* tie-break by protocol-specific priority point */
455 tsk_rt(t)->boost_start_time = priority_point;
456
457 if (pfp->scheduled != t) {
458 /* holder may be queued: first stop queue changes */
459 raw_spin_lock(&pfp->domain.release_lock);
460 if (is_queued(t) &&
461 /* If it is queued, then we need to re-order. */
462 bheap_decrease(fp_ready_order, tsk_rt(t)->heap_node) &&
463 /* If we bubbled to the top, then we need to check for preemptions. */
464 fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
465 preempt(pfp);
466 raw_spin_unlock(&pfp->domain.release_lock);
467 } /* else: nothing to do since the job is not queued while scheduled */
468
469 raw_spin_unlock_irqrestore(&pfp->slock, flags);
470}
471
472static void unboost_priority(struct task_struct* t)
473{
474 unsigned long flags;
475 pfp_domain_t* pfp = task_pfp(t);
476 lt_t now;
477
478 raw_spin_lock_irqsave(&pfp->slock, flags);
479 now = litmus_clock();
480
481 /* assumption: this only happens when the job is scheduled */
482 BUG_ON(pfp->scheduled != t);
483
484 TRACE_TASK(t, "priority restored at %llu\n", now);
485
486 /* priority boosted jobs must be scheduled */
487 BUG_ON(pfp->scheduled != t);
488
489 tsk_rt(t)->priority_boosted = 0;
490 tsk_rt(t)->boost_start_time = 0;
491
492 /* check if this changes anything */
493 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
494 preempt(pfp);
495
496 raw_spin_unlock_irqrestore(&pfp->slock, flags);
497}
498
499/* ******************** SRP support ************************ */
500
501static unsigned int pfp_get_srp_prio(struct task_struct* t)
502{
503 return get_priority(t);
504}
505
506/* ******************** FMLP support ********************** */
507
508struct fmlp_semaphore {
509 struct litmus_lock litmus_lock;
510
511 /* current resource holder */
512 struct task_struct *owner;
513
514 /* FIFO queue of waiting tasks */
515 wait_queue_head_t wait;
516};
517
518static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
519{
520 return container_of(lock, struct fmlp_semaphore, litmus_lock);
521}
522int pfp_fmlp_lock(struct litmus_lock* l)
523{
524 struct task_struct* t = current;
525 struct fmlp_semaphore *sem = fmlp_from_lock(l);
526 wait_queue_t wait;
527 unsigned long flags;
528 lt_t time_of_request;
529
530 if (!is_realtime(t))
531 return -EPERM;
532
533 spin_lock_irqsave(&sem->wait.lock, flags);
534
535 /* tie-break by this point in time */
536 time_of_request = litmus_clock();
537
538 /* Priority-boost ourself *before* we suspend so that
539 * our priority is boosted when we resume. */
540 boost_priority(t, time_of_request);
541
542 if (sem->owner) {
543 /* resource is not free => must suspend and wait */
544
545 init_waitqueue_entry(&wait, t);
546
547 /* FIXME: interruptible would be nice some day */
548 set_task_state(t, TASK_UNINTERRUPTIBLE);
549
550 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
551
552 TS_LOCK_SUSPEND;
553
554 /* release lock before sleeping */
555 spin_unlock_irqrestore(&sem->wait.lock, flags);
556
557 /* We depend on the FIFO order. Thus, we don't need to recheck
558 * when we wake up; we are guaranteed to have the lock since
559 * there is only one wake up per release.
560 */
561
562 schedule();
563
564 TS_LOCK_RESUME;
565
566 /* Since we hold the lock, no other task will change
567 * ->owner. We can thus check it without acquiring the spin
568 * lock. */
569 BUG_ON(sem->owner != t);
570 } else {
571 /* it's ours now */
572 sem->owner = t;
573
574 spin_unlock_irqrestore(&sem->wait.lock, flags);
575 }
576
577 return 0;
578}
579
580int pfp_fmlp_unlock(struct litmus_lock* l)
581{
582 struct task_struct *t = current, *next;
583 struct fmlp_semaphore *sem = fmlp_from_lock(l);
584 unsigned long flags;
585 int err = 0;
586
587 spin_lock_irqsave(&sem->wait.lock, flags);
588
589 if (sem->owner != t) {
590 err = -EINVAL;
591 goto out;
592 }
593
594 /* we lose the benefit of priority boosting */
595
596 unboost_priority(t);
597
598 /* check if there are jobs waiting for this resource */
599 next = __waitqueue_remove_first(&sem->wait);
600 if (next) {
601 /* next becomes the resouce holder */
602 sem->owner = next;
603
604 /* Wake up next. The waiting job is already priority-boosted. */
605 wake_up_process(next);
606 } else
607 /* resource becomes available */
608 sem->owner = NULL;
609
610out:
611 spin_unlock_irqrestore(&sem->wait.lock, flags);
612 return err;
613}
614
615int pfp_fmlp_close(struct litmus_lock* l)
616{
617 struct task_struct *t = current;
618 struct fmlp_semaphore *sem = fmlp_from_lock(l);
619 unsigned long flags;
620
621 int owner;
622
623 spin_lock_irqsave(&sem->wait.lock, flags);
624
625 owner = sem->owner == t;
626
627 spin_unlock_irqrestore(&sem->wait.lock, flags);
628
629 if (owner)
630 pfp_fmlp_unlock(l);
631
632 return 0;
633}
634
635void pfp_fmlp_free(struct litmus_lock* lock)
636{
637 kfree(fmlp_from_lock(lock));
638}
639
640static struct litmus_lock_ops pfp_fmlp_lock_ops = {
641 .close = pfp_fmlp_close,
642 .lock = pfp_fmlp_lock,
643 .unlock = pfp_fmlp_unlock,
644 .deallocate = pfp_fmlp_free,
645};
646
647static struct litmus_lock* pfp_new_fmlp(void)
648{
649 struct fmlp_semaphore* sem;
650
651 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
652 if (!sem)
653 return NULL;
654
655 sem->owner = NULL;
656 init_waitqueue_head(&sem->wait);
657 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
658
659 return &sem->litmus_lock;
660}
661
662/* ******************** MPCP support ********************** */
663
664struct mpcp_semaphore {
665 struct litmus_lock litmus_lock;
666
667 /* current resource holder */
668 struct task_struct *owner;
669
670 /* priority queue of waiting tasks */
671 wait_queue_head_t wait;
672
673 /* priority ceiling per cpu */
674 unsigned int prio_ceiling[NR_CPUS];
675
676 /* should jobs spin "virtually" for this resource? */
677 int vspin;
678};
679
680#define OMEGA_CEILING UINT_MAX
681
682/* Since jobs spin "virtually" while waiting to acquire a lock,
683 * they first must aquire a local per-cpu resource.
684 */
685static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
686static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
687
688/* called with preemptions off <=> no local modifications */
689static void mpcp_vspin_enter(void)
690{
691 struct task_struct* t = current;
692
693 while (1) {
694 if (__get_cpu_var(mpcpvs_vspin) == NULL) {
695 /* good, we get to issue our request */
696 __get_cpu_var(mpcpvs_vspin) = t;
697 break;
698 } else {
699 /* some job is spinning => enqueue in request queue */
700 prio_wait_queue_t wait;
701 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
702 unsigned long flags;
703
704 /* ordered by regular priority */
705 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
706
707 spin_lock_irqsave(&vspin->lock, flags);
708
709 set_task_state(t, TASK_UNINTERRUPTIBLE);
710
711 __add_wait_queue_prio_exclusive(vspin, &wait);
712
713 spin_unlock_irqrestore(&vspin->lock, flags);
714
715 TS_LOCK_SUSPEND;
716
717 preempt_enable_no_resched();
718
719 schedule();
720
721 preempt_disable();
722
723 TS_LOCK_RESUME;
724 /* Recheck if we got it --- some higher-priority process might
725 * have swooped in. */
726 }
727 }
728 /* ok, now it is ours */
729}
730
731/* called with preemptions off */
732static void mpcp_vspin_exit(void)
733{
734 struct task_struct* t = current, *next;
735 unsigned long flags;
736 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
737
738 BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
739
740 /* no spinning job */
741 __get_cpu_var(mpcpvs_vspin) = NULL;
742
743 /* see if anyone is waiting for us to stop "spinning" */
744 spin_lock_irqsave(&vspin->lock, flags);
745 next = __waitqueue_remove_first(vspin);
746
747 if (next)
748 wake_up_process(next);
749
750 spin_unlock_irqrestore(&vspin->lock, flags);
751}
752
753static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
754{
755 return container_of(lock, struct mpcp_semaphore, litmus_lock);
756}
757
758int pfp_mpcp_lock(struct litmus_lock* l)
759{
760 struct task_struct* t = current;
761 struct mpcp_semaphore *sem = mpcp_from_lock(l);
762 prio_wait_queue_t wait;
763 unsigned long flags;
764
765 if (!is_realtime(t))
766 return -EPERM;
767
768 preempt_disable();
769
770 if (sem->vspin)
771 mpcp_vspin_enter();
772
773 /* Priority-boost ourself *before* we suspend so that
774 * our priority is boosted when we resume. Use the priority
775 * ceiling for the local partition. */
776 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
777
778 spin_lock_irqsave(&sem->wait.lock, flags);
779
780 preempt_enable_no_resched();
781
782 if (sem->owner) {
783 /* resource is not free => must suspend and wait */
784
785 /* ordered by regular priority */
786 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
787
788 /* FIXME: interruptible would be nice some day */
789 set_task_state(t, TASK_UNINTERRUPTIBLE);
790
791 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
792
793 TS_LOCK_SUSPEND;
794
795 /* release lock before sleeping */
796 spin_unlock_irqrestore(&sem->wait.lock, flags);
797
798 /* We depend on the FIFO order. Thus, we don't need to recheck
799 * when we wake up; we are guaranteed to have the lock since
800 * there is only one wake up per release.
801 */
802
803 schedule();
804
805 TS_LOCK_RESUME;
806
807 /* Since we hold the lock, no other task will change
808 * ->owner. We can thus check it without acquiring the spin
809 * lock. */
810 BUG_ON(sem->owner != t);
811 } else {
812 /* it's ours now */
813 sem->owner = t;
814
815 spin_unlock_irqrestore(&sem->wait.lock, flags);
816 }
817
818 return 0;
819}
820
821int pfp_mpcp_unlock(struct litmus_lock* l)
822{
823 struct task_struct *t = current, *next;
824 struct mpcp_semaphore *sem = mpcp_from_lock(l);
825 unsigned long flags;
826 int err = 0;
827
828 spin_lock_irqsave(&sem->wait.lock, flags);
829
830 if (sem->owner != t) {
831 err = -EINVAL;
832 goto out;
833 }
834
835 /* we lose the benefit of priority boosting */
836
837 unboost_priority(t);
838
839 /* check if there are jobs waiting for this resource */
840 next = __waitqueue_remove_first(&sem->wait);
841 if (next) {
842 /* next becomes the resouce holder */
843 sem->owner = next;
844
845 /* Wake up next. The waiting job is already priority-boosted. */
846 wake_up_process(next);
847 } else
848 /* resource becomes available */
849 sem->owner = NULL;
850
851out:
852 spin_unlock_irqrestore(&sem->wait.lock, flags);
853
854 if (sem->vspin && err == 0) {
855 preempt_disable();
856 mpcp_vspin_exit();
857 preempt_enable();
858 }
859
860 return err;
861}
862
863int pfp_mpcp_open(struct litmus_lock* l, void* config)
864{
865 struct task_struct *t = current;
866 struct mpcp_semaphore *sem = mpcp_from_lock(l);
867 int cpu, local_cpu;
868 unsigned long flags;
869
870 if (!is_realtime(t))
871 /* we need to know the real-time priority */
872 return -EPERM;
873
874 local_cpu = get_partition(t);
875
876 spin_lock_irqsave(&sem->wait.lock, flags);
877
878 for (cpu = 0; cpu < NR_CPUS; cpu++)
879 if (cpu != local_cpu)
880 {
881 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
882 get_priority(t));
883 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
884 sem, sem->prio_ceiling[cpu], cpu);
885 }
886
887 spin_unlock_irqrestore(&sem->wait.lock, flags);
888
889 return 0;
890}
891
892int pfp_mpcp_close(struct litmus_lock* l)
893{
894 struct task_struct *t = current;
895 struct mpcp_semaphore *sem = mpcp_from_lock(l);
896 unsigned long flags;
897
898 int owner;
899
900 spin_lock_irqsave(&sem->wait.lock, flags);
901
902 owner = sem->owner == t;
903
904 spin_unlock_irqrestore(&sem->wait.lock, flags);
905
906 if (owner)
907 pfp_mpcp_unlock(l);
908
909 return 0;
910}
911
912void pfp_mpcp_free(struct litmus_lock* lock)
913{
914 kfree(mpcp_from_lock(lock));
915}
916
917static struct litmus_lock_ops pfp_mpcp_lock_ops = {
918 .close = pfp_mpcp_close,
919 .lock = pfp_mpcp_lock,
920 .open = pfp_mpcp_open,
921 .unlock = pfp_mpcp_unlock,
922 .deallocate = pfp_mpcp_free,
923};
924
925static struct litmus_lock* pfp_new_mpcp(int vspin)
926{
927 struct mpcp_semaphore* sem;
928 int cpu;
929
930 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
931 if (!sem)
932 return NULL;
933
934 sem->owner = NULL;
935 init_waitqueue_head(&sem->wait);
936 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
937
938 for (cpu = 0; cpu < NR_CPUS; cpu++)
939 sem->prio_ceiling[cpu] = OMEGA_CEILING;
940
941 /* mark as virtual spinning */
942 sem->vspin = vspin;
943
944 return &sem->litmus_lock;
945}
946
947
948/* ******************** PCP support ********************** */
949
950
951struct pcp_semaphore {
952 struct list_head ceiling;
953
954 /* current resource holder */
955 struct task_struct *owner;
956
957 /* priority ceiling --- can be negative due to DPCP support */
958 int prio_ceiling;
959
960 /* on which processor is this PCP semaphore allocated? */
961 int on_cpu;
962};
963
964struct pcp_state {
965 struct list_head system_ceiling;
966
967 /* highest-priority waiting task */
968 struct task_struct* hp_waiter;
969
970 /* list of jobs waiting to get past the system ceiling */
971 wait_queue_head_t ceiling_blocked;
972};
973
974static void pcp_init_state(struct pcp_state* s)
975{
976 INIT_LIST_HEAD(&s->system_ceiling);
977 s->hp_waiter = NULL;
978 init_waitqueue_head(&s->ceiling_blocked);
979}
980
981static DEFINE_PER_CPU(struct pcp_state, pcp_state);
982
983/* assumes preemptions are off */
984static struct pcp_semaphore* pcp_get_ceiling(void)
985{
986 struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next;
987
988 if (top)
989 return list_entry(top, struct pcp_semaphore, ceiling);
990 else
991 return NULL;
992}
993
994/* assumes preempt off */
995static void pcp_add_ceiling(struct pcp_semaphore* sem)
996{
997 struct list_head *pos;
998 struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
999 struct pcp_semaphore* held;
1000
1001 BUG_ON(sem->on_cpu != smp_processor_id());
1002 BUG_ON(in_list(&sem->ceiling));
1003
1004 list_for_each(pos, in_use) {
1005 held = list_entry(pos, struct pcp_semaphore, ceiling);
1006 if (held->prio_ceiling >= sem->prio_ceiling) {
1007 __list_add(&sem->ceiling, pos->prev, pos);
1008 return;
1009 }
1010 }
1011
1012 /* we hit the end of the list */
1013
1014 list_add_tail(&sem->ceiling, in_use);
1015}
1016
1017/* assumes preempt off */
1018static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1019 struct task_struct* task,
1020 int effective_prio)
1021{
1022 return ceiling == NULL ||
1023 ceiling->prio_ceiling > effective_prio ||
1024 ceiling->owner == task;
1025}
1026
1027/* assumes preempt off */
1028static void pcp_priority_inheritance(void)
1029{
1030 unsigned long flags;
1031 pfp_domain_t* pfp = local_pfp;
1032
1033 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1034 struct task_struct *blocker, *blocked;
1035
1036 blocker = ceiling ? ceiling->owner : NULL;
1037 blocked = __get_cpu_var(pcp_state).hp_waiter;
1038
1039 raw_spin_lock_irqsave(&pfp->slock, flags);
1040
1041 /* Current is no longer inheriting anything by default. This should be
1042 * the currently scheduled job, and hence not currently queued. */
1043 BUG_ON(current != pfp->scheduled);
1044
1045 fp_set_prio_inh(pfp, current, NULL);
1046 fp_set_prio_inh(pfp, blocked, NULL);
1047 fp_set_prio_inh(pfp, blocker, NULL);
1048
1049
1050 /* Let blocking job inherit priority of blocked job, if required. */
1051 if (blocker && blocked &&
1052 fp_higher_prio(blocked, blocker)) {
1053 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1054 blocked->comm, blocked->pid,
1055 get_priority(blocker), get_priority(blocked));
1056 fp_set_prio_inh(pfp, blocker, blocked);
1057 }
1058
1059 /* check if anything changed */
1060 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1061 preempt(pfp);
1062
1063 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1064}
1065
1066/* called with preemptions off */
1067static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1068 int effective_prio)
1069{
1070 struct task_struct* t = current;
1071 struct pcp_semaphore* ceiling;
1072 prio_wait_queue_t wait;
1073 unsigned int waiting_higher_prio;
1074
1075 do {
1076 ceiling = pcp_get_ceiling();
1077 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1078 break;
1079
1080 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1081 sem, ceiling->owner->comm, ceiling->owner->pid);
1082
1083 /* we need to wait until the ceiling is lowered */
1084
1085 /* enqueue in priority order */
1086 init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio));
1087 set_task_state(t, TASK_UNINTERRUPTIBLE);
1088 waiting_higher_prio = add_wait_queue_prio_exclusive(
1089 &__get_cpu_var(pcp_state).ceiling_blocked, &wait);
1090
1091 if (waiting_higher_prio == 0) {
1092 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1093
1094 /* we are the new highest-priority waiting job
1095 * => update inheritance */
1096 __get_cpu_var(pcp_state).hp_waiter = t;
1097 pcp_priority_inheritance();
1098 }
1099
1100 TS_LOCK_SUSPEND;
1101
1102 preempt_enable_no_resched();
1103 schedule();
1104 preempt_disable();
1105
1106 /* pcp_resume_unblocked() removed us from wait queue */
1107
1108 TS_LOCK_RESUME;
1109 } while(1);
1110
1111 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1112
1113 /* We are good to go. The semaphore should be available. */
1114 BUG_ON(sem->owner != NULL);
1115
1116 sem->owner = t;
1117
1118 pcp_add_ceiling(sem);
1119}
1120
1121static void pcp_resume_unblocked(void)
1122{
1123 wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked;
1124 unsigned long flags;
1125 prio_wait_queue_t* q;
1126 struct task_struct* t = NULL;
1127
1128 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1129
1130 spin_lock_irqsave(&blocked->lock, flags);
1131
1132 while (waitqueue_active(blocked)) {
1133 /* check first == highest-priority waiting job */
1134 q = list_entry(blocked->task_list.next,
1135 prio_wait_queue_t, wq.task_list);
1136 t = (struct task_struct*) q->wq.private;
1137
1138 /* can it proceed now? => let it go */
1139 if (pcp_exceeds_ceiling(ceiling, t,
1140 prio_from_point(q->priority))) {
1141 __remove_wait_queue(blocked, &q->wq);
1142 wake_up_process(t);
1143 } else {
1144 /* We are done. Update highest-priority waiter. */
1145 __get_cpu_var(pcp_state).hp_waiter = t;
1146 goto out;
1147 }
1148 }
1149 /* If we get here, then there are no more waiting
1150 * jobs. */
1151 __get_cpu_var(pcp_state).hp_waiter = NULL;
1152out:
1153 spin_unlock_irqrestore(&blocked->lock, flags);
1154}
1155
1156/* assumes preempt off */
1157static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1158{
1159 BUG_ON(!in_list(&sem->ceiling));
1160 BUG_ON(sem->owner != current);
1161 BUG_ON(sem->on_cpu != smp_processor_id());
1162
1163 /* remove from ceiling list */
1164 list_del(&sem->ceiling);
1165
1166 /* release */
1167 sem->owner = NULL;
1168
1169 TRACE_CUR("PCP released sem %p\n", sem);
1170
1171 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1172 pcp_resume_unblocked();
1173
1174 pcp_priority_inheritance();
1175}
1176
1177static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1178 int effective_prio)
1179{
1180 /* This needs to be synchronized on something.
1181 * Might as well use waitqueue lock for the processor.
1182 * We assume this happens only before the task set starts execution,
1183 * (i.e., during initialization), but it may happen on multiple processors
1184 * at the same time.
1185 */
1186 unsigned long flags;
1187
1188 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1189
1190 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1191
1192 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1193
1194 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1195}
1196
1197static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1198{
1199 sem->owner = NULL;
1200 INIT_LIST_HEAD(&sem->ceiling);
1201 sem->prio_ceiling = INT_MAX;
1202 sem->on_cpu = cpu;
1203}
1204
1205
1206/* ******************** DPCP support ********************** */
1207
1208struct dpcp_semaphore {
1209 struct litmus_lock litmus_lock;
1210 struct pcp_semaphore pcp;
1211 int owner_cpu;
1212};
1213
1214static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1215{
1216 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1217}
1218
1219/* called with preemptions disabled */
1220static void pfp_migrate_to(int target_cpu)
1221{
1222 struct task_struct* t = current;
1223 pfp_domain_t *from;
1224
1225 if (get_partition(t) == target_cpu)
1226 return;
1227
1228 /* make sure target_cpu makes sense */
1229 BUG_ON(!cpu_online(target_cpu));
1230
1231 local_irq_disable();
1232
1233 /* scheduled task should not be in any ready or release queue */
1234 BUG_ON(is_queued(t));
1235
1236 /* lock both pfp domains in order of address */
1237 from = task_pfp(t);
1238
1239 raw_spin_lock(&from->slock);
1240
1241 /* switch partitions */
1242 tsk_rt(t)->task_params.cpu = target_cpu;
1243
1244 raw_spin_unlock(&from->slock);
1245
1246 /* Don't trace scheduler costs as part of
1247 * locking overhead. Scheduling costs are accounted for
1248 * explicitly. */
1249 TS_LOCK_SUSPEND;
1250
1251 local_irq_enable();
1252 preempt_enable_no_resched();
1253
1254 /* deschedule to be migrated */
1255 schedule();
1256
1257 /* we are now on the target processor */
1258 preempt_disable();
1259
1260 /* start recording costs again */
1261 TS_LOCK_RESUME;
1262
1263 BUG_ON(smp_processor_id() != target_cpu);
1264}
1265
1266int pfp_dpcp_lock(struct litmus_lock* l)
1267{
1268 struct task_struct* t = current;
1269 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1270 int eprio = effective_agent_priority(get_priority(t));
1271 int from = get_partition(t);
1272 int to = sem->pcp.on_cpu;
1273
1274 if (!is_realtime(t))
1275 return -EPERM;
1276
1277 preempt_disable();
1278
1279 /* Priority-boost ourself *before* we suspend so that
1280 * our priority is boosted when we resume. */
1281
1282 boost_priority(t, get_priority(t));
1283
1284 pfp_migrate_to(to);
1285
1286 pcp_raise_ceiling(&sem->pcp, eprio);
1287
1288 /* yep, we got it => execute request */
1289 sem->owner_cpu = from;
1290
1291 preempt_enable();
1292
1293 return 0;
1294}
1295
1296int pfp_dpcp_unlock(struct litmus_lock* l)
1297{
1298 struct task_struct *t = current;
1299 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1300 int err = 0;
1301 int home;
1302
1303 preempt_disable();
1304
1305 if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) {
1306 err = -EINVAL;
1307 goto out;
1308 }
1309
1310 home = sem->owner_cpu;
1311
1312 /* give it back */
1313 pcp_lower_ceiling(&sem->pcp);
1314
1315 /* we lose the benefit of priority boosting */
1316 unboost_priority(t);
1317
1318 pfp_migrate_to(home);
1319
1320out:
1321 preempt_enable();
1322
1323 return err;
1324}
1325
1326int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1327{
1328 struct task_struct *t = current;
1329 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1330 int cpu, eprio;
1331
1332 if (!is_realtime(t))
1333 /* we need to know the real-time priority */
1334 return -EPERM;
1335
1336 if (get_user(cpu, (int*) config))
1337 return -EFAULT;
1338
1339 /* make sure the resource location matches */
1340 if (cpu != sem->pcp.on_cpu)
1341 return -EINVAL;
1342
1343 eprio = effective_agent_priority(get_priority(t));
1344
1345 pcp_update_prio_ceiling(&sem->pcp, eprio);
1346
1347 return 0;
1348}
1349
1350int pfp_dpcp_close(struct litmus_lock* l)
1351{
1352 struct task_struct *t = current;
1353 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1354 int owner = 0;
1355
1356 preempt_disable();
1357
1358 if (sem->pcp.on_cpu == smp_processor_id())
1359 owner = sem->pcp.owner == t;
1360
1361 preempt_enable();
1362
1363 if (owner)
1364 pfp_dpcp_unlock(l);
1365
1366 return 0;
1367}
1368
1369void pfp_dpcp_free(struct litmus_lock* lock)
1370{
1371 kfree(dpcp_from_lock(lock));
1372}
1373
1374static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1375 .close = pfp_dpcp_close,
1376 .lock = pfp_dpcp_lock,
1377 .open = pfp_dpcp_open,
1378 .unlock = pfp_dpcp_unlock,
1379 .deallocate = pfp_dpcp_free,
1380};
1381
1382static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1383{
1384 struct dpcp_semaphore* sem;
1385
1386 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1387 if (!sem)
1388 return NULL;
1389
1390 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1391 sem->owner_cpu = NO_CPU;
1392 pcp_init_semaphore(&sem->pcp, on_cpu);
1393
1394 return &sem->litmus_lock;
1395}
1396
1397
1398/* **** lock constructor **** */
1399
1400
1401static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1402 void* __user config)
1403{
1404 int err = -ENXIO, cpu;
1405 struct srp_semaphore* srp;
1406
1407 /* P-FP currently supports the SRP for local resources and the FMLP
1408 * for global resources. */
1409 switch (type) {
1410 case FMLP_SEM:
1411 /* FIFO Mutex Locking Protocol */
1412 *lock = pfp_new_fmlp();
1413 if (*lock)
1414 err = 0;
1415 else
1416 err = -ENOMEM;
1417 break;
1418
1419 case MPCP_SEM:
1420 /* Multiprocesor Priority Ceiling Protocol */
1421 *lock = pfp_new_mpcp(0);
1422 if (*lock)
1423 err = 0;
1424 else
1425 err = -ENOMEM;
1426 break;
1427
1428 case MPCP_VS_SEM:
1429 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1430 *lock = pfp_new_mpcp(1);
1431 if (*lock)
1432 err = 0;
1433 else
1434 err = -ENOMEM;
1435 break;
1436
1437 case DPCP_SEM:
1438 /* Distributed Priority Ceiling Protocol */
1439 if (get_user(cpu, (int*) config))
1440 return -EFAULT;
1441
1442 if (!cpu_online(cpu))
1443 return -EINVAL;
1444
1445 *lock = pfp_new_dpcp(cpu);
1446 if (*lock)
1447 err = 0;
1448 else
1449 err = -ENOMEM;
1450 break;
1451
1452 case SRP_SEM:
1453 /* Baker's Stack Resource Policy */
1454 srp = allocate_srp_semaphore();
1455 if (srp) {
1456 *lock = &srp->litmus_lock;
1457 err = 0;
1458 } else
1459 err = -ENOMEM;
1460 break;
1461 };
1462
1463 return err;
1464}
1465
1466#endif
1467
1468static long pfp_admit_task(struct task_struct* tsk)
1469{
1470 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1471#ifdef CONFIG_RELEASE_MASTER
1472 /* don't allow tasks on release master CPU */
1473 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1474#endif
1475 get_priority(tsk) > 0)
1476 return 0;
1477 else
1478 return -EINVAL;
1479}
1480
1481static long pfp_activate_plugin(void)
1482{
1483#ifdef CONFIG_RELEASE_MASTER
1484 int cpu;
1485
1486 for_each_online_cpu(cpu) {
1487 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1488 }
1489#endif
1490
1491#ifdef CONFIG_LITMUS_LOCKING
1492 get_srp_prio = pfp_get_srp_prio;
1493
1494 for_each_online_cpu(cpu) {
1495 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1496 per_cpu(mpcpvs_vspin, cpu) = NULL;
1497
1498 pcp_init_state(&per_cpu(pcp_state, cpu));
1499 pfp_doms[cpu] = remote_pfp(cpu);
1500 }
1501
1502#endif
1503
1504 return 0;
1505}
1506
1507
1508/* Plugin object */
1509static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
1510 .plugin_name = "P-FP",
1511 .tick = pfp_tick,
1512 .task_new = pfp_task_new,
1513 .complete_job = complete_job,
1514 .task_exit = pfp_task_exit,
1515 .schedule = pfp_schedule,
1516 .task_wake_up = pfp_task_wake_up,
1517 .task_block = pfp_task_block,
1518 .admit_task = pfp_admit_task,
1519 .activate_plugin = pfp_activate_plugin,
1520#ifdef CONFIG_LITMUS_LOCKING
1521 .allocate_lock = pfp_allocate_lock,
1522 .finish_switch = pfp_finish_switch,
1523#endif
1524};
1525
1526
1527static int __init init_pfp(void)
1528{
1529 int i;
1530
1531 /* We do not really want to support cpu hotplug, do we? ;)
1532 * However, if we are so crazy to do so,
1533 * we cannot use num_online_cpu()
1534 */
1535 for (i = 0; i < num_online_cpus(); i++) {
1536 pfp_domain_init(remote_pfp(i), i);
1537 }
1538 return register_sched_plugin(&pfp_plugin);
1539}
1540
1541module_init(init_pfp);
1542
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..950fe5e6a1ab
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,233 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/preempt.h>
14#include <litmus/jobs.h>
15
16/*
17 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is
19 * non-preemptive section aware and does not invoke the scheduler / send
20 * IPIs if the to-be-preempted task is actually non-preemptive.
21 */
22void preempt_if_preemptable(struct task_struct* t, int cpu)
23{
24 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
25 * on_cpu is currently scheduling background work.
26 */
27
28 int reschedule = 0;
29
30 if (!t)
31 /* move non-real-time task out of the way */
32 reschedule = 1;
33 else {
34 if (smp_processor_id() == cpu) {
35 /* local CPU case */
36 /* check if we need to poke userspace */
37 if (is_user_np(t))
38 /* Yes, poke it. This doesn't have to be atomic since
39 * the task is definitely not executing. */
40 request_exit_np(t);
41 else if (!is_kernel_np(t))
42 /* only if we are allowed to preempt the
43 * currently-executing task */
44 reschedule = 1;
45 } else {
46 /* Remote CPU case. Only notify if it's not a kernel
47 * NP section and if we didn't set the userspace
48 * flag. */
49 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
50 }
51 }
52 if (likely(reschedule))
53 litmus_reschedule(cpu);
54}
55
56
57/*************************************************************
58 * Dummy plugin functions *
59 *************************************************************/
60
61static void litmus_dummy_finish_switch(struct task_struct * prev)
62{
63}
64
65static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
66{
67 sched_state_task_picked();
68 return NULL;
69}
70
71static void litmus_dummy_tick(struct task_struct* tsk)
72{
73}
74
75static long litmus_dummy_admit_task(struct task_struct* tsk)
76{
77 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
78 tsk->comm, tsk->pid);
79 return -EINVAL;
80}
81
82static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
83{
84}
85
86static void litmus_dummy_task_wake_up(struct task_struct *task)
87{
88}
89
90static void litmus_dummy_task_block(struct task_struct *task)
91{
92}
93
94static void litmus_dummy_task_exit(struct task_struct *task)
95{
96}
97
98static void litmus_dummy_pre_setsched(struct task_struct *task, int policy)
99{
100}
101
102
103static long litmus_dummy_complete_job(void)
104{
105 return -ENOSYS;
106}
107
108static long litmus_dummy_activate_plugin(void)
109{
110 return 0;
111}
112
113static long litmus_dummy_deactivate_plugin(void)
114{
115 return 0;
116}
117
118#ifdef CONFIG_LITMUS_LOCKING
119
120static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
121 void* __user config)
122{
123 return -ENXIO;
124}
125
126#endif
127
128
129/* The default scheduler plugin. It doesn't do anything and lets Linux do its
130 * job.
131 */
132struct sched_plugin linux_sched_plugin = {
133 .plugin_name = "Linux",
134 .tick = litmus_dummy_tick,
135 .task_new = litmus_dummy_task_new,
136 .task_exit = litmus_dummy_task_exit,
137 .task_wake_up = litmus_dummy_task_wake_up,
138 .task_block = litmus_dummy_task_block,
139 .complete_job = litmus_dummy_complete_job,
140 .schedule = litmus_dummy_schedule,
141 .finish_switch = litmus_dummy_finish_switch,
142 .activate_plugin = litmus_dummy_activate_plugin,
143 .deactivate_plugin = litmus_dummy_deactivate_plugin,
144#ifdef CONFIG_LITMUS_LOCKING
145 .allocate_lock = litmus_dummy_allocate_lock,
146#endif
147 .admit_task = litmus_dummy_admit_task
148};
149
150/*
151 * The reference to current plugin that is used to schedule tasks within
152 * the system. It stores references to actual function implementations
153 * Should be initialized by calling "init_***_plugin()"
154 */
155struct sched_plugin *litmus = &linux_sched_plugin;
156
157/* the list of registered scheduling plugins */
158static LIST_HEAD(sched_plugins);
159static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
160
161#define CHECK(func) {\
162 if (!plugin->func) \
163 plugin->func = litmus_dummy_ ## func;}
164
165/* FIXME: get reference to module */
166int register_sched_plugin(struct sched_plugin* plugin)
167{
168 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
169 plugin->plugin_name);
170
171 /* make sure we don't trip over null pointers later */
172 CHECK(finish_switch);
173 CHECK(schedule);
174 CHECK(tick);
175 CHECK(task_wake_up);
176 CHECK(task_exit);
177 CHECK(task_block);
178 CHECK(task_new);
179 CHECK(complete_job);
180 CHECK(activate_plugin);
181 CHECK(deactivate_plugin);
182#ifdef CONFIG_LITMUS_LOCKING
183 CHECK(allocate_lock);
184#endif
185 CHECK(admit_task);
186 CHECK(pre_setsched);
187
188 if (!plugin->release_at)
189 plugin->release_at = release_at;
190
191 raw_spin_lock(&sched_plugins_lock);
192 list_add(&plugin->list, &sched_plugins);
193 raw_spin_unlock(&sched_plugins_lock);
194
195 return 0;
196}
197
198
199/* FIXME: reference counting, etc. */
200struct sched_plugin* find_sched_plugin(const char* name)
201{
202 struct list_head *pos;
203 struct sched_plugin *plugin;
204
205 raw_spin_lock(&sched_plugins_lock);
206 list_for_each(pos, &sched_plugins) {
207 plugin = list_entry(pos, struct sched_plugin, list);
208 if (!strcmp(plugin->plugin_name, name))
209 goto out_unlock;
210 }
211 plugin = NULL;
212
213out_unlock:
214 raw_spin_unlock(&sched_plugins_lock);
215 return plugin;
216}
217
218int print_sched_plugins(char* buf, int max)
219{
220 int count = 0;
221 struct list_head *pos;
222 struct sched_plugin *plugin;
223
224 raw_spin_lock(&sched_plugins_lock);
225 list_for_each(pos, &sched_plugins) {
226 plugin = list_entry(pos, struct sched_plugin, list);
227 count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
228 if (max - count <= 0)
229 break;
230 }
231 raw_spin_unlock(&sched_plugins_lock);
232 return count;
233}
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..7b12689ab61a
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,917 @@
1/*
2 * kernel/sched_psn_edf.c
3 *
4 * Implementation of the PSN-EDF scheduler plugin.
5 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
6 *
7 * Suspensions and non-preemptable sections are supported.
8 * Priority inheritance is not supported.
9 */
10
11#include <linux/percpu.h>
12#include <linux/sched.h>
13#include <linux/list.h>
14#include <linux/spinlock.h>
15#include <linux/module.h>
16
17#include <litmus/litmus.h>
18#include <litmus/wait.h>
19#include <litmus/jobs.h>
20#include <litmus/preempt.h>
21#include <litmus/sched_plugin.h>
22#include <litmus/edf_common.h>
23#include <litmus/sched_trace.h>
24#include <litmus/trace.h>
25
26typedef struct {
27 rt_domain_t domain;
28 int cpu;
29 struct task_struct* scheduled; /* only RT tasks */
30/*
31 * scheduling lock slock
32 * protects the domain and serializes scheduling decisions
33 */
34#define slock domain.ready_lock
35
36} psnedf_domain_t;
37
38DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
39
40#define local_edf (&__get_cpu_var(psnedf_domains).domain)
41#define local_pedf (&__get_cpu_var(psnedf_domains))
42#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
43#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
44#define task_edf(task) remote_edf(get_partition(task))
45#define task_pedf(task) remote_pedf(get_partition(task))
46
47
48static void psnedf_domain_init(psnedf_domain_t* pedf,
49 check_resched_needed_t check,
50 release_jobs_t release,
51 int cpu)
52{
53 edf_domain_init(&pedf->domain, check, release);
54 pedf->cpu = cpu;
55 pedf->scheduled = NULL;
56}
57
58static void requeue(struct task_struct* t, rt_domain_t *edf)
59{
60 if (t->state != TASK_RUNNING)
61 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
62
63 set_rt_flags(t, RT_F_RUNNING);
64 if (is_released(t, litmus_clock()))
65 __add_ready(edf, t);
66 else
67 add_release(edf, t); /* it has got to wait */
68}
69
70/* we assume the lock is being held */
71static void preempt(psnedf_domain_t *pedf)
72{
73 preempt_if_preemptable(pedf->scheduled, pedf->cpu);
74}
75
76#ifdef CONFIG_LITMUS_LOCKING
77
78static void boost_priority(struct task_struct* t)
79{
80 unsigned long flags;
81 psnedf_domain_t* pedf = task_pedf(t);
82 lt_t now;
83
84 raw_spin_lock_irqsave(&pedf->slock, flags);
85 now = litmus_clock();
86
87 TRACE_TASK(t, "priority boosted at %llu\n", now);
88
89 tsk_rt(t)->priority_boosted = 1;
90 tsk_rt(t)->boost_start_time = now;
91
92 if (pedf->scheduled != t) {
93 /* holder may be queued: first stop queue changes */
94 raw_spin_lock(&pedf->domain.release_lock);
95 if (is_queued(t) &&
96 /* If it is queued, then we need to re-order. */
97 bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
98 /* If we bubbled to the top, then we need to check for preemptions. */
99 edf_preemption_needed(&pedf->domain, pedf->scheduled))
100 preempt(pedf);
101 raw_spin_unlock(&pedf->domain.release_lock);
102 } /* else: nothing to do since the job is not queued while scheduled */
103
104 raw_spin_unlock_irqrestore(&pedf->slock, flags);
105}
106
107static void unboost_priority(struct task_struct* t)
108{
109 unsigned long flags;
110 psnedf_domain_t* pedf = task_pedf(t);
111 lt_t now;
112
113 raw_spin_lock_irqsave(&pedf->slock, flags);
114 now = litmus_clock();
115
116 /* assumption: this only happens when the job is scheduled */
117 BUG_ON(pedf->scheduled != t);
118
119 TRACE_TASK(t, "priority restored at %llu\n", now);
120
121 /* priority boosted jobs must be scheduled */
122 BUG_ON(pedf->scheduled != t);
123
124 tsk_rt(t)->priority_boosted = 0;
125 tsk_rt(t)->boost_start_time = 0;
126
127 /* check if this changes anything */
128 if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
129 preempt(pedf);
130
131 raw_spin_unlock_irqrestore(&pedf->slock, flags);
132}
133
134#endif
135
136/* This check is trivial in partioned systems as we only have to consider
137 * the CPU of the partition.
138 */
139static int psnedf_check_resched(rt_domain_t *edf)
140{
141 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
142
143 /* because this is a callback from rt_domain_t we already hold
144 * the necessary lock for the ready queue
145 */
146 if (edf_preemption_needed(edf, pedf->scheduled)) {
147 preempt(pedf);
148 return 1;
149 } else
150 return 0;
151}
152
153static void job_completion(struct task_struct* t, int forced)
154{
155 sched_trace_task_completion(t,forced);
156 TRACE_TASK(t, "job_completion().\n");
157
158 set_rt_flags(t, RT_F_SLEEP);
159 prepare_for_next_period(t);
160}
161
162static void psnedf_tick(struct task_struct *t)
163{
164 psnedf_domain_t *pedf = local_pedf;
165
166 /* Check for inconsistency. We don't need the lock for this since
167 * ->scheduled is only changed in schedule, which obviously is not
168 * executing in parallel on this CPU
169 */
170 BUG_ON(is_realtime(t) && t != pedf->scheduled);
171
172 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
173 if (!is_np(t)) {
174 litmus_reschedule_local();
175 TRACE("psnedf_scheduler_tick: "
176 "%d is preemptable "
177 " => FORCE_RESCHED\n", t->pid);
178 } else if (is_user_np(t)) {
179 TRACE("psnedf_scheduler_tick: "
180 "%d is non-preemptable, "
181 "preemption delayed.\n", t->pid);
182 request_exit_np(t);
183 }
184 }
185}
186
187static struct task_struct* psnedf_schedule(struct task_struct * prev)
188{
189 psnedf_domain_t* pedf = local_pedf;
190 rt_domain_t* edf = &pedf->domain;
191 struct task_struct* next;
192
193 int out_of_time, sleep, preempt,
194 np, exists, blocks, resched;
195
196 raw_spin_lock(&pedf->slock);
197
198 /* sanity checking
199 * differently from gedf, when a task exits (dead)
200 * pedf->schedule may be null and prev _is_ realtime
201 */
202 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
203 BUG_ON(pedf->scheduled && !is_realtime(prev));
204
205 /* (0) Determine state */
206 exists = pedf->scheduled != NULL;
207 blocks = exists && !is_running(pedf->scheduled);
208 out_of_time = exists &&
209 budget_enforced(pedf->scheduled) &&
210 budget_exhausted(pedf->scheduled);
211 np = exists && is_np(pedf->scheduled);
212 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
213 preempt = edf_preemption_needed(edf, prev);
214
215 /* If we need to preempt do so.
216 * The following checks set resched to 1 in case of special
217 * circumstances.
218 */
219 resched = preempt;
220
221 /* If a task blocks we have no choice but to reschedule.
222 */
223 if (blocks)
224 resched = 1;
225
226 /* Request a sys_exit_np() call if we would like to preempt but cannot.
227 * Multiple calls to request_exit_np() don't hurt.
228 */
229 if (np && (out_of_time || preempt || sleep))
230 request_exit_np(pedf->scheduled);
231
232 /* Any task that is preemptable and either exhausts its execution
233 * budget or wants to sleep completes. We may have to reschedule after
234 * this.
235 */
236 if (!np && (out_of_time || sleep) && !blocks) {
237 job_completion(pedf->scheduled, !sleep);
238 resched = 1;
239 }
240
241 /* The final scheduling decision. Do we need to switch for some reason?
242 * Switch if we are in RT mode and have no task or if we need to
243 * resched.
244 */
245 next = NULL;
246 if ((!np || blocks) && (resched || !exists)) {
247 /* When preempting a task that does not block, then
248 * re-insert it into either the ready queue or the
249 * release queue (if it completed). requeue() picks
250 * the appropriate queue.
251 */
252 if (pedf->scheduled && !blocks)
253 requeue(pedf->scheduled, edf);
254 next = __take_ready(edf);
255 } else
256 /* Only override Linux scheduler if we have a real-time task
257 * scheduled that needs to continue.
258 */
259 if (exists)
260 next = prev;
261
262 if (next) {
263 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
264 set_rt_flags(next, RT_F_RUNNING);
265 } else {
266 TRACE("becoming idle at %llu\n", litmus_clock());
267 }
268
269 pedf->scheduled = next;
270 sched_state_task_picked();
271 raw_spin_unlock(&pedf->slock);
272
273 return next;
274}
275
276
277/* Prepare a task for running in RT mode
278 */
279static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
280{
281 rt_domain_t* edf = task_edf(t);
282 psnedf_domain_t* pedf = task_pedf(t);
283 unsigned long flags;
284
285 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
286 t->rt_param.task_params.cpu);
287
288 /* setup job parameters */
289 release_at(t, litmus_clock());
290
291 /* The task should be running in the queue, otherwise signal
292 * code will try to wake it up with fatal consequences.
293 */
294 raw_spin_lock_irqsave(&pedf->slock, flags);
295 if (running) {
296 /* there shouldn't be anything else running at the time */
297 BUG_ON(pedf->scheduled);
298 pedf->scheduled = t;
299 } else {
300 requeue(t, edf);
301 /* maybe we have to reschedule */
302 preempt(pedf);
303 }
304 raw_spin_unlock_irqrestore(&pedf->slock, flags);
305}
306
307static void psnedf_task_wake_up(struct task_struct *task)
308{
309 unsigned long flags;
310 psnedf_domain_t* pedf = task_pedf(task);
311 rt_domain_t* edf = task_edf(task);
312 lt_t now;
313
314 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
315 raw_spin_lock_irqsave(&pedf->slock, flags);
316 BUG_ON(is_queued(task));
317 now = litmus_clock();
318 if (is_tardy(task, now)
319#ifdef CONFIG_LITMUS_LOCKING
320 /* We need to take suspensions because of semaphores into
321 * account! If a job resumes after being suspended due to acquiring
322 * a semaphore, it should never be treated as a new job release.
323 */
324 && !is_priority_boosted(task)
325#endif
326 ) {
327 /* new sporadic release */
328 release_at(task, now);
329 sched_trace_task_release(task);
330 }
331
332 /* Only add to ready queue if it is not the currently-scheduled
333 * task. This could be the case if a task was woken up concurrently
334 * on a remote CPU before the executing CPU got around to actually
335 * de-scheduling the task, i.e., wake_up() raced with schedule()
336 * and won.
337 */
338 if (pedf->scheduled != task)
339 requeue(task, edf);
340
341 raw_spin_unlock_irqrestore(&pedf->slock, flags);
342 TRACE_TASK(task, "wake up done\n");
343}
344
345static void psnedf_task_block(struct task_struct *t)
346{
347 /* only running tasks can block, thus t is in no queue */
348 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
349
350 BUG_ON(!is_realtime(t));
351 BUG_ON(is_queued(t));
352}
353
354static void psnedf_task_exit(struct task_struct * t)
355{
356 unsigned long flags;
357 psnedf_domain_t* pedf = task_pedf(t);
358 rt_domain_t* edf;
359
360 raw_spin_lock_irqsave(&pedf->slock, flags);
361 if (is_queued(t)) {
362 /* dequeue */
363 edf = task_edf(t);
364 remove(edf, t);
365 }
366 if (pedf->scheduled == t)
367 pedf->scheduled = NULL;
368
369 TRACE_TASK(t, "RIP, now reschedule\n");
370
371 preempt(pedf);
372 raw_spin_unlock_irqrestore(&pedf->slock, flags);
373}
374
375#ifdef CONFIG_LITMUS_LOCKING
376
377#include <litmus/fdso.h>
378#include <litmus/srp.h>
379
380/* ******************** SRP support ************************ */
381
382static unsigned int psnedf_get_srp_prio(struct task_struct* t)
383{
384 /* assumes implicit deadlines */
385 return get_rt_period(t);
386}
387
388/* ******************** FMLP support ********************** */
389
390/* struct for semaphore with priority inheritance */
391struct fmlp_semaphore {
392 struct litmus_lock litmus_lock;
393
394 /* current resource holder */
395 struct task_struct *owner;
396
397 /* FIFO queue of waiting tasks */
398 wait_queue_head_t wait;
399};
400
401static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
402{
403 return container_of(lock, struct fmlp_semaphore, litmus_lock);
404}
405int psnedf_fmlp_lock(struct litmus_lock* l)
406{
407 struct task_struct* t = current;
408 struct fmlp_semaphore *sem = fmlp_from_lock(l);
409 wait_queue_t wait;
410 unsigned long flags;
411
412 if (!is_realtime(t))
413 return -EPERM;
414
415 preempt_disable();
416
417 TRACE_CUR("want FMLP sem %p\n", sem);
418
419 boost_priority(t);
420
421 spin_lock_irqsave(&sem->wait.lock, flags);
422
423 if (sem->owner) {
424 /* resource is not free => must suspend and wait */
425
426 init_waitqueue_entry(&wait, t);
427
428 /* FIXME: interruptible would be nice some day */
429 set_task_state(t, TASK_UNINTERRUPTIBLE);
430
431 TRACE_CUR("blocking on FMLP sem %p\n", sem);
432 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
433
434 /* release lock before sleeping */
435 spin_unlock_irqrestore(&sem->wait.lock, flags);
436
437 /* We depend on the FIFO order. Thus, we don't need to recheck
438 * when we wake up; we are guaranteed to have the lock since
439 * there is only one wake up per release.
440 */
441
442 TS_LOCK_SUSPEND;
443
444 preempt_enable_no_resched();
445
446 schedule();
447
448 preempt_disable();
449
450 TS_LOCK_RESUME;
451
452 /* Since we hold the lock, no other task will change
453 * ->owner. We can thus check it without acquiring the spin
454 * lock. */
455 BUG_ON(sem->owner != t);
456 } else {
457 /* it's ours now */
458 sem->owner = t;
459
460 spin_unlock_irqrestore(&sem->wait.lock, flags);
461 }
462
463 TRACE_CUR("got FMLP sem %p\n", sem);
464
465 preempt_enable();
466
467 return 0;
468}
469
470int psnedf_fmlp_unlock(struct litmus_lock* l)
471{
472 struct task_struct *t = current, *next;
473 struct fmlp_semaphore *sem = fmlp_from_lock(l);
474 unsigned long flags;
475 int err = 0;
476
477 spin_lock_irqsave(&sem->wait.lock, flags);
478
479 if (sem->owner != t) {
480 err = -EINVAL;
481 goto out;
482 }
483
484 TRACE_CUR("releasing FMLP sem %p\n", sem);
485
486 /* we lose the benefit of priority boosting */
487
488 unboost_priority(t);
489
490 /* check if there are jobs waiting for this resource */
491 next = __waitqueue_remove_first(&sem->wait);
492 if (next) {
493 /* next becomes the resouce holder */
494 sem->owner = next;
495
496 /* wake up next */
497 wake_up_process(next);
498 } else
499 /* resource becomes available */
500 sem->owner = NULL;
501
502out:
503 spin_unlock_irqrestore(&sem->wait.lock, flags);
504 return err;
505}
506
507int psnedf_fmlp_close(struct litmus_lock* l)
508{
509 struct task_struct *t = current;
510 struct fmlp_semaphore *sem = fmlp_from_lock(l);
511 unsigned long flags;
512
513 int owner;
514
515 spin_lock_irqsave(&sem->wait.lock, flags);
516
517 owner = sem->owner == t;
518
519 spin_unlock_irqrestore(&sem->wait.lock, flags);
520
521 if (owner)
522 psnedf_fmlp_unlock(l);
523
524 return 0;
525}
526
527void psnedf_fmlp_free(struct litmus_lock* lock)
528{
529 kfree(fmlp_from_lock(lock));
530}
531
532static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
533 .close = psnedf_fmlp_close,
534 .lock = psnedf_fmlp_lock,
535 .unlock = psnedf_fmlp_unlock,
536 .deallocate = psnedf_fmlp_free,
537};
538
539static struct litmus_lock* psnedf_new_fmlp(void)
540{
541 struct fmlp_semaphore* sem;
542
543 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
544 if (!sem)
545 return NULL;
546
547 sem->owner = NULL;
548 init_waitqueue_head(&sem->wait);
549 sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
550
551 return &sem->litmus_lock;
552}
553
554
555
556/* ******************** OMLP support **********************/
557
558/* Since jobs spin "virtually" while waiting to acquire a lock,
559 * they first must aquire a local per-cpu resource.
560 */
561static DEFINE_PER_CPU(wait_queue_head_t, omlp_token_wait);
562static DEFINE_PER_CPU(struct task_struct*, omlp_token);
563
564/* called with preemptions off <=> no local modifications */
565static void omlp_grab_token(void)
566{
567 struct task_struct* t = current;
568
569 while (1) {
570 if (__get_cpu_var(omlp_token) == NULL) {
571 /* take it */
572 __get_cpu_var(omlp_token) = t;
573 break;
574 } else {
575 /* some job is spinning => enqueue in request queue */
576 prio_wait_queue_t wait;
577 wait_queue_head_t* token_waiters = &__get_cpu_var(omlp_token_wait);
578 unsigned long flags;
579
580 /* ordered by regular priority; break by lower PID */
581 init_prio_waitqueue_entry_tie(&wait, t, get_deadline(t), t->pid);
582
583 spin_lock_irqsave(&token_waiters->lock, flags);
584
585 set_task_state(t, TASK_UNINTERRUPTIBLE);
586
587 __add_wait_queue_prio_exclusive(token_waiters, &wait);
588
589 TRACE_CUR("waiting for OMLP token\n");
590
591 spin_unlock_irqrestore(&token_waiters->lock, flags);
592
593 TS_LOCK_SUSPEND;
594
595 preempt_enable_no_resched();
596
597 schedule();
598
599 preempt_disable();
600
601 TS_LOCK_RESUME;
602 /* Recheck if we got it */
603 }
604 }
605 /* ok, now it is ours */
606 TRACE_CUR("got OMLP token\n");
607}
608
609/* called with preemptions off */
610static void omlp_release_token(void)
611{
612 struct task_struct* t = current, *next;
613 unsigned long flags;
614 wait_queue_head_t* token_waiters = &__get_cpu_var(omlp_token_wait);
615
616 BUG_ON(__get_cpu_var(omlp_token) != t);
617
618 __get_cpu_var(omlp_token) = NULL;
619
620 TRACE_CUR("released OMLP token\n");
621
622 spin_lock_irqsave(&token_waiters->lock, flags);
623 next = __waitqueue_remove_first(token_waiters);
624
625 if (next)
626 wake_up_process(next);
627
628 spin_unlock_irqrestore(&token_waiters->lock, flags);
629}
630
631
632struct omlp_semaphore {
633 struct litmus_lock litmus_lock;
634
635 /* current resource holder */
636 struct task_struct *owner;
637
638 /* FIFO queue of waiting tasks */
639 wait_queue_head_t wait;
640};
641
642static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock)
643{
644 return container_of(lock, struct omlp_semaphore, litmus_lock);
645}
646int psnedf_omlp_lock(struct litmus_lock* l)
647{
648 struct task_struct* t = current;
649 struct omlp_semaphore *sem = omlp_from_lock(l);
650 wait_queue_t wait;
651 unsigned long flags;
652
653 if (!is_realtime(t))
654 return -EPERM;
655
656 preempt_disable();
657
658 omlp_grab_token();
659
660 /* Priority-boost ourself *before* we suspend so that
661 * our priority is boosted when we resume. */
662 boost_priority(t);
663
664 spin_lock_irqsave(&sem->wait.lock, flags);
665
666 if (sem->owner) {
667 /* resource is not free => must suspend and wait */
668
669 init_waitqueue_entry(&wait, t);
670
671 /* FIXME: interruptible would be nice some day */
672 set_task_state(t, TASK_UNINTERRUPTIBLE);
673
674 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
675
676 /* release lock before sleeping */
677 spin_unlock_irqrestore(&sem->wait.lock, flags);
678
679 /* We depend on the FIFO order. Thus, we don't need to recheck
680 * when we wake up; we are guaranteed to have the lock since
681 * there is only one wake up per release.
682 */
683 TS_LOCK_SUSPEND;
684
685 preempt_enable_no_resched();
686
687 schedule();
688
689 preempt_disable();
690
691 TS_LOCK_RESUME;
692
693 /* Since we hold the lock, no other task will change
694 * ->owner. We can thus check it without acquiring the spin
695 * lock. */
696 BUG_ON(sem->owner != t);
697 } else {
698 /* it's ours now */
699 sem->owner = t;
700
701 spin_unlock_irqrestore(&sem->wait.lock, flags);
702 }
703
704 preempt_enable();
705
706 return 0;
707}
708
709int psnedf_omlp_unlock(struct litmus_lock* l)
710{
711 struct task_struct *t = current, *next;
712 struct omlp_semaphore *sem = omlp_from_lock(l);
713 unsigned long flags;
714 int err = 0;
715
716 preempt_disable();
717
718 spin_lock_irqsave(&sem->wait.lock, flags);
719
720 if (sem->owner != t) {
721 err = -EINVAL;
722 spin_unlock_irqrestore(&sem->wait.lock, flags);
723 goto out;
724 }
725
726 /* we lose the benefit of priority boosting */
727
728 unboost_priority(t);
729
730 /* check if there are jobs waiting for this resource */
731 next = __waitqueue_remove_first(&sem->wait);
732 if (next) {
733 /* next becomes the resouce holder */
734 sem->owner = next;
735
736 /* Wake up next. The waiting job is already priority-boosted. */
737 wake_up_process(next);
738 } else
739 /* resource becomes available */
740 sem->owner = NULL;
741
742 spin_unlock_irqrestore(&sem->wait.lock, flags);
743
744 omlp_release_token();
745
746out:
747 preempt_enable();
748 return err;
749}
750
751int psnedf_omlp_close(struct litmus_lock* l)
752{
753 struct task_struct *t = current;
754 struct omlp_semaphore *sem = omlp_from_lock(l);
755 unsigned long flags;
756
757 int owner;
758
759 spin_lock_irqsave(&sem->wait.lock, flags);
760
761 owner = sem->owner == t;
762
763 spin_unlock_irqrestore(&sem->wait.lock, flags);
764
765 if (owner)
766 psnedf_omlp_unlock(l);
767
768 return 0;
769}
770
771void psnedf_omlp_free(struct litmus_lock* lock)
772{
773 kfree(omlp_from_lock(lock));
774}
775
776static struct litmus_lock_ops psnedf_omlp_lock_ops = {
777 .close = psnedf_omlp_close,
778 .lock = psnedf_omlp_lock,
779 .unlock = psnedf_omlp_unlock,
780 .deallocate = psnedf_omlp_free,
781};
782
783static struct litmus_lock* psnedf_new_omlp(void)
784{
785 struct omlp_semaphore* sem;
786
787 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
788 if (!sem)
789 return NULL;
790
791 sem->owner = NULL;
792 init_waitqueue_head(&sem->wait);
793 sem->litmus_lock.ops = &psnedf_omlp_lock_ops;
794
795 return &sem->litmus_lock;
796}
797
798
799/* **** lock constructor **** */
800
801
802static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
803 void* __user unused)
804{
805 int err = -ENXIO;
806 struct srp_semaphore* srp;
807
808 /* PSN-EDF currently supports the SRP for local resources and the FMLP
809 * for global resources. */
810 switch (type) {
811 case FMLP_SEM:
812 /* Flexible Multiprocessor Locking Protocol */
813 *lock = psnedf_new_fmlp();
814 if (*lock)
815 err = 0;
816 else
817 err = -ENOMEM;
818 break;
819
820 case OMLP_SEM:
821 /* O(m) Locking Protocol */
822 *lock = psnedf_new_omlp();
823 if (*lock)
824 err = 0;
825 else
826 err = -ENOMEM;
827 break;
828
829 case SRP_SEM:
830 /* Baker's Stack Resource Policy */
831 srp = allocate_srp_semaphore();
832 if (srp) {
833 *lock = &srp->litmus_lock;
834 err = 0;
835 } else
836 err = -ENOMEM;
837 break;
838 };
839
840 return err;
841}
842
843#endif
844
845
846static long psnedf_activate_plugin(void)
847{
848
849 int cpu;
850
851 for_each_online_cpu(cpu) {
852#ifdef CONFIG_RELEASE_MASTER
853 remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
854#endif
855#ifdef CONFIG_LITMUS_LOCKING
856 init_waitqueue_head(&per_cpu(omlp_token_wait, cpu));
857 per_cpu(omlp_token, cpu) = NULL;
858#endif
859 }
860
861
862#ifdef CONFIG_LITMUS_LOCKING
863 get_srp_prio = psnedf_get_srp_prio;
864#endif
865
866 return 0;
867}
868
869static long psnedf_admit_task(struct task_struct* tsk)
870{
871 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
872#ifdef CONFIG_RELEASE_MASTER
873 /* don't allow tasks on release master CPU */
874 && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
875#endif
876 )
877 return 0;
878 else
879 return -EINVAL;
880}
881
882/* Plugin object */
883static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
884 .plugin_name = "PSN-EDF",
885 .tick = psnedf_tick,
886 .task_new = psnedf_task_new,
887 .complete_job = complete_job,
888 .task_exit = psnedf_task_exit,
889 .schedule = psnedf_schedule,
890 .task_wake_up = psnedf_task_wake_up,
891 .task_block = psnedf_task_block,
892 .admit_task = psnedf_admit_task,
893 .activate_plugin = psnedf_activate_plugin,
894#ifdef CONFIG_LITMUS_LOCKING
895 .allocate_lock = psnedf_allocate_lock,
896#endif
897};
898
899
900static int __init init_psn_edf(void)
901{
902 int i;
903
904 /* We do not really want to support cpu hotplug, do we? ;)
905 * However, if we are so crazy to do so,
906 * we cannot use num_online_cpu()
907 */
908 for (i = 0; i < num_online_cpus(); i++) {
909 psnedf_domain_init(remote_pedf(i),
910 psnedf_check_resched,
911 NULL, i);
912 }
913 return register_sched_plugin(&psn_edf_plugin);
914}
915
916module_init(init_psn_edf);
917
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
new file mode 100644
index 000000000000..5ef8d09ab41f
--- /dev/null
+++ b/litmus/sched_task_trace.c
@@ -0,0 +1,241 @@
1/*
2 * sched_task_trace.c -- record scheduling events to a byte stream
3 */
4
5#define NO_TASK_TRACE_DECLS
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/percpu.h>
10
11#include <litmus/ftdev.h>
12#include <litmus/litmus.h>
13
14#include <litmus/sched_trace.h>
15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h>
17
18
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
20
21#define now() litmus_clock()
22
23struct local_buffer {
24 struct st_event_record record[NO_EVENTS];
25 char flag[NO_EVENTS];
26 struct ft_buffer ftbuf;
27};
28
29DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
30
31static struct ftdev st_dev;
32
33static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
34{
35 return cpu_online(cpu) ? 0 : -ENODEV;
36}
37
38static int __init init_sched_task_trace(void)
39{
40 struct local_buffer* buf;
41 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer));
45
46 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace");
48 if (err)
49 goto err_out;
50
51 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
54 sizeof(struct st_event_record),
55 buf->flag,
56 buf->record);
57 st_dev.minor[i].buf = &buf->ftbuf;
58 }
59 if (ok == st_dev.minor_cnt) {
60 st_dev.can_open = st_dev_can_open;
61 err = register_ftdev(&st_dev);
62 if (err)
63 goto err_dealloc;
64 } else {
65 err = -EINVAL;
66 goto err_dealloc;
67 }
68
69 return 0;
70
71err_dealloc:
72 ftdev_exit(&st_dev);
73err_out:
74 printk(KERN_WARNING "Could not register sched_trace module\n");
75 return err;
76}
77
78static void __exit exit_sched_task_trace(void)
79{
80 ftdev_exit(&st_dev);
81}
82
83module_init(init_sched_task_trace);
84module_exit(exit_sched_task_trace);
85
86
87static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
88{
89 struct st_event_record* rec = NULL;
90 struct local_buffer* buf;
91
92 buf = &get_cpu_var(st_event_buffer);
93 if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
94 rec->hdr.type = type;
95 rec->hdr.cpu = smp_processor_id();
96 rec->hdr.pid = t ? t->pid : 0;
97 rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
98 } else {
99 put_cpu_var(st_event_buffer);
100 }
101 /* rec will be NULL if it failed */
102 return rec;
103}
104
105static inline void put_record(struct st_event_record* rec)
106{
107 struct local_buffer* buf;
108 buf = &__get_cpu_var(st_event_buffer);
109 ft_buffer_finish_write(&buf->ftbuf, rec);
110 put_cpu_var(st_event_buffer);
111}
112
113feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
114{
115 struct task_struct *t = (struct task_struct*) _task;
116 struct st_event_record* rec = get_record(ST_NAME, t);
117 int i;
118 if (rec) {
119 for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
120 rec->data.name.cmd[i] = t->comm[i];
121 put_record(rec);
122 }
123}
124
125feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
126{
127 struct task_struct *t = (struct task_struct*) _task;
128 struct st_event_record* rec = get_record(ST_PARAM, t);
129 if (rec) {
130 rec->data.param.wcet = get_exec_cost(t);
131 rec->data.param.period = get_rt_period(t);
132 rec->data.param.phase = get_rt_phase(t);
133 rec->data.param.partition = get_partition(t);
134 rec->data.param.class = get_class(t);
135 put_record(rec);
136 }
137}
138
139feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
140{
141 struct task_struct *t = (struct task_struct*) _task;
142 struct st_event_record* rec = get_record(ST_RELEASE, t);
143 if (rec) {
144 rec->data.release.release = get_release(t);
145 rec->data.release.deadline = get_deadline(t);
146 put_record(rec);
147 }
148}
149
150/* skipped: st_assigned_data, we don't use it atm */
151
152feather_callback void do_sched_trace_task_switch_to(unsigned long id,
153 unsigned long _task)
154{
155 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec;
157 if (is_realtime(t)) {
158 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) {
160 rec->data.switch_to.when = now();
161 rec->data.switch_to.exec_time = get_exec_time(t);
162 put_record(rec);
163 }
164 }
165}
166
167feather_callback void do_sched_trace_task_switch_away(unsigned long id,
168 unsigned long _task)
169{
170 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec;
172 if (is_realtime(t)) {
173 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) {
175 rec->data.switch_away.when = now();
176 rec->data.switch_away.exec_time = get_exec_time(t);
177 put_record(rec);
178 }
179 }
180}
181
182feather_callback void do_sched_trace_task_completion(unsigned long id,
183 unsigned long _task,
184 unsigned long forced)
185{
186 struct task_struct *t = (struct task_struct*) _task;
187 struct st_event_record* rec = get_record(ST_COMPLETION, t);
188 if (rec) {
189 rec->data.completion.when = now();
190 rec->data.completion.forced = forced;
191 put_record(rec);
192 }
193}
194
195feather_callback void do_sched_trace_task_block(unsigned long id,
196 unsigned long _task)
197{
198 struct task_struct *t = (struct task_struct*) _task;
199 struct st_event_record* rec = get_record(ST_BLOCK, t);
200 if (rec) {
201 rec->data.block.when = now();
202 put_record(rec);
203 }
204}
205
206feather_callback void do_sched_trace_task_resume(unsigned long id,
207 unsigned long _task)
208{
209 struct task_struct *t = (struct task_struct*) _task;
210 struct st_event_record* rec = get_record(ST_RESUME, t);
211 if (rec) {
212 rec->data.resume.when = now();
213 put_record(rec);
214 }
215}
216
217feather_callback void do_sched_trace_sys_release(unsigned long id,
218 unsigned long _start)
219{
220 lt_t *start = (lt_t*) _start;
221 struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
222 if (rec) {
223 rec->data.sys_release.when = now();
224 rec->data.sys_release.release = *start;
225 put_record(rec);
226 }
227}
228
229feather_callback void do_sched_trace_action(unsigned long id,
230 unsigned long _task,
231 unsigned long action)
232{
233 struct task_struct *t = (struct task_struct*) _task;
234 struct st_event_record* rec = get_record(ST_ACTION, t);
235
236 if (rec) {
237 rec->data.action.when = now();
238 rec->data.action.action = action;
239 put_record(rec);
240 }
241}
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 000000000000..f4171fddbbb1
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,252 @@
1/*
2 * sched_trace.c -- record scheduling events to a byte stream.
3 */
4#include <linux/spinlock.h>
5#include <linux/mutex.h>
6
7#include <linux/fs.h>
8#include <linux/slab.h>
9#include <linux/miscdevice.h>
10#include <asm/uaccess.h>
11#include <linux/module.h>
12#include <linux/sysrq.h>
13
14#include <linux/kfifo.h>
15
16#include <litmus/sched_trace.h>
17#include <litmus/litmus.h>
18
19#define SCHED_TRACE_NAME "litmus/log"
20
21/* Compute size of TRACE() buffer */
22#define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT)
23
24/* Max length of one read from the buffer */
25#define MAX_READ_LEN (64 * 1024)
26
27/* Max length for one write --- by TRACE() --- to the buffer. This is used to
28 * allocate a per-cpu buffer for printf() formatting. */
29#define MSG_SIZE 255
30
31
32static DEFINE_MUTEX(reader_mutex);
33static atomic_t reader_cnt = ATOMIC_INIT(0);
34static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE);
35
36
37static DEFINE_RAW_SPINLOCK(log_buffer_lock);
38static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
39
40/*
41 * sched_trace_log_message - Write to the trace buffer (log_buffer)
42 *
43 * This is the only function accessing the log_buffer from inside the
44 * kernel for writing.
45 * Concurrent access to sched_trace_log_message must be serialized using
46 * log_buffer_lock
47 * The maximum length of a formatted message is 255
48 */
49void sched_trace_log_message(const char* fmt, ...)
50{
51 unsigned long flags;
52 va_list args;
53 size_t len;
54 char* buf;
55
56 if (!atomic_read(&reader_cnt))
57 /* early exit if nobody is listening */
58 return;
59
60 va_start(args, fmt);
61 local_irq_save(flags);
62
63 /* format message */
64 buf = __get_cpu_var(fmt_buffer);
65 len = vscnprintf(buf, MSG_SIZE, fmt, args);
66
67 raw_spin_lock(&log_buffer_lock);
68 /* Don't copy the trailing null byte, we don't want null bytes in a
69 * text file.
70 */
71 kfifo_in(&debug_buffer, buf, len);
72 raw_spin_unlock(&log_buffer_lock);
73
74 local_irq_restore(flags);
75 va_end(args);
76}
77
78
79/*
80 * log_read - Read the trace buffer
81 *
82 * This function is called as a file operation from userspace.
83 * Readers can sleep. Access is serialized through reader_mutex
84 */
85static ssize_t log_read(struct file *filp,
86 char __user *to, size_t len,
87 loff_t *f_pos)
88{
89 /* we ignore f_pos, this is strictly sequential */
90
91 ssize_t error = -EINVAL;
92 char* mem;
93
94 if (mutex_lock_interruptible(&reader_mutex)) {
95 error = -ERESTARTSYS;
96 goto out;
97 }
98
99 if (len > MAX_READ_LEN)
100 len = MAX_READ_LEN;
101
102 mem = kmalloc(len, GFP_KERNEL);
103 if (!mem) {
104 error = -ENOMEM;
105 goto out_unlock;
106 }
107
108 error = kfifo_out(&debug_buffer, mem, len);
109 while (!error) {
110 set_current_state(TASK_INTERRUPTIBLE);
111 schedule_timeout(110);
112 if (signal_pending(current))
113 error = -ERESTARTSYS;
114 else
115 error = kfifo_out(&debug_buffer, mem, len);
116 }
117
118 if (error > 0 && copy_to_user(to, mem, error))
119 error = -EFAULT;
120
121 kfree(mem);
122 out_unlock:
123 mutex_unlock(&reader_mutex);
124 out:
125 return error;
126}
127
128/*
129 * Enable redirection of printk() messages to the trace buffer.
130 * Defined in kernel/printk.c
131 */
132extern int trace_override;
133extern int trace_recurse;
134
135/*
136 * log_open - open the global log message ring buffer.
137 */
138static int log_open(struct inode *in, struct file *filp)
139{
140 int error = -EINVAL;
141
142 if (mutex_lock_interruptible(&reader_mutex)) {
143 error = -ERESTARTSYS;
144 goto out;
145 }
146
147 atomic_inc(&reader_cnt);
148 error = 0;
149
150 printk(KERN_DEBUG
151 "sched_trace kfifo with buffer starting at: 0x%p\n",
152 debug_buffer.buf);
153
154 /* override printk() */
155 trace_override++;
156
157 mutex_unlock(&reader_mutex);
158 out:
159 return error;
160}
161
162static int log_release(struct inode *in, struct file *filp)
163{
164 int error = -EINVAL;
165
166 if (mutex_lock_interruptible(&reader_mutex)) {
167 error = -ERESTARTSYS;
168 goto out;
169 }
170
171 atomic_dec(&reader_cnt);
172
173 /* release printk() overriding */
174 trace_override--;
175
176 printk(KERN_DEBUG "sched_trace kfifo released\n");
177
178 mutex_unlock(&reader_mutex);
179 out:
180 return error;
181}
182
183/*
184 * log_fops - The file operations for accessing the global LITMUS log message
185 * buffer.
186 *
187 * Except for opening the device file it uses the same operations as trace_fops.
188 */
189static struct file_operations log_fops = {
190 .owner = THIS_MODULE,
191 .open = log_open,
192 .release = log_release,
193 .read = log_read,
194};
195
196static struct miscdevice litmus_log_dev = {
197 .name = SCHED_TRACE_NAME,
198 .minor = MISC_DYNAMIC_MINOR,
199 .fops = &log_fops,
200};
201
202#ifdef CONFIG_MAGIC_SYSRQ
203void dump_trace_buffer(int max)
204{
205 char line[80];
206 int len;
207 int count = 0;
208
209 /* potential, but very unlikely, race... */
210 trace_recurse = 1;
211 while ((max == 0 || count++ < max) &&
212 (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
213 line[len] = '\0';
214 printk("%s", line);
215 }
216 trace_recurse = 0;
217}
218
219static void sysrq_dump_trace_buffer(int key)
220{
221 dump_trace_buffer(100);
222}
223
224static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
225 .handler = sysrq_dump_trace_buffer,
226 .help_msg = "dump-trace-buffer(Y)",
227 .action_msg = "writing content of TRACE() buffer",
228};
229#endif
230
231static int __init init_sched_trace(void)
232{
233 printk("Initializing TRACE() device\n");
234
235#ifdef CONFIG_MAGIC_SYSRQ
236 /* offer some debugging help */
237 if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
238 printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
239 else
240 printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
241#endif
242
243 return misc_register(&litmus_log_dev);
244}
245
246static void __exit exit_sched_trace(void)
247{
248 misc_deregister(&litmus_log_dev);
249}
250
251module_init(init_sched_trace);
252module_exit(exit_sched_trace);
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..2ed4ec12a9d3
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,295 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/wait.h>
8
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/fdso.h>
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_LITMUS_LOCKING
16
17#include <litmus/srp.h>
18
19srp_prioritization_t get_srp_prio;
20
21struct srp {
22 struct list_head ceiling;
23 wait_queue_head_t ceiling_blocked;
24};
25#define system_ceiling(srp) list2prio(srp->ceiling.next)
26#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
27
28#define UNDEF_SEM -2
29
30atomic_t srp_objects_in_use = ATOMIC_INIT(0);
31
32DEFINE_PER_CPU(struct srp, srp);
33
34/* Initialize SRP semaphores at boot time. */
35static int __init srp_init(void)
36{
37 int i;
38
39 printk("Initializing SRP per-CPU ceilings...");
40 for (i = 0; i < NR_CPUS; i++) {
41 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
42 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
43 }
44 printk(" done!\n");
45
46 return 0;
47}
48module_init(srp_init);
49
50/* SRP task priority comparison function. Smaller numeric values have higher
51 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
52 */
53static int srp_higher_prio(struct srp_priority* first,
54 struct srp_priority* second)
55{
56 if (!first->priority)
57 return 0;
58 else
59 return !second->priority ||
60 first->priority < second->priority || (
61 first->priority == second->priority &&
62 first->pid < second->pid);
63}
64
65
66static int srp_exceeds_ceiling(struct task_struct* first,
67 struct srp* srp)
68{
69 struct srp_priority prio;
70
71 if (list_empty(&srp->ceiling))
72 return 1;
73 else {
74 prio.pid = first->pid;
75 prio.priority = get_srp_prio(first);
76 return srp_higher_prio(&prio, system_ceiling(srp)) ||
77 ceiling2sem(system_ceiling(srp))->owner == first;
78 }
79}
80
81static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
82{
83 struct list_head *pos;
84 if (in_list(&prio->list)) {
85 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
86 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
87 return;
88 }
89 list_for_each(pos, &srp->ceiling)
90 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
91 __list_add(&prio->list, pos->prev, pos);
92 return;
93 }
94
95 list_add_tail(&prio->list, &srp->ceiling);
96}
97
98
99static int lock_srp_semaphore(struct litmus_lock* l)
100{
101 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
102
103 if (!is_realtime(current))
104 return -EPERM;
105
106 preempt_disable();
107
108 /* Update ceiling. */
109 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
110
111 /* SRP invariant: all resources available */
112 BUG_ON(sem->owner != NULL);
113
114 sem->owner = current;
115 TRACE_CUR("acquired srp 0x%p\n", sem);
116
117 preempt_enable();
118
119 return 0;
120}
121
122static int unlock_srp_semaphore(struct litmus_lock* l)
123{
124 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
125 int err = 0;
126
127 preempt_disable();
128
129 if (sem->owner != current) {
130 err = -EINVAL;
131 } else {
132 /* Determine new system priority ceiling for this CPU. */
133 BUG_ON(!in_list(&sem->ceiling.list));
134
135 list_del(&sem->ceiling.list);
136 sem->owner = NULL;
137
138 /* Wake tasks on this CPU, if they exceed current ceiling. */
139 TRACE_CUR("released srp 0x%p\n", sem);
140 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
141 }
142
143 preempt_enable();
144 return err;
145}
146
147static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
148{
149 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
150 int err = 0;
151 struct task_struct* t = current;
152 struct srp_priority t_prio;
153
154 if (!is_realtime(t))
155 return -EPERM;
156
157 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
158
159 preempt_disable();
160
161 if (sem->owner != NULL)
162 err = -EBUSY;
163
164 if (err == 0) {
165 if (sem->cpu == UNDEF_SEM)
166 sem->cpu = get_partition(t);
167 else if (sem->cpu != get_partition(t))
168 err = -EPERM;
169 }
170
171 if (err == 0) {
172 t_prio.priority = get_srp_prio(t);
173 t_prio.pid = t->pid;
174 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
175 sem->ceiling.priority = t_prio.priority;
176 sem->ceiling.pid = t_prio.pid;
177 }
178 }
179
180 preempt_enable();
181
182 return err;
183}
184
185static int close_srp_semaphore(struct litmus_lock* l)
186{
187 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
188 int err = 0;
189
190 preempt_disable();
191
192 if (sem->owner == current)
193 unlock_srp_semaphore(l);
194
195 preempt_enable();
196
197 return err;
198}
199
200static void deallocate_srp_semaphore(struct litmus_lock* l)
201{
202 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
203 atomic_dec(&srp_objects_in_use);
204 kfree(sem);
205}
206
207static struct litmus_lock_ops srp_lock_ops = {
208 .open = open_srp_semaphore,
209 .close = close_srp_semaphore,
210 .lock = lock_srp_semaphore,
211 .unlock = unlock_srp_semaphore,
212 .deallocate = deallocate_srp_semaphore,
213};
214
215struct srp_semaphore* allocate_srp_semaphore(void)
216{
217 struct srp_semaphore* sem;
218
219 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
220 if (!sem)
221 return NULL;
222
223 INIT_LIST_HEAD(&sem->ceiling.list);
224 sem->ceiling.priority = 0;
225 sem->cpu = UNDEF_SEM;
226 sem->owner = NULL;
227
228 sem->litmus_lock.ops = &srp_lock_ops;
229
230 atomic_inc(&srp_objects_in_use);
231 return sem;
232}
233
234static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
235 void *key)
236{
237 int cpu = smp_processor_id();
238 struct task_struct *tsk = wait->private;
239 if (cpu != get_partition(tsk))
240 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
241 get_partition(tsk));
242 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
243 return default_wake_function(wait, mode, sync, key);
244 return 0;
245}
246
247static void do_ceiling_block(struct task_struct *tsk)
248{
249 wait_queue_t wait = {
250 .private = tsk,
251 .func = srp_wake_up,
252 .task_list = {NULL, NULL}
253 };
254
255 tsk->state = TASK_UNINTERRUPTIBLE;
256 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
257 tsk->rt_param.srp_non_recurse = 1;
258 preempt_enable_no_resched();
259 schedule();
260 preempt_disable();
261 tsk->rt_param.srp_non_recurse = 0;
262 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
263}
264
265/* Wait for current task priority to exceed system-wide priority ceiling.
266 * FIXME: the hotpath should be inline.
267 */
268void srp_ceiling_block(void)
269{
270 struct task_struct *tsk = current;
271
272 /* Only applies to real-time tasks, but optimize for RT tasks. */
273 if (unlikely(!is_realtime(tsk)))
274 return;
275
276 /* Avoid recursive ceiling blocking. */
277 if (unlikely(tsk->rt_param.srp_non_recurse))
278 return;
279
280 /* Bail out early if there aren't any SRP resources around. */
281 if (likely(!atomic_read(&srp_objects_in_use)))
282 return;
283
284 preempt_disable();
285 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
286 TRACE_CUR("is priority ceiling blocked.\n");
287 while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
288 do_ceiling_block(tsk);
289 TRACE_CUR("finally exceeds system ceiling.\n");
290 } else
291 TRACE_CUR("is not priority ceiling blocked\n");
292 preempt_enable();
293}
294
295#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..bf75fde5450b
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,104 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19static DECLARE_COMPLETION(ts_release);
20
21static long do_wait_for_ts_release(void)
22{
23 long ret = 0;
24
25 /* If the interruption races with a release, the completion object
26 * may have a non-zero counter. To avoid this problem, this should
27 * be replaced by wait_for_completion().
28 *
29 * For debugging purposes, this is interruptible for now.
30 */
31 ret = wait_for_completion_interruptible(&ts_release);
32
33 return ret;
34}
35
36int count_tasks_waiting_for_release(void)
37{
38 unsigned long flags;
39 int task_count = 0;
40 struct list_head *pos;
41
42 spin_lock_irqsave(&ts_release.wait.lock, flags);
43 list_for_each(pos, &ts_release.wait.task_list) {
44 task_count++;
45 }
46 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
47
48 return task_count;
49}
50
51static long do_release_ts(lt_t start)
52{
53 int task_count = 0;
54 unsigned long flags;
55 struct list_head *pos;
56 struct task_struct *t;
57
58
59 spin_lock_irqsave(&ts_release.wait.lock, flags);
60 TRACE("<<<<<< synchronous task system release >>>>>>\n");
61
62 sched_trace_sys_release(&start);
63 list_for_each(pos, &ts_release.wait.task_list) {
64 t = (struct task_struct*) list_entry(pos,
65 struct __wait_queue,
66 task_list)->private;
67 task_count++;
68 litmus->release_at(t, start + t->rt_param.task_params.phase);
69 sched_trace_task_release(t);
70 }
71
72 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
73
74 complete_n(&ts_release, task_count);
75
76 return task_count;
77}
78
79
80asmlinkage long sys_wait_for_ts_release(void)
81{
82 long ret = -EPERM;
83 struct task_struct *t = current;
84
85 if (is_realtime(t))
86 ret = do_wait_for_ts_release();
87
88 return ret;
89}
90
91
92asmlinkage long sys_release_ts(lt_t __user *__delay)
93{
94 long ret;
95 lt_t delay;
96
97 /* FIXME: check capabilities... */
98
99 ret = copy_from_user(&delay, __delay, sizeof(delay));
100 if (ret == 0)
101 ret = do_release_ts(litmus_clock() + delay);
102
103 return ret;
104}
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 000000000000..39200c8ff74e
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,213 @@
1#include <linux/sched.h>
2#include <linux/module.h>
3#include <linux/uaccess.h>
4
5#include <litmus/ftdev.h>
6#include <litmus/litmus.h>
7#include <litmus/trace.h>
8
9/******************************************************************************/
10/* Allocation */
11/******************************************************************************/
12
13static struct ftdev overhead_dev;
14
15#define trace_ts_buf overhead_dev.minor[0].buf
16
17static unsigned int ts_seq_no = 0;
18
19static inline void __save_timestamp_cpu(unsigned long event,
20 uint8_t type, uint8_t cpu)
21{
22 unsigned int seq_no;
23 struct timestamp *ts;
24 seq_no = fetch_and_inc((int *) &ts_seq_no);
25 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
26 ts->event = event;
27 ts->timestamp = ft_timestamp();
28 ts->seq_no = seq_no;
29 ts->cpu = cpu;
30 ts->task_type = type;
31 ft_buffer_finish_write(trace_ts_buf, ts);
32 }
33}
34
35static void __add_timestamp_user(struct timestamp *pre_recorded)
36{
37 unsigned int seq_no;
38 struct timestamp *ts;
39 seq_no = fetch_and_inc((int *) &ts_seq_no);
40
41 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
42 *ts = *pre_recorded;
43 ts->seq_no = seq_no;
44 ft_buffer_finish_write(trace_ts_buf, ts);
45 }
46}
47
48static inline void __save_timestamp(unsigned long event,
49 uint8_t type)
50{
51 __save_timestamp_cpu(event, type, raw_smp_processor_id());
52}
53
54/* hack: fake timestamp to user-reported time, and record parts of the PID */
55feather_callback void save_timestamp_time(unsigned long event, unsigned long ptr)
56{
57 uint64_t* time = (uint64_t*) ptr;
58 unsigned int seq_no;
59 struct timestamp *ts;
60 seq_no = fetch_and_inc((int *) &ts_seq_no);
61 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
62 ts->event = event;
63 ts->timestamp = *time;
64 ts->seq_no = seq_no;
65 /* type takes lowest byte of PID */
66 ts->task_type = (uint8_t) current->pid;
67 /* cpu takes second-lowest byte of PID*/
68 ts->cpu = (uint8_t) (current->pid >> 8);
69
70 ft_buffer_finish_write(trace_ts_buf, ts);
71 }
72}
73
74feather_callback void save_timestamp_pid(unsigned long event)
75{
76 /* Abuse existing fields to partially export PID. */
77 __save_timestamp_cpu(event,
78 /* type takes lowest byte of PID */
79 (uint8_t) current->pid,
80 /* cpu takes second-lowest byte of PID*/
81 (uint8_t) (current->pid >> 8));
82}
83
84feather_callback void save_timestamp(unsigned long event)
85{
86 __save_timestamp(event, TSK_UNKNOWN);
87}
88
89feather_callback void save_timestamp_def(unsigned long event,
90 unsigned long type)
91{
92 __save_timestamp(event, (uint8_t) type);
93}
94
95feather_callback void save_timestamp_task(unsigned long event,
96 unsigned long t_ptr)
97{
98 int rt = is_realtime((struct task_struct *) t_ptr);
99 __save_timestamp(event, rt ? TSK_RT : TSK_BE);
100}
101
102feather_callback void save_timestamp_cpu(unsigned long event,
103 unsigned long cpu)
104{
105 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
106}
107
108feather_callback void save_task_latency(unsigned long event,
109 unsigned long when_ptr)
110{
111 lt_t now = litmus_clock();
112 lt_t *when = (lt_t*) when_ptr;
113 unsigned int seq_no;
114 int cpu = raw_smp_processor_id();
115 struct timestamp *ts;
116
117 seq_no = fetch_and_inc((int *) &ts_seq_no);
118 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
119 ts->event = event;
120 ts->timestamp = now - *when;
121 ts->seq_no = seq_no;
122 ts->cpu = cpu;
123 ts->task_type = TSK_RT;
124 ft_buffer_finish_write(trace_ts_buf, ts);
125 }
126}
127
128/******************************************************************************/
129/* DEVICE FILE DRIVER */
130/******************************************************************************/
131
132/*
133 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
134 * and we might not get as much
135 */
136#define NO_TIMESTAMPS (2 << 16)
137
138static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
139{
140 unsigned int count = NO_TIMESTAMPS;
141 while (count && !trace_ts_buf) {
142 printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
143 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
144 count /= 2;
145 }
146 return ftdev->minor[idx].buf ? 0 : -ENOMEM;
147}
148
149static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
150{
151 free_ft_buffer(ftdev->minor[idx].buf);
152 ftdev->minor[idx].buf = NULL;
153}
154
155static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
156 const char __user *from)
157{
158 ssize_t consumed = 0;
159 struct timestamp ts;
160
161 /* don't give us partial timestamps */
162 if (len % sizeof(ts))
163 return -EINVAL;
164
165 while (len >= sizeof(ts)) {
166 if (copy_from_user(&ts, from, sizeof(ts))) {
167 consumed = -EFAULT;
168 goto out;
169 }
170 len -= sizeof(ts);
171 from += sizeof(ts);
172 consumed += sizeof(ts);
173
174 __add_timestamp_user(&ts);
175 }
176
177out:
178 return consumed;
179}
180
181static int __init init_ft_overhead_trace(void)
182{
183 int err;
184
185 printk("Initializing Feather-Trace overhead tracing device.\n");
186 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
187 if (err)
188 goto err_out;
189
190 overhead_dev.alloc = alloc_timestamp_buffer;
191 overhead_dev.free = free_timestamp_buffer;
192 overhead_dev.write = write_timestamp_from_user;
193
194 err = register_ftdev(&overhead_dev);
195 if (err)
196 goto err_dealloc;
197
198 return 0;
199
200err_dealloc:
201 ftdev_exit(&overhead_dev);
202err_out:
203 printk(KERN_WARNING "Could not register ft_trace module.\n");
204 return err;
205}
206
207static void __exit exit_ft_overhead_trace(void)
208{
209 ftdev_exit(&overhead_dev);
210}
211
212module_init(init_ft_overhead_trace);
213module_exit(exit_ft_overhead_trace);