aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--arch/arm/Kconfig8
-rw-r--r--arch/arm/include/asm/timex.h2
-rw-r--r--arch/arm/include/asm/unistd.h3
-rw-r--r--arch/arm/kernel/calls.S12
-rw-r--r--arch/arm/kernel/smp.c4
-rw-r--r--arch/arm/mach-realview/include/mach/timex.h27
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/include/asm/entry_arch.h1
-rw-r--r--arch/x86/include/asm/feather_trace.h17
-rw-r--r--arch/x86/include/asm/feather_trace_32.h115
-rw-r--r--arch/x86/include/asm/feather_trace_64.h124
-rw-r--r--arch/x86/include/asm/hw_irq.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/unistd_32.h6
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c17
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/ft_event.c118
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/smp.c31
-rw-r--r--arch/x86/kernel/syscall_table_32.S12
-rw-r--r--fs/exec.c13
-rw-r--r--fs/inode.c2
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/hardirq.h4
-rw-r--r--include/linux/hrtimer.h32
-rw-r--r--include/linux/sched.h18
-rw-r--r--include/linux/smp.h5
-rw-r--r--include/linux/tick.h5
-rw-r--r--include/litmus/affinity.h80
-rw-r--r--include/litmus/bheap.h77
-rw-r--r--include/litmus/binheap.h206
-rw-r--r--include/litmus/budget.h35
-rw-r--r--include/litmus/clustered.h44
-rw-r--r--include/litmus/debug_trace.h37
-rw-r--r--include/litmus/edf_common.h25
-rw-r--r--include/litmus/fdso.h77
-rw-r--r--include/litmus/feather_buffer.h94
-rw-r--r--include/litmus/feather_trace.h65
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/fpmath.h145
-rw-r--r--include/litmus/ftdev.h55
-rw-r--r--include/litmus/jobs.h9
-rw-r--r--include/litmus/litmus.h262
-rw-r--r--include/litmus/litmus_proc.h25
-rw-r--r--include/litmus/locking.h28
-rw-r--r--include/litmus/preempt.h164
-rw-r--r--include/litmus/rt_domain.h182
-rw-r--r--include/litmus/rt_param.h237
-rw-r--r--include/litmus/sched_plugin.h111
-rw-r--r--include/litmus/sched_trace.h259
-rw-r--r--include/litmus/srp.h28
-rw-r--r--include/litmus/trace.h116
-rw-r--r--include/litmus/trace_irq.h23
-rw-r--r--include/litmus/unistd_32.h21
-rw-r--r--include/litmus/unistd_64.h33
-rw-r--r--include/litmus/wait.h57
-rw-r--r--include/trace/events/litmus.h231
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/hrtimer.c95
-rw-r--r--kernel/printk.c14
-rw-r--r--kernel/sched.c151
-rw-r--r--kernel/sched_fair.c3
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/time/tick-sched.c47
-rw-r--r--litmus/Kconfig282
-rw-r--r--litmus/Makefile32
-rw-r--r--litmus/affinity.c42
-rw-r--r--litmus/bheap.c314
-rw-r--r--litmus/binheap.c388
-rw-r--r--litmus/budget.c113
-rw-r--r--litmus/clustered.c111
-rw-r--r--litmus/ctrldev.c149
-rw-r--r--litmus/edf_common.c200
-rw-r--r--litmus/fdso.c297
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/ft_event.c43
-rw-r--r--litmus/ftdev.c439
-rw-r--r--litmus/jobs.c57
-rw-r--r--litmus/litmus.c571
-rw-r--r--litmus/litmus_proc.c347
-rw-r--r--litmus/locking.c171
-rw-r--r--litmus/preempt.c133
-rw-r--r--litmus/rt_domain.c359
-rw-r--r--litmus/sched_cedf.c864
-rw-r--r--litmus/sched_gsn_edf.c1030
-rw-r--r--litmus/sched_litmus.c325
-rw-r--r--litmus/sched_pfair.c1074
-rw-r--r--litmus/sched_pfp.c1693
-rw-r--r--litmus/sched_plugin.c227
-rw-r--r--litmus/sched_psn_edf.c653
-rw-r--r--litmus/sched_task_trace.c241
-rw-r--r--litmus/sched_trace.c252
-rw-r--r--litmus/srp.c295
-rw-r--r--litmus/sync.c104
-rw-r--r--litmus/trace.c225
101 files changed, 14903 insertions, 36 deletions
diff --git a/Makefile b/Makefile
index 05f0d7b9aaf..8152a1b9f14 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 0 2PATCHLEVEL = 0
3SUBLEVEL = 42 3SUBLEVEL = 42
4EXTRAVERSION = 4EXTRAVERSION =-litmus
5NAME = Sneaky Weasel 5NAME = Sneaky Weasel
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -711,7 +711,7 @@ export mod_strip_cmd
711 711
712 712
713ifeq ($(KBUILD_EXTMOD),) 713ifeq ($(KBUILD_EXTMOD),)
714core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ 714core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
715 715
716vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 716vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
717 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 717 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 77ed9c01735..6a05096dc42 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2152,3 +2152,11 @@ source "security/Kconfig"
2152source "crypto/Kconfig" 2152source "crypto/Kconfig"
2153 2153
2154source "lib/Kconfig" 2154source "lib/Kconfig"
2155
2156config ARCH_HAS_SEND_PULL_TIMERS
2157 def_bool n
2158
2159config ARCH_HAS_FEATHER_TRACE
2160 def_bool n
2161
2162source "litmus/Kconfig"
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adab..8a102a383a3 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -16,9 +16,11 @@
16 16
17typedef unsigned long cycles_t; 17typedef unsigned long cycles_t;
18 18
19#ifndef get_cycles
19static inline cycles_t get_cycles (void) 20static inline cycles_t get_cycles (void)
20{ 21{
21 return 0; 22 return 0;
22} 23}
24#endif
23 25
24#endif 26#endif
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 2c04ed5efeb..0196edf6ee5 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -403,6 +403,9 @@
403#define __NR_sendmmsg (__NR_SYSCALL_BASE+374) 403#define __NR_sendmmsg (__NR_SYSCALL_BASE+374)
404#define __NR_setns (__NR_SYSCALL_BASE+375) 404#define __NR_setns (__NR_SYSCALL_BASE+375)
405 405
406#define __NR_LITMUS (__NR_SYSCALL_BASE+376)
407#include <litmus/unistd_32.h>
408
406/* 409/*
407 * The following SWIs are ARM private. 410 * The following SWIs are ARM private.
408 */ 411 */
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 80f7896cc01..ed2ae934127 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -385,6 +385,18 @@
385 CALL(sys_syncfs) 385 CALL(sys_syncfs)
386 CALL(sys_sendmmsg) 386 CALL(sys_sendmmsg)
387/* 375 */ CALL(sys_setns) 387/* 375 */ CALL(sys_setns)
388 CALL(sys_set_rt_task_param)
389 CALL(sys_get_rt_task_param)
390 CALL(sys_complete_job)
391 CALL(sys_od_open)
392/* 380 */ CALL(sys_od_close)
393 CALL(sys_litmus_lock)
394 CALL(sys_litmus_unlock)
395 CALL(sys_query_job_no)
396 CALL(sys_wait_for_job_release)
397/* 385 */ CALL(sys_wait_for_ts_release)
398 CALL(sys_release_ts)
399 CALL(sys_null_call)
388#ifndef syscalls_counted 400#ifndef syscalls_counted
389.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls 401.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
390#define syscalls_counted 402#define syscalls_counted
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index d225fc8f6cd..ad4d4b93977 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -41,6 +41,8 @@
41#include <asm/ptrace.h> 41#include <asm/ptrace.h>
42#include <asm/localtimer.h> 42#include <asm/localtimer.h>
43 43
44#include <litmus/preempt.h>
45
44/* 46/*
45 * as from 2.5, kernels no longer have an init_tasks structure 47 * as from 2.5, kernels no longer have an init_tasks structure
46 * so we need some other way of telling a new secondary core 48 * so we need some other way of telling a new secondary core
@@ -644,6 +646,8 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
644 break; 646 break;
645 647
646 case IPI_RESCHEDULE: 648 case IPI_RESCHEDULE:
649 /* LITMUS^RT: take action based on scheduler state */
650 sched_state_ipi();
647 scheduler_ipi(); 651 scheduler_ipi();
648 break; 652 break;
649 653
diff --git a/arch/arm/mach-realview/include/mach/timex.h b/arch/arm/mach-realview/include/mach/timex.h
index 4eeb069373c..e8bcc40d1f0 100644
--- a/arch/arm/mach-realview/include/mach/timex.h
+++ b/arch/arm/mach-realview/include/mach/timex.h
@@ -21,3 +21,30 @@
21 */ 21 */
22 22
23#define CLOCK_TICK_RATE (50000000 / 16) 23#define CLOCK_TICK_RATE (50000000 / 16)
24
25#if defined(CONFIG_MACH_REALVIEW_PB11MP) || defined(CONFIG_MACH_REALVIEW_PB1176)
26
27static inline unsigned long realview_get_arm11_cp15_ccnt(void)
28{
29 unsigned long cycles;
30 /* Read CP15 CCNT register. */
31 asm volatile ("mrc p15, 0, %0, c15, c12, 1" : "=r" (cycles));
32 return cycles;
33}
34
35#define get_cycles realview_get_arm11_cp15_ccnt
36
37#elif defined(CONFIG_MACH_REALVIEW_PBA8)
38
39
40static inline unsigned long realview_get_a8_cp15_ccnt(void)
41{
42 unsigned long cycles;
43 /* Read CP15 CCNT register. */
44 asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
45 return cycles;
46}
47
48#define get_cycles realview_get_a8_cp15_ccnt
49
50#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 37357a599dc..9f5e14388e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2166,3 +2166,11 @@ source "crypto/Kconfig"
2166source "arch/x86/kvm/Kconfig" 2166source "arch/x86/kvm/Kconfig"
2167 2167
2168source "lib/Kconfig" 2168source "lib/Kconfig"
2169
2170config ARCH_HAS_FEATHER_TRACE
2171 def_bool y
2172
2173config ARCH_HAS_SEND_PULL_TIMERS
2174 def_bool y
2175
2176source "litmus/Kconfig"
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 1cd6d26a0a8..3b0d7ef959b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -13,6 +13,7 @@
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 18BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18 19
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
new file mode 100644
index 00000000000..4fd31633405
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace.h
@@ -0,0 +1,17 @@
1#ifndef _ARCH_FEATHER_TRACE_H
2#define _ARCH_FEATHER_TRACE_H
3
4#include <asm/msr.h>
5
6static inline unsigned long long ft_timestamp(void)
7{
8 return __native_read_tsc();
9}
10
11#ifdef CONFIG_X86_32
12#include "feather_trace_32.h"
13#else
14#include "feather_trace_64.h"
15#endif
16
17#endif
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
new file mode 100644
index 00000000000..75e81a9f938
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -0,0 +1,115 @@
1/* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org>
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24/* Do not directly include this file. Include feather_trace.h instead */
25
26#define feather_callback __attribute__((regparm(3))) __attribute__((used))
27
28/*
29 * Make the compiler reload any register that is not saved in a cdecl function
30 * call (minus the registers that we explicitly clobber as output registers).
31 */
32#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx"
33#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx"
34#define __FT_CLOBBER_LIST2 "memory", "cc", "eax"
35#define __FT_CLOBBER_LIST3 "memory", "cc", "eax"
36
37#define __FT_TMP1(x) "=d" (x)
38#define __FT_ARG1(x) "0" ((long) (x))
39#define __FT_TMP2(x) "=c" (x)
40#define __FT_ARG2(x) "1" ((long) (x))
41
42#define __FT_ARG3(x) "r" ((long) (x))
43
44#define ft_event(id, callback) \
45 __asm__ __volatile__( \
46 "1: jmp 2f \n\t" \
47 " call " #callback " \n\t" \
48 ".section __event_table, \"aw\" \n\t" \
49 ".long " #id ", 0, 1b, 2f \n\t" \
50 ".previous \n\t" \
51 "2: \n\t" \
52 : : : __FT_CLOBBER_LIST0)
53
54#define ft_event0(id, callback) \
55 __asm__ __volatile__( \
56 "1: jmp 2f \n\t" \
57 " movl $" #id ", %%eax \n\t" \
58 " call " #callback " \n\t" \
59 ".section __event_table, \"aw\" \n\t" \
60 ".long " #id ", 0, 1b, 2f \n\t" \
61 ".previous \n\t" \
62 "2: \n\t" \
63 : : : __FT_CLOBBER_LIST0)
64
65#define ft_event1(id, callback, param) \
66 do { \
67 long __ft_tmp1; \
68 __asm__ __volatile__( \
69 "1: jmp 2f \n\t" \
70 " movl $" #id ", %%eax \n\t" \
71 " call " #callback " \n\t" \
72 ".section __event_table, \"aw\" \n\t" \
73 ".long " #id ", 0, 1b, 2f \n\t" \
74 ".previous \n\t" \
75 "2: \n\t" \
76 : __FT_TMP1(__ft_tmp1) \
77 : __FT_ARG1(param) \
78 : __FT_CLOBBER_LIST1); \
79 } while (0);
80
81#define ft_event2(id, callback, param, param2) \
82 do { \
83 long __ft_tmp1, __ft_tmp2; \
84 __asm__ __volatile__( \
85 "1: jmp 2f \n\t" \
86 " movl $" #id ", %%eax \n\t" \
87 " call " #callback " \n\t" \
88 ".section __event_table, \"aw\" \n\t" \
89 ".long " #id ", 0, 1b, 2f \n\t" \
90 ".previous \n\t" \
91 "2: \n\t" \
92 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
93 : __FT_ARG1(param), __FT_ARG2(param2) \
94 : __FT_CLOBBER_LIST2); \
95 } while (0);
96
97
98#define ft_event3(id, callback, param, param2, param3) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
101 __asm__ __volatile__( \
102 "1: jmp 2f \n\t" \
103 " subl $4, %%esp \n\t" \
104 " movl $" #id ", %%eax \n\t" \
105 " movl %2, (%%esp) \n\t" \
106 " call " #callback " \n\t" \
107 " addl $4, %%esp \n\t" \
108 ".section __event_table, \"aw\" \n\t" \
109 ".long " #id ", 0, 1b, 2f \n\t" \
110 ".previous \n\t" \
111 "2: \n\t" \
112 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
113 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
114 : __FT_CLOBBER_LIST3); \
115 } while (0);
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
new file mode 100644
index 00000000000..5ce49e2eebb
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -0,0 +1,124 @@
1/* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu>
2 * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25/* Do not directly include this file. Include feather_trace.h instead */
26
27/* regparm is the default on x86_64 */
28#define feather_callback __attribute__((used))
29
30#define __FT_EVENT_TABLE(id,from,to) \
31 ".section __event_table, \"aw\"\n\t" \
32 ".balign 8\n\t" \
33 ".quad " #id ", 0, " #from ", " #to " \n\t" \
34 ".previous \n\t"
35
36/*
37 * x86_64 caller only owns rbp, rbx, r12-r15;
38 * the callee can freely modify the others.
39 */
40#define __FT_CLOBBER_LIST0 "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
41 "r8", "r9", "r10", "r11", "rax"
42
43#define __FT_CLOBBER_LIST1 "memory", "cc", "rdi", "rdx", "rcx", \
44 "r8", "r9", "r10", "r11", "rax"
45
46#define __FT_CLOBBER_LIST2 "memory", "cc", "rdi", "rcx", \
47 "r8", "r9", "r10", "r11", "rax"
48
49#define __FT_CLOBBER_LIST3 "memory", "cc", "rdi", \
50 "r8", "r9", "r10", "r11", "rax"
51
52/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer
53 * arguments. */
54
55/* RSI */
56#define __FT_TMP1(x) "=S" (x)
57#define __FT_ARG1(x) "0" ((long) (x))
58
59/* RDX */
60#define __FT_TMP2(x) "=d" (x)
61#define __FT_ARG2(x) "1" ((long) (x))
62
63/* RCX */
64#define __FT_TMP3(x) "=c" (x)
65#define __FT_ARG3(x) "2" ((long) (x))
66
67#define ft_event(id, callback) \
68 __asm__ __volatile__( \
69 "1: jmp 2f \n\t" \
70 " call " #callback " \n\t" \
71 __FT_EVENT_TABLE(id,1b,2f) \
72 "2: \n\t" \
73 : : : __FT_CLOBBER_LIST0)
74
75#define ft_event0(id, callback) \
76 __asm__ __volatile__( \
77 "1: jmp 2f \n\t" \
78 " movq $" #id ", %%rdi \n\t" \
79 " call " #callback " \n\t" \
80 __FT_EVENT_TABLE(id,1b,2f) \
81 "2: \n\t" \
82 : : : __FT_CLOBBER_LIST0)
83
84#define ft_event1(id, callback, param) \
85 do { \
86 long __ft_tmp1; \
87 __asm__ __volatile__( \
88 "1: jmp 2f \n\t" \
89 " movq $" #id ", %%rdi \n\t" \
90 " call " #callback " \n\t" \
91 __FT_EVENT_TABLE(id,1b,2f) \
92 "2: \n\t" \
93 : __FT_TMP1(__ft_tmp1) \
94 : __FT_ARG1(param) \
95 : __FT_CLOBBER_LIST1); \
96 } while (0);
97
98#define ft_event2(id, callback, param, param2) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
101 __asm__ __volatile__( \
102 "1: jmp 2f \n\t" \
103 " movq $" #id ", %%rdi \n\t" \
104 " call " #callback " \n\t" \
105 __FT_EVENT_TABLE(id,1b,2f) \
106 "2: \n\t" \
107 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
108 : __FT_ARG1(param), __FT_ARG2(param2) \
109 : __FT_CLOBBER_LIST2); \
110 } while (0);
111
112#define ft_event3(id, callback, param, param2, param3) \
113 do { \
114 long __ft_tmp1, __ft_tmp2, __ft_tmp3; \
115 __asm__ __volatile__( \
116 "1: jmp 2f \n\t" \
117 " movq $" #id ", %%rdi \n\t" \
118 " call " #callback " \n\t" \
119 __FT_EVENT_TABLE(id,1b,2f) \
120 "2: \n\t" \
121 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \
122 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
123 : __FT_CLOBBER_LIST3); \
124 } while (0);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index bb9efe8706e..c490d89a9b7 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -77,6 +77,8 @@ extern void threshold_interrupt(void);
77extern void call_function_interrupt(void); 77extern void call_function_interrupt(void);
78extern void call_function_single_interrupt(void); 78extern void call_function_single_interrupt(void);
79 79
80extern void pull_timers_interrupt(void);
81
80/* IOAPIC */ 82/* IOAPIC */
81#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) 83#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
82extern unsigned long io_apic_irqs; 84extern unsigned long io_apic_irqs;
@@ -155,6 +157,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
155extern void smp_reschedule_interrupt(struct pt_regs *); 157extern void smp_reschedule_interrupt(struct pt_regs *);
156extern void smp_call_function_interrupt(struct pt_regs *); 158extern void smp_call_function_interrupt(struct pt_regs *);
157extern void smp_call_function_single_interrupt(struct pt_regs *); 159extern void smp_call_function_single_interrupt(struct pt_regs *);
160extern void smp_pull_timers_interrupt(struct pt_regs *);
158#ifdef CONFIG_X86_32 161#ifdef CONFIG_X86_32
159extern void smp_invalidate_interrupt(struct pt_regs *); 162extern void smp_invalidate_interrupt(struct pt_regs *);
160#else 163#else
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6e976ee3b3e..99a44cf9845 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -135,6 +135,13 @@
135#define INVALIDATE_TLB_VECTOR_START \ 135#define INVALIDATE_TLB_VECTOR_START \
136 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) 136 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
137 137
138/*
139 * LITMUS^RT pull timers IRQ vector
140 * Make sure it's below the above max 32 vectors.
141 */
142#define PULL_TIMERS_VECTOR 0xce
143
144
138#define NR_VECTORS 256 145#define NR_VECTORS 256
139 146
140#define FPU_IRQ 13 147#define FPU_IRQ 13
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e5f724834ed..d8e4b738bec 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -164,6 +164,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
164extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); 164extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
165extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 165extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
166extern unsigned short num_cache_leaves; 166extern unsigned short num_cache_leaves;
167#ifdef CONFIG_SYSFS
168extern int get_shared_cpu_map(cpumask_var_t mask,
169 unsigned int cpu, int index);
170#endif
167 171
168extern void detect_extended_topology(struct cpuinfo_x86 *c); 172extern void detect_extended_topology(struct cpuinfo_x86 *c);
169extern void detect_ht(struct cpuinfo_x86 *c); 173extern void detect_ht(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 593485b38ab..2f6e127db30 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -353,9 +353,13 @@
353#define __NR_sendmmsg 345 353#define __NR_sendmmsg 345
354#define __NR_setns 346 354#define __NR_setns 346
355 355
356#define __NR_LITMUS 347
357
358#include "litmus/unistd_32.h"
359
356#ifdef __KERNEL__ 360#ifdef __KERNEL__
357 361
358#define NR_syscalls 347 362#define NR_syscalls 347 + NR_litmus_syscalls
359 363
360#define __ARCH_WANT_IPC_PARSE_VERSION 364#define __ARCH_WANT_IPC_PARSE_VERSION
361#define __ARCH_WANT_OLD_READDIR 365#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 705bf139288..e347f077378 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -682,6 +682,10 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
682#define __NR_setns 308 682#define __NR_setns 308
683__SYSCALL(__NR_setns, sys_setns) 683__SYSCALL(__NR_setns, sys_setns)
684 684
685#define __NR_LITMUS 309
686
687#include "litmus/unistd_64.h"
688
685#ifndef __NO_STUBS 689#ifndef __NO_STUBS
686#define __ARCH_WANT_OLD_READDIR 690#define __ARCH_WANT_OLD_READDIR
687#define __ARCH_WANT_OLD_STAT 691#define __ARCH_WANT_OLD_STAT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90b06d4daee..d727f8f9433 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -116,6 +116,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
116obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 116obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
117obj-$(CONFIG_OF) += devicetree.o 117obj-$(CONFIG_OF) += devicetree.o
118 118
119obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
120
119### 121###
120# 64 bit specific files 122# 64 bit specific files
121ifeq ($(CONFIG_X86_64),y) 123ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fde44284cf2..d486002087a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -746,6 +746,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
746static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); 746static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
747#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 747#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
748 748
749/* returns CPUs that share the index cache with cpu */
750int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
751{
752 int ret = 0;
753 struct _cpuid4_info *this_leaf;
754
755 if (index >= num_cache_leaves) {
756 index = num_cache_leaves - 1;
757 ret = index;
758 }
759
760 this_leaf = CPUID4_INFO_IDX(cpu,index);
761 cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
762
763 return ret;
764}
765
749#ifdef CONFIG_SMP 766#ifdef CONFIG_SMP
750 767
751static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) 768static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0c989..47a4bcd2e50 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1003,6 +1003,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
1003 call_function_interrupt smp_call_function_interrupt 1003 call_function_interrupt smp_call_function_interrupt
1004apicinterrupt RESCHEDULE_VECTOR \ 1004apicinterrupt RESCHEDULE_VECTOR \
1005 reschedule_interrupt smp_reschedule_interrupt 1005 reschedule_interrupt smp_reschedule_interrupt
1006apicinterrupt PULL_TIMERS_VECTOR \
1007 pull_timers_interrupt smp_pull_timers_interrupt
1006#endif 1008#endif
1007 1009
1008apicinterrupt ERROR_APIC_VECTOR \ 1010apicinterrupt ERROR_APIC_VECTOR \
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
new file mode 100644
index 00000000000..37cc3325271
--- /dev/null
+++ b/arch/x86/kernel/ft_event.c
@@ -0,0 +1,118 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5/* the feather trace management functions assume
6 * exclusive access to the event table
7 */
8
9#ifndef CONFIG_DEBUG_RODATA
10
11#define BYTE_JUMP 0xeb
12#define BYTE_JUMP_LEN 0x02
13
14/* for each event, there is an entry in the event table */
15struct trace_event {
16 long id;
17 long count;
18 long start_addr;
19 long end_addr;
20};
21
22extern struct trace_event __start___event_table[];
23extern struct trace_event __stop___event_table[];
24
25/* Workaround: if no events are defined, then the event_table section does not
26 * exist and the above references cause linker errors. This could probably be
27 * fixed by adjusting the linker script, but it is easier to maintain for us if
28 * we simply create a dummy symbol in the event table section.
29 */
30int __event_table_dummy[0] __attribute__ ((section("__event_table")));
31
32int ft_enable_event(unsigned long id)
33{
34 struct trace_event* te = __start___event_table;
35 int count = 0;
36 char* delta;
37 unsigned char* instr;
38
39 while (te < __stop___event_table) {
40 if (te->id == id && ++te->count == 1) {
41 instr = (unsigned char*) te->start_addr;
42 /* make sure we don't clobber something wrong */
43 if (*instr == BYTE_JUMP) {
44 delta = (((unsigned char*) te->start_addr) + 1);
45 *delta = 0;
46 }
47 }
48 if (te->id == id)
49 count++;
50 te++;
51 }
52
53 printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
54 return count;
55}
56
57int ft_disable_event(unsigned long id)
58{
59 struct trace_event* te = __start___event_table;
60 int count = 0;
61 char* delta;
62 unsigned char* instr;
63
64 while (te < __stop___event_table) {
65 if (te->id == id && --te->count == 0) {
66 instr = (unsigned char*) te->start_addr;
67 if (*instr == BYTE_JUMP) {
68 delta = (((unsigned char*) te->start_addr) + 1);
69 *delta = te->end_addr - te->start_addr -
70 BYTE_JUMP_LEN;
71 }
72 }
73 if (te->id == id)
74 count++;
75 te++;
76 }
77
78 printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
79 return count;
80}
81
82int ft_disable_all_events(void)
83{
84 struct trace_event* te = __start___event_table;
85 int count = 0;
86 char* delta;
87 unsigned char* instr;
88
89 while (te < __stop___event_table) {
90 if (te->count) {
91 instr = (unsigned char*) te->start_addr;
92 if (*instr == BYTE_JUMP) {
93 delta = (((unsigned char*) te->start_addr)
94 + 1);
95 *delta = te->end_addr - te->start_addr -
96 BYTE_JUMP_LEN;
97 te->count = 0;
98 count++;
99 }
100 }
101 te++;
102 }
103 return count;
104}
105
106int ft_is_event_enabled(unsigned long id)
107{
108 struct trace_event* te = __start___event_table;
109
110 while (te < __stop___event_table) {
111 if (te->id == id)
112 return te->count;
113 te++;
114 }
115 return 0;
116}
117
118#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f470e4ef993..48acf71c653 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -252,6 +252,9 @@ static void __init smp_intr_init(void)
252 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, 252 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
253 call_function_single_interrupt); 253 call_function_single_interrupt);
254 254
255 /* IPI for hrtimer pulling on remote cpus */
256 alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
257
255 /* Low priority IPI to cleanup after moving an irq */ 258 /* Low priority IPI to cleanup after moving an irq */
256 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 259 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
257 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); 260 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 013e7eba83b..ed4c4f54e2a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -23,6 +23,10 @@
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/gfp.h> 24#include <linux/gfp.h>
25 25
26#include <litmus/preempt.h>
27#include <litmus/debug_trace.h>
28#include <litmus/trace.h>
29
26#include <asm/mtrr.h> 30#include <asm/mtrr.h>
27#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
28#include <asm/mmu_context.h> 32#include <asm/mmu_context.h>
@@ -118,6 +122,7 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 122 WARN_ON(1);
119 return; 123 return;
120 } 124 }
125 TS_SEND_RESCHED_START(cpu);
121 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 126 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 127}
123 128
@@ -147,6 +152,16 @@ void native_send_call_func_ipi(const struct cpumask *mask)
147 free_cpumask_var(allbutself); 152 free_cpumask_var(allbutself);
148} 153}
149 154
155/* trigger timers on remote cpu */
156void smp_send_pull_timers(int cpu)
157{
158 if (unlikely(cpu_is_offline(cpu))) {
159 WARN_ON(1);
160 return;
161 }
162 apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
163}
164
150/* 165/*
151 * this function calls the 'stop' function on all other CPUs in the system. 166 * this function calls the 'stop' function on all other CPUs in the system.
152 */ 167 */
@@ -199,8 +214,15 @@ static void native_stop_other_cpus(int wait)
199void smp_reschedule_interrupt(struct pt_regs *regs) 214void smp_reschedule_interrupt(struct pt_regs *regs)
200{ 215{
201 ack_APIC_irq(); 216 ack_APIC_irq();
217 /* LITMUS^RT: this IPI might need to trigger the sched state machine. */
218 sched_state_ipi();
202 inc_irq_stat(irq_resched_count); 219 inc_irq_stat(irq_resched_count);
220 /*
221 * LITMUS^RT: starting from 3.0 schedule_ipi() actually does something.
222 * This may increase IPI latencies compared with previous versions.
223 */
203 scheduler_ipi(); 224 scheduler_ipi();
225 TS_SEND_RESCHED_END;
204 /* 226 /*
205 * KVM uses this interrupt to force a cpu out of guest mode 227 * KVM uses this interrupt to force a cpu out of guest mode
206 */ 228 */
@@ -224,6 +246,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
224 irq_exit(); 246 irq_exit();
225} 247}
226 248
249extern void hrtimer_pull(void);
250
251void smp_pull_timers_interrupt(struct pt_regs *regs)
252{
253 ack_APIC_irq();
254 TRACE("pull timer interrupt\n");
255 hrtimer_pull();
256}
257
227struct smp_ops smp_ops = { 258struct smp_ops smp_ops = {
228 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 259 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
229 .smp_prepare_cpus = native_smp_prepare_cpus, 260 .smp_prepare_cpus = native_smp_prepare_cpus,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a045a1a..d0126222b39 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -346,3 +346,15 @@ ENTRY(sys_call_table)
346 .long sys_syncfs 346 .long sys_syncfs
347 .long sys_sendmmsg /* 345 */ 347 .long sys_sendmmsg /* 345 */
348 .long sys_setns 348 .long sys_setns
349 .long sys_set_rt_task_param /* LITMUS^RT 347 */
350 .long sys_get_rt_task_param
351 .long sys_complete_job
352 .long sys_od_open
353 .long sys_od_close
354 .long sys_litmus_lock /* +5 */
355 .long sys_litmus_unlock
356 .long sys_query_job_no
357 .long sys_wait_for_job_release
358 .long sys_wait_for_ts_release
359 .long sys_release_ts /* +10 */
360 .long sys_null_call
diff --git a/fs/exec.c b/fs/exec.c
index 044c13ffdc4..188d5974f3e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
19 * current->executable is only used by the procfs. This allows a dispatch 19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep 20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary 21 * trying until we recognize the file or we run out of supported binary
22 * formats. 22 * formats.
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
@@ -56,6 +56,8 @@
56#include <linux/oom.h> 56#include <linux/oom.h>
57#include <linux/compat.h> 57#include <linux/compat.h>
58 58
59#include <litmus/litmus.h>
60
59#include <asm/uaccess.h> 61#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 62#include <asm/mmu_context.h>
61#include <asm/tlb.h> 63#include <asm/tlb.h>
@@ -85,7 +87,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
85 insert ? list_add(&fmt->lh, &formats) : 87 insert ? list_add(&fmt->lh, &formats) :
86 list_add_tail(&fmt->lh, &formats); 88 list_add_tail(&fmt->lh, &formats);
87 write_unlock(&binfmt_lock); 89 write_unlock(&binfmt_lock);
88 return 0; 90 return 0;
89} 91}
90 92
91EXPORT_SYMBOL(__register_binfmt); 93EXPORT_SYMBOL(__register_binfmt);
@@ -1160,7 +1162,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1160 group */ 1162 group */
1161 1163
1162 current->self_exec_id++; 1164 current->self_exec_id++;
1163 1165
1164 flush_signal_handlers(current, 0); 1166 flush_signal_handlers(current, 0);
1165 flush_old_files(current->files); 1167 flush_old_files(current->files);
1166} 1168}
@@ -1250,8 +1252,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1250 return res; 1252 return res;
1251} 1253}
1252 1254
1253/* 1255/*
1254 * Fill the binprm structure from the inode. 1256 * Fill the binprm structure from the inode.
1255 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1257 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
1256 * 1258 *
1257 * This may be called multiple times for binary chains (scripts for example). 1259 * This may be called multiple times for binary chains (scripts for example).
@@ -1461,6 +1463,7 @@ static int do_execve_common(const char *filename,
1461 goto out_unmark; 1463 goto out_unmark;
1462 1464
1463 sched_exec(); 1465 sched_exec();
1466 litmus_exec();
1464 1467
1465 bprm->file = file; 1468 bprm->file = file;
1466 bprm->filename = filename; 1469 bprm->filename = filename;
diff --git a/fs/inode.c b/fs/inode.c
index 09f334bf27d..7aa468a4b19 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -307,6 +307,8 @@ void inode_init_once(struct inode *inode)
307#ifdef CONFIG_FSNOTIFY 307#ifdef CONFIG_FSNOTIFY
308 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 308 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
309#endif 309#endif
310 INIT_LIST_HEAD(&inode->i_obj_list);
311 mutex_init(&inode->i_obj_mutex);
310} 312}
311EXPORT_SYMBOL(inode_init_once); 313EXPORT_SYMBOL(inode_init_once);
312 314
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 51494e6b554..9d727271c9f 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -90,6 +90,7 @@ extern bool completion_done(struct completion *x);
90 90
91extern void complete(struct completion *); 91extern void complete(struct completion *);
92extern void complete_all(struct completion *); 92extern void complete_all(struct completion *);
93extern void complete_n(struct completion *, int n);
93 94
94/** 95/**
95 * INIT_COMPLETION - reinitialize a completion structure 96 * INIT_COMPLETION - reinitialize a completion structure
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 212ea7ba3f1..c83d3d59100 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -17,8 +17,8 @@
17 * nr_file rlimit, so it's safe to set up a ridiculously high absolute 17 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
18 * upper limit on files-per-process. 18 * upper limit on files-per-process.
19 * 19 *
20 * Some programs (notably those using select()) may have to be 20 * Some programs (notably those using select()) may have to be
21 * recompiled to take full advantage of the new limits.. 21 * recompiled to take full advantage of the new limits..
22 */ 22 */
23 23
24/* Fixed constants first: */ 24/* Fixed constants first: */
@@ -172,7 +172,7 @@ struct inodes_stat_t {
172#define SEL_EX 4 172#define SEL_EX 4
173 173
174/* public flags for file_system_type */ 174/* public flags for file_system_type */
175#define FS_REQUIRES_DEV 1 175#define FS_REQUIRES_DEV 1
176#define FS_BINARY_MOUNTDATA 2 176#define FS_BINARY_MOUNTDATA 2
177#define FS_HAS_SUBTYPE 4 177#define FS_HAS_SUBTYPE 4
178#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 178#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
@@ -480,7 +480,7 @@ struct iattr {
480 */ 480 */
481#include <linux/quota.h> 481#include <linux/quota.h>
482 482
483/** 483/**
484 * enum positive_aop_returns - aop return codes with specific semantics 484 * enum positive_aop_returns - aop return codes with specific semantics
485 * 485 *
486 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has 486 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
@@ -490,7 +490,7 @@ struct iattr {
490 * be a candidate for writeback again in the near 490 * be a candidate for writeback again in the near
491 * future. Other callers must be careful to unlock 491 * future. Other callers must be careful to unlock
492 * the page if they get this return. Returned by 492 * the page if they get this return. Returned by
493 * writepage(); 493 * writepage();
494 * 494 *
495 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has 495 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
496 * unlocked it and the page might have been truncated. 496 * unlocked it and the page might have been truncated.
@@ -738,6 +738,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
738 738
739struct posix_acl; 739struct posix_acl;
740#define ACL_NOT_CACHED ((void *)(-1)) 740#define ACL_NOT_CACHED ((void *)(-1))
741struct inode_obj_id_table;
741 742
742struct inode { 743struct inode {
743 /* RCU path lookup touches following: */ 744 /* RCU path lookup touches following: */
@@ -811,6 +812,8 @@ struct inode {
811 struct posix_acl *i_acl; 812 struct posix_acl *i_acl;
812 struct posix_acl *i_default_acl; 813 struct posix_acl *i_default_acl;
813#endif 814#endif
815 struct list_head i_obj_list;
816 struct mutex i_obj_mutex;
814 void *i_private; /* fs or device private pointer */ 817 void *i_private; /* fs or device private pointer */
815}; 818};
816 819
@@ -1037,10 +1040,10 @@ static inline int file_check_writeable(struct file *filp)
1037 1040
1038#define MAX_NON_LFS ((1UL<<31) - 1) 1041#define MAX_NON_LFS ((1UL<<31) - 1)
1039 1042
1040/* Page cache limit. The filesystems should put that into their s_maxbytes 1043/* Page cache limit. The filesystems should put that into their s_maxbytes
1041 limits, otherwise bad things can happen in VM. */ 1044 limits, otherwise bad things can happen in VM. */
1042#if BITS_PER_LONG==32 1045#if BITS_PER_LONG==32
1043#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) 1046#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
1044#elif BITS_PER_LONG==64 1047#elif BITS_PER_LONG==64
1045#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL 1048#define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
1046#endif 1049#endif
@@ -2241,7 +2244,7 @@ extern void free_write_pipe(struct file *);
2241 2244
2242extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2245extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2243extern struct file * open_exec(const char *); 2246extern struct file * open_exec(const char *);
2244 2247
2245/* fs/dcache.c -- generic fs support functions */ 2248/* fs/dcache.c -- generic fs support functions */
2246extern int is_subdir(struct dentry *, struct dentry *); 2249extern int is_subdir(struct dentry *, struct dentry *);
2247extern int path_is_under(struct path *, struct path *); 2250extern int path_is_under(struct path *, struct path *);
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index de48bdad279..fecfe27d1f0 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -6,6 +6,8 @@
6#include <linux/ftrace_irq.h> 6#include <linux/ftrace_irq.h>
7#include <asm/hardirq.h> 7#include <asm/hardirq.h>
8 8
9#include <litmus/trace_irq.h>
10
9/* 11/*
10 * We put the hardirq and softirq counter into the preemption 12 * We put the hardirq and softirq counter into the preemption
11 * counter. The bitmask has the following meaning: 13 * counter. The bitmask has the following meaning:
@@ -192,6 +194,7 @@ extern void rcu_nmi_exit(void);
192 account_system_vtime(current); \ 194 account_system_vtime(current); \
193 add_preempt_count(HARDIRQ_OFFSET); \ 195 add_preempt_count(HARDIRQ_OFFSET); \
194 trace_hardirq_enter(); \ 196 trace_hardirq_enter(); \
197 ft_irq_fired(); \
195 } while (0) 198 } while (0)
196 199
197/* 200/*
@@ -222,6 +225,7 @@ extern void irq_exit(void);
222 lockdep_off(); \ 225 lockdep_off(); \
223 rcu_nmi_enter(); \ 226 rcu_nmi_enter(); \
224 trace_hardirq_enter(); \ 227 trace_hardirq_enter(); \
228 ft_irq_fired(); \
225 } while (0) 229 } while (0)
226 230
227#define nmi_exit() \ 231#define nmi_exit() \
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc07d2777bb..6eb8bc57e42 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -175,6 +175,7 @@ enum hrtimer_base_type {
175 * @nr_hangs: Total number of hrtimer interrupt hangs 175 * @nr_hangs: Total number of hrtimer interrupt hangs
176 * @max_hang_time: Maximum time spent in hrtimer_interrupt 176 * @max_hang_time: Maximum time spent in hrtimer_interrupt
177 * @clock_base: array of clock bases for this cpu 177 * @clock_base: array of clock bases for this cpu
178 * @to_pull: LITMUS^RT list of timers to be pulled on this cpu
178 */ 179 */
179struct hrtimer_cpu_base { 180struct hrtimer_cpu_base {
180 raw_spinlock_t lock; 181 raw_spinlock_t lock;
@@ -190,8 +191,32 @@ struct hrtimer_cpu_base {
190 ktime_t max_hang_time; 191 ktime_t max_hang_time;
191#endif 192#endif
192 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; 193 struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
194 struct list_head to_pull;
193}; 195};
194 196
197#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
198
199#define HRTIMER_START_ON_INACTIVE 0
200#define HRTIMER_START_ON_QUEUED 1
201
202/*
203 * struct hrtimer_start_on_info - save timer info on remote cpu
204 * @list: list of hrtimer_start_on_info on remote cpu (to_pull)
205 * @timer: timer to be triggered on remote cpu
206 * @time: time event
207 * @mode: timer mode
208 * @state: activity flag
209 */
210struct hrtimer_start_on_info {
211 struct list_head list;
212 struct hrtimer *timer;
213 ktime_t time;
214 enum hrtimer_mode mode;
215 atomic_t state;
216};
217
218#endif
219
195static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 220static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
196{ 221{
197 timer->node.expires = time; 222 timer->node.expires = time;
@@ -363,6 +388,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
363 unsigned long delta_ns, 388 unsigned long delta_ns,
364 const enum hrtimer_mode mode, int wakeup); 389 const enum hrtimer_mode mode, int wakeup);
365 390
391#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
392extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
393extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
394 struct hrtimer *timer, ktime_t time,
395 const enum hrtimer_mode mode);
396#endif
397
366extern int hrtimer_cancel(struct hrtimer *timer); 398extern int hrtimer_cancel(struct hrtimer *timer);
367extern int hrtimer_try_to_cancel(struct hrtimer *timer); 399extern int hrtimer_try_to_cancel(struct hrtimer *timer);
368 400
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c44f22adae..a59c93266bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -39,6 +39,7 @@
39#define SCHED_BATCH 3 39#define SCHED_BATCH 3
40/* SCHED_ISO: reserved but not implemented yet */ 40/* SCHED_ISO: reserved but not implemented yet */
41#define SCHED_IDLE 5 41#define SCHED_IDLE 5
42#define SCHED_LITMUS 6
42/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ 43/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
43#define SCHED_RESET_ON_FORK 0x40000000 44#define SCHED_RESET_ON_FORK 0x40000000
44 45
@@ -93,6 +94,9 @@ struct sched_param {
93 94
94#include <asm/processor.h> 95#include <asm/processor.h>
95 96
97#include <litmus/rt_param.h>
98#include <litmus/preempt.h>
99
96struct exec_domain; 100struct exec_domain;
97struct futex_pi_state; 101struct futex_pi_state;
98struct robust_list_head; 102struct robust_list_head;
@@ -1210,6 +1214,7 @@ struct sched_rt_entity {
1210}; 1214};
1211 1215
1212struct rcu_node; 1216struct rcu_node;
1217struct od_table_entry;
1213 1218
1214enum perf_event_task_context { 1219enum perf_event_task_context {
1215 perf_invalid_context = -1, 1220 perf_invalid_context = -1,
@@ -1314,9 +1319,9 @@ struct task_struct {
1314 unsigned long stack_canary; 1319 unsigned long stack_canary;
1315#endif 1320#endif
1316 1321
1317 /* 1322 /*
1318 * pointers to (original) parent process, youngest child, younger sibling, 1323 * pointers to (original) parent process, youngest child, younger sibling,
1319 * older sibling, respectively. (p->father can be replaced with 1324 * older sibling, respectively. (p->father can be replaced with
1320 * p->real_parent->pid) 1325 * p->real_parent->pid)
1321 */ 1326 */
1322 struct task_struct *real_parent; /* real parent process */ 1327 struct task_struct *real_parent; /* real parent process */
@@ -1534,6 +1539,12 @@ struct task_struct {
1534 int nr_dirtied; 1539 int nr_dirtied;
1535 int nr_dirtied_pause; 1540 int nr_dirtied_pause;
1536 1541
1542 /* LITMUS RT parameters and state */
1543 struct rt_param rt_param;
1544
1545 /* references to PI semaphores, etc. */
1546 struct od_table_entry *od_table;
1547
1537#ifdef CONFIG_LATENCYTOP 1548#ifdef CONFIG_LATENCYTOP
1538 int latency_record_count; 1549 int latency_record_count;
1539 struct latency_record latency_record[LT_SAVECOUNT]; 1550 struct latency_record latency_record[LT_SAVECOUNT];
@@ -2149,7 +2160,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
2149 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 2160 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
2150 2161
2151 return ret; 2162 return ret;
2152} 2163}
2153 2164
2154extern void block_all_signals(int (*notifier)(void *priv), void *priv, 2165extern void block_all_signals(int (*notifier)(void *priv), void *priv,
2155 sigset_t *mask); 2166 sigset_t *mask);
@@ -2459,6 +2470,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2459static inline void set_tsk_need_resched(struct task_struct *tsk) 2470static inline void set_tsk_need_resched(struct task_struct *tsk)
2460{ 2471{
2461 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 2472 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2473 sched_state_will_schedule(tsk);
2462} 2474}
2463 2475
2464static inline void clear_tsk_need_resched(struct task_struct *tsk) 2476static inline void clear_tsk_need_resched(struct task_struct *tsk)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 8cc38d3bab0..53b1beef27a 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -82,6 +82,11 @@ int smp_call_function_any(const struct cpumask *mask,
82 smp_call_func_t func, void *info, int wait); 82 smp_call_func_t func, void *info, int wait);
83 83
84/* 84/*
85 * sends a 'pull timer' event to a remote CPU
86 */
87extern void smp_send_pull_timers(int cpu);
88
89/*
85 * Generic and arch helpers 90 * Generic and arch helpers
86 */ 91 */
87#ifdef CONFIG_USE_GENERIC_SMP_HELPERS 92#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee2..1e29bd5b18a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -74,6 +74,11 @@ extern int tick_is_oneshot_available(void);
74extern struct tick_device *tick_get_device(int cpu); 74extern struct tick_device *tick_get_device(int cpu);
75 75
76# ifdef CONFIG_HIGH_RES_TIMERS 76# ifdef CONFIG_HIGH_RES_TIMERS
77/* LITMUS^RT tick alignment */
78#define LINUX_DEFAULT_TICKS 0
79#define LITMUS_ALIGNED_TICKS 1
80#define LITMUS_STAGGERED_TICKS 2
81
77extern int tick_init_highres(void); 82extern int tick_init_highres(void);
78extern int tick_program_event(ktime_t expires, int force); 83extern int tick_program_event(ktime_t expires, int force);
79extern void tick_setup_sched_timer(void); 84extern void tick_setup_sched_timer(void);
diff --git a/include/litmus/affinity.h b/include/litmus/affinity.h
new file mode 100644
index 00000000000..ca2e442eb54
--- /dev/null
+++ b/include/litmus/affinity.h
@@ -0,0 +1,80 @@
1#ifndef __LITMUS_AFFINITY_H
2#define __LITMUS_AFFINITY_H
3
4#include <linux/cpumask.h>
5
6/*
7 L1 (instr) = depth 0
8 L1 (data) = depth 1
9 L2 = depth 2
10 L3 = depth 3
11 */
12#define NUM_CACHE_LEVELS 4
13
14struct neighborhood
15{
16 unsigned int size[NUM_CACHE_LEVELS];
17 cpumask_var_t neighbors[NUM_CACHE_LEVELS];
18};
19
20/* topology info is stored redundently in a big array for fast lookups */
21extern struct neighborhood neigh_info[NR_CPUS];
22
23void init_topology(void); /* called by Litmus module's _init_litmus() */
24
25/* Works like:
26void get_nearest_available_cpu(
27 cpu_entry_t **nearest,
28 cpu_entry_t *start,
29 cpu_entry_t *entries,
30 int release_master)
31
32Set release_master = NO_CPU for no Release Master.
33
34We use a macro here to exploit the fact that C-EDF and G-EDF
35have similar structures for their cpu_entry_t structs, even though
36they do not share a common base-struct. The macro allows us to
37avoid code duplication.
38
39TODO: Factor out the job-to-processor linking from C/G-EDF into
40a reusable "processor mapping". (See B.B.'s RTSS'09 paper &
41dissertation.)
42 */
43#define get_nearest_available_cpu(nearest, start, entries, release_master) \
44{ \
45 (nearest) = NULL; \
46 if (!(start)->linked) { \
47 (nearest) = (start); \
48 } else { \
49 int __level; \
50 int __cpu; \
51 int __release_master = ((release_master) == NO_CPU) ? -1 : (release_master); \
52 struct neighborhood *__neighbors = &neigh_info[(start)->cpu]; \
53 \
54 for (__level = 0; (__level < NUM_CACHE_LEVELS) && !(nearest); ++__level) { \
55 if (__neighbors->size[__level] > 1) { \
56 for_each_cpu(__cpu, __neighbors->neighbors[__level]) { \
57 if (__cpu != __release_master) { \
58 cpu_entry_t *__entry = &per_cpu((entries), __cpu); \
59 if (!__entry->linked) { \
60 (nearest) = __entry; \
61 break; \
62 } \
63 } \
64 } \
65 } else if (__neighbors->size[__level] == 0) { \
66 break; \
67 } \
68 } \
69 } \
70 \
71 if ((nearest)) { \
72 TRACE("P%d is closest available CPU to P%d\n", \
73 (nearest)->cpu, (start)->cpu); \
74 } else { \
75 TRACE("Could not find an available CPU close to P%d\n", \
76 (start)->cpu); \
77 } \
78}
79
80#endif
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
new file mode 100644
index 00000000000..cf4864a498d
--- /dev/null
+++ b/include/litmus/bheap.h
@@ -0,0 +1,77 @@
1/* bheaps.h -- Binomial Heaps
2 *
3 * (c) 2008, 2009 Bjoern Brandenburg
4 */
5
6#ifndef BHEAP_H
7#define BHEAP_H
8
9#define NOT_IN_HEAP UINT_MAX
10
11struct bheap_node {
12 struct bheap_node* parent;
13 struct bheap_node* next;
14 struct bheap_node* child;
15
16 unsigned int degree;
17 void* value;
18 struct bheap_node** ref;
19};
20
21struct bheap {
22 struct bheap_node* head;
23 /* We cache the minimum of the heap.
24 * This speeds up repeated peek operations.
25 */
26 struct bheap_node* min;
27};
28
29typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
30
31void bheap_init(struct bheap* heap);
32void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
33
34static inline int bheap_node_in_heap(struct bheap_node* h)
35{
36 return h->degree != NOT_IN_HEAP;
37}
38
39static inline int bheap_empty(struct bheap* heap)
40{
41 return heap->head == NULL && heap->min == NULL;
42}
43
44/* insert (and reinitialize) a node into the heap */
45void bheap_insert(bheap_prio_t higher_prio,
46 struct bheap* heap,
47 struct bheap_node* node);
48
49/* merge addition into target */
50void bheap_union(bheap_prio_t higher_prio,
51 struct bheap* target,
52 struct bheap* addition);
53
54struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
55 struct bheap* heap);
56
57struct bheap_node* bheap_take(bheap_prio_t higher_prio,
58 struct bheap* heap);
59
60void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
61int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
62
63void bheap_delete(bheap_prio_t higher_prio,
64 struct bheap* heap,
65 struct bheap_node* node);
66
67/* allocate from memcache */
68struct bheap_node* bheap_node_alloc(int gfp_flags);
69void bheap_node_free(struct bheap_node* hn);
70
71/* allocate a heap node for value and insert into the heap */
72int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
73 void* value, int gfp_flags);
74
75void* bheap_take_del(bheap_prio_t higher_prio,
76 struct bheap* heap);
77#endif
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 00000000000..901a30a3e29
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,206 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62/* Initialized heap nodes not in a heap have parent
63 * set to BINHEAP_POISON.
64 */
65#define BINHEAP_POISON ((void*)(0xdeadbeef))
66
67
68/**
69 * binheap_entry - get the struct for this heap node.
70 * Only valid when called upon heap nodes other than the root handle.
71 * @ptr: the heap node.
72 * @type: the type of struct pointed to by binheap_node::data.
73 * @member: unused.
74 */
75#define binheap_entry(ptr, type, member) \
76((type *)((ptr)->data))
77
78/**
79 * binheap_node_container - get the struct that contains this node.
80 * Only valid when called upon heap nodes other than the root handle.
81 * @ptr: the heap node.
82 * @type: the type of struct the node is embedded in.
83 * @member: the name of the binheap_struct within the (type) struct.
84 */
85#define binheap_node_container(ptr, type, member) \
86container_of((ptr), type, member)
87
88/**
89 * binheap_top_entry - get the struct for the node at the top of the heap.
90 * Only valid when called upon the heap handle node.
91 * @ptr: the special heap-handle node.
92 * @type: the type of the struct the head is embedded in.
93 * @member: the name of the binheap_struct within the (type) struct.
94 */
95#define binheap_top_entry(ptr, type, member) \
96binheap_entry((ptr)->root, type, member)
97
98/**
99 * binheap_delete_root - remove the root element from the heap.
100 * @handle: handle to the heap.
101 * @type: the type of the struct the head is embedded in.
102 * @member: the name of the binheap_struct within the (type) struct.
103 */
104#define binheap_delete_root(handle, type, member) \
105__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
106
107/**
108 * binheap_delete - remove an arbitrary element from the heap.
109 * @to_delete: pointer to node to be removed.
110 * @handle: handle to the heap.
111 */
112#define binheap_delete(to_delete, handle) \
113__binheap_delete((to_delete), (handle))
114
115/**
116 * binheap_add - insert an element to the heap
117 * new_node: node to add.
118 * @handle: handle to the heap.
119 * @type: the type of the struct the head is embedded in.
120 * @member: the name of the binheap_struct within the (type) struct.
121 */
122#define binheap_add(new_node, handle, type, member) \
123__binheap_add((new_node), (handle), container_of((new_node), type, member))
124
125/**
126 * binheap_decrease - re-eval the position of a node (based upon its
127 * original data pointer).
128 * @handle: handle to the heap.
129 * @orig_node: node that was associated with the data pointer
130 * (whose value has changed) when said pointer was
131 * added to the heap.
132 */
133#define binheap_decrease(orig_node, handle) \
134__binheap_decrease((orig_node), (handle))
135
136#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
137
138#define BINHEAP_NODE(name) \
139 struct binheap_node name = BINHEAP_NODE_INIT()
140
141
142static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
143{
144 n->data = NULL;
145 n->parent = BINHEAP_POISON;
146 n->left = NULL;
147 n->right = NULL;
148 n->ref = NULL;
149 n->ref_ptr = NULL;
150}
151
152static inline void INIT_BINHEAP_HANDLE(struct binheap *handle,
153 binheap_order_t compare)
154{
155 handle->root = NULL;
156 handle->next = NULL;
157 handle->last = NULL;
158 handle->compare = compare;
159}
160
161/* Returns true if binheap is empty. */
162static inline int binheap_empty(struct binheap *handle)
163{
164 return(handle->root == NULL);
165}
166
167/* Returns true if binheap node is in a heap. */
168static inline int binheap_is_in_heap(struct binheap_node *node)
169{
170 return (node->parent != BINHEAP_POISON);
171}
172
173/* Returns true if binheap node is in given heap. */
174int binheap_is_in_this_heap(struct binheap_node *node, struct binheap* heap);
175
176/* Add a node to a heap */
177void __binheap_add(struct binheap_node *new_node,
178 struct binheap *handle,
179 void *data);
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(struct binheap_node *node_to_delete,
196 struct binheap *handle);
197
198/**
199 * Bubble up a node whose pointer has decreased in value.
200 */
201void __binheap_decrease(struct binheap_node *orig_node,
202 struct binheap *handle);
203
204
205#endif
206
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
new file mode 100644
index 00000000000..33344ee8d5f
--- /dev/null
+++ b/include/litmus/budget.h
@@ -0,0 +1,35 @@
1#ifndef _LITMUS_BUDGET_H_
2#define _LITMUS_BUDGET_H_
3
4/* Update the per-processor enforcement timer (arm/reproram/cancel) for
5 * the next task. */
6void update_enforcement_timer(struct task_struct* t);
7
8inline static int budget_exhausted(struct task_struct* t)
9{
10 return get_exec_time(t) >= get_exec_cost(t);
11}
12
13inline static lt_t budget_remaining(struct task_struct* t)
14{
15 if (!budget_exhausted(t))
16 return get_exec_cost(t) - get_exec_time(t);
17 else
18 /* avoid overflow */
19 return 0;
20}
21
22#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
23
24#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
25 == PRECISE_ENFORCEMENT)
26
27static inline int requeue_preempted_job(struct task_struct* t)
28{
29 /* Add task to ready queue only if not subject to budget enforcement or
30 * if the job has budget remaining. t may be NULL.
31 */
32 return t && (!budget_exhausted(t) || !budget_enforced(t));
33}
34
35#endif
diff --git a/include/litmus/clustered.h b/include/litmus/clustered.h
new file mode 100644
index 00000000000..0c18dcb15e6
--- /dev/null
+++ b/include/litmus/clustered.h
@@ -0,0 +1,44 @@
1#ifndef CLUSTERED_H
2#define CLUSTERED_H
3
4/* Which cache level should be used to group CPUs into clusters?
5 * GLOBAL_CLUSTER means that all CPUs form a single cluster (just like under
6 * global scheduling).
7 */
8enum cache_level {
9 GLOBAL_CLUSTER = 0,
10 L1_CLUSTER = 1,
11 L2_CLUSTER = 2,
12 L3_CLUSTER = 3
13};
14
15int parse_cache_level(const char *str, enum cache_level *level);
16const char* cache_level_name(enum cache_level level);
17
18/* expose a cache level in a /proc dir */
19struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
20 enum cache_level* level);
21
22
23
24struct scheduling_cluster {
25 unsigned int id;
26 /* list of CPUs that are part of this cluster */
27 struct list_head cpus;
28};
29
30struct cluster_cpu {
31 unsigned int id; /* which CPU is this? */
32 struct list_head cluster_list; /* List of the CPUs in this cluster. */
33 struct scheduling_cluster* cluster; /* The cluster that this CPU belongs to. */
34};
35
36int get_cluster_size(enum cache_level level);
37
38int assign_cpus_to_clusters(enum cache_level level,
39 struct scheduling_cluster* clusters[],
40 unsigned int num_clusters,
41 struct cluster_cpu* cpus[],
42 unsigned int num_cpus);
43
44#endif
diff --git a/include/litmus/debug_trace.h b/include/litmus/debug_trace.h
new file mode 100644
index 00000000000..48d086d5a44
--- /dev/null
+++ b/include/litmus/debug_trace.h
@@ -0,0 +1,37 @@
1#ifndef LITMUS_DEBUG_TRACE_H
2#define LITMUS_DEBUG_TRACE_H
3
4#ifdef CONFIG_SCHED_DEBUG_TRACE
5void sched_trace_log_message(const char* fmt, ...);
6void dump_trace_buffer(int max);
7#else
8
9#define sched_trace_log_message(fmt, ...)
10
11#endif
12
13extern atomic_t __log_seq_no;
14
15#ifdef CONFIG_SCHED_DEBUG_TRACE_CALLER
16#define TRACE_PREFIX "%d P%d [%s@%s:%d]: "
17#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
18 raw_smp_processor_id(), \
19 __FUNCTION__, __FILE__, __LINE__
20#else
21#define TRACE_PREFIX "%d P%d: "
22#define TRACE_ARGS atomic_add_return(1, &__log_seq_no), \
23 raw_smp_processor_id()
24#endif
25
26#define TRACE(fmt, args...) \
27 sched_trace_log_message(TRACE_PREFIX fmt, \
28 TRACE_ARGS, ## args)
29
30#define TRACE_TASK(t, fmt, args...) \
31 TRACE("(%s/%d:%d) " fmt, (t)->comm, (t)->pid, \
32 (t)->rt_param.job_params.job_no, ##args)
33
34#define TRACE_CUR(fmt, args...) \
35 TRACE_TASK(current, fmt, ## args)
36
37#endif
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 00000000000..bbaf22ea7f1
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,25 @@
1/*
2 * EDF common data structures and utility functions shared by all EDF
3 * based scheduler plugins
4 */
5
6/* CLEANUP: Add comments and make it less messy.
7 *
8 */
9
10#ifndef __UNC_EDF_COMMON_H__
11#define __UNC_EDF_COMMON_H__
12
13#include <litmus/rt_domain.h>
14
15void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
16 release_jobs_t release);
17
18int edf_higher_prio(struct task_struct* first,
19 struct task_struct* second);
20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24
25#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 00000000000..f2115b83f1e
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,77 @@
1/* fdso.h - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 */
5
6#ifndef _LINUX_FDSO_H_
7#define _LINUX_FDSO_H_
8
9#include <linux/list.h>
10#include <asm/atomic.h>
11
12#include <linux/fs.h>
13#include <linux/slab.h>
14
15#define MAX_OBJECT_DESCRIPTORS 85
16
17typedef enum {
18 MIN_OBJ_TYPE = 0,
19
20 FMLP_SEM = 0,
21 SRP_SEM = 1,
22
23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26
27 PCP_SEM = 5,
28
29 MAX_OBJ_TYPE = 5
30} obj_type_t;
31
32struct inode_obj_id {
33 struct list_head list;
34 atomic_t count;
35 struct inode* inode;
36
37 obj_type_t type;
38 void* obj;
39 unsigned int id;
40};
41
42struct fdso_ops;
43
44struct od_table_entry {
45 unsigned int used;
46
47 struct inode_obj_id* obj;
48 const struct fdso_ops* class;
49};
50
51struct fdso_ops {
52 int (*create)(void** obj_ref, obj_type_t type, void* __user);
53 void (*destroy)(obj_type_t type, void*);
54 int (*open) (struct od_table_entry*, void* __user);
55 int (*close) (struct od_table_entry*);
56};
57
58/* translate a userspace supplied od into the raw table entry
59 * returns NULL if od is invalid
60 */
61struct od_table_entry* get_entry_for_od(int od);
62
63/* translate a userspace supplied od into the associated object
64 * returns NULL if od is invalid
65 */
66static inline void* od_lookup(int od, obj_type_t type)
67{
68 struct od_table_entry* e = get_entry_for_od(od);
69 return e && e->obj->type == type ? e->obj->obj : NULL;
70}
71
72#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
73#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
74#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
75
76
77#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 00000000000..6c18277fdfc
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,94 @@
1#ifndef _FEATHER_BUFFER_H_
2#define _FEATHER_BUFFER_H_
3
4/* requires UINT_MAX and memcpy */
5
6#define SLOT_FREE 0
7#define SLOT_BUSY 1
8#define SLOT_READY 2
9
10struct ft_buffer {
11 unsigned int slot_count;
12 unsigned int slot_size;
13
14 int free_count;
15 unsigned int write_idx;
16 unsigned int read_idx;
17
18 char* slots;
19 void* buffer_mem;
20 unsigned int failed_writes;
21};
22
23static inline int init_ft_buffer(struct ft_buffer* buf,
24 unsigned int slot_count,
25 unsigned int slot_size,
26 char* slots,
27 void* buffer_mem)
28{
29 int i = 0;
30 if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
31 /* The slot count must divide UNIT_MAX + 1 so that when it
32 * wraps around the index correctly points to 0.
33 */
34 return 0;
35 } else {
36 buf->slot_count = slot_count;
37 buf->slot_size = slot_size;
38 buf->slots = slots;
39 buf->buffer_mem = buffer_mem;
40 buf->free_count = slot_count;
41 buf->write_idx = 0;
42 buf->read_idx = 0;
43 buf->failed_writes = 0;
44 for (i = 0; i < slot_count; i++)
45 buf->slots[i] = SLOT_FREE;
46 return 1;
47 }
48}
49
50static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
51{
52 int free = fetch_and_dec(&buf->free_count);
53 unsigned int idx;
54 if (free <= 0) {
55 fetch_and_inc(&buf->free_count);
56 *ptr = 0;
57 fetch_and_inc(&buf->failed_writes);
58 return 0;
59 } else {
60 idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
61 buf->slots[idx] = SLOT_BUSY;
62 *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
63 return 1;
64 }
65}
66
67static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
68{
69 unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
70 buf->slots[idx] = SLOT_READY;
71}
72
73
74/* exclusive reader access is assumed */
75static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
76{
77 unsigned int idx;
78 if (buf->free_count == buf->slot_count)
79 /* nothing available */
80 return 0;
81 idx = buf->read_idx % buf->slot_count;
82 if (buf->slots[idx] == SLOT_READY) {
83 memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
84 buf->slot_size);
85 buf->slots[idx] = SLOT_FREE;
86 buf->read_idx++;
87 fetch_and_inc(&buf->free_count);
88 return 1;
89 } else
90 return 0;
91}
92
93
94#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 00000000000..028dfb206fb
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,65 @@
1#ifndef _FEATHER_TRACE_H_
2#define _FEATHER_TRACE_H_
3
4#include <asm/atomic.h>
5
6int ft_enable_event(unsigned long id);
7int ft_disable_event(unsigned long id);
8int ft_is_event_enabled(unsigned long id);
9int ft_disable_all_events(void);
10
11/* atomic_* funcitons are inline anyway */
12static inline int fetch_and_inc(int *val)
13{
14 return atomic_add_return(1, (atomic_t*) val) - 1;
15}
16
17static inline int fetch_and_dec(int *val)
18{
19 return atomic_sub_return(1, (atomic_t*) val) + 1;
20}
21
22/* Don't use rewriting implementation if kernel text pages are read-only.
23 * Ftrace gets around this by using the identity mapping, but that's more
24 * effort that is warrented right now for Feather-Trace.
25 * Eventually, it may make sense to replace Feather-Trace with ftrace.
26 */
27#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA)
28
29#include <asm/feather_trace.h>
30
31#else /* !__ARCH_HAS_FEATHER_TRACE */
32
33/* provide default implementation */
34
35#include <asm/timex.h> /* for get_cycles() */
36
37static inline unsigned long long ft_timestamp(void)
38{
39 return get_cycles();
40}
41
42#define feather_callback
43
44#define MAX_EVENTS 1024
45
46extern int ft_events[MAX_EVENTS];
47
48#define ft_event(id, callback) \
49 if (ft_events[id]) callback();
50
51#define ft_event0(id, callback) \
52 if (ft_events[id]) callback(id);
53
54#define ft_event1(id, callback, param) \
55 if (ft_events[id]) callback(id, param);
56
57#define ft_event2(id, callback, param, param2) \
58 if (ft_events[id]) callback(id, param, param2);
59
60#define ft_event3(id, callback, p, p2, p3) \
61 if (ft_events[id]) callback(id, p, p2, p3);
62
63#endif /* __ARCH_HAS_FEATHER_TRACE */
64
65#endif
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 00000000000..dd1f7bf1e34
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 00000000000..04d4bcaeae9
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,145 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#ifndef __KERNEL__
5#include <stdint.h>
6#define abs(x) (((x) < 0) ? -(x) : x)
7#endif
8
9// Use 64-bit because we want to track things at the nanosecond scale.
10// This can lead to very large numbers.
11typedef int64_t fpbuf_t;
12typedef struct
13{
14 fpbuf_t val;
15} fp_t;
16
17#define FP_SHIFT 10
18#define ROUND_BIT (FP_SHIFT - 1)
19
20#define _fp(x) ((fp_t) {x})
21
22#ifdef __KERNEL__
23static const fp_t LITMUS_FP_ZERO = {.val = 0};
24static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
25#endif
26
27static inline fp_t FP(fpbuf_t x)
28{
29 return _fp(((fpbuf_t) x) << FP_SHIFT);
30}
31
32/* divide two integers to obtain a fixed point value */
33static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
34{
35 return _fp(FP(a).val / (b));
36}
37
38static inline fpbuf_t _point(fp_t x)
39{
40 return (x.val % (1 << FP_SHIFT));
41
42}
43
44#define fp2str(x) x.val
45/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
46#define _FP_ "%ld/1024"
47
48static inline fpbuf_t _floor(fp_t x)
49{
50 return x.val >> FP_SHIFT;
51}
52
53/* FIXME: negative rounding */
54static inline fpbuf_t _round(fp_t x)
55{
56 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
57}
58
59/* multiply two fixed point values */
60static inline fp_t _mul(fp_t a, fp_t b)
61{
62 return _fp((a.val * b.val) >> FP_SHIFT);
63}
64
65static inline fp_t _div(fp_t a, fp_t b)
66{
67#if !defined(__KERNEL__) && !defined(unlikely)
68#define unlikely(x) (x)
69#define DO_UNDEF_UNLIKELY
70#endif
71 /* try not to overflow */
72 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
73 return _fp((a.val / b.val) << FP_SHIFT);
74 else
75 return _fp((a.val << FP_SHIFT) / b.val);
76#ifdef DO_UNDEF_UNLIKELY
77#undef unlikely
78#undef DO_UNDEF_UNLIKELY
79#endif
80}
81
82static inline fp_t _add(fp_t a, fp_t b)
83{
84 return _fp(a.val + b.val);
85}
86
87static inline fp_t _sub(fp_t a, fp_t b)
88{
89 return _fp(a.val - b.val);
90}
91
92static inline fp_t _neg(fp_t x)
93{
94 return _fp(-x.val);
95}
96
97static inline fp_t _abs(fp_t x)
98{
99 return _fp(abs(x.val));
100}
101
102/* works the same as casting float/double to integer */
103static inline fpbuf_t _fp_to_integer(fp_t x)
104{
105 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
106}
107
108static inline fp_t _integer_to_fp(fpbuf_t x)
109{
110 return _frac(x,1);
111}
112
113static inline int _leq(fp_t a, fp_t b)
114{
115 return a.val <= b.val;
116}
117
118static inline int _geq(fp_t a, fp_t b)
119{
120 return a.val >= b.val;
121}
122
123static inline int _lt(fp_t a, fp_t b)
124{
125 return a.val < b.val;
126}
127
128static inline int _gt(fp_t a, fp_t b)
129{
130 return a.val > b.val;
131}
132
133static inline int _eq(fp_t a, fp_t b)
134{
135 return a.val == b.val;
136}
137
138static inline fp_t _max(fp_t a, fp_t b)
139{
140 if (a.val < b.val)
141 return b;
142 else
143 return a;
144}
145#endif
diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
new file mode 100644
index 00000000000..0b959874dd7
--- /dev/null
+++ b/include/litmus/ftdev.h
@@ -0,0 +1,55 @@
1#ifndef _LITMUS_FTDEV_H_
2#define _LITMUS_FTDEV_H_
3
4#include <litmus/feather_trace.h>
5#include <litmus/feather_buffer.h>
6#include <linux/mutex.h>
7#include <linux/cdev.h>
8
9#define FTDEV_ENABLE_CMD 0
10#define FTDEV_DISABLE_CMD 1
11
12struct ftdev;
13
14/* return 0 if buffer can be opened, otherwise -$REASON */
15typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
16/* return 0 on success, otherwise -$REASON */
17typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
18typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
19/* Let devices handle writes from userspace. No synchronization provided. */
20typedef ssize_t (*ftdev_write_t)(struct ft_buffer* buf, size_t len, const char __user *from);
21
22struct ftdev_event;
23
24struct ftdev_minor {
25 struct ft_buffer* buf;
26 unsigned int readers;
27 struct mutex lock;
28 /* FIXME: filter for authorized events */
29 struct ftdev_event* events;
30 struct device* device;
31 struct ftdev* ftdev;
32};
33
34struct ftdev {
35 dev_t major;
36 struct cdev cdev;
37 struct class* class;
38 const char* name;
39 struct ftdev_minor* minor;
40 unsigned int minor_cnt;
41 ftdev_alloc_t alloc;
42 ftdev_free_t free;
43 ftdev_can_open_t can_open;
44 ftdev_write_t write;
45};
46
47struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
48void free_ft_buffer(struct ft_buffer* buf);
49
50int ftdev_init( struct ftdev* ftdev, struct module* owner,
51 const int minor_cnt, const char* name);
52void ftdev_exit(struct ftdev* ftdev);
53int register_ftdev(struct ftdev* ftdev);
54
55#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 00000000000..9bd361ef394
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,9 @@
1#ifndef __LITMUS_JOBS_H__
2#define __LITMUS_JOBS_H__
3
4void prepare_for_next_period(struct task_struct *t);
5void release_at(struct task_struct *t, lt_t start);
6long complete_job(void);
7
8#endif
9
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 00000000000..807b7888695
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,262 @@
1/*
2 * Constant definitions related to
3 * scheduling policy.
4 */
5
6#ifndef _LINUX_LITMUS_H_
7#define _LINUX_LITMUS_H_
8
9#include <litmus/debug_trace.h>
10
11#ifdef CONFIG_RELEASE_MASTER
12extern atomic_t release_master_cpu;
13#endif
14
15/* in_list - is a given list_head queued on some list?
16 */
17static inline int in_list(struct list_head* list)
18{
19 return !( /* case 1: deleted */
20 (list->next == LIST_POISON1 &&
21 list->prev == LIST_POISON2)
22 ||
23 /* case 2: initialized */
24 (list->next == list &&
25 list->prev == list)
26 );
27}
28
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30
31#define NO_CPU 0xffffffff
32
33void litmus_fork(struct task_struct *tsk);
34void litmus_exec(void);
35/* clean up real-time state of a task */
36void exit_litmus(struct task_struct *dead_tsk);
37
38long litmus_admit_task(struct task_struct *tsk);
39void litmus_exit_task(struct task_struct *tsk);
40
41#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
42#define rt_transition_pending(t) \
43 ((t)->rt_param.transition_pending)
44
45#define tsk_rt(t) (&(t)->rt_param)
46
47/* Realtime utility macros */
48#define get_rt_flags(t) (tsk_rt(t)->flags)
49#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
50#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
51#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
52
53/* task_params macros */
54#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
55#define get_rt_period(t) (tsk_rt(t)->task_params.period)
56#define get_rt_relative_deadline(t) (tsk_rt(t)->task_params.relative_deadline)
57#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
58#define get_partition(t) (tsk_rt(t)->task_params.cpu)
59#define get_priority(t) (tsk_rt(t)->task_params.priority)
60#define get_class(t) (tsk_rt(t)->task_params.cls)
61
62/* job_param macros */
63#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
64#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
65#define get_release(t) (tsk_rt(t)->job_params.release)
66#define get_lateness(t) (tsk_rt(t)->job_params.lateness)
67
68#define is_hrt(t) \
69 (tsk_rt(t)->task_params.cls == RT_CLASS_HARD)
70#define is_srt(t) \
71 (tsk_rt(t)->task_params.cls == RT_CLASS_SOFT)
72#define is_be(t) \
73 (tsk_rt(t)->task_params.cls == RT_CLASS_BEST_EFFORT)
74
75/* Our notion of time within LITMUS: kernel monotonic time. */
76static inline lt_t litmus_clock(void)
77{
78 return ktime_to_ns(ktime_get());
79}
80
81/* A macro to convert from nanoseconds to ktime_t. */
82#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
83
84#define get_domain(t) (tsk_rt(t)->domain)
85
86/* Honor the flag in the preempt_count variable that is set
87 * when scheduling is in progress.
88 */
89#define is_running(t) \
90 ((t)->state == TASK_RUNNING || \
91 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
92
93#define is_blocked(t) \
94 (!is_running(t))
95#define is_released(t, now) \
96 (lt_before_eq(get_release(t), now))
97#define is_tardy(t, now) \
98 (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
99
100/* real-time comparison macros */
101#define earlier_deadline(a, b) (lt_before(\
102 (a)->rt_param.job_params.deadline,\
103 (b)->rt_param.job_params.deadline))
104#define earlier_release(a, b) (lt_before(\
105 (a)->rt_param.job_params.release,\
106 (b)->rt_param.job_params.release))
107
108void preempt_if_preemptable(struct task_struct* t, int on_cpu);
109
110#ifdef CONFIG_LITMUS_LOCKING
111void srp_ceiling_block(void);
112#else
113#define srp_ceiling_block() /* nothing */
114#endif
115
116#define bheap2task(hn) ((struct task_struct*) hn->value)
117
118#ifdef CONFIG_NP_SECTION
119
120static inline int is_kernel_np(struct task_struct *t)
121{
122 return tsk_rt(t)->kernel_np;
123}
124
125static inline int is_user_np(struct task_struct *t)
126{
127 return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->sched.np.flag : 0;
128}
129
130static inline void request_exit_np(struct task_struct *t)
131{
132 if (is_user_np(t)) {
133 /* Set the flag that tells user space to call
134 * into the kernel at the end of a critical section. */
135 if (likely(tsk_rt(t)->ctrl_page)) {
136 TRACE_TASK(t, "setting delayed_preemption flag\n");
137 tsk_rt(t)->ctrl_page->sched.np.preempt = 1;
138 }
139 }
140}
141
142static inline void make_np(struct task_struct *t)
143{
144 tsk_rt(t)->kernel_np++;
145}
146
147/* Caller should check if preemption is necessary when
148 * the function return 0.
149 */
150static inline int take_np(struct task_struct *t)
151{
152 return --tsk_rt(t)->kernel_np;
153}
154
155/* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
156static inline int request_exit_np_atomic(struct task_struct *t)
157{
158 union np_flag old, new;
159
160 if (tsk_rt(t)->ctrl_page) {
161 old.raw = tsk_rt(t)->ctrl_page->sched.raw;
162 if (old.np.flag == 0) {
163 /* no longer non-preemptive */
164 return 0;
165 } else if (old.np.preempt) {
166 /* already set, nothing for us to do */
167 return 1;
168 } else {
169 /* non preemptive and flag not set */
170 new.raw = old.raw;
171 new.np.preempt = 1;
172 /* if we get old back, then we atomically set the flag */
173 return cmpxchg(&tsk_rt(t)->ctrl_page->sched.raw, old.raw, new.raw) == old.raw;
174 /* If we raced with a concurrent change, then so be
175 * it. Deliver it by IPI. We don't want an unbounded
176 * retry loop here since tasks might exploit that to
177 * keep the kernel busy indefinitely. */
178 }
179 } else
180 return 0;
181}
182
183#else
184
185static inline int is_kernel_np(struct task_struct* t)
186{
187 return 0;
188}
189
190static inline int is_user_np(struct task_struct* t)
191{
192 return 0;
193}
194
195static inline void request_exit_np(struct task_struct *t)
196{
197 /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
198 BUG();
199}
200
201static inline int request_exit_np_atomic(struct task_struct *t)
202{
203 return 0;
204}
205
206#endif
207
208static inline void clear_exit_np(struct task_struct *t)
209{
210 if (likely(tsk_rt(t)->ctrl_page))
211 tsk_rt(t)->ctrl_page->sched.np.preempt = 0;
212}
213
214static inline int is_np(struct task_struct *t)
215{
216#ifdef CONFIG_SCHED_DEBUG_TRACE
217 int kernel, user;
218 kernel = is_kernel_np(t);
219 user = is_user_np(t);
220 if (kernel || user)
221 TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
222
223 kernel, user);
224 return kernel || user;
225#else
226 return unlikely(is_kernel_np(t) || is_user_np(t));
227#endif
228}
229
230static inline int is_present(struct task_struct* t)
231{
232 return t && tsk_rt(t)->present;
233}
234
235
236/* make the unit explicit */
237typedef unsigned long quanta_t;
238
239enum round {
240 FLOOR,
241 CEIL
242};
243
244
245/* Tick period is used to convert ns-specified execution
246 * costs and periods into tick-based equivalents.
247 */
248extern ktime_t tick_period;
249
250static inline quanta_t time2quanta(lt_t time, enum round round)
251{
252 s64 quantum_length = ktime_to_ns(tick_period);
253
254 if (do_div(time, quantum_length) && round == CEIL)
255 time++;
256 return (quanta_t) time;
257}
258
259/* By how much is cpu staggered behind CPU 0? */
260u64 cpu_stagger_offset(int cpu);
261
262#endif
diff --git a/include/litmus/litmus_proc.h b/include/litmus/litmus_proc.h
new file mode 100644
index 00000000000..6800e725d48
--- /dev/null
+++ b/include/litmus/litmus_proc.h
@@ -0,0 +1,25 @@
1#include <litmus/sched_plugin.h>
2#include <linux/proc_fs.h>
3
4int __init init_litmus_proc(void);
5void exit_litmus_proc(void);
6
7/*
8 * On success, returns 0 and sets the pointer to the location of the new
9 * proc dir entry, otherwise returns an error code and sets pde to NULL.
10 */
11long make_plugin_proc_dir(struct sched_plugin* plugin,
12 struct proc_dir_entry** pde);
13
14/*
15 * Plugins should deallocate all child proc directory entries before
16 * calling this, to avoid memory leaks.
17 */
18void remove_plugin_proc_dir(struct sched_plugin* plugin);
19
20
21/* Copy at most size-1 bytes from ubuf into kbuf, null-terminate buf, and
22 * remove a '\n' if present. Returns the number of bytes that were read or
23 * -EFAULT. */
24int copy_and_chomp(char *kbuf, unsigned long ksize,
25 __user const char* ubuf, unsigned long ulength);
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
new file mode 100644
index 00000000000..4d7b870cb44
--- /dev/null
+++ b/include/litmus/locking.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H
3
4struct litmus_lock_ops;
5
6/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores.
8 */
9struct litmus_lock {
10 struct litmus_lock_ops *ops;
11 int type;
12};
13
14struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user);
18 int (*close)(struct litmus_lock*);
19
20 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*);
22 int (*unlock)(struct litmus_lock*);
23
24 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*);
26};
27
28#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
new file mode 100644
index 00000000000..380b886d78f
--- /dev/null
+++ b/include/litmus/preempt.h
@@ -0,0 +1,164 @@
1#ifndef LITMUS_PREEMPT_H
2#define LITMUS_PREEMPT_H
3
4#include <linux/types.h>
5#include <linux/cache.h>
6#include <linux/percpu.h>
7#include <asm/atomic.h>
8
9#include <litmus/debug_trace.h>
10
11extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13#ifdef CONFIG_PREEMPT_STATE_TRACE
14const char* sched_state_name(int s);
15#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
16#else
17#define TRACE_STATE(fmt, args...) /* ignore */
18#endif
19
20#define VERIFY_SCHED_STATE(x) \
21 do { int __s = get_sched_state(); \
22 if ((__s & (x)) == 0) \
23 TRACE_STATE("INVALID s=0x%x (%s) not " \
24 "in 0x%x (%s) [%s]\n", \
25 __s, sched_state_name(__s), \
26 (x), #x, __FUNCTION__); \
27 } while (0);
28
29#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
30 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
31 cpu, (x), sched_state_name(x), \
32 (y), sched_state_name(y))
33
34
35typedef enum scheduling_state {
36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
37 * should be scheduled, and the processor does not
38 * plan to invoke schedule(). */
39 SHOULD_SCHEDULE = (1 << 1), /* A remote processor has determined that the
40 * processor should reschedule, but this has not
41 * been communicated yet (IPI still pending). */
42 WILL_SCHEDULE = (1 << 2), /* The processor has noticed that it has to
43 * reschedule and will do so shortly. */
44 TASK_PICKED = (1 << 3), /* The processor is currently executing schedule(),
45 * has selected a new task to schedule, but has not
46 * yet performed the actual context switch. */
47 PICKED_WRONG_TASK = (1 << 4), /* The processor has not yet performed the context
48 * switch, but a remote processor has already
49 * determined that a higher-priority task became
50 * eligible after the task was picked. */
51} sched_state_t;
52
53static inline sched_state_t get_sched_state_on(int cpu)
54{
55 return atomic_read(&per_cpu(resched_state, cpu));
56}
57
58static inline sched_state_t get_sched_state(void)
59{
60 return atomic_read(&__get_cpu_var(resched_state));
61}
62
63static inline int is_in_sched_state(int possible_states)
64{
65 return get_sched_state() & possible_states;
66}
67
68static inline int cpu_is_in_sched_state(int cpu, int possible_states)
69{
70 return get_sched_state_on(cpu) & possible_states;
71}
72
73static inline void set_sched_state(sched_state_t s)
74{
75 TRACE_SCHED_STATE_CHANGE(get_sched_state(), s, smp_processor_id());
76 atomic_set(&__get_cpu_var(resched_state), s);
77}
78
79static inline int sched_state_transition(sched_state_t from, sched_state_t to)
80{
81 sched_state_t old_state;
82
83 old_state = atomic_cmpxchg(&__get_cpu_var(resched_state), from, to);
84 if (old_state == from) {
85 TRACE_SCHED_STATE_CHANGE(from, to, smp_processor_id());
86 return 1;
87 } else
88 return 0;
89}
90
91static inline int sched_state_transition_on(int cpu,
92 sched_state_t from,
93 sched_state_t to)
94{
95 sched_state_t old_state;
96
97 old_state = atomic_cmpxchg(&per_cpu(resched_state, cpu), from, to);
98 if (old_state == from) {
99 TRACE_SCHED_STATE_CHANGE(from, to, cpu);
100 return 1;
101 } else
102 return 0;
103}
104
105/* Plugins must call this function after they have decided which job to
106 * schedule next. IMPORTANT: this function must be called while still holding
107 * the lock that is used to serialize scheduling decisions.
108 *
109 * (Ideally, we would like to use runqueue locks for this purpose, but that
110 * would lead to deadlocks with the migration code.)
111 */
112static inline void sched_state_task_picked(void)
113{
114 VERIFY_SCHED_STATE(WILL_SCHEDULE);
115
116 /* WILL_SCHEDULE has only a local tansition => simple store is ok */
117 set_sched_state(TASK_PICKED);
118}
119
120static inline void sched_state_entered_schedule(void)
121{
122 /* Update state for the case that we entered schedule() not due to
123 * set_tsk_need_resched() */
124 set_sched_state(WILL_SCHEDULE);
125}
126
127/* Called by schedule() to check if the scheduling decision is still valid
128 * after a context switch. Returns 1 if the CPU needs to reschdule. */
129static inline int sched_state_validate_switch(void)
130{
131 int left_state_ok = 0;
132
133 VERIFY_SCHED_STATE(PICKED_WRONG_TASK | TASK_PICKED);
134
135 if (is_in_sched_state(TASK_PICKED)) {
136 /* Might be good; let's try to transition out of this
137 * state. This must be done atomically since remote processors
138 * may try to change the state, too. */
139 left_state_ok = sched_state_transition(TASK_PICKED, TASK_SCHEDULED);
140 }
141
142 if (!left_state_ok) {
143 /* We raced with a higher-priority task arrival => not
144 * valid. The CPU needs to reschedule. */
145 set_sched_state(WILL_SCHEDULE);
146 return 1;
147 } else
148 return 0;
149}
150
151/* State transition events. See litmus/preempt.c for details. */
152void sched_state_will_schedule(struct task_struct* tsk);
153void sched_state_ipi(void);
154/* Cause a CPU (remote or local) to reschedule. */
155void litmus_reschedule(int cpu);
156void litmus_reschedule_local(void);
157
158#ifdef CONFIG_DEBUG_KERNEL
159void sched_state_plugin_check(void);
160#else
161#define sched_state_plugin_check() /* no check */
162#endif
163
164#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 00000000000..ac249292e86
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,182 @@
1/* CLEANUP: Add comments and make it less messy.
2 *
3 */
4
5#ifndef __UNC_RT_DOMAIN_H__
6#define __UNC_RT_DOMAIN_H__
7
8#include <litmus/bheap.h>
9
10#define RELEASE_QUEUE_SLOTS 127 /* prime */
11
12struct _rt_domain;
13
14typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
15typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
16
17struct release_queue {
18 /* each slot maintains a list of release heaps sorted
19 * by release time */
20 struct list_head slot[RELEASE_QUEUE_SLOTS];
21};
22
23typedef struct _rt_domain {
24 /* runnable rt tasks are in here */
25 raw_spinlock_t ready_lock;
26 struct bheap ready_queue;
27
28 /* real-time tasks waiting for release are in here */
29 raw_spinlock_t release_lock;
30 struct release_queue release_queue;
31
32#ifdef CONFIG_RELEASE_MASTER
33 int release_master;
34#endif
35
36 /* for moving tasks to the release queue */
37 raw_spinlock_t tobe_lock;
38 struct list_head tobe_released;
39
40 /* how do we check if we need to kick another CPU? */
41 check_resched_needed_t check_resched;
42
43 /* how do we release jobs? */
44 release_jobs_t release_jobs;
45
46 /* how are tasks ordered in the ready queue? */
47 bheap_prio_t order;
48} rt_domain_t;
49
50struct release_heap {
51 /* list_head for per-time-slot list */
52 struct list_head list;
53 lt_t release_time;
54 /* all tasks to be released at release_time */
55 struct bheap heap;
56 /* used to trigger the release */
57 struct hrtimer timer;
58
59#ifdef CONFIG_RELEASE_MASTER
60 /* used to delegate releases */
61 struct hrtimer_start_on_info info;
62#endif
63 /* required for the timer callback */
64 rt_domain_t* dom;
65};
66
67
68static inline struct task_struct* __next_ready(rt_domain_t* rt)
69{
70 struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
71 if (hn)
72 return bheap2task(hn);
73 else
74 return NULL;
75}
76
77void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
78 check_resched_needed_t check,
79 release_jobs_t relase);
80
81void __add_ready(rt_domain_t* rt, struct task_struct *new);
82void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
83void __add_release(rt_domain_t* rt, struct task_struct *task);
84
85static inline struct task_struct* __take_ready(rt_domain_t* rt)
86{
87 struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
88 if (hn)
89 return bheap2task(hn);
90 else
91 return NULL;
92}
93
94static inline struct task_struct* __peek_ready(rt_domain_t* rt)
95{
96 struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
97 if (hn)
98 return bheap2task(hn);
99 else
100 return NULL;
101}
102
103static inline int is_queued(struct task_struct *t)
104{
105 BUG_ON(!tsk_rt(t)->heap_node);
106 return bheap_node_in_heap(tsk_rt(t)->heap_node);
107}
108
109static inline void remove(rt_domain_t* rt, struct task_struct *t)
110{
111 bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
112}
113
114static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
115{
116 unsigned long flags;
117 /* first we need the write lock for rt_ready_queue */
118 raw_spin_lock_irqsave(&rt->ready_lock, flags);
119 __add_ready(rt, new);
120 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
121}
122
123static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
124{
125 unsigned long flags;
126 raw_spin_lock_irqsave(&rt->ready_lock, flags);
127 __merge_ready(rt, tasks);
128 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
129}
130
131static inline struct task_struct* take_ready(rt_domain_t* rt)
132{
133 unsigned long flags;
134 struct task_struct* ret;
135 /* first we need the write lock for rt_ready_queue */
136 raw_spin_lock_irqsave(&rt->ready_lock, flags);
137 ret = __take_ready(rt);
138 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
139 return ret;
140}
141
142
143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{
145 unsigned long flags;
146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
147 __add_release(rt, task);
148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
149}
150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
166static inline int __jobs_pending(rt_domain_t* rt)
167{
168 return !bheap_empty(&rt->ready_queue);
169}
170
171static inline int jobs_pending(rt_domain_t* rt)
172{
173 unsigned long flags;
174 int ret;
175 /* first we need the write lock for rt_ready_queue */
176 raw_spin_lock_irqsave(&rt->ready_lock, flags);
177 ret = !bheap_empty(&rt->ready_queue);
178 raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
179 return ret;
180}
181
182#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 00000000000..fac939dbd33
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,237 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_
7
8/* Litmus time type. */
9typedef unsigned long long lt_t;
10
11static inline int lt_after(lt_t a, lt_t b)
12{
13 return ((long long) b) - ((long long) a) < 0;
14}
15#define lt_before(a, b) lt_after(b, a)
16
17static inline int lt_after_eq(lt_t a, lt_t b)
18{
19 return ((long long) a) - ((long long) b) >= 0;
20}
21#define lt_before_eq(a, b) lt_after_eq(b, a)
22
23/* different types of clients */
24typedef enum {
25 RT_CLASS_HARD,
26 RT_CLASS_SOFT,
27 RT_CLASS_BEST_EFFORT
28} task_class_t;
29
30typedef enum {
31 NO_ENFORCEMENT, /* job may overrun unhindered */
32 QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */
34} budget_policy_t;
35
36/* We use the common priority interpretation "lower index == higher priority",
37 * which is commonly used in fixed-priority schedulability analysis papers.
38 * So, a numerically lower priority value implies higher scheduling priority,
39 * with priority 1 being the highest priority. Priority 0 is reserved for
40 * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value
41 * range.
42 */
43
44#define LITMUS_MAX_PRIORITY 512
45#define LITMUS_HIGHEST_PRIORITY 1
46#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1)
47
48/* Provide generic comparison macros for userspace,
49 * in case that we change this later. */
50#define litmus_higher_fixed_prio(a, b) (a < b)
51#define litmus_lower_fixed_prio(a, b) (a > b)
52#define litmus_is_valid_fixed_prio(p) \
53 ((p) >= LITMUS_HIGHEST_PRIORITY && \
54 (p) <= LITMUS_LOWEST_PRIORITY)
55
56struct rt_task {
57 lt_t exec_cost;
58 lt_t period;
59 lt_t relative_deadline;
60 lt_t phase;
61 unsigned int cpu;
62 unsigned int priority;
63 task_class_t cls;
64 budget_policy_t budget_policy; /* ignored by pfair */
65};
66
67union np_flag {
68 uint32_t raw;
69 struct {
70 /* Is the task currently in a non-preemptive section? */
71 uint32_t flag:31;
72 /* Should the task call into the scheduler? */
73 uint32_t preempt:1;
74 } np;
75};
76
77/* The definition of the data that is shared between the kernel and real-time
78 * tasks via a shared page (see litmus/ctrldev.c).
79 *
80 * WARNING: User space can write to this, so don't trust
81 * the correctness of the fields!
82 *
83 * This servees two purposes: to enable efficient signaling
84 * of non-preemptive sections (user->kernel) and
85 * delayed preemptions (kernel->user), and to export
86 * some real-time relevant statistics such as preemption and
87 * migration data to user space. We can't use a device to export
88 * statistics because we want to avoid system call overhead when
89 * determining preemption/migration overheads).
90 */
91struct control_page {
92 volatile union np_flag sched;
93
94 /* to be extended */
95};
96
97/* don't export internal data structures to user space (liblitmus) */
98#ifdef __KERNEL__
99
100struct _rt_domain;
101struct bheap_node;
102struct release_heap;
103
104struct rt_job {
105 /* Time instant the the job was or will be released. */
106 lt_t release;
107 /* What is the current deadline? */
108 lt_t deadline;
109
110 /* How much service has this job received so far? */
111 lt_t exec_time;
112
113 /* By how much did the prior job miss its deadline by?
114 * Value differs from tardiness in that lateness may
115 * be negative (when job finishes before its deadline).
116 */
117 long long lateness;
118
119 /* Which job is this. This is used to let user space
120 * specify which job to wait for, which is important if jobs
121 * overrun. If we just call sys_sleep_next_period() then we
122 * will unintentionally miss jobs after an overrun.
123 *
124 * Increase this sequence number when a job is released.
125 */
126 unsigned int job_no;
127};
128
129struct pfair_param;
130
131/* RT task parameters for scheduling extensions
132 * These parameters are inherited during clone and therefore must
133 * be explicitly set up before the task set is launched.
134 */
135struct rt_param {
136 /* is the task sleeping? */
137 unsigned int flags:8;
138
139 /* do we need to check for srp blocking? */
140 unsigned int srp_non_recurse:1;
141
142 /* is the task present? (true if it can be scheduled) */
143 unsigned int present:1;
144
145#ifdef CONFIG_LITMUS_LOCKING
146 /* Is the task being priority-boosted by a locking protocol? */
147 unsigned int priority_boosted:1;
148 /* If so, when did this start? */
149 lt_t boost_start_time;
150#endif
151
152 /* user controlled parameters */
153 struct rt_task task_params;
154
155 /* timing parameters */
156 struct rt_job job_params;
157
158 /* task representing the current "inherited" task
159 * priority, assigned by inherit_priority and
160 * return priority in the scheduler plugins.
161 * could point to self if PI does not result in
162 * an increased task priority.
163 */
164 struct task_struct* inh_task;
165
166#ifdef CONFIG_NP_SECTION
167 /* For the FMLP under PSN-EDF, it is required to make the task
168 * non-preemptive from kernel space. In order not to interfere with
169 * user space, this counter indicates the kernel space np setting.
170 * kernel_np > 0 => task is non-preemptive
171 */
172 unsigned int kernel_np;
173#endif
174
175 /* This field can be used by plugins to store where the task
176 * is currently scheduled. It is the responsibility of the
177 * plugin to avoid race conditions.
178 *
179 * This used by GSN-EDF and PFAIR.
180 */
181 volatile int scheduled_on;
182
183 /* Is the stack of the task currently in use? This is updated by
184 * the LITMUS core.
185 *
186 * Be careful to avoid deadlocks!
187 */
188 volatile int stack_in_use;
189
190 /* This field can be used by plugins to store where the task
191 * is currently linked. It is the responsibility of the plugin
192 * to avoid race conditions.
193 *
194 * Used by GSN-EDF.
195 */
196 volatile int linked_on;
197
198 /* PFAIR/PD^2 state. Allocated on demand. */
199 struct pfair_param* pfair;
200
201 /* Fields saved before BE->RT transition.
202 */
203 int old_policy;
204 int old_prio;
205
206 /* ready queue for this task */
207 struct _rt_domain* domain;
208
209 /* heap element for this task
210 *
211 * Warning: Don't statically allocate this node. The heap
212 * implementation swaps these between tasks, thus after
213 * dequeuing from a heap you may end up with a different node
214 * then the one you had when enqueuing the task. For the same
215 * reason, don't obtain and store references to this node
216 * other than this pointer (which is updated by the heap
217 * implementation).
218 */
219 struct bheap_node* heap_node;
220 struct release_heap* rel_heap;
221
222 /* Used by rt_domain to queue task in release list.
223 */
224 struct list_head list;
225
226 /* Pointer to the page shared between userspace and kernel. */
227 struct control_page * ctrl_page;
228};
229
230/* Possible RT flags */
231#define RT_F_RUNNING 0x00000000
232#define RT_F_SLEEP 0x00000001
233#define RT_F_EXIT_SEM 0x00000008
234
235#endif
236
237#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 00000000000..6e7cabdddae
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,111 @@
1/*
2 * Definition of the scheduler plugin interface.
3 *
4 */
5#ifndef _LINUX_SCHED_PLUGIN_H_
6#define _LINUX_SCHED_PLUGIN_H_
7
8#include <linux/sched.h>
9
10#ifdef CONFIG_LITMUS_LOCKING
11#include <litmus/locking.h>
12#endif
13
14/************************ setup/tear down ********************/
15
16typedef long (*activate_plugin_t) (void);
17typedef long (*deactivate_plugin_t) (void);
18
19
20
21/********************* scheduler invocation ******************/
22
23/* Plugin-specific realtime tick handler */
24typedef void (*scheduler_tick_t) (struct task_struct *cur);
25/* Novell make sched decision function */
26typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
27/* Clean up after the task switch has occured.
28 * This function is called after every (even non-rt) task switch.
29 */
30typedef void (*finish_switch_t)(struct task_struct *prev);
31
32
33/********************* task state changes ********************/
34
35/* Called to setup a new real-time task.
36 * Release the first job, enqueue, etc.
37 * Task may already be running.
38 */
39typedef void (*task_new_t) (struct task_struct *task,
40 int on_rq,
41 int running);
42
43/* Called to re-introduce a task after blocking.
44 * Can potentially be called multiple times.
45 */
46typedef void (*task_wake_up_t) (struct task_struct *task);
47/* called to notify the plugin of a blocking real-time task
48 * it will only be called for real-time tasks and before schedule is called */
49typedef void (*task_block_t) (struct task_struct *task);
50/* Called when a real-time task exits or changes to a different scheduling
51 * class.
52 * Free any allocated resources
53 */
54typedef void (*task_exit_t) (struct task_struct *);
55
56/* Called when the current task attempts to create a new lock of a given
57 * protocol type. */
58typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
59 void* __user config);
60
61
62/********************* sys call backends ********************/
63/* This function causes the caller to sleep until the next release */
64typedef long (*complete_job_t) (void);
65
66typedef long (*admit_task_t)(struct task_struct* tsk);
67
68typedef void (*release_at_t)(struct task_struct *t, lt_t start);
69
70struct sched_plugin {
71 struct list_head list;
72 /* basic info */
73 char *plugin_name;
74
75 /* setup */
76 activate_plugin_t activate_plugin;
77 deactivate_plugin_t deactivate_plugin;
78
79 /* scheduler invocation */
80 scheduler_tick_t tick;
81 schedule_t schedule;
82 finish_switch_t finish_switch;
83
84 /* syscall backend */
85 complete_job_t complete_job;
86 release_at_t release_at;
87
88 /* task state changes */
89 admit_task_t admit_task;
90
91 task_new_t task_new;
92 task_wake_up_t task_wake_up;
93 task_block_t task_block;
94 task_exit_t task_exit;
95
96#ifdef CONFIG_LITMUS_LOCKING
97 /* locking protocols */
98 allocate_lock_t allocate_lock;
99#endif
100} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
101
102
103extern struct sched_plugin *litmus;
104
105int register_sched_plugin(struct sched_plugin* plugin);
106struct sched_plugin* find_sched_plugin(const char* name);
107int print_sched_plugins(char* buf, int max);
108
109extern struct sched_plugin linux_sched_plugin;
110
111#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 00000000000..82bde824129
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,259 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_H_
5#define _LINUX_SCHED_TRACE_H_
6
7/* all times in nanoseconds */
8
9struct st_trace_header {
10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */
14};
15
16#define ST_NAME_LEN 16
17struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19};
20
21struct st_param_data { /* regular params */
22 u32 wcet;
23 u32 period;
24 u32 phase;
25 u8 partition;
26 u8 class;
27 u8 __unused[2];
28};
29
30struct st_release_data { /* A job is was/is going to be released. */
31 u64 release; /* What's the release time? */
32 u64 deadline; /* By when must it finish? */
33};
34
35struct st_assigned_data { /* A job was asigned to a CPU. */
36 u64 when;
37 u8 target; /* Where should it execute? */
38 u8 __unused[7];
39};
40
41struct st_switch_to_data { /* A process was switched to on a given CPU. */
42 u64 when; /* When did this occur? */
43 u32 exec_time; /* Time the current job has executed. */
44 u8 __unused[4];
45
46};
47
48struct st_switch_away_data { /* A process was switched away from on a given CPU. */
49 u64 when;
50 u64 exec_time;
51};
52
53struct st_completion_data { /* A job completed. */
54 u64 when;
55 u8 forced:1; /* Set to 1 if job overran and kernel advanced to the
56 * next task automatically; set to 0 otherwise.
57 */
58 u8 __uflags:7;
59 u8 __unused[7];
60};
61
62struct st_block_data { /* A task blocks. */
63 u64 when;
64 u64 __unused;
65};
66
67struct st_resume_data { /* A task resumes. */
68 u64 when;
69 u64 __unused;
70};
71
72struct st_action_data {
73 u64 when;
74 u8 action;
75 u8 __unused[7];
76};
77
78struct st_sys_release_data {
79 u64 when;
80 u64 release;
81};
82
83#define DATA(x) struct st_ ## x ## _data x;
84
85typedef enum {
86 ST_NAME = 1, /* Start at one, so that we can spot
87 * uninitialized records. */
88 ST_PARAM,
89 ST_RELEASE,
90 ST_ASSIGNED,
91 ST_SWITCH_TO,
92 ST_SWITCH_AWAY,
93 ST_COMPLETION,
94 ST_BLOCK,
95 ST_RESUME,
96 ST_ACTION,
97 ST_SYS_RELEASE
98} st_event_record_type_t;
99
100struct st_event_record {
101 struct st_trace_header hdr;
102 union {
103 u64 raw[2];
104
105 DATA(name);
106 DATA(param);
107 DATA(release);
108 DATA(assigned);
109 DATA(switch_to);
110 DATA(switch_away);
111 DATA(completion);
112 DATA(block);
113 DATA(resume);
114 DATA(action);
115 DATA(sys_release);
116 } data;
117};
118
119#undef DATA
120
121#ifdef __KERNEL__
122
123#include <linux/sched.h>
124#include <litmus/feather_trace.h>
125
126#ifdef CONFIG_SCHED_TASK_TRACE
127
128#define SCHED_TRACE(id, callback, task) \
129 ft_event1(id, callback, task)
130#define SCHED_TRACE2(id, callback, task, xtra) \
131 ft_event2(id, callback, task, xtra)
132
133/* provide prototypes; needed on sparc64 */
134#ifndef NO_TASK_TRACE_DECLS
135feather_callback void do_sched_trace_task_name(unsigned long id,
136 struct task_struct* task);
137feather_callback void do_sched_trace_task_param(unsigned long id,
138 struct task_struct* task);
139feather_callback void do_sched_trace_task_release(unsigned long id,
140 struct task_struct* task);
141feather_callback void do_sched_trace_task_switch_to(unsigned long id,
142 struct task_struct* task);
143feather_callback void do_sched_trace_task_switch_away(unsigned long id,
144 struct task_struct* task);
145feather_callback void do_sched_trace_task_completion(unsigned long id,
146 struct task_struct* task,
147 unsigned long forced);
148feather_callback void do_sched_trace_task_block(unsigned long id,
149 struct task_struct* task);
150feather_callback void do_sched_trace_task_resume(unsigned long id,
151 struct task_struct* task);
152feather_callback void do_sched_trace_action(unsigned long id,
153 struct task_struct* task,
154 unsigned long action);
155feather_callback void do_sched_trace_sys_release(unsigned long id,
156 lt_t* start);
157
158#endif
159
160#else
161
162#define SCHED_TRACE(id, callback, task) /* no tracing */
163#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
164
165#endif
166
167#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
168
169#include <trace/events/litmus.h>
170
171#else
172
173/* Override trace macros to actually do nothing */
174#define trace_litmus_task_param(t)
175#define trace_litmus_task_release(t)
176#define trace_litmus_switch_to(t)
177#define trace_litmus_switch_away(prev)
178#define trace_litmus_task_completion(t, forced)
179#define trace_litmus_task_block(t)
180#define trace_litmus_task_resume(t)
181#define trace_litmus_sys_release(start)
182
183#endif
184
185
186#define SCHED_TRACE_BASE_ID 500
187
188
189#define sched_trace_task_name(t) \
190 SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, \
191 do_sched_trace_task_name, t)
192
193#define sched_trace_task_param(t) \
194 do { \
195 SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, \
196 do_sched_trace_task_param, t); \
197 trace_litmus_task_param(t); \
198 } while (0)
199
200#define sched_trace_task_release(t) \
201 do { \
202 SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, \
203 do_sched_trace_task_release, t); \
204 trace_litmus_task_release(t); \
205 } while (0)
206
207#define sched_trace_task_switch_to(t) \
208 do { \
209 SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, \
210 do_sched_trace_task_switch_to, t); \
211 trace_litmus_switch_to(t); \
212 } while (0)
213
214#define sched_trace_task_switch_away(t) \
215 do { \
216 SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, \
217 do_sched_trace_task_switch_away, t); \
218 trace_litmus_switch_away(t); \
219 } while (0)
220
221#define sched_trace_task_completion(t, forced) \
222 do { \
223 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, \
224 do_sched_trace_task_completion, t, \
225 (unsigned long) forced); \
226 trace_litmus_task_completion(t, forced); \
227 } while (0)
228
229#define sched_trace_task_block(t) \
230 do { \
231 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \
232 do_sched_trace_task_block, t); \
233 trace_litmus_task_block(t); \
234 } while (0)
235
236#define sched_trace_task_resume(t) \
237 do { \
238 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \
239 do_sched_trace_task_resume, t); \
240 trace_litmus_task_resume(t); \
241 } while (0)
242
243#define sched_trace_action(t, action) \
244 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 9, \
245 do_sched_trace_action, t, (unsigned long) action);
246
247/* when is a pointer, it does not need an explicit cast to unsigned long */
248#define sched_trace_sys_release(when) \
249 do { \
250 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, \
251 do_sched_trace_sys_release, when); \
252 trace_litmus_sys_release(when); \
253 } while (0)
254
255#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
256
257#endif /* __KERNEL__ */
258
259#endif
diff --git a/include/litmus/srp.h b/include/litmus/srp.h
new file mode 100644
index 00000000000..c9a4552b2bf
--- /dev/null
+++ b/include/litmus/srp.h
@@ -0,0 +1,28 @@
1#ifndef LITMUS_SRP_H
2#define LITMUS_SRP_H
3
4struct srp_semaphore;
5
6struct srp_priority {
7 struct list_head list;
8 unsigned int priority;
9 pid_t pid;
10};
11#define list2prio(l) list_entry(l, struct srp_priority, list)
12
13/* struct for uniprocessor SRP "semaphore" */
14struct srp_semaphore {
15 struct litmus_lock litmus_lock;
16 struct srp_priority ceiling;
17 struct task_struct* owner;
18 int cpu; /* cpu associated with this "semaphore" and resource */
19};
20
21/* map a task to its SRP preemption level priority */
22typedef unsigned int (*srp_prioritization_t)(struct task_struct* t);
23/* Must be updated by each plugin that uses SRP.*/
24extern srp_prioritization_t get_srp_prio;
25
26struct srp_semaphore* allocate_srp_semaphore(void);
27
28#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 00000000000..e809376d648
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,116 @@
1#ifndef _SYS_TRACE_H_
2#define _SYS_TRACE_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6#include <litmus/feather_trace.h>
7#include <litmus/feather_buffer.h>
8
9
10/*********************** TIMESTAMPS ************************/
11
12enum task_type_marker {
13 TSK_BE,
14 TSK_RT,
15 TSK_UNKNOWN
16};
17
18struct timestamp {
19 uint64_t timestamp;
20 uint32_t seq_no;
21 uint8_t cpu;
22 uint8_t event;
23 uint8_t task_type:2;
24 uint8_t irq_flag:1;
25 uint8_t irq_count:5;
26};
27
28/* tracing callbacks */
29feather_callback void save_timestamp(unsigned long event);
30feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
31feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
32feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
33feather_callback void save_task_latency(unsigned long event, unsigned long when_ptr);
34
35#define TIMESTAMP(id) ft_event0(id, save_timestamp)
36
37#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
38
39#define TTIMESTAMP(id, task) \
40 ft_event1(id, save_timestamp_task, (unsigned long) task)
41
42#define CTIMESTAMP(id, cpu) \
43 ft_event1(id, save_timestamp_cpu, (unsigned long) cpu)
44
45#define LTIMESTAMP(id, task) \
46 ft_event1(id, save_task_latency, (unsigned long) task)
47
48#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
49
50#define TIMESTAMP(id) /* no tracing */
51
52#define DTIMESTAMP(id, def) /* no tracing */
53
54#define TTIMESTAMP(id, task) /* no tracing */
55
56#define CTIMESTAMP(id, cpu) /* no tracing */
57
58#define LTIMESTAMP(id, when_ptr) /* no tracing */
59
60#endif
61
62
63/* Convention for timestamps
64 * =========================
65 *
66 * In order to process the trace files with a common tool, we use the following
67 * convention to measure execution times: The end time id of a code segment is
68 * always the next number after the start time event id.
69 */
70
71
72
73#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
74 * care
75 * about
76 * next */
77#define TS_SCHED_END(t) TTIMESTAMP(101, t)
78#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
79#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
80
81#define TS_CXS_START(t) TTIMESTAMP(104, t)
82#define TS_CXS_END(t) TTIMESTAMP(105, t)
83
84#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
85#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
86
87#define TS_TICK_START(t) TTIMESTAMP(110, t)
88#define TS_TICK_END(t) TTIMESTAMP(111, t)
89
90
91#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
92#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
93
94#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
95#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
96
97#define TS_ENTER_NP_START TIMESTAMP(140)
98#define TS_ENTER_NP_END TIMESTAMP(141)
99
100#define TS_EXIT_NP_START TIMESTAMP(150)
101#define TS_EXIT_NP_END TIMESTAMP(151)
102
103#define TS_LOCK_START TIMESTAMP(170)
104#define TS_LOCK_SUSPEND TIMESTAMP(171)
105#define TS_LOCK_RESUME TIMESTAMP(172)
106#define TS_LOCK_END TIMESTAMP(173)
107
108#define TS_UNLOCK_START TIMESTAMP(180)
109#define TS_UNLOCK_END TIMESTAMP(181)
110
111#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
112#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
113
114#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
115
116#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
new file mode 100644
index 00000000000..b717b1d5539
--- /dev/null
+++ b/include/litmus/trace_irq.h
@@ -0,0 +1,23 @@
1#ifndef _LITMUS_TRACE_IRQ_H_
2#define _LITMUS_TRACE_IRQ_H_
3
4#ifdef CONFIG_SCHED_OVERHEAD_TRACE
5
6#include <linux/percpu.h>
7
8extern DEFINE_PER_CPU(atomic_t, irq_fired_count);
9
10static inline void ft_irq_fired(void)
11{
12 /* Only called with preemptions disabled. */
13 atomic_inc(&__get_cpu_var(irq_fired_count));
14}
15
16
17#else
18
19#define ft_irq_fired() /* nothing to do */
20
21#endif
22
23#endif
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
new file mode 100644
index 00000000000..94264c27d9a
--- /dev/null
+++ b/include/litmus/unistd_32.h
@@ -0,0 +1,21 @@
1/*
2 * included from arch/x86/include/asm/unistd_32.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9#define __NR_get_rt_task_param __LSC(1)
10#define __NR_complete_job __LSC(2)
11#define __NR_od_open __LSC(3)
12#define __NR_od_close __LSC(4)
13#define __NR_litmus_lock __LSC(5)
14#define __NR_litmus_unlock __LSC(6)
15#define __NR_query_job_no __LSC(7)
16#define __NR_wait_for_job_release __LSC(8)
17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11)
20
21#define NR_litmus_syscalls 12
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
new file mode 100644
index 00000000000..d5ced0d2642
--- /dev/null
+++ b/include/litmus/unistd_64.h
@@ -0,0 +1,33 @@
1/*
2 * included from arch/x86/include/asm/unistd_64.h
3 *
4 * LITMUS^RT syscalls with "relative" numbers
5 */
6#define __LSC(x) (__NR_LITMUS + x)
7
8#define __NR_set_rt_task_param __LSC(0)
9__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
10#define __NR_get_rt_task_param __LSC(1)
11__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
12#define __NR_complete_job __LSC(2)
13__SYSCALL(__NR_complete_job, sys_complete_job)
14#define __NR_od_open __LSC(3)
15__SYSCALL(__NR_od_open, sys_od_open)
16#define __NR_od_close __LSC(4)
17__SYSCALL(__NR_od_close, sys_od_close)
18#define __NR_litmus_lock __LSC(5)
19__SYSCALL(__NR_litmus_lock, sys_litmus_lock)
20#define __NR_litmus_unlock __LSC(6)
21__SYSCALL(__NR_litmus_unlock, sys_litmus_unlock)
22#define __NR_query_job_no __LSC(7)
23__SYSCALL(__NR_query_job_no, sys_query_job_no)
24#define __NR_wait_for_job_release __LSC(8)
25__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
26#define __NR_wait_for_ts_release __LSC(9)
27__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
28#define __NR_release_ts __LSC(10)
29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call)
32
33#define NR_litmus_syscalls 12
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 00000000000..ce1347c355f
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
new file mode 100644
index 00000000000..0fffcee02be
--- /dev/null
+++ b/include/trace/events/litmus.h
@@ -0,0 +1,231 @@
1/*
2 * LITMUS^RT kernel style scheduling tracepoints
3 */
4#undef TRACE_SYSTEM
5#define TRACE_SYSTEM litmus
6
7#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
8#define _SCHED_TASK_TRACEPOINT_H
9
10#include <linux/tracepoint.h>
11
12#include <litmus/litmus.h>
13#include <litmus/rt_param.h>
14
15/*
16 * Tracing task admission
17 */
18TRACE_EVENT(litmus_task_param,
19
20 TP_PROTO(struct task_struct *t),
21
22 TP_ARGS(t),
23
24 TP_STRUCT__entry(
25 __field( pid_t, pid )
26 __field( unsigned int, job )
27 __field( lt_t, wcet )
28 __field( lt_t, period )
29 __field( lt_t, phase )
30 __field( int, partition )
31 ),
32
33 TP_fast_assign(
34 __entry->pid = t ? t->pid : 0;
35 __entry->job = t ? t->rt_param.job_params.job_no : 0;
36 __entry->wcet = get_exec_cost(t);
37 __entry->period = get_rt_period(t);
38 __entry->phase = get_rt_phase(t);
39 __entry->partition = get_partition(t);
40 ),
41
42 TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
43 __entry->pid, __entry->period,
44 __entry->pid, __entry->wcet)
45);
46
47/*
48 * Tracing jobs release
49 */
50TRACE_EVENT(litmus_task_release,
51
52 TP_PROTO(struct task_struct *t),
53
54 TP_ARGS(t),
55
56 TP_STRUCT__entry(
57 __field( pid_t, pid )
58 __field( unsigned int, job )
59 __field( lt_t, release )
60 __field( lt_t, deadline )
61 ),
62
63 TP_fast_assign(
64 __entry->pid = t ? t->pid : 0;
65 __entry->job = t ? t->rt_param.job_params.job_no : 0;
66 __entry->release = get_release(t);
67 __entry->deadline = get_deadline(t);
68 ),
69
70 TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
71 __entry->pid, __entry->job, __entry->release,
72 __entry->pid, __entry->job, __entry->deadline)
73);
74
75/*
76 * Tracepoint for switching to new task
77 */
78TRACE_EVENT(litmus_switch_to,
79
80 TP_PROTO(struct task_struct *t),
81
82 TP_ARGS(t),
83
84 TP_STRUCT__entry(
85 __field( pid_t, pid )
86 __field( unsigned int, job )
87 __field( lt_t, when )
88 __field( lt_t, exec_time )
89 ),
90
91 TP_fast_assign(
92 __entry->pid = is_realtime(t) ? t->pid : 0;
93 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
94 __entry->when = litmus_clock();
95 __entry->exec_time = get_exec_time(t);
96 ),
97
98 TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
99 __entry->pid, __entry->job,
100 __entry->when, __entry->exec_time)
101);
102
103/*
104 * Tracepoint for switching away previous task
105 */
106TRACE_EVENT(litmus_switch_away,
107
108 TP_PROTO(struct task_struct *t),
109
110 TP_ARGS(t),
111
112 TP_STRUCT__entry(
113 __field( pid_t, pid )
114 __field( unsigned int, job )
115 __field( lt_t, when )
116 __field( lt_t, exec_time )
117 ),
118
119 TP_fast_assign(
120 __entry->pid = is_realtime(t) ? t->pid : 0;
121 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
122 __entry->when = litmus_clock();
123 __entry->exec_time = get_exec_time(t);
124 ),
125
126 TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
127 __entry->pid, __entry->job,
128 __entry->when, __entry->exec_time)
129);
130
131/*
132 * Tracing jobs completion
133 */
134TRACE_EVENT(litmus_task_completion,
135
136 TP_PROTO(struct task_struct *t, unsigned long forced),
137
138 TP_ARGS(t, forced),
139
140 TP_STRUCT__entry(
141 __field( pid_t, pid )
142 __field( unsigned int, job )
143 __field( lt_t, when )
144 __field( unsigned long, forced )
145 ),
146
147 TP_fast_assign(
148 __entry->pid = t ? t->pid : 0;
149 __entry->job = t ? t->rt_param.job_params.job_no : 0;
150 __entry->when = litmus_clock();
151 __entry->forced = forced;
152 ),
153
154 TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
155 __entry->pid, __entry->job,
156 __entry->when, __entry->forced)
157);
158
159/*
160 * Trace blocking tasks.
161 */
162TRACE_EVENT(litmus_task_block,
163
164 TP_PROTO(struct task_struct *t),
165
166 TP_ARGS(t),
167
168 TP_STRUCT__entry(
169 __field( pid_t, pid )
170 __field( lt_t, when )
171 ),
172
173 TP_fast_assign(
174 __entry->pid = t ? t->pid : 0;
175 __entry->when = litmus_clock();
176 ),
177
178 TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when)
179);
180
181/*
182 * Tracing jobs resume
183 */
184TRACE_EVENT(litmus_task_resume,
185
186 TP_PROTO(struct task_struct *t),
187
188 TP_ARGS(t),
189
190 TP_STRUCT__entry(
191 __field( pid_t, pid )
192 __field( unsigned int, job )
193 __field( lt_t, when )
194 ),
195
196 TP_fast_assign(
197 __entry->pid = t ? t->pid : 0;
198 __entry->job = t ? t->rt_param.job_params.job_no : 0;
199 __entry->when = litmus_clock();
200 ),
201
202 TP_printk("resume(job(%u, %u)): %Lu\n",
203 __entry->pid, __entry->job, __entry->when)
204);
205
206/*
207 * Trace synchronous release
208 */
209TRACE_EVENT(litmus_sys_release,
210
211 TP_PROTO(lt_t *start),
212
213 TP_ARGS(start),
214
215 TP_STRUCT__entry(
216 __field( lt_t, rel )
217 __field( lt_t, when )
218 ),
219
220 TP_fast_assign(
221 __entry->rel = *start;
222 __entry->when = litmus_clock();
223 ),
224
225 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
226);
227
228#endif /* _SCHED_TASK_TRACEPOINT_H */
229
230/* Must stay outside the protection */
231#include <trace/define_trace.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 303bed2966b..94464e0546a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -57,6 +57,8 @@
57#include <asm/pgtable.h> 57#include <asm/pgtable.h>
58#include <asm/mmu_context.h> 58#include <asm/mmu_context.h>
59 59
60extern void exit_od_table(struct task_struct *t);
61
60static void exit_mm(struct task_struct * tsk); 62static void exit_mm(struct task_struct * tsk);
61 63
62static void __unhash_process(struct task_struct *p, bool group_dead) 64static void __unhash_process(struct task_struct *p, bool group_dead)
@@ -980,6 +982,8 @@ NORET_TYPE void do_exit(long code)
980 if (unlikely(tsk->audit_context)) 982 if (unlikely(tsk->audit_context))
981 audit_free(tsk); 983 audit_free(tsk);
982 984
985 exit_od_table(tsk);
986
983 tsk->exit_code = code; 987 tsk->exit_code = code;
984 taskstats_exit(tsk, group_dead); 988 taskstats_exit(tsk, group_dead);
985 989
diff --git a/kernel/fork.c b/kernel/fork.c
index 992f5d10a05..a39e5c25d45 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -79,6 +79,9 @@
79 79
80#include <trace/events/sched.h> 80#include <trace/events/sched.h>
81 81
82#include <litmus/litmus.h>
83#include <litmus/sched_plugin.h>
84
82/* 85/*
83 * Protected counters by write_lock_irq(&tasklist_lock) 86 * Protected counters by write_lock_irq(&tasklist_lock)
84 */ 87 */
@@ -208,6 +211,7 @@ void __put_task_struct(struct task_struct *tsk)
208 WARN_ON(atomic_read(&tsk->usage)); 211 WARN_ON(atomic_read(&tsk->usage));
209 WARN_ON(tsk == current); 212 WARN_ON(tsk == current);
210 213
214 exit_litmus(tsk);
211 exit_creds(tsk); 215 exit_creds(tsk);
212 delayacct_tsk_free(tsk); 216 delayacct_tsk_free(tsk);
213 put_signal_struct(tsk->signal); 217 put_signal_struct(tsk->signal);
@@ -293,6 +297,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
293 297
294 tsk->stack = ti; 298 tsk->stack = ti;
295 299
300 /* Don't let the new task be a real-time task. */
301 litmus_fork(tsk);
302
296 err = prop_local_init_single(&tsk->dirties); 303 err = prop_local_init_single(&tsk->dirties);
297 if (err) 304 if (err)
298 goto out; 305 goto out;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 957869fd596..2f3c57417c5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -46,6 +46,8 @@
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/timer.h> 47#include <linux/timer.h>
48 48
49#include <litmus/litmus.h>
50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50 52
51#include <trace/events/timer.h> 53#include <trace/events/timer.h>
@@ -1038,6 +1040,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1038} 1040}
1039EXPORT_SYMBOL_GPL(hrtimer_start); 1041EXPORT_SYMBOL_GPL(hrtimer_start);
1040 1042
1043#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
1044
1045/**
1046 * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
1047 */
1048void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
1049{
1050 memset(info, 0, sizeof(struct hrtimer_start_on_info));
1051 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
1052}
1053
1054/**
1055 * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
1056 */
1057void hrtimer_pull(void)
1058{
1059 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
1060 struct hrtimer_start_on_info *info;
1061 struct list_head *pos, *safe, list;
1062
1063 raw_spin_lock(&base->lock);
1064 list_replace_init(&base->to_pull, &list);
1065 raw_spin_unlock(&base->lock);
1066
1067 list_for_each_safe(pos, safe, &list) {
1068 info = list_entry(pos, struct hrtimer_start_on_info, list);
1069 TRACE("pulled timer 0x%x\n", info->timer);
1070 list_del(pos);
1071 hrtimer_start(info->timer, info->time, info->mode);
1072 }
1073}
1074
1075/**
1076 * hrtimer_start_on - trigger timer arming on remote cpu
1077 * @cpu: remote cpu
1078 * @info: save timer information for enqueuing on remote cpu
1079 * @timer: timer to be pulled
1080 * @time: expire time
1081 * @mode: timer mode
1082 */
1083int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1084 struct hrtimer *timer, ktime_t time,
1085 const enum hrtimer_mode mode)
1086{
1087 unsigned long flags;
1088 struct hrtimer_cpu_base* base;
1089 int in_use = 0, was_empty;
1090
1091 /* serialize access to info through the timer base */
1092 lock_hrtimer_base(timer, &flags);
1093
1094 in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
1095 if (!in_use) {
1096 INIT_LIST_HEAD(&info->list);
1097 info->timer = timer;
1098 info->time = time;
1099 info->mode = mode;
1100 /* mark as in use */
1101 atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
1102 }
1103
1104 unlock_hrtimer_base(timer, &flags);
1105
1106 if (!in_use) {
1107 /* initiate pull */
1108 preempt_disable();
1109 if (cpu == smp_processor_id()) {
1110 /* start timer locally; we may get called
1111 * with rq->lock held, do not wake up anything
1112 */
1113 TRACE("hrtimer_start_on: starting on local CPU\n");
1114 __hrtimer_start_range_ns(info->timer, info->time,
1115 0, info->mode, 0);
1116 } else {
1117 TRACE("hrtimer_start_on: pulling to remote CPU\n");
1118 base = &per_cpu(hrtimer_bases, cpu);
1119 raw_spin_lock_irqsave(&base->lock, flags);
1120 was_empty = list_empty(&base->to_pull);
1121 list_add(&info->list, &base->to_pull);
1122 raw_spin_unlock_irqrestore(&base->lock, flags);
1123 if (was_empty)
1124 /* only send IPI if other no else
1125 * has done so already
1126 */
1127 smp_send_pull_timers(cpu);
1128 }
1129 preempt_enable();
1130 }
1131 return in_use;
1132}
1133
1134#endif
1041 1135
1042/** 1136/**
1043 * hrtimer_try_to_cancel - try to deactivate a timer 1137 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -1648,6 +1742,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1648 } 1742 }
1649 1743
1650 hrtimer_init_hres(cpu_base); 1744 hrtimer_init_hres(cpu_base);
1745 INIT_LIST_HEAD(&cpu_base->to_pull);
1651} 1746}
1652 1747
1653#ifdef CONFIG_HOTPLUG_CPU 1748#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/printk.c b/kernel/printk.c
index 24146142bc0..ec7b0bad58a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -74,6 +74,13 @@ int console_printk[4] = {
74}; 74};
75 75
76/* 76/*
77 * divert printk() messages when there is a LITMUS^RT debug listener
78 */
79#include <litmus/litmus.h>
80int trace_override = 0;
81int trace_recurse = 0;
82
83/*
77 * Low level drivers may need that to know if they can schedule in 84 * Low level drivers may need that to know if they can schedule in
78 * their unblank() callback or not. So let's export it. 85 * their unblank() callback or not. So let's export it.
79 */ 86 */
@@ -924,6 +931,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
924 /* Emit the output into the temporary buffer */ 931 /* Emit the output into the temporary buffer */
925 printed_len += vscnprintf(printk_buf + printed_len, 932 printed_len += vscnprintf(printk_buf + printed_len,
926 sizeof(printk_buf) - printed_len, fmt, args); 933 sizeof(printk_buf) - printed_len, fmt, args);
934 /* if LITMUS^RT tracer is active divert printk() msgs */
935 if (trace_override && !trace_recurse)
936 TRACE("%s", printk_buf);
927 937
928#ifdef CONFIG_DEBUG_LL 938#ifdef CONFIG_DEBUG_LL
929 printascii(printk_buf); 939 printascii(printk_buf);
@@ -1004,7 +1014,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
1004 * Try to acquire and then immediately release the 1014 * Try to acquire and then immediately release the
1005 * console semaphore. The release will do all the 1015 * console semaphore. The release will do all the
1006 * actual magic (print out buffers, wake up klogd, 1016 * actual magic (print out buffers, wake up klogd,
1007 * etc). 1017 * etc).
1008 * 1018 *
1009 * The console_trylock_for_printk() function 1019 * The console_trylock_for_printk() function
1010 * will release 'logbuf_lock' regardless of whether it 1020 * will release 'logbuf_lock' regardless of whether it
@@ -1276,7 +1286,7 @@ int printk_needs_cpu(int cpu)
1276 1286
1277void wake_up_klogd(void) 1287void wake_up_klogd(void)
1278{ 1288{
1279 if (waitqueue_active(&log_wait)) 1289 if (!trace_override && waitqueue_active(&log_wait))
1280 this_cpu_write(printk_pending, 1); 1290 this_cpu_write(printk_pending, 1);
1281} 1291}
1282 1292
diff --git a/kernel/sched.c b/kernel/sched.c
index 71c64ba10af..6d400c11b6f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -84,6 +84,11 @@
84#define CREATE_TRACE_POINTS 84#define CREATE_TRACE_POINTS
85#include <trace/events/sched.h> 85#include <trace/events/sched.h>
86 86
87#include <litmus/sched_trace.h>
88#include <litmus/trace.h>
89
90static void litmus_tick(struct rq*, struct task_struct*);
91
87/* 92/*
88 * Convert user-nice values [ -20 ... 0 ... 19 ] 93 * Convert user-nice values [ -20 ... 0 ... 19 ]
89 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 94 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -411,6 +416,12 @@ struct rt_rq {
411#endif 416#endif
412}; 417};
413 418
419/* Litmus related fields in a runqueue */
420struct litmus_rq {
421 unsigned long nr_running;
422 struct task_struct *prev;
423};
424
414#ifdef CONFIG_SMP 425#ifdef CONFIG_SMP
415 426
416/* 427/*
@@ -476,6 +487,7 @@ struct rq {
476 487
477 struct cfs_rq cfs; 488 struct cfs_rq cfs;
478 struct rt_rq rt; 489 struct rt_rq rt;
490 struct litmus_rq litmus;
479 491
480#ifdef CONFIG_FAIR_GROUP_SCHED 492#ifdef CONFIG_FAIR_GROUP_SCHED
481 /* list of leaf cfs_rq on this cpu: */ 493 /* list of leaf cfs_rq on this cpu: */
@@ -1050,6 +1062,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1050 raw_spin_lock(&rq->lock); 1062 raw_spin_lock(&rq->lock);
1051 update_rq_clock(rq); 1063 update_rq_clock(rq);
1052 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1064 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1065 litmus_tick(rq, rq->curr);
1053 raw_spin_unlock(&rq->lock); 1066 raw_spin_unlock(&rq->lock);
1054 1067
1055 return HRTIMER_NORESTART; 1068 return HRTIMER_NORESTART;
@@ -1793,7 +1806,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1793 1806
1794static const struct sched_class rt_sched_class; 1807static const struct sched_class rt_sched_class;
1795 1808
1796#define sched_class_highest (&stop_sched_class) 1809#define sched_class_highest (&litmus_sched_class)
1797#define for_each_class(class) \ 1810#define for_each_class(class) \
1798 for (class = sched_class_highest; class; class = class->next) 1811 for (class = sched_class_highest; class; class = class->next)
1799 1812
@@ -2051,6 +2064,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
2051#include "sched_rt.c" 2064#include "sched_rt.c"
2052#include "sched_autogroup.c" 2065#include "sched_autogroup.c"
2053#include "sched_stoptask.c" 2066#include "sched_stoptask.c"
2067#include "../litmus/sched_litmus.c"
2054#ifdef CONFIG_SCHED_DEBUG 2068#ifdef CONFIG_SCHED_DEBUG
2055# include "sched_debug.c" 2069# include "sched_debug.c"
2056#endif 2070#endif
@@ -2173,6 +2187,10 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2173 * A queue event has occurred, and we're going to schedule. In 2187 * A queue event has occurred, and we're going to schedule. In
2174 * this case, we can save a useless back to back clock update. 2188 * this case, we can save a useless back to back clock update.
2175 */ 2189 */
2190 /* LITMUS^RT:
2191 * The "disable-clock-update" approach was buggy in Linux 2.6.36.
2192 * The issue has been solved in 2.6.37.
2193 */
2176 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) 2194 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2177 rq->skip_clock_update = 1; 2195 rq->skip_clock_update = 1;
2178} 2196}
@@ -2663,7 +2681,12 @@ static void ttwu_queue(struct task_struct *p, int cpu)
2663 struct rq *rq = cpu_rq(cpu); 2681 struct rq *rq = cpu_rq(cpu);
2664 2682
2665#if defined(CONFIG_SMP) 2683#if defined(CONFIG_SMP)
2666 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { 2684 /*
2685 * LITMUS^RT: whether to send an IPI to the remote CPU
2686 * is plugin specific.
2687 */
2688 if (!is_realtime(p) &&
2689 sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2667 sched_clock_cpu(cpu); /* sync clocks x-cpu */ 2690 sched_clock_cpu(cpu); /* sync clocks x-cpu */
2668 ttwu_queue_remote(p, cpu); 2691 ttwu_queue_remote(p, cpu);
2669 return; 2692 return;
@@ -2696,6 +2719,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2696 unsigned long flags; 2719 unsigned long flags;
2697 int cpu, success = 0; 2720 int cpu, success = 0;
2698 2721
2722 if (is_realtime(p))
2723 TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
2724
2699 smp_wmb(); 2725 smp_wmb();
2700 raw_spin_lock_irqsave(&p->pi_lock, flags); 2726 raw_spin_lock_irqsave(&p->pi_lock, flags);
2701 if (!(p->state & state)) 2727 if (!(p->state & state))
@@ -2732,6 +2758,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2732 */ 2758 */
2733 smp_rmb(); 2759 smp_rmb();
2734 2760
2761 /* LITMUS^RT: once the task can be safely referenced by this
2762 * CPU, don't mess up with Linux load balancing stuff.
2763 */
2764 if (is_realtime(p))
2765 goto litmus_out_activate;
2766
2735 p->sched_contributes_to_load = !!task_contributes_to_load(p); 2767 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2736 p->state = TASK_WAKING; 2768 p->state = TASK_WAKING;
2737 2769
@@ -2743,12 +2775,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2743 wake_flags |= WF_MIGRATED; 2775 wake_flags |= WF_MIGRATED;
2744 set_task_cpu(p, cpu); 2776 set_task_cpu(p, cpu);
2745 } 2777 }
2778
2779litmus_out_activate:
2746#endif /* CONFIG_SMP */ 2780#endif /* CONFIG_SMP */
2747 2781
2748 ttwu_queue(p, cpu); 2782 ttwu_queue(p, cpu);
2749stat: 2783stat:
2750 ttwu_stat(p, cpu, wake_flags); 2784 ttwu_stat(p, cpu, wake_flags);
2751out: 2785out:
2786 if (is_realtime(p))
2787 TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2752 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2788 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2753 2789
2754 return success; 2790 return success;
@@ -2859,7 +2895,8 @@ void sched_fork(struct task_struct *p)
2859 * Revert to default priority/policy on fork if requested. 2895 * Revert to default priority/policy on fork if requested.
2860 */ 2896 */
2861 if (unlikely(p->sched_reset_on_fork)) { 2897 if (unlikely(p->sched_reset_on_fork)) {
2862 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { 2898 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR ||
2899 p->policy == SCHED_LITMUS) {
2863 p->policy = SCHED_NORMAL; 2900 p->policy = SCHED_NORMAL;
2864 p->normal_prio = p->static_prio; 2901 p->normal_prio = p->static_prio;
2865 } 2902 }
@@ -3088,6 +3125,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
3088 */ 3125 */
3089 prev_state = prev->state; 3126 prev_state = prev->state;
3090 finish_arch_switch(prev); 3127 finish_arch_switch(prev);
3128 litmus->finish_switch(prev);
3129 prev->rt_param.stack_in_use = NO_CPU;
3091#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 3130#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3092 local_irq_disable(); 3131 local_irq_disable();
3093#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 3132#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3117,6 +3156,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
3117{ 3156{
3118 if (prev->sched_class->pre_schedule) 3157 if (prev->sched_class->pre_schedule)
3119 prev->sched_class->pre_schedule(rq, prev); 3158 prev->sched_class->pre_schedule(rq, prev);
3159
3160 /* LITMUS^RT not very clean hack: we need to save the prev task
3161 * as our scheduling decision rely on it (as we drop the rq lock
3162 * something in prev can change...); there is no way to escape
3163 * this ack apart from modifying pick_nex_task(rq, _prev_) or
3164 * falling back on the previous solution of decoupling
3165 * scheduling decisions
3166 */
3167 rq->litmus.prev = prev;
3120} 3168}
3121 3169
3122/* rq->lock is NOT held, but preemption is disabled */ 3170/* rq->lock is NOT held, but preemption is disabled */
@@ -3153,16 +3201,26 @@ static inline void post_schedule(struct rq *rq)
3153asmlinkage void schedule_tail(struct task_struct *prev) 3201asmlinkage void schedule_tail(struct task_struct *prev)
3154 __releases(rq->lock) 3202 __releases(rq->lock)
3155{ 3203{
3156 struct rq *rq = this_rq(); 3204 struct rq *rq;
3157 3205
3206 preempt_disable();
3207
3208 rq = this_rq();
3158 finish_task_switch(rq, prev); 3209 finish_task_switch(rq, prev);
3159 3210
3211 sched_trace_task_switch_to(current);
3212
3160 /* 3213 /*
3161 * FIXME: do we need to worry about rq being invalidated by the 3214 * FIXME: do we need to worry about rq being invalidated by the
3162 * task_switch? 3215 * task_switch?
3163 */ 3216 */
3164 post_schedule(rq); 3217 post_schedule(rq);
3165 3218
3219 if (sched_state_validate_switch())
3220 litmus_reschedule_local();
3221
3222 preempt_enable();
3223
3166#ifdef __ARCH_WANT_UNLOCKED_CTXSW 3224#ifdef __ARCH_WANT_UNLOCKED_CTXSW
3167 /* In this case, finish_task_switch does not reenable preemption */ 3225 /* In this case, finish_task_switch does not reenable preemption */
3168 preempt_enable(); 3226 preempt_enable();
@@ -4107,18 +4165,26 @@ void scheduler_tick(void)
4107 4165
4108 sched_clock_tick(); 4166 sched_clock_tick();
4109 4167
4168 TS_TICK_START(current);
4169
4110 raw_spin_lock(&rq->lock); 4170 raw_spin_lock(&rq->lock);
4111 update_rq_clock(rq); 4171 update_rq_clock(rq);
4112 update_cpu_load_active(rq); 4172 update_cpu_load_active(rq);
4113 curr->sched_class->task_tick(rq, curr, 0); 4173 curr->sched_class->task_tick(rq, curr, 0);
4174
4175 /* litmus_tick may force current to resched */
4176 litmus_tick(rq, curr);
4177
4114 raw_spin_unlock(&rq->lock); 4178 raw_spin_unlock(&rq->lock);
4115 4179
4116 perf_event_task_tick(); 4180 perf_event_task_tick();
4117 4181
4118#ifdef CONFIG_SMP 4182#ifdef CONFIG_SMP
4119 rq->idle_at_tick = idle_cpu(cpu); 4183 rq->idle_at_tick = idle_cpu(cpu);
4120 trigger_load_balance(rq, cpu); 4184 if (!is_realtime(current))
4185 trigger_load_balance(rq, cpu);
4121#endif 4186#endif
4187 TS_TICK_END(current);
4122} 4188}
4123 4189
4124notrace unsigned long get_parent_ip(unsigned long addr) 4190notrace unsigned long get_parent_ip(unsigned long addr)
@@ -4238,12 +4304,20 @@ pick_next_task(struct rq *rq)
4238 /* 4304 /*
4239 * Optimization: we know that if all tasks are in 4305 * Optimization: we know that if all tasks are in
4240 * the fair class we can call that function directly: 4306 * the fair class we can call that function directly:
4241 */ 4307
4242 if (likely(rq->nr_running == rq->cfs.nr_running)) { 4308 * NOT IN LITMUS^RT!
4309
4310 * This breaks many assumptions in the plugins.
4311 * Do not uncomment without thinking long and hard
4312 * about how this affects global plugins such as GSN-EDF.
4313
4314 if (rq->nr_running == rq->cfs.nr_running) {
4315 TRACE("taking shortcut in pick_next_task()\n");
4243 p = fair_sched_class.pick_next_task(rq); 4316 p = fair_sched_class.pick_next_task(rq);
4244 if (likely(p)) 4317 if (likely(p))
4245 return p; 4318 return p;
4246 } 4319 }
4320 */
4247 4321
4248 for_each_class(class) { 4322 for_each_class(class) {
4249 p = class->pick_next_task(rq); 4323 p = class->pick_next_task(rq);
@@ -4266,11 +4340,19 @@ static void __sched __schedule(void)
4266 4340
4267need_resched: 4341need_resched:
4268 preempt_disable(); 4342 preempt_disable();
4343 sched_state_entered_schedule();
4269 cpu = smp_processor_id(); 4344 cpu = smp_processor_id();
4270 rq = cpu_rq(cpu); 4345 rq = cpu_rq(cpu);
4271 rcu_note_context_switch(cpu); 4346 rcu_note_context_switch(cpu);
4272 prev = rq->curr; 4347 prev = rq->curr;
4273 4348
4349 /* LITMUS^RT: quickly re-evaluate the scheduling decision
4350 * if the previous one is no longer valid after CTX.
4351 */
4352litmus_need_resched_nonpreemptible:
4353 TS_SCHED_START;
4354 sched_trace_task_switch_away(prev);
4355
4274 schedule_debug(prev); 4356 schedule_debug(prev);
4275 4357
4276 if (sched_feat(HRTICK)) 4358 if (sched_feat(HRTICK))
@@ -4320,7 +4402,10 @@ need_resched:
4320#endif 4402#endif
4321 ++*switch_count; 4403 ++*switch_count;
4322 4404
4405 TS_SCHED_END(next);
4406 TS_CXS_START(next);
4323 context_switch(rq, prev, next); /* unlocks the rq */ 4407 context_switch(rq, prev, next); /* unlocks the rq */
4408 TS_CXS_END(current);
4324 /* 4409 /*
4325 * The context switch have flipped the stack from under us 4410 * The context switch have flipped the stack from under us
4326 * and restored the local variables which were saved when 4411 * and restored the local variables which were saved when
@@ -4329,14 +4414,23 @@ need_resched:
4329 */ 4414 */
4330 cpu = smp_processor_id(); 4415 cpu = smp_processor_id();
4331 rq = cpu_rq(cpu); 4416 rq = cpu_rq(cpu);
4332 } else 4417 } else {
4418 TS_SCHED_END(prev);
4333 raw_spin_unlock_irq(&rq->lock); 4419 raw_spin_unlock_irq(&rq->lock);
4420 }
4421
4422 sched_trace_task_switch_to(current);
4334 4423
4335 post_schedule(rq); 4424 post_schedule(rq);
4336 4425
4426 if (sched_state_validate_switch())
4427 goto litmus_need_resched_nonpreemptible;
4428
4337 preempt_enable_no_resched(); 4429 preempt_enable_no_resched();
4338 if (need_resched()) 4430 if (need_resched())
4339 goto need_resched; 4431 goto need_resched;
4432
4433 srp_ceiling_block();
4340} 4434}
4341 4435
4342static inline void sched_submit_work(struct task_struct *tsk) 4436static inline void sched_submit_work(struct task_struct *tsk)
@@ -4626,6 +4720,17 @@ void complete_all(struct completion *x)
4626} 4720}
4627EXPORT_SYMBOL(complete_all); 4721EXPORT_SYMBOL(complete_all);
4628 4722
4723void complete_n(struct completion *x, int n)
4724{
4725 unsigned long flags;
4726
4727 spin_lock_irqsave(&x->wait.lock, flags);
4728 x->done += n;
4729 __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
4730 spin_unlock_irqrestore(&x->wait.lock, flags);
4731}
4732EXPORT_SYMBOL(complete_n);
4733
4629static inline long __sched 4734static inline long __sched
4630do_wait_for_common(struct completion *x, long timeout, int state) 4735do_wait_for_common(struct completion *x, long timeout, int state)
4631{ 4736{
@@ -5065,7 +5170,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
5065 p->normal_prio = normal_prio(p); 5170 p->normal_prio = normal_prio(p);
5066 /* we are holding p->pi_lock already */ 5171 /* we are holding p->pi_lock already */
5067 p->prio = rt_mutex_getprio(p); 5172 p->prio = rt_mutex_getprio(p);
5068 if (rt_prio(p->prio)) 5173 if (p->policy == SCHED_LITMUS)
5174 p->sched_class = &litmus_sched_class;
5175 else if (rt_prio(p->prio))
5069 p->sched_class = &rt_sched_class; 5176 p->sched_class = &rt_sched_class;
5070 else 5177 else
5071 p->sched_class = &fair_sched_class; 5178 p->sched_class = &fair_sched_class;
@@ -5113,7 +5220,7 @@ recheck:
5113 5220
5114 if (policy != SCHED_FIFO && policy != SCHED_RR && 5221 if (policy != SCHED_FIFO && policy != SCHED_RR &&
5115 policy != SCHED_NORMAL && policy != SCHED_BATCH && 5222 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
5116 policy != SCHED_IDLE) 5223 policy != SCHED_IDLE && policy != SCHED_LITMUS)
5117 return -EINVAL; 5224 return -EINVAL;
5118 } 5225 }
5119 5226
@@ -5128,6 +5235,8 @@ recheck:
5128 return -EINVAL; 5235 return -EINVAL;
5129 if (rt_policy(policy) != (param->sched_priority != 0)) 5236 if (rt_policy(policy) != (param->sched_priority != 0))
5130 return -EINVAL; 5237 return -EINVAL;
5238 if (policy == SCHED_LITMUS && policy == p->policy)
5239 return -EINVAL;
5131 5240
5132 /* 5241 /*
5133 * Allow unprivileged RT tasks to decrease priority: 5242 * Allow unprivileged RT tasks to decrease priority:
@@ -5171,6 +5280,12 @@ recheck:
5171 return retval; 5280 return retval;
5172 } 5281 }
5173 5282
5283 if (policy == SCHED_LITMUS) {
5284 retval = litmus_admit_task(p);
5285 if (retval)
5286 return retval;
5287 }
5288
5174 /* 5289 /*
5175 * make sure no PI-waiters arrive (or leave) while we are 5290 * make sure no PI-waiters arrive (or leave) while we are
5176 * changing the priority of the task: 5291 * changing the priority of the task:
@@ -5229,10 +5344,19 @@ recheck:
5229 5344
5230 p->sched_reset_on_fork = reset_on_fork; 5345 p->sched_reset_on_fork = reset_on_fork;
5231 5346
5347 if (p->policy == SCHED_LITMUS)
5348 litmus_exit_task(p);
5349
5232 oldprio = p->prio; 5350 oldprio = p->prio;
5233 prev_class = p->sched_class; 5351 prev_class = p->sched_class;
5234 __setscheduler(rq, p, policy, param->sched_priority); 5352 __setscheduler(rq, p, policy, param->sched_priority);
5235 5353
5354 if (policy == SCHED_LITMUS) {
5355 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
5356 p->rt_param.present = running;
5357 litmus->task_new(p, on_rq, running);
5358 }
5359
5236 if (running) 5360 if (running)
5237 p->sched_class->set_curr_task(rq); 5361 p->sched_class->set_curr_task(rq);
5238 if (on_rq) 5362 if (on_rq)
@@ -5400,10 +5524,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5400 rcu_read_lock(); 5524 rcu_read_lock();
5401 5525
5402 p = find_process_by_pid(pid); 5526 p = find_process_by_pid(pid);
5403 if (!p) { 5527 /* Don't set affinity if task not found and for LITMUS tasks */
5528 if (!p || is_realtime(p)) {
5404 rcu_read_unlock(); 5529 rcu_read_unlock();
5405 put_online_cpus(); 5530 put_online_cpus();
5406 return -ESRCH; 5531 return p ? -EPERM : -ESRCH;
5407 } 5532 }
5408 5533
5409 /* Prevent p going away */ 5534 /* Prevent p going away */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index afa7412406b..48645150b6e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1892,6 +1892,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1892 int scale = cfs_rq->nr_running >= sched_nr_latency; 1892 int scale = cfs_rq->nr_running >= sched_nr_latency;
1893 int next_buddy_marked = 0; 1893 int next_buddy_marked = 0;
1894 1894
1895 if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS)
1896 goto preempt;
1897
1895 if (unlikely(se == pse)) 1898 if (unlikely(se == pse))
1896 return; 1899 return;
1897 1900
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ac79f9e34fd..fbd3be9c869 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1078,7 +1078,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1078 */ 1078 */
1079static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1079static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1080{ 1080{
1081 if (p->prio < rq->curr->prio) { 1081 if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
1082 resched_task(rq->curr); 1082 resched_task(rq->curr);
1083 return; 1083 return;
1084 } 1084 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c473ce246cb..61e31a35e98 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -766,12 +766,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
766} 766}
767 767
768/** 768/**
769 * tick_set_quanta_type - get the quanta type as a boot option
770 * Default is standard setup with ticks staggered over first
771 * half of tick period.
772 */
773int quanta_type = LINUX_DEFAULT_TICKS;
774static int __init tick_set_quanta_type(char *str)
775{
776 if (strcmp("aligned", str) == 0) {
777 quanta_type = LITMUS_ALIGNED_TICKS;
778 printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
779 }
780 else if (strcmp("staggered", str) == 0) {
781 quanta_type = LITMUS_STAGGERED_TICKS;
782 printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
783 }
784 return 1;
785}
786__setup("quanta=", tick_set_quanta_type);
787
788u64 cpu_stagger_offset(int cpu)
789{
790 u64 offset = 0;
791 switch (quanta_type) {
792 case LITMUS_ALIGNED_TICKS:
793 offset = 0;
794 break;
795 case LITMUS_STAGGERED_TICKS:
796 offset = ktime_to_ns(tick_period);
797 do_div(offset, num_possible_cpus());
798 offset *= cpu;
799 break;
800 default:
801 offset = ktime_to_ns(tick_period) >> 1;
802 do_div(offset, num_possible_cpus());
803 offset *= cpu;
804 }
805 return offset;
806}
807
808/**
769 * tick_setup_sched_timer - setup the tick emulation timer 809 * tick_setup_sched_timer - setup the tick emulation timer
770 */ 810 */
771void tick_setup_sched_timer(void) 811void tick_setup_sched_timer(void)
772{ 812{
773 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 813 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
774 ktime_t now = ktime_get(); 814 ktime_t now = ktime_get();
815 u64 offset;
775 816
776 /* 817 /*
777 * Emulate tick processing via per-CPU hrtimers: 818 * Emulate tick processing via per-CPU hrtimers:
@@ -782,6 +823,12 @@ void tick_setup_sched_timer(void)
782 /* Get the next period (per cpu) */ 823 /* Get the next period (per cpu) */
783 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 824 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
784 825
826 /* Offset must be set correctly to achieve desired quanta type. */
827 offset = cpu_stagger_offset(smp_processor_id());
828
829 /* Add the correct offset to expiration time */
830 hrtimer_add_expires_ns(&ts->sched_timer, offset);
831
785 for (;;) { 832 for (;;) {
786 hrtimer_forward(&ts->sched_timer, now, tick_period); 833 hrtimer_forward(&ts->sched_timer, now, tick_period);
787 hrtimer_start_expires(&ts->sched_timer, 834 hrtimer_start_expires(&ts->sched_timer,
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 00000000000..f2dbfb39688
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,282 @@
1menu "LITMUS^RT"
2
3menu "Scheduling"
4
5config PLUGIN_CEDF
6 bool "Clustered-EDF"
7 depends on X86 && SYSFS
8 default y
9 help
10 Include the Clustered EDF (C-EDF) plugin in the kernel.
11 This is appropriate for large platforms with shared caches.
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches.
14
15config PLUGIN_PFAIR
16 bool "PFAIR"
17 depends on HIGH_RES_TIMERS && !NO_HZ
18 default y
19 help
20 Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
21 The PFAIR plugin requires high resolution timers (for staggered quanta)
22 and does not support NO_HZ (quanta could be missed when the system is idle).
23
24 If unsure, say Yes.
25
26config RELEASE_MASTER
27 bool "Release-master Support"
28 depends on ARCH_HAS_SEND_PULL_TIMERS
29 default n
30 help
31 Allow one processor to act as a dedicated interrupt processor
32 that services all timer interrupts, but that does not schedule
33 real-time tasks. See RTSS'09 paper for details
34 (http://www.cs.unc.edu/~anderson/papers.html).
35 Currently only supported by GSN-EDF.
36
37endmenu
38
39menu "Real-Time Synchronization"
40
41config NP_SECTION
42 bool "Non-preemptive section support"
43 default n
44 help
45 Allow tasks to become non-preemptable.
46 Note that plugins still need to explicitly support non-preemptivity.
47 Currently, only GSN-EDF and PSN-EDF have such support.
48
49 This is required to support locking protocols such as the FMLP.
50 If disabled, all tasks will be considered preemptable at all times.
51
52config LITMUS_LOCKING
53 bool "Support for real-time locking protocols"
54 depends on NP_SECTION
55 default n
56 help
57 Enable LITMUS^RT's deterministic multiprocessor real-time
58 locking protocols.
59
60 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP.
62
63endmenu
64
65menu "Performance Enhancements"
66
67config SCHED_CPU_AFFINITY
68 bool "Local Migration Affinity"
69 depends on X86
70 default y
71 help
72 Rescheduled tasks prefer CPUs near to their previously used CPU. This
73 may improve performance through possible preservation of cache affinity.
74
75 Warning: May make bugs harder to find since tasks may migrate less often.
76
77 NOTES:
78 * Feature is not utilized by PFair/PD^2.
79
80 Say Yes if unsure.
81
82choice
83 prompt "EDF Tie-Break Behavior"
84 default EDF_TIE_BREAK_LATENESS_NORM
85 help
86 Allows the configuration of tie-breaking behavior when the deadlines
87 of two EDF-scheduled tasks are equal.
88
89 config EDF_TIE_BREAK_LATENESS
90 bool "Lateness-based Tie Break"
91 help
92 Break ties between two jobs, A and B, based upon the lateness of their
93 prior jobs. The job with the greatest lateness has priority. Note that
94 lateness has a negative value if the prior job finished before its
95 deadline.
96
97 config EDF_TIE_BREAK_LATENESS_NORM
98 bool "Normalized Lateness-based Tie Break"
99 help
100 Break ties between two jobs, A and B, based upon the lateness, normalized
101 by relative deadline, of their prior jobs. The job with the greatest
102 normalized lateness has priority. Note that lateness has a negative value
103 if the prior job finished before its deadline.
104
105 Normalized lateness tie-breaks are likely desireable over non-normalized
106 tie-breaks if the execution times and/or relative deadlines of tasks in a
107 task set vary greatly.
108
109 config EDF_TIE_BREAK_HASH
110 bool "Hash-based Tie Breaks"
111 help
112 Break ties between two jobs, A and B, with equal deadlines by using a
113 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
114 A has ~50% of winning a given tie-break.
115
116 config EDF_PID_TIE_BREAK
117 bool "PID-based Tie Breaks"
118 help
119 Break ties based upon OS-assigned thread IDs. Use this option if
120 required by algorithm's real-time analysis or per-task response-time
121 jitter must be minimized.
122
123 NOTES:
124 * This tie-breaking method was default in Litmus 2012.2 and before.
125
126endchoice
127
128endmenu
129
130menu "Tracing"
131
132config FEATHER_TRACE
133 bool "Feather-Trace Infrastructure"
134 default y
135 help
136 Feather-Trace basic tracing infrastructure. Includes device file
137 driver and instrumentation point support.
138
139 There are actually two implementations of Feather-Trace.
140 1) A slower, but portable, default implementation.
141 2) Architecture-specific implementations that rewrite kernel .text at runtime.
142
143 If enabled, Feather-Trace will be based on 2) if available (currently only for x86).
144 However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case
145 to avoid problems with write-protected .text pages.
146
147 Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n.
148
149 Note that this option only enables the basic Feather-Trace infrastructure;
150 you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
151 actually enable any events.
152
153config SCHED_TASK_TRACE
154 bool "Trace real-time tasks"
155 depends on FEATHER_TRACE
156 default y
157 help
158 Include support for the sched_trace_XXX() tracing functions. This
159 allows the collection of real-time task events such as job
160 completions, job releases, early completions, etc. This results in a
161 small overhead in the scheduling code. Disable if the overhead is not
162 acceptable (e.g., benchmarking).
163
164 Say Yes for debugging.
165 Say No for overhead tracing.
166
167config SCHED_TASK_TRACE_SHIFT
168 int "Buffer size for sched_trace_xxx() events"
169 depends on SCHED_TASK_TRACE
170 range 8 13
171 default 9
172 help
173
174 Select the buffer size of sched_trace_xxx() events as a power of two.
175 These buffers are statically allocated as per-CPU data. Each event
176 requires 24 bytes storage plus one additional flag byte. Too large
177 buffers can cause issues with the per-cpu allocator (and waste
178 memory). Too small buffers can cause scheduling events to be lost. The
179 "right" size is workload dependent and depends on the number of tasks,
180 each task's period, each task's number of suspensions, and how often
181 the buffer is flushed.
182
183 Examples: 12 => 4k events
184 10 => 1k events
185 8 => 512 events
186
187config SCHED_LITMUS_TRACEPOINT
188 bool "Enable Event/Tracepoint Tracing for real-time task tracing"
189 depends on TRACEPOINTS
190 default n
191 help
192 Enable kernel-style events (tracepoint) for Litmus. Litmus events
193 trace the same functions as the above sched_trace_XXX(), but can
194 be enabled independently.
195 Litmus tracepoints can be recorded and analyzed together (single
196 time reference) with all other kernel tracing events (e.g.,
197 sched:sched_switch, etc.).
198
199 This also enables a quick way to visualize schedule traces using
200 trace-cmd utility and kernelshark visualizer.
201
202 Say Yes for debugging and visualization purposes.
203 Say No for overhead tracing.
204
205config SCHED_OVERHEAD_TRACE
206 bool "Record timestamps for overhead measurements"
207 depends on FEATHER_TRACE
208 default n
209 help
210 Export event stream for overhead tracing.
211 Say Yes for overhead tracing.
212
213config SCHED_DEBUG_TRACE
214 bool "TRACE() debugging"
215 default y
216 help
217 Include support for sched_trace_log_messageg(), which is used to
218 implement TRACE(). If disabled, no TRACE() messages will be included
219 in the kernel, and no overheads due to debugging statements will be
220 incurred by the scheduler. Disable if the overhead is not acceptable
221 (e.g. benchmarking).
222
223 Say Yes for debugging.
224 Say No for overhead tracing.
225
226config SCHED_DEBUG_TRACE_SHIFT
227 int "Buffer size for TRACE() buffer"
228 depends on SCHED_DEBUG_TRACE
229 range 14 22
230 default 18
231 help
232
233 Select the amount of memory needed per for the TRACE() buffer, as a
234 power of two. The TRACE() buffer is global and statically allocated. If
235 the buffer is too small, there will be holes in the TRACE() log if the
236 buffer-flushing task is starved.
237
238 The default should be sufficient for most systems. Increase the buffer
239 size if the log contains holes. Reduce the buffer size when running on
240 a memory-constrained system.
241
242 Examples: 14 => 16KB
243 18 => 256KB
244 20 => 1MB
245
246 This buffer is exported to usespace using a misc device as
247 'litmus/log'. On a system with default udev rules, a corresponding
248 character device node should be created at /dev/litmus/log. The buffer
249 can be flushed using cat, e.g., 'cat /dev/litmus/log > my_log_file.txt'.
250
251config SCHED_DEBUG_TRACE_CALLER
252 bool "Include [function@file:line] tag in TRACE() log"
253 depends on SCHED_DEBUG_TRACE
254 default n
255 help
256 With this option enabled, TRACE() prepends
257
258 "[<function name>@<filename>:<line number>]"
259
260 to each message in the debug log. Enable this to aid in figuring out
261 what was called in which order. The downside is that it adds a lot of
262 clutter.
263
264 If unsure, say No.
265
266config PREEMPT_STATE_TRACE
267 bool "Trace preemption state machine transitions"
268 depends on SCHED_DEBUG_TRACE
269 default n
270 help
271 With this option enabled, each CPU will log when it transitions
272 states in the preemption state machine. This state machine is
273 used to determine how to react to IPIs (avoid races with in-flight IPIs).
274
275 Warning: this creates a lot of information in the debug trace. Only
276 recommended when you are debugging preemption-related races.
277
278 If unsure, say No.
279
280endmenu
281
282endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 00000000000..d26ca7076b6
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,32 @@
1#
2# Makefile for LITMUS^RT
3#
4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o \
21 sched_gsn_edf.o \
22 sched_psn_edf.o \
23 sched_pfp.o
24
25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
26obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
27obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
28
29obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 00000000000..3fa6dd78940
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,42 @@
1#include <linux/cpu.h>
2
3#include <litmus/affinity.h>
4
5struct neighborhood neigh_info[NR_CPUS];
6
7/* called by _init_litmus() */
8void init_topology(void) {
9 int cpu;
10 int i;
11 int chk;
12 int depth = num_cache_leaves;
13
14 if (depth > NUM_CACHE_LEVELS)
15 depth = NUM_CACHE_LEVELS;
16
17 for_each_online_cpu(cpu) {
18 for (i = 0; i < depth; ++i) {
19 chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
20 if (chk) {
21 /* failed */
22 neigh_info[cpu].size[i] = 0;
23 } else {
24 /* size = num bits in mask */
25 neigh_info[cpu].size[i] =
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 }
32
33 /* set data for non-existent levels */
34 for (; i < NUM_CACHE_LEVELS; ++i) {
35 neigh_info[cpu].size[i] = 0;
36
37 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
38 cpu, neigh_info[cpu].size[i], i, 0lu);
39 }
40 }
41}
42
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 00000000000..528af97f18a
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,314 @@
1#include "linux/kernel.h"
2#include "litmus/bheap.h"
3
4void bheap_init(struct bheap* heap)
5{
6 heap->head = NULL;
7 heap->min = NULL;
8}
9
10void bheap_node_init(struct bheap_node** _h, void* value)
11{
12 struct bheap_node* h = *_h;
13 h->parent = NULL;
14 h->next = NULL;
15 h->child = NULL;
16 h->degree = NOT_IN_HEAP;
17 h->value = value;
18 h->ref = _h;
19}
20
21
22/* make child a subtree of root */
23static void __bheap_link(struct bheap_node* root,
24 struct bheap_node* child)
25{
26 child->parent = root;
27 child->next = root->child;
28 root->child = child;
29 root->degree++;
30}
31
32/* merge root lists */
33static struct bheap_node* __bheap_merge(struct bheap_node* a,
34 struct bheap_node* b)
35{
36 struct bheap_node* head = NULL;
37 struct bheap_node** pos = &head;
38
39 while (a && b) {
40 if (a->degree < b->degree) {
41 *pos = a;
42 a = a->next;
43 } else {
44 *pos = b;
45 b = b->next;
46 }
47 pos = &(*pos)->next;
48 }
49 if (a)
50 *pos = a;
51 else
52 *pos = b;
53 return head;
54}
55
56/* reverse a linked list of nodes. also clears parent pointer */
57static struct bheap_node* __bheap_reverse(struct bheap_node* h)
58{
59 struct bheap_node* tail = NULL;
60 struct bheap_node* next;
61
62 if (!h)
63 return h;
64
65 h->parent = NULL;
66 while (h->next) {
67 next = h->next;
68 h->next = tail;
69 tail = h;
70 h = next;
71 h->parent = NULL;
72 }
73 h->next = tail;
74 return h;
75}
76
77static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
78 struct bheap_node** prev, struct bheap_node** node)
79{
80 struct bheap_node *_prev, *cur;
81 *prev = NULL;
82
83 if (!heap->head) {
84 *node = NULL;
85 return;
86 }
87
88 *node = heap->head;
89 _prev = heap->head;
90 cur = heap->head->next;
91 while (cur) {
92 if (higher_prio(cur, *node)) {
93 *node = cur;
94 *prev = _prev;
95 }
96 _prev = cur;
97 cur = cur->next;
98 }
99}
100
101static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
102 struct bheap_node* h2)
103{
104 struct bheap_node* h1;
105 struct bheap_node *prev, *x, *next;
106 if (!h2)
107 return;
108 h1 = heap->head;
109 if (!h1) {
110 heap->head = h2;
111 return;
112 }
113 h1 = __bheap_merge(h1, h2);
114 prev = NULL;
115 x = h1;
116 next = x->next;
117 while (next) {
118 if (x->degree != next->degree ||
119 (next->next && next->next->degree == x->degree)) {
120 /* nothing to do, advance */
121 prev = x;
122 x = next;
123 } else if (higher_prio(x, next)) {
124 /* x becomes the root of next */
125 x->next = next->next;
126 __bheap_link(x, next);
127 } else {
128 /* next becomes the root of x */
129 if (prev)
130 prev->next = next;
131 else
132 h1 = next;
133 __bheap_link(next, x);
134 x = next;
135 }
136 next = x->next;
137 }
138 heap->head = h1;
139}
140
141static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
142 struct bheap* heap)
143{
144 struct bheap_node *prev, *node;
145 __bheap_min(higher_prio, heap, &prev, &node);
146 if (!node)
147 return NULL;
148 if (prev)
149 prev->next = node->next;
150 else
151 heap->head = node->next;
152 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
153 return node;
154}
155
156/* insert (and reinitialize) a node into the heap */
157void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
158 struct bheap_node* node)
159{
160 struct bheap_node *min;
161 node->child = NULL;
162 node->parent = NULL;
163 node->next = NULL;
164 node->degree = 0;
165 if (heap->min && higher_prio(node, heap->min)) {
166 /* swap min cache */
167 min = heap->min;
168 min->child = NULL;
169 min->parent = NULL;
170 min->next = NULL;
171 min->degree = 0;
172 __bheap_union(higher_prio, heap, min);
173 heap->min = node;
174 } else
175 __bheap_union(higher_prio, heap, node);
176}
177
178void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
179{
180 struct bheap_node* min;
181 if (heap->min) {
182 min = heap->min;
183 heap->min = NULL;
184 bheap_insert(higher_prio, heap, min);
185 }
186}
187
188/* merge addition into target */
189void bheap_union(bheap_prio_t higher_prio,
190 struct bheap* target, struct bheap* addition)
191{
192 /* first insert any cached minima, if necessary */
193 bheap_uncache_min(higher_prio, target);
194 bheap_uncache_min(higher_prio, addition);
195 __bheap_union(higher_prio, target, addition->head);
196 /* this is a destructive merge */
197 addition->head = NULL;
198}
199
200struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
201 struct bheap* heap)
202{
203 if (!heap->min)
204 heap->min = __bheap_extract_min(higher_prio, heap);
205 return heap->min;
206}
207
208struct bheap_node* bheap_take(bheap_prio_t higher_prio,
209 struct bheap* heap)
210{
211 struct bheap_node *node;
212 if (!heap->min)
213 heap->min = __bheap_extract_min(higher_prio, heap);
214 node = heap->min;
215 heap->min = NULL;
216 if (node)
217 node->degree = NOT_IN_HEAP;
218 return node;
219}
220
221int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
222{
223 struct bheap_node *parent;
224 struct bheap_node** tmp_ref;
225 void* tmp;
226
227 /* bubble up */
228 parent = node->parent;
229 while (parent && higher_prio(node, parent)) {
230 /* swap parent and node */
231 tmp = parent->value;
232 parent->value = node->value;
233 node->value = tmp;
234 /* swap references */
235 *(parent->ref) = node;
236 *(node->ref) = parent;
237 tmp_ref = parent->ref;
238 parent->ref = node->ref;
239 node->ref = tmp_ref;
240 /* step up */
241 node = parent;
242 parent = node->parent;
243 }
244
245 return parent != NULL;
246}
247
248void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
249 struct bheap_node* node)
250{
251 struct bheap_node *parent, *prev, *pos;
252 struct bheap_node** tmp_ref;
253 void* tmp;
254
255 if (heap->min != node) {
256 /* bubble up */
257 parent = node->parent;
258 while (parent) {
259 /* swap parent and node */
260 tmp = parent->value;
261 parent->value = node->value;
262 node->value = tmp;
263 /* swap references */
264 *(parent->ref) = node;
265 *(node->ref) = parent;
266 tmp_ref = parent->ref;
267 parent->ref = node->ref;
268 node->ref = tmp_ref;
269 /* step up */
270 node = parent;
271 parent = node->parent;
272 }
273 /* now delete:
274 * first find prev */
275 prev = NULL;
276 pos = heap->head;
277 while (pos != node) {
278 prev = pos;
279 pos = pos->next;
280 }
281 /* we have prev, now remove node */
282 if (prev)
283 prev->next = node->next;
284 else
285 heap->head = node->next;
286 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
287 } else
288 heap->min = NULL;
289 node->degree = NOT_IN_HEAP;
290}
291
292/* allocate a heap node for value and insert into the heap */
293int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
294 void* value, int gfp_flags)
295{
296 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
297 if (likely(hn)) {
298 bheap_node_init(&hn, value);
299 bheap_insert(higher_prio, heap, hn);
300 }
301 return hn != NULL;
302}
303
304void* bheap_take_del(bheap_prio_t higher_prio,
305 struct bheap* heap)
306{
307 struct bheap_node* hn = bheap_take(higher_prio, heap);
308 void* ret = NULL;
309 if (hn) {
310 ret = hn->value;
311 bheap_node_free(hn);
312 }
313 return ret;
314}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 00000000000..40a913f4b5a
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,388 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
388
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 00000000000..f7712be29ad
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,113 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4
5#include <litmus/litmus.h>
6#include <litmus/preempt.h>
7
8#include <litmus/budget.h>
9
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{
21 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer,
23 timer);
24 unsigned long flags;
25
26 local_irq_save(flags);
27 TRACE("enforcement timer fired.\n");
28 et->armed = 0;
29 /* activate scheduler */
30 litmus_reschedule_local();
31 local_irq_restore(flags);
32
33 return HRTIMER_NORESTART;
34}
35
36/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et)
38{
39 int ret;
40
41 TRACE("cancelling enforcement timer.\n");
42
43 /* Since interrupts are disabled and et->armed is only
44 * modified locally, we do not need any locks.
45 */
46
47 if (et->armed) {
48 ret = hrtimer_try_to_cancel(&et->timer);
49 /* Should never be inactive. */
50 BUG_ON(ret == 0);
51 /* Should never be running concurrently. */
52 BUG_ON(ret == -1);
53
54 et->armed = 0;
55 }
56}
57
58/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t)
61{
62 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64
65 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */
67 BUG_ON(budget_exhausted(t) && (!is_np(t)));
68
69 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */
71
72 if (likely(!is_np(t))) {
73 when_to_fire = litmus_clock() + budget_remaining(t);
74 __hrtimer_start_range_ns(&et->timer,
75 ns_to_ktime(when_to_fire),
76 0 /* delta */,
77 HRTIMER_MODE_ABS_PINNED,
78 0 /* no wakeup */);
79 et->armed = 1;
80 }
81}
82
83
84/* expects to be called with IRQs off */
85void update_enforcement_timer(struct task_struct* t)
86{
87 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
88
89 if (t && budget_precisely_enforced(t)) {
90 /* Make sure we call into the scheduler when this budget
91 * expires. */
92 arm_enforcement_timer(et, t);
93 } else if (et->armed) {
94 /* Make sure we don't cause unnecessary interrupts. */
95 cancel_enforcement_timer(et);
96 }
97}
98
99
100static int __init init_budget_enforcement(void)
101{
102 int cpu;
103 struct enforcement_timer* et;
104
105 for (cpu = 0; cpu < NR_CPUS; cpu++) {
106 et = &per_cpu(budget_timer, cpu);
107 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
108 et->timer.function = on_enforcement_timeout;
109 }
110 return 0;
111}
112
113module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 00000000000..6fe1b512f62
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,111 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4
5#include <litmus/clustered.h>
6
7#ifndef CONFIG_X86
8/* fake get_shared_cpu_map() on non-x86 architectures */
9
10int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
11{
12 if (index != 1)
13 return 1;
14 else {
15 /* Fake L1: CPU is all by itself. */
16 cpumask_clear(mask);
17 cpumask_set_cpu(cpu, mask);
18 return 0;
19 }
20}
21
22#endif
23
24int get_cluster_size(enum cache_level level)
25{
26 cpumask_var_t mask;
27 int ok;
28 int num_cpus;
29
30 if (level == GLOBAL_CLUSTER)
31 return num_online_cpus();
32 else {
33 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
34 return -ENOMEM;
35 /* assumes CPU 0 is representative of all CPUs */
36 ok = get_shared_cpu_map(mask, 0, level);
37 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
38 if (ok == 0)
39 num_cpus = cpumask_weight(mask);
40 free_cpumask_var(mask);
41
42 if (ok == 0)
43 return num_cpus;
44 else
45 return -EINVAL;
46 }
47}
48
49int assign_cpus_to_clusters(enum cache_level level,
50 struct scheduling_cluster* clusters[],
51 unsigned int num_clusters,
52 struct cluster_cpu* cpus[],
53 unsigned int num_cpus)
54{
55 cpumask_var_t mask;
56 unsigned int i, free_cluster = 0, low_cpu;
57 int err = 0;
58
59 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
60 return -ENOMEM;
61
62 /* clear cluster pointers */
63 for (i = 0; i < num_cpus; i++) {
64 cpus[i]->id = i;
65 cpus[i]->cluster = NULL;
66 }
67
68 /* initialize clusters */
69 for (i = 0; i < num_clusters; i++) {
70 clusters[i]->id = i;
71 INIT_LIST_HEAD(&clusters[i]->cpus);
72 }
73
74 /* Assign each CPU. Two assumtions are made:
75 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
76 * 2) All cpus that belong to some cluster are online.
77 */
78 for_each_online_cpu(i) {
79 /* get lowest-id CPU in cluster */
80 if (level != GLOBAL_CLUSTER) {
81 err = get_shared_cpu_map(mask, cpus[i]->id, level);
82 if (err != 0) {
83 /* ugh... wrong cache level? Either caller screwed up
84 * or the CPU topology is weird. */
85 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
86 level, err);
87 err = -EINVAL;
88 goto out;
89 }
90 low_cpu = cpumask_first(mask);
91 } else
92 low_cpu = 0;
93 if (low_cpu == i) {
94 /* caller must provide an appropriate number of clusters */
95 BUG_ON(free_cluster >= num_clusters);
96
97 /* create new cluster */
98 cpus[i]->cluster = clusters[free_cluster++];
99 } else {
100 /* low_cpu points to the right cluster
101 * Assumption: low_cpu is actually online and was processed earlier. */
102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 }
104 /* enqueue in cpus list */
105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 }
108out:
109 free_cpumask_var(mask);
110 return err;
111}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 00000000000..9969ab17c19
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,149 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33
34 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
35
36 TRACE_CUR(CTRL_NAME
37 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
38 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
39 vma->vm_page_prot);
40
41 /* Map it into the vma. */
42 err = vm_insert_page(vma, vma->vm_start, ctrl);
43
44 if (err)
45 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
46
47 return err;
48}
49
50static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
51{
52 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
53 vma->vm_flags, vma->vm_page_prot);
54
55 TRACE_CUR(CTRL_NAME
56 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
57 (void*) vma->vm_start, (void*) vma->vm_end, vma,
58 vma->vm_private_data);
59}
60
61static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
62 struct vm_fault* vmf)
63{
64 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
65 vma->vm_flags, vmf->pgoff);
66
67 /* This function should never be called, since all pages should have
68 * been mapped by mmap() already. */
69 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
70
71 return VM_FAULT_SIGBUS;
72}
73
74static struct vm_operations_struct litmus_ctrl_vm_ops = {
75 .close = litmus_ctrl_vm_close,
76 .fault = litmus_ctrl_vm_fault,
77};
78
79static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
80{
81 int err = 0;
82
83 /* first make sure mapper knows what he's doing */
84
85 /* you can only get one page */
86 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
87 return -EINVAL;
88
89 /* you can only map the "first" page */
90 if (vma->vm_pgoff != 0)
91 return -EINVAL;
92
93 /* you can't share it with anyone */
94 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
95 return -EINVAL;
96
97 vma->vm_ops = &litmus_ctrl_vm_ops;
98 /* This mapping should not be kept across forks,
99 * cannot be expanded, and is not a "normal" page. */
100 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO;
101
102 /* We don't want the first write access to trigger a "minor" page fault
103 * to mark the page as dirty. This is transient, private memory, we
104 * don't care if it was touched or not. __S011 means RW access, but not
105 * execute, and avoids copy-on-write behavior.
106 * See protection_map in mmap.c. */
107 vma->vm_page_prot = __S011;
108
109 err = alloc_ctrl_page(current);
110 if (!err)
111 err = map_ctrl_page(current, vma);
112
113 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
114 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
115
116 return err;
117}
118
119static struct file_operations litmus_ctrl_fops = {
120 .owner = THIS_MODULE,
121 .mmap = litmus_ctrl_mmap,
122};
123
124static struct miscdevice litmus_ctrl_dev = {
125 .name = CTRL_NAME,
126 .minor = MISC_DYNAMIC_MINOR,
127 .fops = &litmus_ctrl_fops,
128};
129
130static int __init init_litmus_ctrl_dev(void)
131{
132 int err;
133
134 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
135
136 printk("Initializing LITMUS^RT control device.\n");
137 err = misc_register(&litmus_ctrl_dev);
138 if (err)
139 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
140 return err;
141}
142
143static void __exit exit_litmus_ctrl_dev(void)
144{
145 misc_deregister(&litmus_ctrl_dev);
146}
147
148module_init(init_litmus_ctrl_dev);
149module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 00000000000..5aca2934a7b
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,200 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
18#include <litmus/fpmath.h>
19#endif
20
21#ifdef CONFIG_EDF_TIE_BREAK_HASH
22#include <linux/hash.h>
23static inline long edf_hash(struct task_struct *t)
24{
25 /* pid is 32 bits, so normally we would shove that into the
26 * upper 32-bits and and put the job number in the bottom
27 * and hash the 64-bit number with hash_64(). Sadly,
28 * in testing, hash_64() doesn't distribute keys were the
29 * upper bits are close together (as would be the case with
30 * pids) and job numbers are equal (as would be the case with
31 * synchronous task sets with all relative deadlines equal).
32 *
33 * A 2006 Linux patch proposed the following solution
34 * (but for some reason it wasn't accepted...).
35 *
36 * At least this workaround works for 32-bit systems as well.
37 */
38 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
39}
40#endif
41
42
43/* edf_higher_prio - returns true if first has a higher EDF priority
44 * than second. Deadline ties are broken by PID.
45 *
46 * both first and second may be NULL
47 */
48int edf_higher_prio(struct task_struct* first,
49 struct task_struct* second)
50{
51 struct task_struct *first_task = first;
52 struct task_struct *second_task = second;
53
54 /* There is no point in comparing a task to itself. */
55 if (first && first == second) {
56 TRACE_TASK(first,
57 "WARNING: pointless edf priority comparison.\n");
58 return 0;
59 }
60
61
62 /* check for NULL tasks */
63 if (!first || !second)
64 return first && !second;
65
66#ifdef CONFIG_LITMUS_LOCKING
67
68 /* Check for inherited priorities. Change task
69 * used for comparison in such a case.
70 */
71 if (unlikely(first->rt_param.inh_task))
72 first_task = first->rt_param.inh_task;
73 if (unlikely(second->rt_param.inh_task))
74 second_task = second->rt_param.inh_task;
75
76 /* Check for priority boosting. Tie-break by start of boosting.
77 */
78 if (unlikely(is_priority_boosted(first_task))) {
79 /* first_task is boosted, how about second_task? */
80 if (!is_priority_boosted(second_task) ||
81 lt_before(get_boost_start(first_task),
82 get_boost_start(second_task)))
83 return 1;
84 else
85 return 0;
86 } else if (unlikely(is_priority_boosted(second_task)))
87 /* second_task is boosted, first is not*/
88 return 0;
89
90#endif
91
92 if (earlier_deadline(first_task, second_task)) {
93 return 1;
94 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) {
96 /* Need to tie break. All methods must set pid_break to 0/1 if
97 * first_task does not have priority over second_task.
98 */
99 int pid_break;
100
101
102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
103 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks,
105 * especially in task sets where all tasks have the same
106 * period and relative deadlines.
107 */
108 if (get_lateness(first_task) > get_lateness(second_task)) {
109 return 1;
110 }
111 pid_break = (get_lateness(first_task) == get_lateness(second_task));
112
113
114#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
115 /* Tie break by lateness, normalized by relative deadline. Jobs with
116 * greater normalized lateness get priority.
117 *
118 * Note: Considered using the algebraically equivalent
119 * lateness(first)*relative_deadline(second) >
120 lateness(second)*relative_deadline(first)
121 * to avoid fixed-point math, but values are prone to overflow if inputs
122 * are on the order of several seconds, even in 64-bit.
123 */
124 fp_t fnorm = _frac(get_lateness(first_task),
125 get_rt_relative_deadline(first_task));
126 fp_t snorm = _frac(get_lateness(second_task),
127 get_rt_relative_deadline(second_task));
128 if (_gt(fnorm, snorm)) {
129 return 1;
130 }
131 pid_break = _eq(fnorm, snorm);
132
133
134#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
135 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
136 * a 50% chance that first_task has a higher priority than second_task.
137 */
138 long fhash = edf_hash(first_task);
139 long shash = edf_hash(second_task);
140 if (fhash < shash) {
141 return 1;
142 }
143 pid_break = (fhash == shash);
144#else
145
146
147 /* CONFIG_EDF_PID_TIE_BREAK */
148 pid_break = 1; // fall through to tie-break by pid;
149#endif
150
151 /* Tie break by pid */
152 if(pid_break) {
153 if (first_task->pid < second_task->pid) {
154 return 1;
155 }
156 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the
158 * inherited priority wins.
159 */
160 if (!second->rt_param.inh_task) {
161 return 1;
162 }
163 }
164 }
165 }
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */
167}
168
169int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
170{
171 return edf_higher_prio(bheap2task(a), bheap2task(b));
172}
173
174void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
175 release_jobs_t release)
176{
177 rt_domain_init(rt, edf_ready_order, resched, release);
178}
179
180/* need_to_preempt - check whether the task t needs to be preempted
181 * call only with irqs disabled and with ready_lock acquired
182 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
183 */
184int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
185{
186 /* we need the read lock for edf_ready_queue */
187 /* no need to preempt if there is nothing pending */
188 if (!__jobs_pending(rt))
189 return 0;
190 /* we need to reschedule if t doesn't exist */
191 if (!t)
192 return 1;
193
194 /* NOTE: We cannot check for non-preemptibility since we
195 * don't know what address space we're currently in.
196 */
197
198 /* make sure to get non-rt stuff out of the way */
199 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
200}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 00000000000..cd85b9cd9a0
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,297 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30};
31
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
33{
34 if (fdso_ops[type]->create)
35 return fdso_ops[type]->create(obj_ref, type, config);
36 else
37 return -EINVAL;
38}
39
40static void fdso_destroy(obj_type_t type, void* obj)
41{
42 fdso_ops[type]->destroy(type, obj);
43}
44
45static int fdso_open(struct od_table_entry* entry, void* __user config)
46{
47 if (fdso_ops[entry->obj->type]->open)
48 return fdso_ops[entry->obj->type]->open(entry, config);
49 else
50 return 0;
51}
52
53static int fdso_close(struct od_table_entry* entry)
54{
55 if (fdso_ops[entry->obj->type]->close)
56 return fdso_ops[entry->obj->type]->close(entry);
57 else
58 return 0;
59}
60
61/* inode must be locked already */
62static int alloc_inode_obj(struct inode_obj_id** obj_ref,
63 struct inode* inode,
64 obj_type_t type,
65 unsigned int id,
66 void* __user config)
67{
68 struct inode_obj_id* obj;
69 void* raw_obj;
70 int err;
71
72 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
73 if (!obj) {
74 return -ENOMEM;
75 }
76
77 err = fdso_create(&raw_obj, type, config);
78 if (err != 0) {
79 kfree(obj);
80 return err;
81 }
82
83 INIT_LIST_HEAD(&obj->list);
84 atomic_set(&obj->count, 1);
85 obj->type = type;
86 obj->id = id;
87 obj->obj = raw_obj;
88 obj->inode = inode;
89
90 list_add(&obj->list, &inode->i_obj_list);
91 atomic_inc(&inode->i_count);
92
93 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
94
95 *obj_ref = obj;
96 return 0;
97}
98
99/* inode must be locked already */
100static struct inode_obj_id* get_inode_obj(struct inode* inode,
101 obj_type_t type,
102 unsigned int id)
103{
104 struct list_head* pos;
105 struct inode_obj_id* obj = NULL;
106
107 list_for_each(pos, &inode->i_obj_list) {
108 obj = list_entry(pos, struct inode_obj_id, list);
109 if (obj->id == id && obj->type == type) {
110 atomic_inc(&obj->count);
111 return obj;
112 }
113 }
114 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
115 return NULL;
116}
117
118
119static void put_inode_obj(struct inode_obj_id* obj)
120{
121 struct inode* inode;
122 int let_go = 0;
123
124 inode = obj->inode;
125 if (atomic_dec_and_test(&obj->count)) {
126
127 mutex_lock(&inode->i_obj_mutex);
128 /* no new references can be obtained */
129 if (!atomic_read(&obj->count)) {
130 list_del(&obj->list);
131 fdso_destroy(obj->type, obj->obj);
132 kfree(obj);
133 let_go = 1;
134 }
135 mutex_unlock(&inode->i_obj_mutex);
136 if (let_go)
137 iput(inode);
138 }
139}
140
141static struct od_table_entry* get_od_entry(struct task_struct* t)
142{
143 struct od_table_entry* table;
144 int i;
145
146
147 table = t->od_table;
148 if (!table) {
149 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
150 GFP_KERNEL);
151 t->od_table = table;
152 }
153
154 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
155 if (!table[i].used) {
156 table[i].used = 1;
157 return table + i;
158 }
159 return NULL;
160}
161
162static int put_od_entry(struct od_table_entry* od)
163{
164 put_inode_obj(od->obj);
165 od->used = 0;
166 return 0;
167}
168
169void exit_od_table(struct task_struct* t)
170{
171 int i;
172
173 if (t->od_table) {
174 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
175 if (t->od_table[i].used)
176 put_od_entry(t->od_table + i);
177 kfree(t->od_table);
178 t->od_table = NULL;
179 }
180}
181
182static int do_sys_od_open(struct file* file, obj_type_t type, int id,
183 void* __user config)
184{
185 int idx = 0, err = 0;
186 struct inode* inode;
187 struct inode_obj_id* obj = NULL;
188 struct od_table_entry* entry;
189
190 inode = file->f_dentry->d_inode;
191
192 entry = get_od_entry(current);
193 if (!entry)
194 return -ENOMEM;
195
196 mutex_lock(&inode->i_obj_mutex);
197 obj = get_inode_obj(inode, type, id);
198 if (!obj)
199 err = alloc_inode_obj(&obj, inode, type, id, config);
200 if (err != 0) {
201 obj = NULL;
202 idx = err;
203 entry->used = 0;
204 } else {
205 entry->obj = obj;
206 entry->class = fdso_ops[type];
207 idx = entry - current->od_table;
208 }
209
210 mutex_unlock(&inode->i_obj_mutex);
211
212 /* open only if creation succeeded */
213 if (!err)
214 err = fdso_open(entry, config);
215 if (err < 0) {
216 /* The class rejected the open call.
217 * We need to clean up and tell user space.
218 */
219 if (obj)
220 put_od_entry(entry);
221 idx = err;
222 }
223
224 return idx;
225}
226
227
228struct od_table_entry* get_entry_for_od(int od)
229{
230 struct task_struct *t = current;
231
232 if (!t->od_table)
233 return NULL;
234 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
235 return NULL;
236 if (!t->od_table[od].used)
237 return NULL;
238 return t->od_table + od;
239}
240
241
242asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
243{
244 int ret = 0;
245 struct file* file;
246
247 /*
248 1) get file from fd, get inode from file
249 2) lock inode
250 3) try to lookup object
251 4) if not present create and enqueue object, inc inode refcnt
252 5) increment refcnt of object
253 6) alloc od_table_entry, setup ptrs
254 7) unlock inode
255 8) return offset in od_table as OD
256 */
257
258 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
259 ret = -EINVAL;
260 goto out;
261 }
262
263 file = fget(fd);
264 if (!file) {
265 ret = -EBADF;
266 goto out;
267 }
268
269 ret = do_sys_od_open(file, type, obj_id, config);
270
271 fput(file);
272
273out:
274 return ret;
275}
276
277
278asmlinkage long sys_od_close(int od)
279{
280 int ret = -EINVAL;
281 struct task_struct *t = current;
282
283 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
284 return ret;
285
286 if (!t->od_table || !t->od_table[od].used)
287 return ret;
288
289
290 /* give the class a chance to reject the close
291 */
292 ret = fdso_close(t->od_table + od);
293 if (ret == 0)
294 ret = put_od_entry(t->od_table + od);
295
296 return ret;
297}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 00000000000..31fc2db20ad
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66
67 return !is_realtime(second_task) ||
68
69 get_priority(first_task) < get_priority(second_task) ||
70
71 /* Break by PID.
72 */
73 (get_priority(first_task) == get_priority(second_task) &&
74 (first_task->pid < second_task->pid ||
75
76 /* If the PIDs are the same then the task with the inherited
77 * priority wins.
78 */
79 (first_task->pid == second_task->pid &&
80 !second->rt_param.inh_task)));
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 00000000000..399a07becca
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,43 @@
1#include <linux/types.h>
2
3#include <litmus/feather_trace.h>
4
5#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA)
6/* provide dummy implementation */
7
8int ft_events[MAX_EVENTS];
9
10int ft_enable_event(unsigned long id)
11{
12 if (id < MAX_EVENTS) {
13 ft_events[id]++;
14 return 1;
15 } else
16 return 0;
17}
18
19int ft_disable_event(unsigned long id)
20{
21 if (id < MAX_EVENTS && ft_events[id]) {
22 ft_events[id]--;
23 return 1;
24 } else
25 return 0;
26}
27
28int ft_disable_all_events(void)
29{
30 int i;
31
32 for (i = 0; i < MAX_EVENTS; i++)
33 ft_events[i] = 0;
34
35 return MAX_EVENTS;
36}
37
38int ft_is_event_enabled(unsigned long id)
39{
40 return id < MAX_EVENTS && ft_events[id];
41}
42
43#endif
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
new file mode 100644
index 00000000000..06fcf4cf77d
--- /dev/null
+++ b/litmus/ftdev.c
@@ -0,0 +1,439 @@
1#include <linux/sched.h>
2#include <linux/fs.h>
3#include <linux/slab.h>
4#include <linux/cdev.h>
5#include <asm/uaccess.h>
6#include <linux/module.h>
7#include <linux/device.h>
8
9#include <litmus/litmus.h>
10#include <litmus/feather_trace.h>
11#include <litmus/ftdev.h>
12
13struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
14{
15 struct ft_buffer* buf;
16 size_t total = (size + 1) * count;
17 char* mem;
18 int order = 0, pages = 1;
19
20 buf = kmalloc(sizeof(*buf), GFP_KERNEL);
21 if (!buf)
22 return NULL;
23
24 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
25 while (pages < total) {
26 order++;
27 pages *= 2;
28 }
29
30 mem = (char*) __get_free_pages(GFP_KERNEL, order);
31 if (!mem) {
32 kfree(buf);
33 return NULL;
34 }
35
36 if (!init_ft_buffer(buf, count, size,
37 mem + (count * size), /* markers at the end */
38 mem)) { /* buffer objects */
39 free_pages((unsigned long) mem, order);
40 kfree(buf);
41 return NULL;
42 }
43 return buf;
44}
45
46void free_ft_buffer(struct ft_buffer* buf)
47{
48 int order = 0, pages = 1;
49 size_t total;
50
51 if (buf) {
52 total = (buf->slot_size + 1) * buf->slot_count;
53 total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
54 while (pages < total) {
55 order++;
56 pages *= 2;
57 }
58 free_pages((unsigned long) buf->buffer_mem, order);
59 kfree(buf);
60 }
61}
62
63struct ftdev_event {
64 int id;
65 struct ftdev_event* next;
66};
67
68static int activate(struct ftdev_event** chain, int id)
69{
70 struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
71 if (ev) {
72 printk(KERN_INFO
73 "Enabling feather-trace event %d.\n", (int) id);
74 ft_enable_event(id);
75 ev->id = id;
76 ev->next = *chain;
77 *chain = ev;
78 }
79 return ev ? 0 : -ENOMEM;
80}
81
82static void deactivate(struct ftdev_event** chain, int id)
83{
84 struct ftdev_event **cur = chain;
85 struct ftdev_event *nxt;
86 while (*cur) {
87 if ((*cur)->id == id) {
88 nxt = (*cur)->next;
89 kfree(*cur);
90 *cur = nxt;
91 printk(KERN_INFO
92 "Disabling feather-trace event %d.\n", (int) id);
93 ft_disable_event(id);
94 break;
95 }
96 cur = &(*cur)->next;
97 }
98}
99
100static int ftdev_open(struct inode *in, struct file *filp)
101{
102 struct ftdev* ftdev;
103 struct ftdev_minor* ftdm;
104 unsigned int buf_idx = iminor(in);
105 int err = 0;
106
107 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
108
109 if (buf_idx >= ftdev->minor_cnt) {
110 err = -ENODEV;
111 goto out;
112 }
113 if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
114 goto out;
115
116 ftdm = ftdev->minor + buf_idx;
117 ftdm->ftdev = ftdev;
118 filp->private_data = ftdm;
119
120 if (mutex_lock_interruptible(&ftdm->lock)) {
121 err = -ERESTARTSYS;
122 goto out;
123 }
124
125 if (!ftdm->readers && ftdev->alloc)
126 err = ftdev->alloc(ftdev, buf_idx);
127 if (0 == err)
128 ftdm->readers++;
129
130 mutex_unlock(&ftdm->lock);
131out:
132 return err;
133}
134
135static int ftdev_release(struct inode *in, struct file *filp)
136{
137 struct ftdev* ftdev;
138 struct ftdev_minor* ftdm;
139 unsigned int buf_idx = iminor(in);
140 int err = 0;
141
142 ftdev = container_of(in->i_cdev, struct ftdev, cdev);
143
144 if (buf_idx >= ftdev->minor_cnt) {
145 err = -ENODEV;
146 goto out;
147 }
148 ftdm = ftdev->minor + buf_idx;
149
150 if (mutex_lock_interruptible(&ftdm->lock)) {
151 err = -ERESTARTSYS;
152 goto out;
153 }
154
155 if (ftdm->readers == 1) {
156 while (ftdm->events)
157 deactivate(&ftdm->events, ftdm->events->id);
158
159 /* wait for any pending events to complete */
160 set_current_state(TASK_UNINTERRUPTIBLE);
161 schedule_timeout(HZ);
162
163 printk(KERN_ALERT "Failed trace writes: %u\n",
164 ftdm->buf->failed_writes);
165
166 if (ftdev->free)
167 ftdev->free(ftdev, buf_idx);
168 }
169
170 ftdm->readers--;
171 mutex_unlock(&ftdm->lock);
172out:
173 return err;
174}
175
176/* based on ft_buffer_read
177 * @returns < 0 : page fault
178 * = 0 : no data available
179 * = 1 : one slot copied
180 */
181static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
182{
183 unsigned int idx;
184 int err = 0;
185 if (buf->free_count != buf->slot_count) {
186 /* data available */
187 idx = buf->read_idx % buf->slot_count;
188 if (buf->slots[idx] == SLOT_READY) {
189 err = copy_to_user(dest, ((char*) buf->buffer_mem) +
190 idx * buf->slot_size,
191 buf->slot_size);
192 if (err == 0) {
193 /* copy ok */
194 buf->slots[idx] = SLOT_FREE;
195 buf->read_idx++;
196 fetch_and_inc(&buf->free_count);
197 err = 1;
198 }
199 }
200 }
201 return err;
202}
203
204static ssize_t ftdev_read(struct file *filp,
205 char __user *to, size_t len, loff_t *f_pos)
206{
207 /* we ignore f_pos, this is strictly sequential */
208
209 ssize_t err = 0;
210 size_t chunk;
211 int copied;
212 struct ftdev_minor* ftdm = filp->private_data;
213
214 if (mutex_lock_interruptible(&ftdm->lock)) {
215 err = -ERESTARTSYS;
216 goto out;
217 }
218
219
220 chunk = ftdm->buf->slot_size;
221 while (len >= chunk) {
222 copied = ft_buffer_copy_to_user(ftdm->buf, to);
223 if (copied == 1) {
224 len -= chunk;
225 to += chunk;
226 err += chunk;
227 } else if (err == 0 && copied == 0 && ftdm->events) {
228 /* Only wait if there are any events enabled and only
229 * if we haven't copied some data yet. We cannot wait
230 * here with copied data because that data would get
231 * lost if the task is interrupted (e.g., killed).
232 */
233 set_current_state(TASK_INTERRUPTIBLE);
234 schedule_timeout(50);
235 if (signal_pending(current)) {
236 if (err == 0)
237 /* nothing read yet, signal problem */
238 err = -ERESTARTSYS;
239 break;
240 }
241 } else if (copied < 0) {
242 /* page fault */
243 err = copied;
244 break;
245 } else
246 /* nothing left to get, return to user space */
247 break;
248 }
249 mutex_unlock(&ftdm->lock);
250out:
251 return err;
252}
253
254static long ftdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
255{
256 long err = -ENOIOCTLCMD;
257 struct ftdev_minor* ftdm = filp->private_data;
258
259 if (mutex_lock_interruptible(&ftdm->lock)) {
260 err = -ERESTARTSYS;
261 goto out;
262 }
263
264 /* FIXME: check id against list of acceptable events */
265
266 switch (cmd) {
267 case FTDEV_ENABLE_CMD:
268 if (activate(&ftdm->events, arg))
269 err = -ENOMEM;
270 else
271 err = 0;
272 break;
273
274 case FTDEV_DISABLE_CMD:
275 deactivate(&ftdm->events, arg);
276 err = 0;
277 break;
278
279 default:
280 printk(KERN_DEBUG "ftdev: strange ioctl (%u, %lu)\n", cmd, arg);
281 };
282
283 mutex_unlock(&ftdm->lock);
284out:
285 return err;
286}
287
288static ssize_t ftdev_write(struct file *filp, const char __user *from,
289 size_t len, loff_t *f_pos)
290{
291 struct ftdev_minor* ftdm = filp->private_data;
292 ssize_t err = -EINVAL;
293 struct ftdev* ftdev = ftdm->ftdev;
294
295 /* dispatch write to buffer-specific code, if available */
296 if (ftdev->write)
297 err = ftdev->write(ftdm->buf, len, from);
298
299 return err;
300}
301
302struct file_operations ftdev_fops = {
303 .owner = THIS_MODULE,
304 .open = ftdev_open,
305 .release = ftdev_release,
306 .write = ftdev_write,
307 .read = ftdev_read,
308 .unlocked_ioctl = ftdev_ioctl,
309};
310
311int ftdev_init( struct ftdev* ftdev, struct module* owner,
312 const int minor_cnt, const char* name)
313{
314 int i, err;
315
316 BUG_ON(minor_cnt < 1);
317
318 cdev_init(&ftdev->cdev, &ftdev_fops);
319 ftdev->name = name;
320 ftdev->minor_cnt = minor_cnt;
321 ftdev->cdev.owner = owner;
322 ftdev->cdev.ops = &ftdev_fops;
323 ftdev->alloc = NULL;
324 ftdev->free = NULL;
325 ftdev->can_open = NULL;
326 ftdev->write = NULL;
327
328 ftdev->minor = kcalloc(ftdev->minor_cnt, sizeof(*ftdev->minor),
329 GFP_KERNEL);
330 if (!ftdev->minor) {
331 printk(KERN_WARNING "ftdev(%s): Could not allocate memory\n",
332 ftdev->name);
333 err = -ENOMEM;
334 goto err_out;
335 }
336
337 for (i = 0; i < ftdev->minor_cnt; i++) {
338 mutex_init(&ftdev->minor[i].lock);
339 ftdev->minor[i].readers = 0;
340 ftdev->minor[i].buf = NULL;
341 ftdev->minor[i].events = NULL;
342 }
343
344 ftdev->class = class_create(owner, ftdev->name);
345 if (IS_ERR(ftdev->class)) {
346 err = PTR_ERR(ftdev->class);
347 printk(KERN_WARNING "ftdev(%s): "
348 "Could not create device class.\n", ftdev->name);
349 goto err_dealloc;
350 }
351
352 return 0;
353
354err_dealloc:
355 kfree(ftdev->minor);
356err_out:
357 return err;
358}
359
360/*
361 * Destroy minor devices up to, but not including, up_to.
362 */
363static void ftdev_device_destroy(struct ftdev* ftdev, unsigned int up_to)
364{
365 dev_t minor_cntr;
366
367 if (up_to < 1)
368 up_to = (ftdev->minor_cnt < 1) ? 0 : ftdev->minor_cnt;
369
370 for (minor_cntr = 0; minor_cntr < up_to; ++minor_cntr)
371 device_destroy(ftdev->class, MKDEV(ftdev->major, minor_cntr));
372}
373
374void ftdev_exit(struct ftdev* ftdev)
375{
376 printk("ftdev(%s): Exiting\n", ftdev->name);
377 ftdev_device_destroy(ftdev, -1);
378 cdev_del(&ftdev->cdev);
379 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
380 class_destroy(ftdev->class);
381 kfree(ftdev->minor);
382}
383
384int register_ftdev(struct ftdev* ftdev)
385{
386 struct device **device;
387 dev_t trace_dev_tmp, minor_cntr;
388 int err;
389
390 err = alloc_chrdev_region(&trace_dev_tmp, 0, ftdev->minor_cnt,
391 ftdev->name);
392 if (err) {
393 printk(KERN_WARNING "ftdev(%s): "
394 "Could not allocate char. device region (%d minors)\n",
395 ftdev->name, ftdev->minor_cnt);
396 goto err_out;
397 }
398
399 ftdev->major = MAJOR(trace_dev_tmp);
400
401 err = cdev_add(&ftdev->cdev, trace_dev_tmp, ftdev->minor_cnt);
402 if (err) {
403 printk(KERN_WARNING "ftdev(%s): "
404 "Could not add cdev for major %u with %u minor(s).\n",
405 ftdev->name, ftdev->major, ftdev->minor_cnt);
406 goto err_unregister;
407 }
408
409 /* create the minor device(s) */
410 for (minor_cntr = 0; minor_cntr < ftdev->minor_cnt; ++minor_cntr)
411 {
412 trace_dev_tmp = MKDEV(ftdev->major, minor_cntr);
413 device = &ftdev->minor[minor_cntr].device;
414
415 *device = device_create(ftdev->class, NULL, trace_dev_tmp, NULL,
416 "litmus/%s%d", ftdev->name, minor_cntr);
417 if (IS_ERR(*device)) {
418 err = PTR_ERR(*device);
419 printk(KERN_WARNING "ftdev(%s): "
420 "Could not create device major/minor number "
421 "%u/%u\n", ftdev->name, ftdev->major,
422 minor_cntr);
423 printk(KERN_WARNING "ftdev(%s): "
424 "will attempt deletion of allocated devices.\n",
425 ftdev->name);
426 goto err_minors;
427 }
428 }
429
430 return 0;
431
432err_minors:
433 ftdev_device_destroy(ftdev, minor_cntr);
434 cdev_del(&ftdev->cdev);
435err_unregister:
436 unregister_chrdev_region(MKDEV(ftdev->major, 0), ftdev->minor_cnt);
437err_out:
438 return err;
439}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 00000000000..fb093c03d53
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,57 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/litmus.h>
7#include <litmus/jobs.h>
8
9static inline void setup_release(struct task_struct *t, lt_t release)
10{
11 /* prepare next release */
12 t->rt_param.job_params.release = release;
13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
14 t->rt_param.job_params.exec_time = 0;
15
16 /* update job sequence number */
17 t->rt_param.job_params.job_no++;
18
19 /* don't confuse Linux */
20 t->rt.time_slice = 1;
21}
22
23void prepare_for_next_period(struct task_struct *t)
24{
25 BUG_ON(!t);
26
27 /* Record lateness before we set up the next job's
28 * release and deadline. Lateness may be negative.
29 */
30 t->rt_param.job_params.lateness =
31 (long long)litmus_clock() -
32 (long long)t->rt_param.job_params.deadline;
33
34 setup_release(t, get_release(t) + get_rt_period(t));
35}
36
37void release_at(struct task_struct *t, lt_t start)
38{
39 BUG_ON(!t);
40 setup_release(t, start);
41 set_rt_flags(t, RT_F_RUNNING);
42}
43
44
45/*
46 * Deactivate current task until the beginning of the next period.
47 */
48long complete_job(void)
49{
50 /* Mark that we do not excute anymore */
51 set_rt_flags(current, RT_F_SLEEP);
52 /* call schedule, this will return when a new job arrives
53 * it also takes care of preparing for the next release
54 */
55 schedule();
56 return 0;
57}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 00000000000..81384327e85
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,571 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12
13#include <litmus/litmus.h>
14#include <litmus/bheap.h>
15#include <litmus/trace.h>
16#include <litmus/rt_domain.h>
17#include <litmus/litmus_proc.h>
18#include <litmus/sched_trace.h>
19
20#ifdef CONFIG_SCHED_CPU_AFFINITY
21#include <litmus/affinity.h>
22#endif
23
24/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_RAW_SPINLOCK(task_transition_lock);
27/* synchronize plugin switching */
28atomic_t cannot_use_plugin = ATOMIC_INIT(0);
29
30/* Give log messages sequential IDs. */
31atomic_t __log_seq_no = ATOMIC_INIT(0);
32
33#ifdef CONFIG_RELEASE_MASTER
34/* current master CPU for handling timer IRQs */
35atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
36#endif
37
38static struct kmem_cache * bheap_node_cache;
39extern struct kmem_cache * release_heap_cache;
40
41struct bheap_node* bheap_node_alloc(int gfp_flags)
42{
43 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
44}
45
46void bheap_node_free(struct bheap_node* hn)
47{
48 kmem_cache_free(bheap_node_cache, hn);
49}
50
51struct release_heap* release_heap_alloc(int gfp_flags);
52void release_heap_free(struct release_heap* rh);
53
54/*
55 * sys_set_task_rt_param
56 * @pid: Pid of the task which scheduling parameters must be changed
57 * @param: New real-time extension parameters such as the execution cost and
58 * period
59 * Syscall for manipulating with task rt extension params
60 * Returns EFAULT if param is NULL.
61 * ESRCH if pid is not corrsponding
62 * to a valid task.
63 * EINVAL if either period or execution cost is <=0
64 * EPERM if pid is a real-time task
65 * 0 if success
66 *
67 * Only non-real-time tasks may be configured with this system call
68 * to avoid races with the scheduler. In practice, this means that a
69 * task's parameters must be set _before_ calling sys_prepare_rt_task()
70 *
71 * find_task_by_vpid() assumes that we are in the same namespace of the
72 * target.
73 */
74asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
75{
76 struct rt_task tp;
77 struct task_struct *target;
78 int retval = -EINVAL;
79
80 printk("Setting up rt task parameters for process %d.\n", pid);
81
82 if (pid < 0 || param == 0) {
83 goto out;
84 }
85 if (copy_from_user(&tp, param, sizeof(tp))) {
86 retval = -EFAULT;
87 goto out;
88 }
89
90 /* Task search and manipulation must be protected */
91 read_lock_irq(&tasklist_lock);
92 if (!(target = find_task_by_vpid(pid))) {
93 retval = -ESRCH;
94 goto out_unlock;
95 }
96
97 if (is_realtime(target)) {
98 /* The task is already a real-time task.
99 * We cannot not allow parameter changes at this point.
100 */
101 retval = -EBUSY;
102 goto out_unlock;
103 }
104
105 /* set relative deadline to be implicit if left unspecified */
106 if (tp.relative_deadline == 0)
107 tp.relative_deadline = tp.period;
108
109 if (tp.exec_cost <= 0)
110 goto out_unlock;
111 if (tp.period <= 0)
112 goto out_unlock;
113 if (!cpu_online(tp.cpu))
114 goto out_unlock;
115 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
116 {
117 printk(KERN_INFO "litmus: real-time task %d rejected "
118 "because task density > 1.0\n", pid);
119 goto out_unlock;
120 }
121 if (tp.cls != RT_CLASS_HARD &&
122 tp.cls != RT_CLASS_SOFT &&
123 tp.cls != RT_CLASS_BEST_EFFORT)
124 {
125 printk(KERN_INFO "litmus: real-time task %d rejected "
126 "because its class is invalid\n", pid);
127 goto out_unlock;
128 }
129 if (tp.budget_policy != NO_ENFORCEMENT &&
130 tp.budget_policy != QUANTUM_ENFORCEMENT &&
131 tp.budget_policy != PRECISE_ENFORCEMENT)
132 {
133 printk(KERN_INFO "litmus: real-time task %d rejected "
134 "because unsupported budget enforcement policy "
135 "specified (%d)\n",
136 pid, tp.budget_policy);
137 goto out_unlock;
138 }
139
140 target->rt_param.task_params = tp;
141
142 retval = 0;
143 out_unlock:
144 read_unlock_irq(&tasklist_lock);
145 out:
146 return retval;
147}
148
149/*
150 * Getter of task's RT params
151 * returns EINVAL if param or pid is NULL
152 * returns ESRCH if pid does not correspond to a valid task
153 * returns EFAULT if copying of parameters has failed.
154 *
155 * find_task_by_vpid() assumes that we are in the same namespace of the
156 * target.
157 */
158asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
159{
160 int retval = -EINVAL;
161 struct task_struct *source;
162 struct rt_task lp;
163 if (param == 0 || pid < 0)
164 goto out;
165 read_lock(&tasklist_lock);
166 if (!(source = find_task_by_vpid(pid))) {
167 retval = -ESRCH;
168 goto out_unlock;
169 }
170 lp = source->rt_param.task_params;
171 read_unlock(&tasklist_lock);
172 /* Do copying outside the lock */
173 retval =
174 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
175 return retval;
176 out_unlock:
177 read_unlock(&tasklist_lock);
178 out:
179 return retval;
180
181}
182
183/*
184 * This is the crucial function for periodic task implementation,
185 * It checks if a task is periodic, checks if such kind of sleep
186 * is permitted and calls plugin-specific sleep, which puts the
187 * task into a wait array.
188 * returns 0 on successful wakeup
189 * returns EPERM if current conditions do not permit such sleep
190 * returns EINVAL if current task is not able to go to sleep
191 */
192asmlinkage long sys_complete_job(void)
193{
194 int retval = -EPERM;
195 if (!is_realtime(current)) {
196 retval = -EINVAL;
197 goto out;
198 }
199 /* Task with negative or zero period cannot sleep */
200 if (get_rt_period(current) <= 0) {
201 retval = -EINVAL;
202 goto out;
203 }
204 /* The plugin has to put the task into an
205 * appropriate queue and call schedule
206 */
207 retval = litmus->complete_job();
208 out:
209 return retval;
210}
211
212/* This is an "improved" version of sys_complete_job that
213 * addresses the problem of unintentionally missing a job after
214 * an overrun.
215 *
216 * returns 0 on successful wakeup
217 * returns EPERM if current conditions do not permit such sleep
218 * returns EINVAL if current task is not able to go to sleep
219 */
220asmlinkage long sys_wait_for_job_release(unsigned int job)
221{
222 int retval = -EPERM;
223 if (!is_realtime(current)) {
224 retval = -EINVAL;
225 goto out;
226 }
227
228 /* Task with negative or zero period cannot sleep */
229 if (get_rt_period(current) <= 0) {
230 retval = -EINVAL;
231 goto out;
232 }
233
234 retval = 0;
235
236 /* first wait until we have "reached" the desired job
237 *
238 * This implementation has at least two problems:
239 *
240 * 1) It doesn't gracefully handle the wrap around of
241 * job_no. Since LITMUS is a prototype, this is not much
242 * of a problem right now.
243 *
244 * 2) It is theoretically racy if a job release occurs
245 * between checking job_no and calling sleep_next_period().
246 * A proper solution would requiring adding another callback
247 * in the plugin structure and testing the condition with
248 * interrupts disabled.
249 *
250 * FIXME: At least problem 2 should be taken care of eventually.
251 */
252 while (!retval && job > current->rt_param.job_params.job_no)
253 /* If the last job overran then job <= job_no and we
254 * don't send the task to sleep.
255 */
256 retval = litmus->complete_job();
257 out:
258 return retval;
259}
260
261/* This is a helper syscall to query the current job sequence number.
262 *
263 * returns 0 on successful query
264 * returns EPERM if task is not a real-time task.
265 * returns EFAULT if &job is not a valid pointer.
266 */
267asmlinkage long sys_query_job_no(unsigned int __user *job)
268{
269 int retval = -EPERM;
270 if (is_realtime(current))
271 retval = put_user(current->rt_param.job_params.job_no, job);
272
273 return retval;
274}
275
276/* sys_null_call() is only used for determining raw system call
277 * overheads (kernel entry, kernel exit). It has no useful side effects.
278 * If ts is non-NULL, then the current Feather-Trace time is recorded.
279 */
280asmlinkage long sys_null_call(cycles_t __user *ts)
281{
282 long ret = 0;
283 cycles_t now;
284
285 if (ts) {
286 now = get_cycles();
287 ret = put_user(now, ts);
288 }
289
290 return ret;
291}
292
293/* p is a real-time task. Re-init its state as a best-effort task. */
294static void reinit_litmus_state(struct task_struct* p, int restore)
295{
296 struct rt_task user_config = {};
297 void* ctrl_page = NULL;
298
299 if (restore) {
300 /* Safe user-space provided configuration data.
301 * and allocated page. */
302 user_config = p->rt_param.task_params;
303 ctrl_page = p->rt_param.ctrl_page;
304 }
305
306 /* We probably should not be inheriting any task's priority
307 * at this point in time.
308 */
309 WARN_ON(p->rt_param.inh_task);
310
311 /* Cleanup everything else. */
312 memset(&p->rt_param, 0, sizeof(p->rt_param));
313
314 /* Restore preserved fields. */
315 if (restore) {
316 p->rt_param.task_params = user_config;
317 p->rt_param.ctrl_page = ctrl_page;
318 }
319}
320
321long litmus_admit_task(struct task_struct* tsk)
322{
323 long retval = 0;
324 unsigned long flags;
325
326 BUG_ON(is_realtime(tsk));
327
328 if (get_rt_relative_deadline(tsk) == 0 ||
329 get_exec_cost(tsk) >
330 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
331 TRACE_TASK(tsk,
332 "litmus admit: invalid task parameters "
333 "(e = %lu, p = %lu, d = %lu)\n",
334 get_exec_cost(tsk), get_rt_period(tsk),
335 get_rt_relative_deadline(tsk));
336 retval = -EINVAL;
337 goto out;
338 }
339
340 if (!cpu_online(get_partition(tsk))) {
341 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
342 get_partition(tsk));
343 retval = -EINVAL;
344 goto out;
345 }
346
347 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
348
349 /* avoid scheduler plugin changing underneath us */
350 raw_spin_lock_irqsave(&task_transition_lock, flags);
351
352 /* allocate heap node for this task */
353 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
354 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
355
356 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
357 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
358
359 bheap_node_free(tsk_rt(tsk)->heap_node);
360 release_heap_free(tsk_rt(tsk)->rel_heap);
361
362 retval = -ENOMEM;
363 goto out_unlock;
364 } else {
365 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
366 }
367
368 retval = litmus->admit_task(tsk);
369
370 if (!retval) {
371 sched_trace_task_name(tsk);
372 sched_trace_task_param(tsk);
373 atomic_inc(&rt_task_count);
374 }
375
376out_unlock:
377 raw_spin_unlock_irqrestore(&task_transition_lock, flags);
378out:
379 return retval;
380}
381
382void litmus_exit_task(struct task_struct* tsk)
383{
384 if (is_realtime(tsk)) {
385 sched_trace_task_completion(tsk, 1);
386
387 litmus->task_exit(tsk);
388
389 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
390 bheap_node_free(tsk_rt(tsk)->heap_node);
391 release_heap_free(tsk_rt(tsk)->rel_heap);
392
393 atomic_dec(&rt_task_count);
394 reinit_litmus_state(tsk, 1);
395 }
396}
397
398/* IPI callback to synchronize plugin switching */
399static void synch_on_plugin_switch(void* info)
400{
401 atomic_inc(&cannot_use_plugin);
402 while (atomic_read(&cannot_use_plugin) > 0)
403 cpu_relax();
404}
405
406/* Switching a plugin in use is tricky.
407 * We must watch out that no real-time tasks exists
408 * (and that none is created in parallel) and that the plugin is not
409 * currently in use on any processor (in theory).
410 */
411int switch_sched_plugin(struct sched_plugin* plugin)
412{
413 unsigned long flags;
414 int ret = 0;
415
416 BUG_ON(!plugin);
417
418 /* forbid other cpus to use the plugin */
419 atomic_set(&cannot_use_plugin, 1);
420 /* send IPI to force other CPUs to synch with us */
421 smp_call_function(synch_on_plugin_switch, NULL, 0);
422
423 /* wait until all other CPUs have started synch */
424 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
425 cpu_relax();
426
427 /* stop task transitions */
428 raw_spin_lock_irqsave(&task_transition_lock, flags);
429
430 /* don't switch if there are active real-time tasks */
431 if (atomic_read(&rt_task_count) == 0) {
432 ret = litmus->deactivate_plugin();
433 if (0 != ret)
434 goto out;
435 ret = plugin->activate_plugin();
436 if (0 != ret) {
437 printk(KERN_INFO "Can't activate %s (%d).\n",
438 plugin->plugin_name, ret);
439 plugin = &linux_sched_plugin;
440 }
441 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
442 litmus = plugin;
443 } else
444 ret = -EBUSY;
445out:
446 raw_spin_unlock_irqrestore(&task_transition_lock, flags);
447 atomic_set(&cannot_use_plugin, 0);
448 return ret;
449}
450
451/* Called upon fork.
452 * p is the newly forked task.
453 */
454void litmus_fork(struct task_struct* p)
455{
456 if (is_realtime(p)) {
457 /* clean out any litmus related state, don't preserve anything */
458 reinit_litmus_state(p, 0);
459 /* Don't let the child be a real-time task. */
460 p->sched_reset_on_fork = 1;
461 } else
462 /* non-rt tasks might have ctrl_page set */
463 tsk_rt(p)->ctrl_page = NULL;
464
465 /* od tables are never inherited across a fork */
466 p->od_table = NULL;
467}
468
469/* Called upon execve().
470 * current is doing the exec.
471 * Don't let address space specific stuff leak.
472 */
473void litmus_exec(void)
474{
475 struct task_struct* p = current;
476
477 if (is_realtime(p)) {
478 WARN_ON(p->rt_param.inh_task);
479 if (tsk_rt(p)->ctrl_page) {
480 free_page((unsigned long) tsk_rt(p)->ctrl_page);
481 tsk_rt(p)->ctrl_page = NULL;
482 }
483 }
484}
485
486void exit_litmus(struct task_struct *dead_tsk)
487{
488 /* We also allow non-RT tasks to
489 * allocate control pages to allow
490 * measurements with non-RT tasks.
491 * So check if we need to free the page
492 * in any case.
493 */
494 if (tsk_rt(dead_tsk)->ctrl_page) {
495 TRACE_TASK(dead_tsk,
496 "freeing ctrl_page %p\n",
497 tsk_rt(dead_tsk)->ctrl_page);
498 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
499 }
500
501 /* main cleanup only for RT tasks */
502 if (is_realtime(dead_tsk))
503 litmus_exit_task(dead_tsk);
504}
505
506
507#ifdef CONFIG_MAGIC_SYSRQ
508int sys_kill(int pid, int sig);
509
510static void sysrq_handle_kill_rt_tasks(int key)
511{
512 struct task_struct *t;
513 read_lock(&tasklist_lock);
514 for_each_process(t) {
515 if (is_realtime(t)) {
516 sys_kill(t->pid, SIGKILL);
517 }
518 }
519 read_unlock(&tasklist_lock);
520}
521
522static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
523 .handler = sysrq_handle_kill_rt_tasks,
524 .help_msg = "quit-rt-tasks(X)",
525 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
526};
527#endif
528
529extern struct sched_plugin linux_sched_plugin;
530
531static int __init _init_litmus(void)
532{
533 /* Common initializers,
534 * mode change lock is used to enforce single mode change
535 * operation.
536 */
537 printk("Starting LITMUS^RT kernel\n");
538
539 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
540
541 register_sched_plugin(&linux_sched_plugin);
542
543 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
544 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
545
546#ifdef CONFIG_MAGIC_SYSRQ
547 /* offer some debugging help */
548 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
549 printk("Registered kill rt tasks magic sysrq.\n");
550 else
551 printk("Could not register kill rt tasks magic sysrq.\n");
552#endif
553
554 init_litmus_proc();
555
556#ifdef CONFIG_SCHED_CPU_AFFINITY
557 init_topology();
558#endif
559
560 return 0;
561}
562
563static void _exit_litmus(void)
564{
565 exit_litmus_proc();
566 kmem_cache_destroy(bheap_node_cache);
567 kmem_cache_destroy(release_heap_cache);
568}
569
570module_init(_init_litmus);
571module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 00000000000..4bf725a36c9
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,347 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/uaccess.h>
7
8#include <litmus/litmus.h>
9#include <litmus/litmus_proc.h>
10
11#include <litmus/clustered.h>
12
13/* in litmus/litmus.c */
14extern atomic_t rt_task_count;
15
16static struct proc_dir_entry *litmus_dir = NULL,
17 *curr_file = NULL,
18 *stat_file = NULL,
19 *plugs_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL,
22#endif
23 *plugs_file = NULL;
24
25/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void);
27
28static int proc_read_stats(char *page, char **start,
29 off_t off, int count,
30 int *eof, void *data)
31{
32 int len;
33
34 len = snprintf(page, PAGE_SIZE,
35 "real-time tasks = %d\n"
36 "ready for release = %d\n",
37 atomic_read(&rt_task_count),
38 count_tasks_waiting_for_release());
39 return len;
40}
41
42static int proc_read_plugins(char *page, char **start,
43 off_t off, int count,
44 int *eof, void *data)
45{
46 int len;
47
48 len = print_sched_plugins(page, PAGE_SIZE);
49 return len;
50}
51
52static int proc_read_curr(char *page, char **start,
53 off_t off, int count,
54 int *eof, void *data)
55{
56 int len;
57
58 len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
59 return len;
60}
61
62/* in litmus/litmus.c */
63int switch_sched_plugin(struct sched_plugin*);
64
65static int proc_write_curr(struct file *file,
66 const char *buffer,
67 unsigned long count,
68 void *data)
69{
70 int len, ret;
71 char name[65];
72 struct sched_plugin* found;
73
74 len = copy_and_chomp(name, sizeof(name), buffer, count);
75 if (len < 0)
76 return len;
77
78 found = find_sched_plugin(name);
79
80 if (found) {
81 ret = switch_sched_plugin(found);
82 if (ret != 0)
83 printk(KERN_INFO "Could not switch plugin: %d\n", ret);
84 } else
85 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
86
87 return len;
88}
89
90#ifdef CONFIG_RELEASE_MASTER
91static int proc_read_release_master(char *page, char **start,
92 off_t off, int count,
93 int *eof, void *data)
94{
95 int len, master;
96 master = atomic_read(&release_master_cpu);
97 if (master == NO_CPU)
98 len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
99 else
100 len = snprintf(page, PAGE_SIZE, "%d\n", master);
101 return len;
102}
103
104static int proc_write_release_master(struct file *file,
105 const char *buffer,
106 unsigned long count,
107 void *data)
108{
109 int cpu, err, len, online = 0;
110 char msg[64];
111
112 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
113
114 if (len < 0)
115 return len;
116
117 if (strcmp(msg, "NO_CPU") == 0)
118 atomic_set(&release_master_cpu, NO_CPU);
119 else {
120 err = sscanf(msg, "%d", &cpu);
121 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
122 atomic_set(&release_master_cpu, cpu);
123 } else {
124 TRACE("invalid release master: '%s' "
125 "(err:%d cpu:%d online:%d)\n",
126 msg, err, cpu, online);
127 len = -EINVAL;
128 }
129 }
130 return len;
131}
132#endif
133
134int __init init_litmus_proc(void)
135{
136 litmus_dir = proc_mkdir("litmus", NULL);
137 if (!litmus_dir) {
138 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
139 return -ENOMEM;
140 }
141
142 curr_file = create_proc_entry("active_plugin",
143 0644, litmus_dir);
144 if (!curr_file) {
145 printk(KERN_ERR "Could not allocate active_plugin "
146 "procfs entry.\n");
147 return -ENOMEM;
148 }
149 curr_file->read_proc = proc_read_curr;
150 curr_file->write_proc = proc_write_curr;
151
152#ifdef CONFIG_RELEASE_MASTER
153 release_master_file = create_proc_entry("release_master",
154 0644, litmus_dir);
155 if (!release_master_file) {
156 printk(KERN_ERR "Could not allocate release_master "
157 "procfs entry.\n");
158 return -ENOMEM;
159 }
160 release_master_file->read_proc = proc_read_release_master;
161 release_master_file->write_proc = proc_write_release_master;
162#endif
163
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL);
166
167 plugs_dir = proc_mkdir("plugins", litmus_dir);
168 if (!plugs_dir){
169 printk(KERN_ERR "Could not allocate plugins directory "
170 "procfs entry.\n");
171 return -ENOMEM;
172 }
173
174 plugs_file = create_proc_read_entry("loaded", 0444, plugs_dir,
175 proc_read_plugins, NULL);
176
177 return 0;
178}
179
180void exit_litmus_proc(void)
181{
182 if (plugs_file)
183 remove_proc_entry("loaded", plugs_dir);
184 if (plugs_dir)
185 remove_proc_entry("plugins", litmus_dir);
186 if (stat_file)
187 remove_proc_entry("stats", litmus_dir);
188 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir);
190#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir);
193#endif
194 if (litmus_dir)
195 remove_proc_entry("litmus", NULL);
196}
197
198long make_plugin_proc_dir(struct sched_plugin* plugin,
199 struct proc_dir_entry** pde_in)
200{
201 struct proc_dir_entry *pde_new = NULL;
202 long rv;
203
204 if (!plugin || !plugin->plugin_name){
205 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
206 __func__);
207 rv = -EINVAL;
208 goto out_no_pde;
209 }
210
211 if (!plugs_dir){
212 printk(KERN_ERR "Could not make plugin sub-directory, because "
213 "/proc/litmus/plugins does not exist.\n");
214 rv = -ENOENT;
215 goto out_no_pde;
216 }
217
218 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
219 if (!pde_new){
220 printk(KERN_ERR "Could not make plugin sub-directory: "
221 "out of memory?.\n");
222 rv = -ENOMEM;
223 goto out_no_pde;
224 }
225
226 rv = 0;
227 *pde_in = pde_new;
228 goto out_ok;
229
230out_no_pde:
231 *pde_in = NULL;
232out_ok:
233 return rv;
234}
235
236void remove_plugin_proc_dir(struct sched_plugin* plugin)
237{
238 if (!plugin || !plugin->plugin_name){
239 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
240 __func__);
241 return;
242 }
243 remove_proc_entry(plugin->plugin_name, plugs_dir);
244}
245
246
247
248/* misc. I/O helper functions */
249
250int copy_and_chomp(char *kbuf, unsigned long ksize,
251 __user const char* ubuf, unsigned long ulength)
252{
253 /* caller must provide buffer space */
254 BUG_ON(!ksize);
255
256 ksize--; /* leave space for null byte */
257
258 if (ksize > ulength)
259 ksize = ulength;
260
261 if(copy_from_user(kbuf, ubuf, ksize))
262 return -EFAULT;
263
264 kbuf[ksize] = '\0';
265
266 /* chomp kbuf */
267 if (ksize > 0 && kbuf[ksize - 1] == '\n')
268 kbuf[ksize - 1] = '\0';
269
270 return ksize;
271}
272
273/* helper functions for clustered plugins */
274static const char* cache_level_names[] = {
275 "ALL",
276 "L1",
277 "L2",
278 "L3",
279};
280
281int parse_cache_level(const char *cache_name, enum cache_level *level)
282{
283 int err = -EINVAL;
284 int i;
285 /* do a quick and dirty comparison to find the cluster size */
286 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
287 if (!strcmp(cache_name, cache_level_names[i])) {
288 *level = (enum cache_level) i;
289 err = 0;
290 break;
291 }
292 return err;
293}
294
295const char* cache_level_name(enum cache_level level)
296{
297 int idx = level;
298
299 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
300 return cache_level_names[idx];
301 else
302 return "INVALID";
303}
304
305
306/* proc file interface to configure the cluster size */
307static int proc_read_cluster_size(char *page, char **start,
308 off_t off, int count,
309 int *eof, void *data)
310{
311 return snprintf(page, PAGE_SIZE, "%s\n",
312 cache_level_name(*((enum cache_level*) data)));;
313}
314
315static int proc_write_cluster_size(struct file *file,
316 const char *buffer,
317 unsigned long count,
318 void *data)
319{
320 int len;
321 char cache_name[8];
322
323 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
324
325 if (len > 0 && parse_cache_level(cache_name, (enum cache_level*) data))
326 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
327
328 return len;
329}
330
331struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
332 enum cache_level* level)
333{
334 struct proc_dir_entry* cluster_file;
335
336 cluster_file = create_proc_entry("cluster", 0644, parent);
337 if (!cluster_file) {
338 printk(KERN_ERR "Could not allocate %s/cluster "
339 "procfs entry.\n", parent->name);
340 } else {
341 cluster_file->read_proc = proc_read_cluster_size;
342 cluster_file->write_proc = proc_write_cluster_size;
343 cluster_file->data = level;
344 }
345 return cluster_file;
346}
347
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 00000000000..ca5a073a989
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,171 @@
1#include <litmus/fdso.h>
2
3#ifdef CONFIG_LITMUS_LOCKING
4
5#include <litmus/sched_plugin.h>
6#include <litmus/trace.h>
7#include <litmus/wait.h>
8
9static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
11static int close_generic_lock(struct od_table_entry* entry);
12static void destroy_generic_lock(obj_type_t type, void* sem);
13
14struct fdso_ops generic_lock_ops = {
15 .create = create_generic_lock,
16 .open = open_generic_lock,
17 .close = close_generic_lock,
18 .destroy = destroy_generic_lock
19};
20
21static inline bool is_lock(struct od_table_entry* entry)
22{
23 return entry->class == &generic_lock_ops;
24}
25
26static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
27{
28 BUG_ON(!is_lock(entry));
29 return (struct litmus_lock*) entry->obj->obj;
30}
31
32static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
33{
34 struct litmus_lock* lock;
35 int err;
36
37 err = litmus->allocate_lock(&lock, type, arg);
38 if (err == 0)
39 *obj_ref = lock;
40 return err;
41}
42
43static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
44{
45 struct litmus_lock* lock = get_lock(entry);
46 if (lock->ops->open)
47 return lock->ops->open(lock, arg);
48 else
49 return 0; /* default: any task can open it */
50}
51
52static int close_generic_lock(struct od_table_entry* entry)
53{
54 struct litmus_lock* lock = get_lock(entry);
55 if (lock->ops->close)
56 return lock->ops->close(lock);
57 else
58 return 0; /* default: closing succeeds */
59}
60
61static void destroy_generic_lock(obj_type_t type, void* obj)
62{
63 struct litmus_lock* lock = (struct litmus_lock*) obj;
64 lock->ops->deallocate(lock);
65}
66
67asmlinkage long sys_litmus_lock(int lock_od)
68{
69 long err = -EINVAL;
70 struct od_table_entry* entry;
71 struct litmus_lock* l;
72
73 TS_LOCK_START;
74
75 entry = get_entry_for_od(lock_od);
76 if (entry && is_lock(entry)) {
77 l = get_lock(entry);
78 TRACE_CUR("attempts to lock 0x%p\n", l);
79 err = l->ops->lock(l);
80 }
81
82 /* Note: task my have been suspended or preempted in between! Take
83 * this into account when computing overheads. */
84 TS_LOCK_END;
85
86 return err;
87}
88
89asmlinkage long sys_litmus_unlock(int lock_od)
90{
91 long err = -EINVAL;
92 struct od_table_entry* entry;
93 struct litmus_lock* l;
94
95 TS_UNLOCK_START;
96
97 entry = get_entry_for_od(lock_od);
98 if (entry && is_lock(entry)) {
99 l = get_lock(entry);
100 TRACE_CUR("attempts to unlock 0x%p\n", l);
101 err = l->ops->unlock(l);
102 }
103
104 /* Note: task my have been preempted in between! Take this into
105 * account when computing overheads. */
106 TS_UNLOCK_END;
107
108 return err;
109}
110
111struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
112{
113 wait_queue_t* q;
114 struct task_struct* t = NULL;
115
116 if (waitqueue_active(wq)) {
117 q = list_entry(wq->task_list.next,
118 wait_queue_t, task_list);
119 t = (struct task_struct*) q->private;
120 __remove_wait_queue(wq, q);
121 }
122 return(t);
123}
124
125unsigned int __add_wait_queue_prio_exclusive(
126 wait_queue_head_t* head,
127 prio_wait_queue_t *new)
128{
129 struct list_head *pos;
130 unsigned int passed = 0;
131
132 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
133
134 /* find a spot where the new entry is less than the next */
135 list_for_each(pos, &head->task_list) {
136 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
137 wq.task_list);
138
139 if (unlikely(lt_before(new->priority, queued->priority) ||
140 (new->priority == queued->priority &&
141 new->tie_breaker < queued->tie_breaker))) {
142 /* pos is not less than new, thus insert here */
143 __list_add(&new->wq.task_list, pos->prev, pos);
144 goto out;
145 }
146 passed++;
147 }
148
149 /* if we get to this point either the list is empty or every entry
150 * queued element is less than new.
151 * Let's add new to the end. */
152 list_add_tail(&new->wq.task_list, &head->task_list);
153out:
154 return passed;
155}
156
157#else
158
159struct fdso_ops generic_lock_ops = {};
160
161asmlinkage long sys_litmus_lock(int sem_od)
162{
163 return -ENOSYS;
164}
165
166asmlinkage long sys_litmus_unlock(int sem_od)
167{
168 return -ENOSYS;
169}
170
171#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 00000000000..5704d0bf4c0
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,133 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5
6/* The rescheduling state of each processor.
7 */
8DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
9
10void sched_state_will_schedule(struct task_struct* tsk)
11{
12 /* Litmus hack: we only care about processor-local invocations of
13 * set_tsk_need_resched(). We can't reliably set the flag remotely
14 * since it might race with other updates to the scheduling state. We
15 * can't rely on the runqueue lock protecting updates to the sched
16 * state since processors do not acquire the runqueue locks for all
17 * updates to the sched state (to avoid acquiring two runqueue locks at
18 * the same time). Further, if tsk is residing on a remote processor,
19 * then that processor doesn't actually know yet that it is going to
20 * reschedule; it still must receive an IPI (unless a local invocation
21 * races).
22 */
23 if (likely(task_cpu(tsk) == smp_processor_id())) {
24 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
25 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
26 set_sched_state(PICKED_WRONG_TASK);
27 else
28 set_sched_state(WILL_SCHEDULE);
29 } else
30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk));
33#ifdef CONFIG_PREEMPT_STATE_TRACE
34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
35 __builtin_return_address(0));
36#endif
37}
38
39/* Called by the IPI handler after another CPU called smp_send_resched(). */
40void sched_state_ipi(void)
41{
42 /* If the IPI was slow, we might be in any state right now. The IPI is
43 * only meaningful if we are in SHOULD_SCHEDULE. */
44 if (is_in_sched_state(SHOULD_SCHEDULE)) {
45 /* Cause scheduler to be invoked.
46 * This will cause a transition to WILL_SCHEDULE. */
47 set_tsk_need_resched(current);
48 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
49 current->comm, current->pid);
50 } else {
51 /* ignore */
52 TRACE_STATE("ignoring IPI in state %x (%s)\n",
53 get_sched_state(),
54 sched_state_name(get_sched_state()));
55 }
56}
57
58/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
59 * hold the lock that is used to serialize scheduling decisions. */
60void litmus_reschedule(int cpu)
61{
62 int picked_transition_ok = 0;
63 int scheduled_transition_ok = 0;
64
65 /* The (remote) CPU could be in any state. */
66
67 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
68 * is not aware of the need to reschedule at this point. */
69
70 /* is a context switch in progress? */
71 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
72 picked_transition_ok = sched_state_transition_on(
73 cpu, TASK_PICKED, PICKED_WRONG_TASK);
74
75 if (!picked_transition_ok &&
76 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
77 /* We either raced with the end of the context switch, or the
78 * CPU was in TASK_SCHEDULED anyway. */
79 scheduled_transition_ok = sched_state_transition_on(
80 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
81 }
82
83 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
84 * scheduler to be invoked. */
85 if (scheduled_transition_ok) {
86 if (smp_processor_id() == cpu)
87 set_tsk_need_resched(current);
88 else
89 smp_send_reschedule(cpu);
90 }
91
92 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
93 __FUNCTION__,
94 picked_transition_ok,
95 scheduled_transition_ok);
96}
97
98void litmus_reschedule_local(void)
99{
100 if (is_in_sched_state(TASK_PICKED))
101 set_sched_state(PICKED_WRONG_TASK);
102 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
103 set_sched_state(WILL_SCHEDULE);
104 set_tsk_need_resched(current);
105 }
106}
107
108#ifdef CONFIG_DEBUG_KERNEL
109
110void sched_state_plugin_check(void)
111{
112 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
113 TRACE("!!!! plugin did not call sched_state_task_picked()!"
114 "Calling sched_state_task_picked() is mandatory---fix this.\n");
115 set_sched_state(TASK_PICKED);
116 }
117}
118
119#define NAME_CHECK(x) case x: return #x
120const char* sched_state_name(int s)
121{
122 switch (s) {
123 NAME_CHECK(TASK_SCHEDULED);
124 NAME_CHECK(SHOULD_SCHEDULE);
125 NAME_CHECK(WILL_SCHEDULE);
126 NAME_CHECK(TASK_PICKED);
127 NAME_CHECK(PICKED_WRONG_TASK);
128 default:
129 return "UNKNOWN";
130 };
131}
132
133#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 00000000000..d0b796611be
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,359 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24/* Uncomment when debugging timer races... */
25#if 0
26#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE
28#else
29#define VTRACE_TASK(t, fmt, args...) /* shut up */
30#define VTRACE(fmt, args...) /* be quiet already */
31#endif
32
33static int dummy_resched(rt_domain_t *rt)
34{
35 return 0;
36}
37
38static int dummy_order(struct bheap_node* a, struct bheap_node* b)
39{
40 return 0;
41}
42
43/* default implementation: use default lock */
44static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
45{
46 merge_ready(rt, tasks);
47}
48
49static unsigned int time2slot(lt_t time)
50{
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52}
53
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{
56 unsigned long flags;
57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
61
62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
63
64 TS_RELEASE_START;
65
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */
70 list_del(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73
74 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79
80 VTRACE("on_release_timer(0x%p) ends.\n", timer);
81
82 return HRTIMER_NORESTART;
83}
84
85/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache;
87
88struct release_heap* release_heap_alloc(int gfp_flags)
89{
90 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) {
93 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer;
96 }
97 return rh;
98}
99
100void release_heap_free(struct release_heap* rh)
101{
102 /* make sure timer is no longer in use */
103 hrtimer_cancel(&rh->timer);
104 kmem_cache_free(release_heap_cache, rh);
105}
106
107/* Caller must hold release lock.
108 * Will return heap for given time. If no such heap exists prior to
109 * the invocation it will be created.
110 */
111static struct release_heap* get_release_heap(rt_domain_t *rt,
112 struct task_struct* t,
113 int use_task_heap)
114{
115 struct list_head* pos;
116 struct release_heap* heap = NULL;
117 struct release_heap* rh;
118 lt_t release_time = get_release(t);
119 unsigned int slot = time2slot(release_time);
120
121 /* initialize pos for the case that the list is empty */
122 pos = rt->release_queue.slot[slot].next;
123 list_for_each(pos, &rt->release_queue.slot[slot]) {
124 rh = list_entry(pos, struct release_heap, list);
125 if (release_time == rh->release_time) {
126 /* perfect match -- this happens on hyperperiod
127 * boundaries
128 */
129 heap = rh;
130 break;
131 } else if (lt_before(release_time, rh->release_time)) {
132 /* we need to insert a new node since rh is
133 * already in the future
134 */
135 break;
136 }
137 }
138 if (!heap && use_task_heap) {
139 /* use pre-allocated release heap */
140 rh = tsk_rt(t)->rel_heap;
141
142 rh->dom = rt;
143 rh->release_time = release_time;
144
145 /* add to release queue */
146 list_add(&rh->list, pos->prev);
147 heap = rh;
148 }
149 return heap;
150}
151
152static void reinit_release_heap(struct task_struct* t)
153{
154 struct release_heap* rh;
155
156 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap;
158
159 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no
162 * circumstances should the timer be active (= yet to be
163 * triggered).
164 *
165 * WARNING: If the CPU still holds the release_lock at this point,
166 * deadlock may occur!
167 */
168 BUG_ON(hrtimer_cancel(&rh->timer));
169
170 /* initialize */
171 bheap_init(&rh->heap);
172#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
174#endif
175}
176/* arm_release_timer() - start local release timer or trigger
177 * remote timer (pull timer)
178 *
179 * Called by add_release() with:
180 * - tobe_lock taken
181 * - IRQ disabled
182 */
183#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
186#else
187static void arm_release_timer(rt_domain_t *_rt)
188#endif
189{
190 rt_domain_t *rt = _rt;
191 struct list_head list;
192 struct list_head *pos, *safe;
193 struct task_struct* t;
194 struct release_heap* rh;
195
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list);
198
199 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t);
203 list_del(pos);
204
205 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock);
207 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
208
209 rh = get_release_heap(rt, t, 0);
210 if (!rh) {
211 /* need to use our own, but drop lock first */
212 raw_spin_unlock(&rt->release_lock);
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock);
215
216 reinit_release_heap(t);
217 VTRACE_TASK(t, "release_heap ready\n");
218
219 raw_spin_lock(&rt->release_lock);
220 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
221 &rt->release_lock);
222
223 rh = get_release_heap(rt, t, 1);
224 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
227
228 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
230
231 /* To avoid arming the timer multiple times, we only let the
232 * owner do the arming (which is the "first" task to reference
233 * this release_heap anyway).
234 */
235 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237 /* we cannot arm the timer using hrtimer_start()
238 * as it may deadlock on rq->lock
239 *
240 * PINNED mode is ok on both local and remote CPU
241 */
242#ifdef CONFIG_RELEASE_MASTER
243 if (rt->release_master == NO_CPU &&
244 target_cpu == NO_CPU)
245#endif
246 __hrtimer_start_range_ns(&rh->timer,
247 ns_to_ktime(rh->release_time),
248 0, HRTIMER_MODE_ABS_PINNED, 0);
249#ifdef CONFIG_RELEASE_MASTER
250 else
251 hrtimer_start_on(
252 /* target_cpu overrides release master */
253 (target_cpu != NO_CPU ?
254 target_cpu : rt->release_master),
255 &rh->info, &rh->timer,
256 ns_to_ktime(rh->release_time),
257 HRTIMER_MODE_ABS_PINNED);
258#endif
259 } else
260 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
261 }
262}
263
264void rt_domain_init(rt_domain_t *rt,
265 bheap_prio_t order,
266 check_resched_needed_t check,
267 release_jobs_t release
268 )
269{
270 int i;
271
272 BUG_ON(!rt);
273 if (!check)
274 check = dummy_resched;
275 if (!release)
276 release = default_release_jobs;
277 if (!order)
278 order = dummy_order;
279
280#ifdef CONFIG_RELEASE_MASTER
281 rt->release_master = NO_CPU;
282#endif
283
284 bheap_init(&rt->ready_queue);
285 INIT_LIST_HEAD(&rt->tobe_released);
286 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
287 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
288
289 raw_spin_lock_init(&rt->ready_lock);
290 raw_spin_lock_init(&rt->release_lock);
291 raw_spin_lock_init(&rt->tobe_lock);
292
293 rt->check_resched = check;
294 rt->release_jobs = release;
295 rt->order = order;
296}
297
298/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
299 * @new: the newly released task
300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{
303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
304 "to ready queue at %llu\n",
305 new->comm, new->pid,
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 get_release(new), litmus_clock());
308
309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
310
311 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
312 rt->check_resched(rt);
313}
314
315/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
316 * @tasks - the newly released tasks
317 */
318void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
319{
320 bheap_union(rt->order, &rt->ready_queue, tasks);
321 rt->check_resched(rt);
322}
323
324
325#ifdef CONFIG_RELEASE_MASTER
326void __add_release_on(rt_domain_t* rt, struct task_struct *task,
327 int target_cpu)
328{
329 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
330 get_release(task), target_cpu);
331 list_add(&tsk_rt(task)->list, &rt->tobe_released);
332 task->rt_param.domain = rt;
333
334 /* start release timer */
335 TS_SCHED2_START(task);
336
337 arm_release_timer_on(rt, target_cpu);
338
339 TS_SCHED2_END(task);
340}
341#endif
342
343/* add_release - add a real-time task to the rt release queue.
344 * @task: the sleeping task
345 */
346void __add_release(rt_domain_t* rt, struct task_struct *task)
347{
348 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
349 list_add(&tsk_rt(task)->list, &rt->tobe_released);
350 task->rt_param.domain = rt;
351
352 /* start release timer */
353 TS_SCHED2_START(task);
354
355 arm_release_timer(rt);
356
357 TS_SCHED2_END(task);
358}
359
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
new file mode 100644
index 00000000000..b0c16e34d2c
--- /dev/null
+++ b/litmus/sched_cedf.c
@@ -0,0 +1,864 @@
1/*
2 * litmus/sched_cedf.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/jobs.h>
37#include <litmus/preempt.h>
38#include <litmus/budget.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/edf_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46
47#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h>
49#endif
50
51/* to configure the cluster size */
52#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h>
54
55/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
57 * all CPUs form a single cluster (just like GSN-EDF).
58 */
59static enum cache_level cluster_config = GLOBAL_CLUSTER;
60
61struct clusterdomain;
62
63/* cpu_entry_t - maintain the linked and scheduled state
64 *
65 * A cpu also contains a pointer to the cedf_domain_t cluster
66 * that owns it (struct clusterdomain*)
67 */
68typedef struct {
69 int cpu;
70 struct clusterdomain* cluster; /* owning cluster */
71 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn;
75} cpu_entry_t;
76
77/* one cpu_entry_t per CPU */
78DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
79
80#define set_will_schedule() \
81 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
82#define clear_will_schedule() \
83 (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
84#define test_will_schedule(cpu) \
85 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
86
87/*
88 * In C-EDF there is a cedf domain _per_ cluster
89 * The number of clusters is dynamically determined accordingly to the
90 * total cpu number and the cluster size
91 */
92typedef struct clusterdomain {
93 /* rt_domain for this cluster */
94 rt_domain_t domain;
95 /* cpus in this cluster */
96 cpu_entry_t* *cpus;
97 /* map of this cluster cpus */
98 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node;
101 struct bheap cpu_heap;
102 /* lock for this cluster */
103#define cluster_lock domain.ready_lock
104} cedf_domain_t;
105
106/* a cedf_domain per cluster; allocation is done at init/activation time */
107cedf_domain_t *cedf;
108
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology)
115#define WANT_ALL_SCHED_EVENTS
116 */
117#define VERBOSE_INIT
118
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
120{
121 cpu_entry_t *a, *b;
122 a = _a->value;
123 b = _b->value;
124 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap.
126 */
127 return edf_higher_prio(b->linked, a->linked);
128}
129
130/* update_cpu_position - Move the cpu entry to the correct place to maintain
131 * order in the cpu queue. Caller must hold cedf lock.
132 */
133static void update_cpu_position(cpu_entry_t *entry)
134{
135 cedf_domain_t *cluster = entry->cluster;
136
137 if (likely(bheap_node_in_heap(entry->hn)))
138 bheap_delete(cpu_lower_prio,
139 &cluster->cpu_heap,
140 entry->hn);
141
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
143}
144
145/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{
148 struct bheap_node* hn;
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151}
152
153
154/* link_task_to_cpu - Update the link of a CPU.
155 * Handles the case where the to-be-linked task is already
156 * scheduled on a different CPU.
157 */
158static noinline void link_task_to_cpu(struct task_struct* linked,
159 cpu_entry_t *entry)
160{
161 cpu_entry_t *sched;
162 struct task_struct* tmp;
163 int on_cpu;
164
165 BUG_ON(linked && !is_realtime(linked));
166
167 /* Currently linked task is set to be unlinked. */
168 if (entry->linked) {
169 entry->linked->rt_param.linked_on = NO_CPU;
170 }
171
172 /* Link new task to CPU. */
173 if (linked) {
174 set_rt_flags(linked, RT_F_RUNNING);
175 /* handle task is already scheduled somewhere! */
176 on_cpu = linked->rt_param.scheduled_on;
177 if (on_cpu != NO_CPU) {
178 sched = &per_cpu(cedf_cpu_entries, on_cpu);
179 /* this should only happen if not linked already */
180 BUG_ON(sched->linked == linked);
181
182 /* If we are already scheduled on the CPU to which we
183 * wanted to link, we don't need to do the swap --
184 * we just link ourselves to the CPU and depend on
185 * the caller to get things right.
186 */
187 if (entry != sched) {
188 TRACE_TASK(linked,
189 "already scheduled on %d, updating link.\n",
190 sched->cpu);
191 tmp = sched->linked;
192 linked->rt_param.linked_on = sched->cpu;
193 sched->linked = linked;
194 update_cpu_position(sched);
195 linked = tmp;
196 }
197 }
198 if (linked) /* might be NULL due to swap */
199 linked->rt_param.linked_on = entry->cpu;
200 }
201 entry->linked = linked;
202#ifdef WANT_ALL_SCHED_EVENTS
203 if (linked)
204 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
205 else
206 TRACE("NULL linked to %d.\n", entry->cpu);
207#endif
208 update_cpu_position(entry);
209}
210
211/* unlink - Make sure a task is not linked any longer to an entry
212 * where it was linked before. Must hold cedf_lock.
213 */
214static noinline void unlink(struct task_struct* t)
215{
216 cpu_entry_t *entry;
217
218 if (t->rt_param.linked_on != NO_CPU) {
219 /* unlink */
220 entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
221 t->rt_param.linked_on = NO_CPU;
222 link_task_to_cpu(NULL, entry);
223 } else if (is_queued(t)) {
224 /* This is an interesting situation: t is scheduled,
225 * but was just recently unlinked. It cannot be
226 * linked anywhere else (because then it would have
227 * been relinked to this CPU), thus it must be in some
228 * queue. We must remove it from the list in this
229 * case.
230 *
231 * in C-EDF case is should be somewhere in the queue for
232 * its domain, therefore and we can get the domain using
233 * task_cpu_cluster
234 */
235 remove(&(task_cpu_cluster(t))->domain, t);
236 }
237}
238
239
240/* preempt - force a CPU to reschedule
241 */
242static void preempt(cpu_entry_t *entry)
243{
244 preempt_if_preemptable(entry->scheduled, entry->cpu);
245}
246
247/* requeue - Put an unlinked task into gsn-edf domain.
248 * Caller must hold cedf_lock.
249 */
250static noinline void requeue(struct task_struct* task)
251{
252 cedf_domain_t *cluster = task_cpu_cluster(task);
253 BUG_ON(!task);
254 /* sanity check before insertion */
255 BUG_ON(is_queued(task));
256
257 if (is_released(task, litmus_clock()))
258 __add_ready(&cluster->domain, task);
259 else {
260 /* it has got to wait */
261 add_release(&cluster->domain, task);
262 }
263}
264
265#ifdef CONFIG_SCHED_CPU_AFFINITY
266static cpu_entry_t* cedf_get_nearest_available_cpu(
267 cedf_domain_t *cluster, cpu_entry_t *start)
268{
269 cpu_entry_t *affinity;
270
271 get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
272#ifdef CONFIG_RELEASE_MASTER
273 cluster->domain.release_master
274#else
275 NO_CPU
276#endif
277 );
278
279 /* make sure CPU is in our cluster */
280 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
281 return(affinity);
282 else
283 return(NULL);
284}
285#endif
286
287
288/* check for any necessary preemptions */
289static void check_for_preemptions(cedf_domain_t *cluster)
290{
291 struct task_struct *task;
292 cpu_entry_t *last;
293
294 for(last = lowest_prio_cpu(cluster);
295 edf_preemption_needed(&cluster->domain, last->linked);
296 last = lowest_prio_cpu(cluster)) {
297 /* preemption necessary */
298 task = __take_ready(&cluster->domain);
299 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
300 task->pid, last->cpu);
301#ifdef CONFIG_SCHED_CPU_AFFINITY
302 {
303 cpu_entry_t *affinity =
304 cedf_get_nearest_available_cpu(cluster,
305 &per_cpu(cedf_cpu_entries, task_cpu(task)));
306 if(affinity)
307 last = affinity;
308 else if(requeue_preempted_job(last->linked))
309 requeue(last->linked);
310 }
311#else
312 if (requeue_preempted_job(last->linked))
313 requeue(last->linked);
314#endif
315 link_task_to_cpu(task, last);
316 preempt(last);
317 }
318}
319
320/* cedf_job_arrival: task is either resumed or released */
321static noinline void cedf_job_arrival(struct task_struct* task)
322{
323 cedf_domain_t *cluster = task_cpu_cluster(task);
324 BUG_ON(!task);
325
326 requeue(task);
327 check_for_preemptions(cluster);
328}
329
330static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
331{
332 cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
333 unsigned long flags;
334
335 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
336
337 __merge_ready(&cluster->domain, tasks);
338 check_for_preemptions(cluster);
339
340 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
341}
342
343/* caller holds cedf_lock */
344static noinline void job_completion(struct task_struct *t, int forced)
345{
346 BUG_ON(!t);
347
348 sched_trace_task_completion(t, forced);
349
350 TRACE_TASK(t, "job_completion().\n");
351
352 /* set flags */
353 set_rt_flags(t, RT_F_SLEEP);
354 /* prepare for next period */
355 prepare_for_next_period(t);
356 if (is_released(t, litmus_clock()))
357 sched_trace_task_release(t);
358 /* unlink */
359 unlink(t);
360 /* requeue
361 * But don't requeue a blocking task. */
362 if (is_running(t))
363 cedf_job_arrival(t);
364}
365
366/* cedf_tick - this function is called for every local timer
367 * interrupt.
368 *
369 * checks whether the current task has expired and checks
370 * whether we need to preempt it if it has not expired
371 */
372static void cedf_tick(struct task_struct* t)
373{
374 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
375 if (!is_np(t)) {
376 /* np tasks will be preempted when they become
377 * preemptable again
378 */
379 litmus_reschedule_local();
380 set_will_schedule();
381 TRACE("cedf_scheduler_tick: "
382 "%d is preemptable "
383 " => FORCE_RESCHED\n", t->pid);
384 } else if (is_user_np(t)) {
385 TRACE("cedf_scheduler_tick: "
386 "%d is non-preemptable, "
387 "preemption delayed.\n", t->pid);
388 request_exit_np(t);
389 }
390 }
391}
392
393/* Getting schedule() right is a bit tricky. schedule() may not make any
394 * assumptions on the state of the current task since it may be called for a
395 * number of reasons. The reasons include a scheduler_tick() determined that it
396 * was necessary, because sys_exit_np() was called, because some Linux
397 * subsystem determined so, or even (in the worst case) because there is a bug
398 * hidden somewhere. Thus, we must take extreme care to determine what the
399 * current state is.
400 *
401 * The CPU could currently be scheduling a task (or not), be linked (or not).
402 *
403 * The following assertions for the scheduled task could hold:
404 *
405 * - !is_running(scheduled) // the job blocks
406 * - scheduled->timeslice == 0 // the job completed (forcefully)
407 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
408 * - linked != scheduled // we need to reschedule (for any reason)
409 * - is_np(scheduled) // rescheduling must be delayed,
410 * sys_exit_np must be requested
411 *
412 * Any of these can occur together.
413 */
414static struct task_struct* cedf_schedule(struct task_struct * prev)
415{
416 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
417 cedf_domain_t *cluster = entry->cluster;
418 int out_of_time, sleep, preempt, np, exists, blocks;
419 struct task_struct* next = NULL;
420
421#ifdef CONFIG_RELEASE_MASTER
422 /* Bail out early if we are the release master.
423 * The release master never schedules any real-time tasks.
424 */
425 if (unlikely(cluster->domain.release_master == entry->cpu)) {
426 sched_state_task_picked();
427 return NULL;
428 }
429#endif
430
431 raw_spin_lock(&cluster->cluster_lock);
432 clear_will_schedule();
433
434 /* sanity checking */
435 BUG_ON(entry->scheduled && entry->scheduled != prev);
436 BUG_ON(entry->scheduled && !is_realtime(prev));
437 BUG_ON(is_realtime(prev) && !entry->scheduled);
438
439 /* (0) Determine state */
440 exists = entry->scheduled != NULL;
441 blocks = exists && !is_running(entry->scheduled);
442 out_of_time = exists &&
443 budget_enforced(entry->scheduled) &&
444 budget_exhausted(entry->scheduled);
445 np = exists && is_np(entry->scheduled);
446 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
447 preempt = entry->scheduled != entry->linked;
448
449#ifdef WANT_ALL_SCHED_EVENTS
450 TRACE_TASK(prev, "invoked cedf_schedule.\n");
451#endif
452
453 if (exists)
454 TRACE_TASK(prev,
455 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
456 "state:%d sig:%d\n",
457 blocks, out_of_time, np, sleep, preempt,
458 prev->state, signal_pending(prev));
459 if (entry->linked && preempt)
460 TRACE_TASK(prev, "will be preempted by %s/%d\n",
461 entry->linked->comm, entry->linked->pid);
462
463
464 /* If a task blocks we have no choice but to reschedule.
465 */
466 if (blocks)
467 unlink(entry->scheduled);
468
469 /* Request a sys_exit_np() call if we would like to preempt but cannot.
470 * We need to make sure to update the link structure anyway in case
471 * that we are still linked. Multiple calls to request_exit_np() don't
472 * hurt.
473 */
474 if (np && (out_of_time || preempt || sleep)) {
475 unlink(entry->scheduled);
476 request_exit_np(entry->scheduled);
477 }
478
479 /* Any task that is preemptable and either exhausts its execution
480 * budget or wants to sleep completes. We may have to reschedule after
481 * this. Don't do a job completion if we block (can't have timers running
482 * for blocked jobs).
483 */
484 if (!np && (out_of_time || sleep) && !blocks)
485 job_completion(entry->scheduled, !sleep);
486
487 /* Link pending task if we became unlinked.
488 */
489 if (!entry->linked)
490 link_task_to_cpu(__take_ready(&cluster->domain), entry);
491
492 /* The final scheduling decision. Do we need to switch for some reason?
493 * If linked is different from scheduled, then select linked as next.
494 */
495 if ((!np || blocks) &&
496 entry->linked != entry->scheduled) {
497 /* Schedule a linked job? */
498 if (entry->linked) {
499 entry->linked->rt_param.scheduled_on = entry->cpu;
500 next = entry->linked;
501 }
502 if (entry->scheduled) {
503 /* not gonna be scheduled soon */
504 entry->scheduled->rt_param.scheduled_on = NO_CPU;
505 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
506 }
507 } else
508 /* Only override Linux scheduler if we have a real-time task
509 * scheduled that needs to continue.
510 */
511 if (exists)
512 next = prev;
513
514 sched_state_task_picked();
515 raw_spin_unlock(&cluster->cluster_lock);
516
517#ifdef WANT_ALL_SCHED_EVENTS
518 TRACE("cedf_lock released, next=0x%p\n", next);
519
520 if (next)
521 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
522 else if (exists && !next)
523 TRACE("becomes idle at %llu.\n", litmus_clock());
524#endif
525
526
527 return next;
528}
529
530
531/* _finish_switch - we just finished the switch away from prev
532 */
533static void cedf_finish_switch(struct task_struct *prev)
534{
535 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
536
537 entry->scheduled = is_realtime(current) ? current : NULL;
538#ifdef WANT_ALL_SCHED_EVENTS
539 TRACE_TASK(prev, "switched away from\n");
540#endif
541}
542
543
544/* Prepare a task for running in RT mode
545 */
546static void cedf_task_new(struct task_struct * t, int on_rq, int running)
547{
548 unsigned long flags;
549 cpu_entry_t* entry;
550 cedf_domain_t* cluster;
551
552 TRACE("gsn edf: task new %d\n", t->pid);
553
554 /* the cluster doesn't change even if t is running */
555 cluster = task_cpu_cluster(t);
556
557 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
558
559 /* setup job params */
560 release_at(t, litmus_clock());
561
562 if (running) {
563 entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
564 BUG_ON(entry->scheduled);
565
566#ifdef CONFIG_RELEASE_MASTER
567 if (entry->cpu != cluster->domain.release_master) {
568#endif
569 entry->scheduled = t;
570 tsk_rt(t)->scheduled_on = task_cpu(t);
571#ifdef CONFIG_RELEASE_MASTER
572 } else {
573 /* do not schedule on release master */
574 preempt(entry); /* force resched */
575 tsk_rt(t)->scheduled_on = NO_CPU;
576 }
577#endif
578 } else {
579 t->rt_param.scheduled_on = NO_CPU;
580 }
581 t->rt_param.linked_on = NO_CPU;
582
583 cedf_job_arrival(t);
584 raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
585}
586
587static void cedf_task_wake_up(struct task_struct *task)
588{
589 unsigned long flags;
590 lt_t now;
591 cedf_domain_t *cluster;
592
593 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
594
595 cluster = task_cpu_cluster(task);
596
597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
598 /* We need to take suspensions because of semaphores into
599 * account! If a job resumes after being suspended due to acquiring
600 * a semaphore, it should never be treated as a new job release.
601 */
602 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
603 set_rt_flags(task, RT_F_RUNNING);
604 } else {
605 now = litmus_clock();
606 if (is_tardy(task, now)) {
607 /* new sporadic release */
608 release_at(task, now);
609 sched_trace_task_release(task);
610 }
611 else {
612 if (task->rt.time_slice) {
613 /* came back in time before deadline
614 */
615 set_rt_flags(task, RT_F_RUNNING);
616 }
617 }
618 }
619 cedf_job_arrival(task);
620 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
621}
622
623static void cedf_task_block(struct task_struct *t)
624{
625 unsigned long flags;
626 cedf_domain_t *cluster;
627
628 TRACE_TASK(t, "block at %llu\n", litmus_clock());
629
630 cluster = task_cpu_cluster(t);
631
632 /* unlink if necessary */
633 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
634 unlink(t);
635 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
636
637 BUG_ON(!is_realtime(t));
638}
639
640
641static void cedf_task_exit(struct task_struct * t)
642{
643 unsigned long flags;
644 cedf_domain_t *cluster = task_cpu_cluster(t);
645
646 /* unlink if necessary */
647 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
648 unlink(t);
649 if (tsk_rt(t)->scheduled_on != NO_CPU) {
650 cpu_entry_t *cpu;
651 cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on);
652 cpu->scheduled = NULL;
653 tsk_rt(t)->scheduled_on = NO_CPU;
654 }
655 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
656
657 BUG_ON(!is_realtime(t));
658 TRACE_TASK(t, "RIP\n");
659}
660
661static long cedf_admit_task(struct task_struct* tsk)
662{
663 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
664}
665
666/* total number of cluster */
667static int num_clusters;
668/* we do not support cluster of different sizes */
669static unsigned int cluster_size;
670
671#ifdef VERBOSE_INIT
672static void print_cluster_topology(cpumask_var_t mask, int cpu)
673{
674 int chk;
675 char buf[255];
676
677 chk = cpulist_scnprintf(buf, 254, mask);
678 buf[chk] = '\0';
679 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
680
681}
682#endif
683
684static int clusters_allocated = 0;
685
686static void cleanup_cedf(void)
687{
688 int i;
689
690 if (clusters_allocated) {
691 for (i = 0; i < num_clusters; i++) {
692 kfree(cedf[i].cpus);
693 kfree(cedf[i].heap_node);
694 free_cpumask_var(cedf[i].cpu_map);
695 }
696
697 kfree(cedf);
698 }
699}
700
701static long cedf_activate_plugin(void)
702{
703 int i, j, cpu, ccpu, cpu_count;
704 cpu_entry_t *entry;
705
706 cpumask_var_t mask;
707 int chk = 0;
708
709 /* de-allocate old clusters, if any */
710 cleanup_cedf();
711
712 printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n",
713 cluster_config);
714
715 /* need to get cluster_size first */
716 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
717 return -ENOMEM;
718
719 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
720 cluster_size = num_online_cpus();
721 } else {
722 chk = get_shared_cpu_map(mask, 0, cluster_config);
723 if (chk) {
724 /* if chk != 0 then it is the max allowed index */
725 printk(KERN_INFO "C-EDF: Cluster configuration = %d "
726 "is not supported on this hardware.\n",
727 cluster_config);
728 /* User should notice that the configuration failed, so
729 * let's bail out. */
730 return -EINVAL;
731 }
732
733 cluster_size = cpumask_weight(mask);
734 }
735
736 if ((num_online_cpus() % cluster_size) != 0) {
737 /* this can't be right, some cpus are left out */
738 printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
739 num_online_cpus(), cluster_size);
740 return -1;
741 }
742
743 num_clusters = num_online_cpus() / cluster_size;
744 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
745 num_clusters, cluster_size);
746
747 /* initialize clusters */
748 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
749 for (i = 0; i < num_clusters; i++) {
750
751 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
752 GFP_ATOMIC);
753 cedf[i].heap_node = kmalloc(
754 cluster_size * sizeof(struct bheap_node),
755 GFP_ATOMIC);
756 bheap_init(&(cedf[i].cpu_heap));
757 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
758
759 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
760 return -ENOMEM;
761#ifdef CONFIG_RELEASE_MASTER
762 cedf[i].domain.release_master = atomic_read(&release_master_cpu);
763#endif
764 }
765
766 /* cycle through cluster and add cpus to them */
767 for (i = 0; i < num_clusters; i++) {
768
769 for_each_online_cpu(cpu) {
770 /* check if the cpu is already in a cluster */
771 for (j = 0; j < num_clusters; j++)
772 if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
773 break;
774 /* if it is in a cluster go to next cpu */
775 if (j < num_clusters &&
776 cpumask_test_cpu(cpu, cedf[j].cpu_map))
777 continue;
778
779 /* this cpu isn't in any cluster */
780 /* get the shared cpus */
781 if (unlikely(cluster_config == GLOBAL_CLUSTER))
782 cpumask_copy(mask, cpu_online_mask);
783 else
784 get_shared_cpu_map(mask, cpu, cluster_config);
785
786 cpumask_copy(cedf[i].cpu_map, mask);
787#ifdef VERBOSE_INIT
788 print_cluster_topology(mask, cpu);
789#endif
790 /* add cpus to current cluster and init cpu_entry_t */
791 cpu_count = 0;
792 for_each_cpu(ccpu, cedf[i].cpu_map) {
793
794 entry = &per_cpu(cedf_cpu_entries, ccpu);
795 cedf[i].cpus[cpu_count] = entry;
796 atomic_set(&entry->will_schedule, 0);
797 entry->cpu = ccpu;
798 entry->cluster = &cedf[i];
799 entry->hn = &(cedf[i].heap_node[cpu_count]);
800 bheap_node_init(&entry->hn, entry);
801
802 cpu_count++;
803
804 entry->linked = NULL;
805 entry->scheduled = NULL;
806#ifdef CONFIG_RELEASE_MASTER
807 /* only add CPUs that should schedule jobs */
808 if (entry->cpu != entry->cluster->domain.release_master)
809#endif
810 update_cpu_position(entry);
811 }
812 /* done with this cluster */
813 break;
814 }
815 }
816
817 free_cpumask_var(mask);
818 clusters_allocated = 1;
819 return 0;
820}
821
822/* Plugin object */
823static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
824 .plugin_name = "C-EDF",
825 .finish_switch = cedf_finish_switch,
826 .tick = cedf_tick,
827 .task_new = cedf_task_new,
828 .complete_job = complete_job,
829 .task_exit = cedf_task_exit,
830 .schedule = cedf_schedule,
831 .task_wake_up = cedf_task_wake_up,
832 .task_block = cedf_task_block,
833 .admit_task = cedf_admit_task,
834 .activate_plugin = cedf_activate_plugin,
835};
836
837static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
838
839static int __init init_cedf(void)
840{
841 int err, fs;
842
843 err = register_sched_plugin(&cedf_plugin);
844 if (!err) {
845 fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir);
846 if (!fs)
847 cluster_file = create_cluster_file(cedf_dir, &cluster_config);
848 else
849 printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n");
850 }
851 return err;
852}
853
854static void clean_cedf(void)
855{
856 cleanup_cedf();
857 if (cluster_file)
858 remove_proc_entry("cluster", cedf_dir);
859 if (cedf_dir)
860 remove_plugin_proc_dir(&cedf_plugin);
861}
862
863module_init(init_cedf);
864module_exit(clean_cedf);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 00000000000..c3344b9d288
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,1030 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14#include <linux/slab.h>
15
16#include <litmus/litmus.h>
17#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22
23#include <litmus/preempt.h>
24#include <litmus/budget.h>
25
26#include <litmus/bheap.h>
27
28#ifdef CONFIG_SCHED_CPU_AFFINITY
29#include <litmus/affinity.h>
30#endif
31
32#include <linux/module.h>
33
34/* Overview of GSN-EDF operations.
35 *
36 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
37 * description only covers how the individual operations are implemented in
38 * LITMUS.
39 *
40 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
41 * structure (NOT the actually scheduled
42 * task). If there is another linked task To
43 * already it will set To->linked_on = NO_CPU
44 * (thereby removing its association with this
45 * CPU). However, it will not requeue the
46 * previously linked task (if any). It will set
47 * T's state to RT_F_RUNNING and check whether
48 * it is already running somewhere else. If T
49 * is scheduled somewhere else it will link
50 * it to that CPU instead (and pull the linked
51 * task to cpu). T may be NULL.
52 *
53 * unlink(T) - Unlink removes T from all scheduler data
54 * structures. If it is linked to some CPU it
55 * will link NULL to that CPU. If it is
56 * currently queued in the gsnedf queue it will
57 * be removed from the rt_domain. It is safe to
58 * call unlink(T) if T is not linked. T may not
59 * be NULL.
60 *
61 * requeue(T) - Requeue will insert T into the appropriate
62 * queue. If the system is in real-time mode and
63 * the T is released already, it will go into the
64 * ready queue. If the system is not in
65 * real-time mode is T, then T will go into the
66 * release queue. If T's release time is in the
67 * future, it will go into the release
68 * queue. That means that T's release time/job
69 * no/etc. has to be updated before requeu(T) is
70 * called. It is not safe to call requeue(T)
71 * when T is already queued. T may not be NULL.
72 *
73 * gsnedf_job_arrival(T) - This is the catch all function when T enters
74 * the system after either a suspension or at a
75 * job release. It will queue T (which means it
76 * is not safe to call gsnedf_job_arrival(T) if
77 * T is already queued) and then check whether a
78 * preemption is necessary. If a preemption is
79 * necessary it will update the linkage
80 * accordingly and cause scheduled to be called
81 * (either with an IPI or need_resched). It is
82 * safe to call gsnedf_job_arrival(T) if T's
83 * next job has not been actually released yet
84 * (releast time in the future). T will be put
85 * on the release queue in that case.
86 *
87 * job_completion(T) - Take care of everything that needs to be done
88 * to prepare T for its next release and place
89 * it in the right queue with
90 * gsnedf_job_arrival().
91 *
92 *
93 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
94 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
95 * the functions will automatically propagate pending task from the ready queue
96 * to a linked task. This is the job of the calling function ( by means of
97 * __take_ready).
98 */
99
100
101/* cpu_entry_t - maintain the linked and scheduled state
102 */
103typedef struct {
104 int cpu;
105 struct task_struct* linked; /* only RT tasks */
106 struct task_struct* scheduled; /* only RT tasks */
107 struct bheap_node* hn;
108} cpu_entry_t;
109DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
110
111cpu_entry_t* gsnedf_cpus[NR_CPUS];
112
113/* the cpus queue themselves according to priority in here */
114static struct bheap_node gsnedf_heap_node[NR_CPUS];
115static struct bheap gsnedf_cpu_heap;
116
117static rt_domain_t gsnedf;
118#define gsnedf_lock (gsnedf.ready_lock)
119
120
121/* Uncomment this if you want to see all scheduling decisions in the
122 * TRACE() log.
123#define WANT_ALL_SCHED_EVENTS
124 */
125
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
127{
128 cpu_entry_t *a, *b;
129 a = _a->value;
130 b = _b->value;
131 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap.
133 */
134 return edf_higher_prio(b->linked, a->linked);
135}
136
137/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold gsnedf lock.
139 */
140static void update_cpu_position(cpu_entry_t *entry)
141{
142 if (likely(bheap_node_in_heap(entry->hn)))
143 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
144 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
145}
146
147/* caller must hold gsnedf lock */
148static cpu_entry_t* lowest_prio_cpu(void)
149{
150 struct bheap_node* hn;
151 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
152 return hn->value;
153}
154
155
156/* link_task_to_cpu - Update the link of a CPU.
157 * Handles the case where the to-be-linked task is already
158 * scheduled on a different CPU.
159 */
160static noinline void link_task_to_cpu(struct task_struct* linked,
161 cpu_entry_t *entry)
162{
163 cpu_entry_t *sched;
164 struct task_struct* tmp;
165 int on_cpu;
166
167 BUG_ON(linked && !is_realtime(linked));
168
169 /* Currently linked task is set to be unlinked. */
170 if (entry->linked) {
171 entry->linked->rt_param.linked_on = NO_CPU;
172 }
173
174 /* Link new task to CPU. */
175 if (linked) {
176 set_rt_flags(linked, RT_F_RUNNING);
177 /* handle task is already scheduled somewhere! */
178 on_cpu = linked->rt_param.scheduled_on;
179 if (on_cpu != NO_CPU) {
180 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
181 /* this should only happen if not linked already */
182 BUG_ON(sched->linked == linked);
183
184 /* If we are already scheduled on the CPU to which we
185 * wanted to link, we don't need to do the swap --
186 * we just link ourselves to the CPU and depend on
187 * the caller to get things right.
188 */
189 if (entry != sched) {
190 TRACE_TASK(linked,
191 "already scheduled on %d, updating link.\n",
192 sched->cpu);
193 tmp = sched->linked;
194 linked->rt_param.linked_on = sched->cpu;
195 sched->linked = linked;
196 update_cpu_position(sched);
197 linked = tmp;
198 }
199 }
200 if (linked) /* might be NULL due to swap */
201 linked->rt_param.linked_on = entry->cpu;
202 }
203 entry->linked = linked;
204#ifdef WANT_ALL_SCHED_EVENTS
205 if (linked)
206 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
207 else
208 TRACE("NULL linked to %d.\n", entry->cpu);
209#endif
210 update_cpu_position(entry);
211}
212
213/* unlink - Make sure a task is not linked any longer to an entry
214 * where it was linked before. Must hold gsnedf_lock.
215 */
216static noinline void unlink(struct task_struct* t)
217{
218 cpu_entry_t *entry;
219
220 if (t->rt_param.linked_on != NO_CPU) {
221 /* unlink */
222 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
223 t->rt_param.linked_on = NO_CPU;
224 link_task_to_cpu(NULL, entry);
225 } else if (is_queued(t)) {
226 /* This is an interesting situation: t is scheduled,
227 * but was just recently unlinked. It cannot be
228 * linked anywhere else (because then it would have
229 * been relinked to this CPU), thus it must be in some
230 * queue. We must remove it from the list in this
231 * case.
232 */
233 remove(&gsnedf, t);
234 }
235}
236
237
238/* preempt - force a CPU to reschedule
239 */
240static void preempt(cpu_entry_t *entry)
241{
242 preempt_if_preemptable(entry->scheduled, entry->cpu);
243}
244
245/* requeue - Put an unlinked task into gsn-edf domain.
246 * Caller must hold gsnedf_lock.
247 */
248static noinline void requeue(struct task_struct* task)
249{
250 BUG_ON(!task);
251 /* sanity check before insertion */
252 BUG_ON(is_queued(task));
253
254 if (is_released(task, litmus_clock()))
255 __add_ready(&gsnedf, task);
256 else {
257 /* it has got to wait */
258 add_release(&gsnedf, task);
259 }
260}
261
262#ifdef CONFIG_SCHED_CPU_AFFINITY
263static cpu_entry_t* gsnedf_get_nearest_available_cpu(cpu_entry_t *start)
264{
265 cpu_entry_t *affinity;
266
267 get_nearest_available_cpu(affinity, start, gsnedf_cpu_entries,
268#ifdef CONFIG_RELEASE_MASTER
269 gsnedf.release_master
270#else
271 NO_CPU
272#endif
273 );
274
275 return(affinity);
276}
277#endif
278
279/* check for any necessary preemptions */
280static void check_for_preemptions(void)
281{
282 struct task_struct *task;
283 cpu_entry_t *last;
284
285 for (last = lowest_prio_cpu();
286 edf_preemption_needed(&gsnedf, last->linked);
287 last = lowest_prio_cpu()) {
288 /* preemption necessary */
289 task = __take_ready(&gsnedf);
290 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
291 task->pid, last->cpu);
292
293#ifdef CONFIG_SCHED_CPU_AFFINITY
294 {
295 cpu_entry_t *affinity =
296 gsnedf_get_nearest_available_cpu(
297 &per_cpu(gsnedf_cpu_entries, task_cpu(task)));
298 if (affinity)
299 last = affinity;
300 else if (requeue_preempted_job(last->linked))
301 requeue(last->linked);
302 }
303#else
304 if (requeue_preempted_job(last->linked))
305 requeue(last->linked);
306#endif
307
308 link_task_to_cpu(task, last);
309 preempt(last);
310 }
311}
312
313/* gsnedf_job_arrival: task is either resumed or released */
314static noinline void gsnedf_job_arrival(struct task_struct* task)
315{
316 BUG_ON(!task);
317
318 requeue(task);
319 check_for_preemptions();
320}
321
322static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
323{
324 unsigned long flags;
325
326 raw_spin_lock_irqsave(&gsnedf_lock, flags);
327
328 __merge_ready(rt, tasks);
329 check_for_preemptions();
330
331 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
332}
333
334/* caller holds gsnedf_lock */
335static noinline void job_completion(struct task_struct *t, int forced)
336{
337 BUG_ON(!t);
338
339 sched_trace_task_completion(t, forced);
340
341 TRACE_TASK(t, "job_completion().\n");
342
343 /* set flags */
344 set_rt_flags(t, RT_F_SLEEP);
345 /* prepare for next period */
346 prepare_for_next_period(t);
347 if (is_released(t, litmus_clock()))
348 sched_trace_task_release(t);
349 /* unlink */
350 unlink(t);
351 /* requeue
352 * But don't requeue a blocking task. */
353 if (is_running(t))
354 gsnedf_job_arrival(t);
355}
356
357/* gsnedf_tick - this function is called for every local timer
358 * interrupt.
359 *
360 * checks whether the current task has expired and checks
361 * whether we need to preempt it if it has not expired
362 */
363static void gsnedf_tick(struct task_struct* t)
364{
365 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
366 if (!is_np(t)) {
367 /* np tasks will be preempted when they become
368 * preemptable again
369 */
370 litmus_reschedule_local();
371 TRACE("gsnedf_scheduler_tick: "
372 "%d is preemptable "
373 " => FORCE_RESCHED\n", t->pid);
374 } else if (is_user_np(t)) {
375 TRACE("gsnedf_scheduler_tick: "
376 "%d is non-preemptable, "
377 "preemption delayed.\n", t->pid);
378 request_exit_np(t);
379 }
380 }
381}
382
383/* Getting schedule() right is a bit tricky. schedule() may not make any
384 * assumptions on the state of the current task since it may be called for a
385 * number of reasons. The reasons include a scheduler_tick() determined that it
386 * was necessary, because sys_exit_np() was called, because some Linux
387 * subsystem determined so, or even (in the worst case) because there is a bug
388 * hidden somewhere. Thus, we must take extreme care to determine what the
389 * current state is.
390 *
391 * The CPU could currently be scheduling a task (or not), be linked (or not).
392 *
393 * The following assertions for the scheduled task could hold:
394 *
395 * - !is_running(scheduled) // the job blocks
396 * - scheduled->timeslice == 0 // the job completed (forcefully)
397 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
398 * - linked != scheduled // we need to reschedule (for any reason)
399 * - is_np(scheduled) // rescheduling must be delayed,
400 * sys_exit_np must be requested
401 *
402 * Any of these can occur together.
403 */
404static struct task_struct* gsnedf_schedule(struct task_struct * prev)
405{
406 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
407 int out_of_time, sleep, preempt, np, exists, blocks;
408 struct task_struct* next = NULL;
409
410#ifdef CONFIG_RELEASE_MASTER
411 /* Bail out early if we are the release master.
412 * The release master never schedules any real-time tasks.
413 */
414 if (unlikely(gsnedf.release_master == entry->cpu)) {
415 sched_state_task_picked();
416 return NULL;
417 }
418#endif
419
420 raw_spin_lock(&gsnedf_lock);
421
422 /* sanity checking */
423 BUG_ON(entry->scheduled && entry->scheduled != prev);
424 BUG_ON(entry->scheduled && !is_realtime(prev));
425 BUG_ON(is_realtime(prev) && !entry->scheduled);
426
427 /* (0) Determine state */
428 exists = entry->scheduled != NULL;
429 blocks = exists && !is_running(entry->scheduled);
430 out_of_time = exists && budget_enforced(entry->scheduled)
431 && budget_exhausted(entry->scheduled);
432 np = exists && is_np(entry->scheduled);
433 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
434 preempt = entry->scheduled != entry->linked;
435
436#ifdef WANT_ALL_SCHED_EVENTS
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif
439
440 if (exists)
441 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt,
445 prev->state, signal_pending(prev));
446 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid);
449
450
451 /* If a task blocks we have no choice but to reschedule.
452 */
453 if (blocks)
454 unlink(entry->scheduled);
455
456 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case
458 * that we are still linked. Multiple calls to request_exit_np() don't
459 * hurt.
460 */
461 if (np && (out_of_time || preempt || sleep)) {
462 unlink(entry->scheduled);
463 request_exit_np(entry->scheduled);
464 }
465
466 /* Any task that is preemptable and either exhausts its execution
467 * budget or wants to sleep completes. We may have to reschedule after
468 * this. Don't do a job completion if we block (can't have timers running
469 * for blocked jobs).
470 */
471 if (!np && (out_of_time || sleep) && !blocks)
472 job_completion(entry->scheduled, !sleep);
473
474 /* Link pending task if we became unlinked.
475 */
476 if (!entry->linked)
477 link_task_to_cpu(__take_ready(&gsnedf), entry);
478
479 /* The final scheduling decision. Do we need to switch for some reason?
480 * If linked is different from scheduled, then select linked as next.
481 */
482 if ((!np || blocks) &&
483 entry->linked != entry->scheduled) {
484 /* Schedule a linked job? */
485 if (entry->linked) {
486 entry->linked->rt_param.scheduled_on = entry->cpu;
487 next = entry->linked;
488 TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
489 }
490 if (entry->scheduled) {
491 /* not gonna be scheduled soon */
492 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 }
495 } else
496 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue.
498 */
499 if (exists)
500 next = prev;
501
502 sched_state_task_picked();
503
504 raw_spin_unlock(&gsnedf_lock);
505
506#ifdef WANT_ALL_SCHED_EVENTS
507 TRACE("gsnedf_lock released, next=0x%p\n", next);
508
509 if (next)
510 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
511 else if (exists && !next)
512 TRACE("becomes idle at %llu.\n", litmus_clock());
513#endif
514
515
516 return next;
517}
518
519
520/* _finish_switch - we just finished the switch away from prev
521 */
522static void gsnedf_finish_switch(struct task_struct *prev)
523{
524 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
525
526 entry->scheduled = is_realtime(current) ? current : NULL;
527#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n");
529#endif
530}
531
532
533/* Prepare a task for running in RT mode
534 */
535static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
536{
537 unsigned long flags;
538 cpu_entry_t* entry;
539
540 TRACE("gsn edf: task new %d\n", t->pid);
541
542 raw_spin_lock_irqsave(&gsnedf_lock, flags);
543
544 /* setup job params */
545 release_at(t, litmus_clock());
546
547 if (running) {
548 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
549 BUG_ON(entry->scheduled);
550
551#ifdef CONFIG_RELEASE_MASTER
552 if (entry->cpu != gsnedf.release_master) {
553#endif
554 entry->scheduled = t;
555 tsk_rt(t)->scheduled_on = task_cpu(t);
556#ifdef CONFIG_RELEASE_MASTER
557 } else {
558 /* do not schedule on release master */
559 preempt(entry); /* force resched */
560 tsk_rt(t)->scheduled_on = NO_CPU;
561 }
562#endif
563 } else {
564 t->rt_param.scheduled_on = NO_CPU;
565 }
566 t->rt_param.linked_on = NO_CPU;
567
568 gsnedf_job_arrival(t);
569 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
570}
571
572static void gsnedf_task_wake_up(struct task_struct *task)
573{
574 unsigned long flags;
575 lt_t now;
576
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578
579 raw_spin_lock_irqsave(&gsnedf_lock, flags);
580 /* We need to take suspensions because of semaphores into
581 * account! If a job resumes after being suspended due to acquiring
582 * a semaphore, it should never be treated as a new job release.
583 */
584 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
585 set_rt_flags(task, RT_F_RUNNING);
586 } else {
587 now = litmus_clock();
588 if (is_tardy(task, now)) {
589 /* new sporadic release */
590 release_at(task, now);
591 sched_trace_task_release(task);
592 }
593 else {
594 if (task->rt.time_slice) {
595 /* came back in time before deadline
596 */
597 set_rt_flags(task, RT_F_RUNNING);
598 }
599 }
600 }
601 gsnedf_job_arrival(task);
602 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
603}
604
605static void gsnedf_task_block(struct task_struct *t)
606{
607 unsigned long flags;
608
609 TRACE_TASK(t, "block at %llu\n", litmus_clock());
610
611 /* unlink if necessary */
612 raw_spin_lock_irqsave(&gsnedf_lock, flags);
613 unlink(t);
614 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
615
616 BUG_ON(!is_realtime(t));
617}
618
619
620static void gsnedf_task_exit(struct task_struct * t)
621{
622 unsigned long flags;
623
624 /* unlink if necessary */
625 raw_spin_lock_irqsave(&gsnedf_lock, flags);
626 unlink(t);
627 if (tsk_rt(t)->scheduled_on != NO_CPU) {
628 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
629 tsk_rt(t)->scheduled_on = NO_CPU;
630 }
631 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
632
633 BUG_ON(!is_realtime(t));
634 TRACE_TASK(t, "RIP\n");
635}
636
637
638static long gsnedf_admit_task(struct task_struct* tsk)
639{
640 return 0;
641}
642
643#ifdef CONFIG_LITMUS_LOCKING
644
645#include <litmus/fdso.h>
646
647/* called with IRQs off */
648static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
649{
650 int linked_on;
651 int check_preempt = 0;
652
653 raw_spin_lock(&gsnedf_lock);
654
655 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
656 tsk_rt(t)->inh_task = prio_inh;
657
658 linked_on = tsk_rt(t)->linked_on;
659
660 /* If it is scheduled, then we need to reorder the CPU heap. */
661 if (linked_on != NO_CPU) {
662 TRACE_TASK(t, "%s: linked on %d\n",
663 __FUNCTION__, linked_on);
664 /* Holder is scheduled; need to re-order CPUs.
665 * We can't use heap_decrease() here since
666 * the cpu_heap is ordered in reverse direction, so
667 * it is actually an increase. */
668 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
669 gsnedf_cpus[linked_on]->hn);
670 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
671 gsnedf_cpus[linked_on]->hn);
672 } else {
673 /* holder may be queued: first stop queue changes */
674 raw_spin_lock(&gsnedf.release_lock);
675 if (is_queued(t)) {
676 TRACE_TASK(t, "%s: is queued\n",
677 __FUNCTION__);
678 /* We need to update the position of holder in some
679 * heap. Note that this could be a release heap if we
680 * budget enforcement is used and this job overran. */
681 check_preempt =
682 !bheap_decrease(edf_ready_order,
683 tsk_rt(t)->heap_node);
684 } else {
685 /* Nothing to do: if it is not queued and not linked
686 * then it is either sleeping or currently being moved
687 * by other code (e.g., a timer interrupt handler) that
688 * will use the correct priority when enqueuing the
689 * task. */
690 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
691 __FUNCTION__);
692 }
693 raw_spin_unlock(&gsnedf.release_lock);
694
695 /* If holder was enqueued in a release heap, then the following
696 * preemption check is pointless, but we can't easily detect
697 * that case. If you want to fix this, then consider that
698 * simply adding a state flag requires O(n) time to update when
699 * releasing n tasks, which conflicts with the goal to have
700 * O(log n) merges. */
701 if (check_preempt) {
702 /* heap_decrease() hit the top level of the heap: make
703 * sure preemption checks get the right task, not the
704 * potentially stale cache. */
705 bheap_uncache_min(edf_ready_order,
706 &gsnedf.ready_queue);
707 check_for_preemptions();
708 }
709 }
710
711 raw_spin_unlock(&gsnedf_lock);
712}
713
714/* called with IRQs off */
715static void clear_priority_inheritance(struct task_struct* t)
716{
717 raw_spin_lock(&gsnedf_lock);
718
719 /* A job only stops inheriting a priority when it releases a
720 * resource. Thus we can make the following assumption.*/
721 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
722
723 TRACE_TASK(t, "priority restored\n");
724 tsk_rt(t)->inh_task = NULL;
725
726 /* Check if rescheduling is necessary. We can't use heap_decrease()
727 * since the priority was effectively lowered. */
728 unlink(t);
729 gsnedf_job_arrival(t);
730
731 raw_spin_unlock(&gsnedf_lock);
732}
733
734
735/* ******************** FMLP support ********************** */
736
737/* struct for semaphore with priority inheritance */
738struct fmlp_semaphore {
739 struct litmus_lock litmus_lock;
740
741 /* current resource holder */
742 struct task_struct *owner;
743
744 /* highest-priority waiter */
745 struct task_struct *hp_waiter;
746
747 /* FIFO queue of waiting tasks */
748 wait_queue_head_t wait;
749};
750
751static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
752{
753 return container_of(lock, struct fmlp_semaphore, litmus_lock);
754}
755
756/* caller is responsible for locking */
757struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
758 struct task_struct* skip)
759{
760 struct list_head *pos;
761 struct task_struct *queued, *found = NULL;
762
763 list_for_each(pos, &sem->wait.task_list) {
764 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
765 task_list)->private;
766
767 /* Compare task prios, find high prio task. */
768 if (queued != skip && edf_higher_prio(queued, found))
769 found = queued;
770 }
771 return found;
772}
773
774int gsnedf_fmlp_lock(struct litmus_lock* l)
775{
776 struct task_struct* t = current;
777 struct fmlp_semaphore *sem = fmlp_from_lock(l);
778 wait_queue_t wait;
779 unsigned long flags;
780
781 if (!is_realtime(t))
782 return -EPERM;
783
784 spin_lock_irqsave(&sem->wait.lock, flags);
785
786 if (sem->owner) {
787 /* resource is not free => must suspend and wait */
788
789 init_waitqueue_entry(&wait, t);
790
791 /* FIXME: interruptible would be nice some day */
792 set_task_state(t, TASK_UNINTERRUPTIBLE);
793
794 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
795
796 /* check if we need to activate priority inheritance */
797 if (edf_higher_prio(t, sem->hp_waiter)) {
798 sem->hp_waiter = t;
799 if (edf_higher_prio(t, sem->owner))
800 set_priority_inheritance(sem->owner, sem->hp_waiter);
801 }
802
803 TS_LOCK_SUSPEND;
804
805 /* release lock before sleeping */
806 spin_unlock_irqrestore(&sem->wait.lock, flags);
807
808 /* We depend on the FIFO order. Thus, we don't need to recheck
809 * when we wake up; we are guaranteed to have the lock since
810 * there is only one wake up per release.
811 */
812
813 schedule();
814
815 TS_LOCK_RESUME;
816
817 /* Since we hold the lock, no other task will change
818 * ->owner. We can thus check it without acquiring the spin
819 * lock. */
820 BUG_ON(sem->owner != t);
821 } else {
822 /* it's ours now */
823 sem->owner = t;
824
825 spin_unlock_irqrestore(&sem->wait.lock, flags);
826 }
827
828 return 0;
829}
830
831int gsnedf_fmlp_unlock(struct litmus_lock* l)
832{
833 struct task_struct *t = current, *next;
834 struct fmlp_semaphore *sem = fmlp_from_lock(l);
835 unsigned long flags;
836 int err = 0;
837
838 spin_lock_irqsave(&sem->wait.lock, flags);
839
840 if (sem->owner != t) {
841 err = -EINVAL;
842 goto out;
843 }
844
845 /* check if there are jobs waiting for this resource */
846 next = __waitqueue_remove_first(&sem->wait);
847 if (next) {
848 /* next becomes the resouce holder */
849 sem->owner = next;
850 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
851
852 /* determine new hp_waiter if necessary */
853 if (next == sem->hp_waiter) {
854 TRACE_TASK(next, "was highest-prio waiter\n");
855 /* next has the highest priority --- it doesn't need to
856 * inherit. However, we need to make sure that the
857 * next-highest priority in the queue is reflected in
858 * hp_waiter. */
859 sem->hp_waiter = find_hp_waiter(sem, next);
860 if (sem->hp_waiter)
861 TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
862 else
863 TRACE("no further waiters\n");
864 } else {
865 /* Well, if next is not the highest-priority waiter,
866 * then it ought to inherit the highest-priority
867 * waiter's priority. */
868 set_priority_inheritance(next, sem->hp_waiter);
869 }
870
871 /* wake up next */
872 wake_up_process(next);
873 } else
874 /* becomes available */
875 sem->owner = NULL;
876
877 /* we lose the benefit of priority inheritance (if any) */
878 if (tsk_rt(t)->inh_task)
879 clear_priority_inheritance(t);
880
881out:
882 spin_unlock_irqrestore(&sem->wait.lock, flags);
883
884 return err;
885}
886
887int gsnedf_fmlp_close(struct litmus_lock* l)
888{
889 struct task_struct *t = current;
890 struct fmlp_semaphore *sem = fmlp_from_lock(l);
891 unsigned long flags;
892
893 int owner;
894
895 spin_lock_irqsave(&sem->wait.lock, flags);
896
897 owner = sem->owner == t;
898
899 spin_unlock_irqrestore(&sem->wait.lock, flags);
900
901 if (owner)
902 gsnedf_fmlp_unlock(l);
903
904 return 0;
905}
906
907void gsnedf_fmlp_free(struct litmus_lock* lock)
908{
909 kfree(fmlp_from_lock(lock));
910}
911
912static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
913 .close = gsnedf_fmlp_close,
914 .lock = gsnedf_fmlp_lock,
915 .unlock = gsnedf_fmlp_unlock,
916 .deallocate = gsnedf_fmlp_free,
917};
918
919static struct litmus_lock* gsnedf_new_fmlp(void)
920{
921 struct fmlp_semaphore* sem;
922
923 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
924 if (!sem)
925 return NULL;
926
927 sem->owner = NULL;
928 sem->hp_waiter = NULL;
929 init_waitqueue_head(&sem->wait);
930 sem->litmus_lock.ops = &gsnedf_fmlp_lock_ops;
931
932 return &sem->litmus_lock;
933}
934
935/* **** lock constructor **** */
936
937
938static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
939 void* __user unused)
940{
941 int err = -ENXIO;
942
943 /* GSN-EDF currently only supports the FMLP for global resources. */
944 switch (type) {
945
946 case FMLP_SEM:
947 /* Flexible Multiprocessor Locking Protocol */
948 *lock = gsnedf_new_fmlp();
949 if (*lock)
950 err = 0;
951 else
952 err = -ENOMEM;
953 break;
954
955 };
956
957 return err;
958}
959
960#endif
961
962
963static long gsnedf_activate_plugin(void)
964{
965 int cpu;
966 cpu_entry_t *entry;
967
968 bheap_init(&gsnedf_cpu_heap);
969#ifdef CONFIG_RELEASE_MASTER
970 gsnedf.release_master = atomic_read(&release_master_cpu);
971#endif
972
973 for_each_online_cpu(cpu) {
974 entry = &per_cpu(gsnedf_cpu_entries, cpu);
975 bheap_node_init(&entry->hn, entry);
976 entry->linked = NULL;
977 entry->scheduled = NULL;
978#ifdef CONFIG_RELEASE_MASTER
979 if (cpu != gsnedf.release_master) {
980#endif
981 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
982 update_cpu_position(entry);
983#ifdef CONFIG_RELEASE_MASTER
984 } else {
985 TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
986 }
987#endif
988 }
989 return 0;
990}
991
992/* Plugin object */
993static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
994 .plugin_name = "GSN-EDF",
995 .finish_switch = gsnedf_finish_switch,
996 .tick = gsnedf_tick,
997 .task_new = gsnedf_task_new,
998 .complete_job = complete_job,
999 .task_exit = gsnedf_task_exit,
1000 .schedule = gsnedf_schedule,
1001 .task_wake_up = gsnedf_task_wake_up,
1002 .task_block = gsnedf_task_block,
1003 .admit_task = gsnedf_admit_task,
1004 .activate_plugin = gsnedf_activate_plugin,
1005#ifdef CONFIG_LITMUS_LOCKING
1006 .allocate_lock = gsnedf_allocate_lock,
1007#endif
1008};
1009
1010
1011static int __init init_gsn_edf(void)
1012{
1013 int cpu;
1014 cpu_entry_t *entry;
1015
1016 bheap_init(&gsnedf_cpu_heap);
1017 /* initialize CPU state */
1018 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1019 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1020 gsnedf_cpus[cpu] = entry;
1021 entry->cpu = cpu;
1022 entry->hn = &gsnedf_heap_node[cpu];
1023 bheap_node_init(&entry->hn, entry);
1024 }
1025 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1026 return register_sched_plugin(&gsn_edf_plugin);
1027}
1028
1029
1030module_init(init_gsn_edf);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
new file mode 100644
index 00000000000..6553948407d
--- /dev/null
+++ b/litmus/sched_litmus.c
@@ -0,0 +1,325 @@
1/* This file is included from kernel/sched.c */
2
3#include <litmus/litmus.h>
4#include <litmus/budget.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/preempt.h>
7
8static void update_time_litmus(struct rq *rq, struct task_struct *p)
9{
10 u64 delta = rq->clock - p->se.exec_start;
11 if (unlikely((s64)delta < 0))
12 delta = 0;
13 /* per job counter */
14 p->rt_param.job_params.exec_time += delta;
15 /* task counter */
16 p->se.sum_exec_runtime += delta;
17 /* sched_clock() */
18 p->se.exec_start = rq->clock;
19 cpuacct_charge(p, delta);
20}
21
22static void double_rq_lock(struct rq *rq1, struct rq *rq2);
23static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
24
25/*
26 * litmus_tick gets called by scheduler_tick() with HZ freq
27 * Interrupts are disabled
28 */
29static void litmus_tick(struct rq *rq, struct task_struct *p)
30{
31 TS_PLUGIN_TICK_START;
32
33 if (is_realtime(p))
34 update_time_litmus(rq, p);
35
36 /* plugin tick */
37 litmus->tick(p);
38
39 TS_PLUGIN_TICK_END;
40
41 return;
42}
43
44static struct task_struct *
45litmus_schedule(struct rq *rq, struct task_struct *prev)
46{
47 struct rq* other_rq;
48 struct task_struct *next;
49
50 long was_running;
51 lt_t _maybe_deadlock = 0;
52
53 /* let the plugin schedule */
54 next = litmus->schedule(prev);
55
56 sched_state_plugin_check();
57
58 /* check if a global plugin pulled a task from a different RQ */
59 if (next && task_rq(next) != rq) {
60 /* we need to migrate the task */
61 other_rq = task_rq(next);
62 TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
63
64 /* while we drop the lock, the prev task could change its
65 * state
66 */
67 was_running = is_running(prev);
68 mb();
69 raw_spin_unlock(&rq->lock);
70
71 /* Don't race with a concurrent switch. This could deadlock in
72 * the case of cross or circular migrations. It's the job of
73 * the plugin to make sure that doesn't happen.
74 */
75 TRACE_TASK(next, "stack_in_use=%d\n",
76 next->rt_param.stack_in_use);
77 if (next->rt_param.stack_in_use != NO_CPU) {
78 TRACE_TASK(next, "waiting to deschedule\n");
79 _maybe_deadlock = litmus_clock();
80 }
81 while (next->rt_param.stack_in_use != NO_CPU) {
82 cpu_relax();
83 mb();
84 if (next->rt_param.stack_in_use == NO_CPU)
85 TRACE_TASK(next,"descheduled. Proceeding.\n");
86
87 if (lt_before(_maybe_deadlock + 10000000,
88 litmus_clock())) {
89 /* We've been spinning for 10ms.
90 * Something can't be right!
91 * Let's abandon the task and bail out; at least
92 * we will have debug info instead of a hard
93 * deadlock.
94 */
95 TRACE_TASK(next,"stack too long in use. "
96 "Deadlock?\n");
97 next = NULL;
98
99 /* bail out */
100 raw_spin_lock(&rq->lock);
101 return next;
102 }
103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->on_cpu)
106 TRACE_TASK(next, "waiting for !oncpu");
107 while (next->on_cpu) {
108 cpu_relax();
109 mb();
110 }
111#endif
112 double_rq_lock(rq, other_rq);
113 mb();
114 if (is_realtime(prev) && is_running(prev) != was_running) {
115 TRACE_TASK(prev,
116 "state changed while we dropped"
117 " the lock: is_running=%d, was_running=%d\n",
118 is_running(prev), was_running);
119 if (is_running(prev) && !was_running) {
120 /* prev task became unblocked
121 * we need to simulate normal sequence of events
122 * to scheduler plugins.
123 */
124 litmus->task_block(prev);
125 litmus->task_wake_up(prev);
126 }
127 }
128
129 set_task_cpu(next, smp_processor_id());
130
131 /* DEBUG: now that we have the lock we need to make sure a
132 * couple of things still hold:
133 * - it is still a real-time task
134 * - it is still runnable (could have been stopped)
135 * If either is violated, then the active plugin is
136 * doing something wrong.
137 */
138 if (!is_realtime(next) || !is_running(next)) {
139 /* BAD BAD BAD */
140 TRACE_TASK(next,"BAD: migration invariant FAILED: "
141 "rt=%d running=%d\n",
142 is_realtime(next),
143 is_running(next));
144 /* drop the task */
145 next = NULL;
146 }
147 /* release the other CPU's runqueue, but keep ours */
148 raw_spin_unlock(&other_rq->lock);
149 }
150 if (next) {
151 next->rt_param.stack_in_use = rq->cpu;
152 next->se.exec_start = rq->clock;
153 }
154
155 update_enforcement_timer(next);
156 return next;
157}
158
159static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
160 int flags)
161{
162 if (flags & ENQUEUE_WAKEUP) {
163 sched_trace_task_resume(p);
164 tsk_rt(p)->present = 1;
165 /* LITMUS^RT plugins need to update the state
166 * _before_ making it available in global structures.
167 * Linux gets away with being lazy about the task state
168 * update. We can't do that, hence we update the task
169 * state already here.
170 *
171 * WARNING: this needs to be re-evaluated when porting
172 * to newer kernel versions.
173 */
174 p->state = TASK_RUNNING;
175 litmus->task_wake_up(p);
176
177 rq->litmus.nr_running++;
178 } else
179 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
180}
181
182static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
183 int flags)
184{
185 if (flags & DEQUEUE_SLEEP) {
186 litmus->task_block(p);
187 tsk_rt(p)->present = 0;
188 sched_trace_task_block(p);
189
190 rq->litmus.nr_running--;
191 } else
192 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
193}
194
195static void yield_task_litmus(struct rq *rq)
196{
197 BUG_ON(rq->curr != current);
198 /* sched_yield() is called to trigger delayed preemptions.
199 * Thus, mark the current task as needing to be rescheduled.
200 * This will cause the scheduler plugin to be invoked, which can
201 * then determine if a preemption is still required.
202 */
203 clear_exit_np(current);
204 litmus_reschedule_local();
205}
206
207/* Plugins are responsible for this.
208 */
209static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
210{
211}
212
213static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
214{
215}
216
217static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
218{
219 update_time_litmus(rq, prev);
220 if (!is_running(prev))
221 tsk_rt(prev)->present = 0;
222}
223
224/* pick_next_task_litmus() - litmus_schedule() function
225 *
226 * return the next task to be scheduled
227 */
228static struct task_struct *pick_next_task_litmus(struct rq *rq)
229{
230 /* get the to-be-switched-out task (prev) */
231 struct task_struct *prev = rq->litmus.prev;
232 struct task_struct *next;
233
234 /* if not called from schedule() but from somewhere
235 * else (e.g., migration), return now!
236 */
237 if(!rq->litmus.prev)
238 return NULL;
239
240 rq->litmus.prev = NULL;
241
242 TS_PLUGIN_SCHED_START;
243 next = litmus_schedule(rq, prev);
244 TS_PLUGIN_SCHED_END;
245
246 return next;
247}
248
249static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
250{
251 /* nothing to do; tick related tasks are done by litmus_tick() */
252 return;
253}
254
255static void switched_to_litmus(struct rq *rq, struct task_struct *p)
256{
257}
258
259static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
260 int oldprio)
261{
262}
263
264unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
265{
266 /* return infinity */
267 return 0;
268}
269
270/* This is called when a task became a real-time task, either due to a SCHED_*
271 * class transition or due to PI mutex inheritance. We don't handle Linux PI
272 * mutex inheritance yet (and probably never will). Use LITMUS provided
273 * synchronization primitives instead.
274 */
275static void set_curr_task_litmus(struct rq *rq)
276{
277 rq->curr->se.exec_start = rq->clock;
278}
279
280
281#ifdef CONFIG_SMP
282/* execve tries to rebalance task in this scheduling domain.
283 * We don't care about the scheduling domain; can gets called from
284 * exec, fork, wakeup.
285 */
286static int
287select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
288{
289 /* preemption is already disabled.
290 * We don't want to change cpu here
291 */
292 return task_cpu(p);
293}
294#endif
295
296static const struct sched_class litmus_sched_class = {
297 /* From 34f971f6 the stop/migrate worker threads have a class on
298 * their own, which is the highest prio class. We don't support
299 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
300 * CPU capacity.
301 */
302 .next = &stop_sched_class,
303 .enqueue_task = enqueue_task_litmus,
304 .dequeue_task = dequeue_task_litmus,
305 .yield_task = yield_task_litmus,
306
307 .check_preempt_curr = check_preempt_curr_litmus,
308
309 .pick_next_task = pick_next_task_litmus,
310 .put_prev_task = put_prev_task_litmus,
311
312#ifdef CONFIG_SMP
313 .select_task_rq = select_task_rq_litmus,
314
315 .pre_schedule = pre_schedule_litmus,
316#endif
317
318 .set_curr_task = set_curr_task_litmus,
319 .task_tick = task_tick_litmus,
320
321 .get_rr_interval = get_rr_interval_litmus,
322
323 .prio_changed = prio_changed_litmus,
324 .switched_to = switched_to_litmus,
325};
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
new file mode 100644
index 00000000000..72c06a492ef
--- /dev/null
+++ b/litmus/sched_pfair.c
@@ -0,0 +1,1074 @@
1/*
2 * kernel/sched_pfair.c
3 *
4 * Implementation of the PD^2 pfair scheduling algorithm. This
5 * implementation realizes "early releasing," i.e., it is work-conserving.
6 *
7 */
8
9#include <asm/div64.h>
10#include <linux/delay.h>
11#include <linux/module.h>
12#include <linux/spinlock.h>
13#include <linux/percpu.h>
14#include <linux/sched.h>
15#include <linux/list.h>
16#include <linux/slab.h>
17
18#include <litmus/litmus.h>
19#include <litmus/jobs.h>
20#include <litmus/preempt.h>
21#include <litmus/rt_domain.h>
22#include <litmus/sched_plugin.h>
23#include <litmus/sched_trace.h>
24
25#include <litmus/bheap.h>
26
27/* to configure the cluster size */
28#include <litmus/litmus_proc.h>
29
30#include <litmus/clustered.h>
31
32static enum cache_level pfair_cluster_level = GLOBAL_CLUSTER;
33
34struct subtask {
35 /* measured in quanta relative to job release */
36 quanta_t release;
37 quanta_t deadline;
38 quanta_t overlap; /* called "b bit" by PD^2 */
39 quanta_t group_deadline;
40};
41
42struct pfair_param {
43 quanta_t quanta; /* number of subtasks */
44 quanta_t cur; /* index of current subtask */
45
46 quanta_t release; /* in quanta */
47 quanta_t period; /* in quanta */
48
49 quanta_t last_quantum; /* when scheduled last */
50 int last_cpu; /* where scheduled last */
51
52 struct pfair_cluster* cluster; /* where this task is scheduled */
53
54 struct subtask subtasks[0]; /* allocate together with pfair_param */
55};
56
57#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
58
59struct pfair_state {
60 struct cluster_cpu topology;
61
62 volatile quanta_t cur_tick; /* updated by the CPU that is advancing
63 * the time */
64 volatile quanta_t local_tick; /* What tick is the local CPU currently
65 * executing? Updated only by the local
66 * CPU. In QEMU, this may lag behind the
67 * current tick. In a real system, with
68 * proper timers and aligned quanta,
69 * that should only be the case for a
70 * very short time after the time
71 * advanced. With staggered quanta, it
72 * will lag for the duration of the
73 * offset.
74 */
75
76 struct task_struct* linked; /* the task that should be executing */
77 struct task_struct* local; /* the local copy of linked */
78 struct task_struct* scheduled; /* what is actually scheduled */
79
80 lt_t offset; /* stagger offset */
81 unsigned int missed_updates;
82 unsigned int missed_quanta;
83};
84
85struct pfair_cluster {
86 struct scheduling_cluster topology;
87
88 /* The "global" time in this cluster. */
89 quanta_t pfair_time; /* the "official" PFAIR clock */
90
91 /* The ready queue for this cluster. */
92 rt_domain_t pfair;
93
94 /* The set of jobs that should have their release enacted at the next
95 * quantum boundary.
96 */
97 struct bheap release_queue;
98 raw_spinlock_t release_lock;
99};
100
101#define RT_F_REQUEUE 0x2
102
103static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
104{
105 return container_of(state->topology.cluster, struct pfair_cluster, topology);
106}
107
108static inline int cpu_id(struct pfair_state* state)
109{
110 return state->topology.id;
111}
112
113static inline struct pfair_state* from_cluster_list(struct list_head* pos)
114{
115 return list_entry(pos, struct pfair_state, topology.cluster_list);
116}
117
118static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
119{
120 return container_of(rt, struct pfair_cluster, pfair);
121}
122
123static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
124{
125 /* The ready_lock is used to serialize all scheduling events. */
126 return &cluster->pfair.ready_lock;
127}
128
129static inline raw_spinlock_t* cpu_lock(struct pfair_state* state)
130{
131 return cluster_lock(cpu_cluster(state));
132}
133
134DEFINE_PER_CPU(struct pfair_state, pfair_state);
135struct pfair_state* *pstate; /* short cut */
136
137static struct pfair_cluster* pfair_clusters;
138static int num_pfair_clusters;
139
140/* Enable for lots of trace info.
141 * #define PFAIR_DEBUG
142 */
143
144#ifdef PFAIR_DEBUG
145#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
146#define PTRACE(f, args...) TRACE(f, ## args)
147#else
148#define PTRACE_TASK(t, f, args...)
149#define PTRACE(f, args...)
150#endif
151
152/* gcc will inline all of these accessor functions... */
153static struct subtask* cur_subtask(struct task_struct* t)
154{
155 return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
156}
157
158static quanta_t cur_deadline(struct task_struct* t)
159{
160 return cur_subtask(t)->deadline + tsk_pfair(t)->release;
161}
162
163static quanta_t cur_release(struct task_struct* t)
164{
165 /* This is early releasing: only the release of the first subtask
166 * counts. */
167 return tsk_pfair(t)->release;
168}
169
170static quanta_t cur_overlap(struct task_struct* t)
171{
172 return cur_subtask(t)->overlap;
173}
174
175static quanta_t cur_group_deadline(struct task_struct* t)
176{
177 quanta_t gdl = cur_subtask(t)->group_deadline;
178 if (gdl)
179 return gdl + tsk_pfair(t)->release;
180 else
181 return gdl;
182}
183
184
185static int pfair_higher_prio(struct task_struct* first,
186 struct task_struct* second)
187{
188 return /* first task must exist */
189 first && (
190 /* Does the second task exist and is it a real-time task? If
191 * not, the first task (which is a RT task) has higher
192 * priority.
193 */
194 !second || !is_realtime(second) ||
195
196 /* Is the (subtask) deadline of the first task earlier?
197 * Then it has higher priority.
198 */
199 time_before(cur_deadline(first), cur_deadline(second)) ||
200
201 /* Do we have a deadline tie?
202 * Then break by B-bit.
203 */
204 (cur_deadline(first) == cur_deadline(second) &&
205 (cur_overlap(first) > cur_overlap(second) ||
206
207 /* Do we have a B-bit tie?
208 * Then break by group deadline.
209 */
210 (cur_overlap(first) == cur_overlap(second) &&
211 (time_after(cur_group_deadline(first),
212 cur_group_deadline(second)) ||
213
214 /* Do we have a group deadline tie?
215 * Then break by PID, which are unique.
216 */
217 (cur_group_deadline(first) ==
218 cur_group_deadline(second) &&
219 first->pid < second->pid))))));
220}
221
222int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
223{
224 return pfair_higher_prio(bheap2task(a), bheap2task(b));
225}
226
227static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
228{
229 struct pfair_cluster* cluster = from_domain(rt);
230 unsigned long flags;
231
232 raw_spin_lock_irqsave(&cluster->release_lock, flags);
233
234 bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
235
236 raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
237}
238
239static void prepare_release(struct task_struct* t, quanta_t at)
240{
241 tsk_pfair(t)->release = at;
242 tsk_pfair(t)->cur = 0;
243}
244
245/* pull released tasks from the release queue */
246static void poll_releases(struct pfair_cluster* cluster)
247{
248 raw_spin_lock(&cluster->release_lock);
249 __merge_ready(&cluster->pfair, &cluster->release_queue);
250 raw_spin_unlock(&cluster->release_lock);
251}
252
253static void check_preempt(struct task_struct* t)
254{
255 int cpu = NO_CPU;
256 if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
257 tsk_rt(t)->present) {
258 /* the task can be scheduled and
259 * is not scheduled where it ought to be scheduled
260 */
261 cpu = tsk_rt(t)->linked_on != NO_CPU ?
262 tsk_rt(t)->linked_on :
263 tsk_rt(t)->scheduled_on;
264 PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
265 tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
266 /* preempt */
267 litmus_reschedule(cpu);
268 }
269}
270
271/* caller must hold pfair.ready_lock */
272static void drop_all_references(struct task_struct *t)
273{
274 int cpu;
275 struct pfair_state* s;
276 struct pfair_cluster* cluster;
277 if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
278 /* It must be in the ready queue; drop references isn't called
279 * when the job is in a release queue. */
280 cluster = tsk_pfair(t)->cluster;
281 bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
282 tsk_rt(t)->heap_node);
283 }
284 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
285 s = &per_cpu(pfair_state, cpu);
286 if (s->linked == t)
287 s->linked = NULL;
288 if (s->local == t)
289 s->local = NULL;
290 if (s->scheduled == t)
291 s->scheduled = NULL;
292 }
293 /* make sure we don't have a stale linked_on field */
294 tsk_rt(t)->linked_on = NO_CPU;
295}
296
297static void pfair_prepare_next_period(struct task_struct* t)
298{
299 struct pfair_param* p = tsk_pfair(t);
300
301 prepare_for_next_period(t);
302 get_rt_flags(t) = RT_F_RUNNING;
303 p->release += p->period;
304}
305
306/* returns 1 if the task needs to go the release queue */
307static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
308{
309 struct pfair_param* p = tsk_pfair(t);
310 int to_relq;
311 p->cur = (p->cur + 1) % p->quanta;
312 if (!p->cur) {
313 if (tsk_rt(t)->present) {
314 /* The job overran; we start a new budget allocation. */
315 pfair_prepare_next_period(t);
316 } else {
317 /* remove task from system until it wakes */
318 drop_all_references(t);
319 tsk_rt(t)->flags = RT_F_REQUEUE;
320 TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
321 cpu, p->cur);
322 return 0;
323 }
324 }
325 to_relq = time_after(cur_release(t), time);
326 TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
327 cpu, p->cur, to_relq, cur_release(t), time);
328 return to_relq;
329}
330
331static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
332{
333 struct task_struct* l;
334 struct pfair_param* p;
335 struct list_head* pos;
336 struct pfair_state* cpu;
337
338 list_for_each(pos, &cluster->topology.cpus) {
339 cpu = from_cluster_list(pos);
340 l = cpu->linked;
341 cpu->missed_updates += cpu->linked != cpu->local;
342 if (l) {
343 p = tsk_pfair(l);
344 p->last_quantum = time;
345 p->last_cpu = cpu_id(cpu);
346 if (advance_subtask(time, l, cpu_id(cpu))) {
347 //cpu->linked = NULL;
348 PTRACE_TASK(l, "should go to release queue. "
349 "scheduled_on=%d present=%d\n",
350 tsk_rt(l)->scheduled_on,
351 tsk_rt(l)->present);
352 }
353 }
354 }
355}
356
357static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
358{
359 int cpu;
360 if (tsk_rt(t)->scheduled_on != NO_CPU) {
361 /* always observe scheduled_on linkage */
362 default_cpu = tsk_rt(t)->scheduled_on;
363 } else if (tsk_pfair(t)->last_quantum == time - 1) {
364 /* back2back quanta */
365 /* Only observe last_quantum if no scheduled_on is in the way.
366 * This should only kick in if a CPU missed quanta, and that
367 * *should* only happen in QEMU.
368 */
369 cpu = tsk_pfair(t)->last_cpu;
370 if (!pstate[cpu]->linked ||
371 tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
372 default_cpu = cpu;
373 }
374 }
375 return default_cpu;
376}
377
378/* returns one if linking was redirected */
379static int pfair_link(quanta_t time, int cpu,
380 struct task_struct* t)
381{
382 int target = target_cpu(time, t, cpu);
383 struct task_struct* prev = pstate[cpu]->linked;
384 struct task_struct* other;
385 struct pfair_cluster* cluster = cpu_cluster(pstate[cpu]);
386
387 if (target != cpu) {
388 BUG_ON(pstate[target]->topology.cluster != pstate[cpu]->topology.cluster);
389 other = pstate[target]->linked;
390 pstate[target]->linked = t;
391 tsk_rt(t)->linked_on = target;
392 if (!other)
393 /* linked ok, but reschedule this CPU */
394 return 1;
395 if (target < cpu) {
396 /* link other to cpu instead */
397 tsk_rt(other)->linked_on = cpu;
398 pstate[cpu]->linked = other;
399 if (prev) {
400 /* prev got pushed back into the ready queue */
401 tsk_rt(prev)->linked_on = NO_CPU;
402 __add_ready(&cluster->pfair, prev);
403 }
404 /* we are done with this cpu */
405 return 0;
406 } else {
407 /* re-add other, it's original CPU was not considered yet */
408 tsk_rt(other)->linked_on = NO_CPU;
409 __add_ready(&cluster->pfair, other);
410 /* reschedule this CPU */
411 return 1;
412 }
413 } else {
414 pstate[cpu]->linked = t;
415 tsk_rt(t)->linked_on = cpu;
416 if (prev) {
417 /* prev got pushed back into the ready queue */
418 tsk_rt(prev)->linked_on = NO_CPU;
419 __add_ready(&cluster->pfair, prev);
420 }
421 /* we are done with this CPU */
422 return 0;
423 }
424}
425
426static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
427{
428 int retry;
429 struct list_head *pos;
430 struct pfair_state *cpu_state;
431
432 list_for_each(pos, &cluster->topology.cpus) {
433 cpu_state = from_cluster_list(pos);
434 retry = 1;
435#ifdef CONFIG_RELEASE_MASTER
436 /* skip release master */
437 if (cluster->pfair.release_master == cpu_id(cpu_state))
438 continue;
439#endif
440 while (retry) {
441 if (pfair_higher_prio(__peek_ready(&cluster->pfair),
442 cpu_state->linked))
443 retry = pfair_link(time, cpu_id(cpu_state),
444 __take_ready(&cluster->pfair));
445 else
446 retry = 0;
447 }
448 }
449}
450
451static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
452{
453 struct pfair_state *cpu;
454 struct list_head* pos;
455
456 /* called with interrupts disabled */
457 PTRACE("--- Q %lu at %llu PRE-SPIN\n",
458 time, litmus_clock());
459 raw_spin_lock(cluster_lock(cluster));
460 PTRACE("<<< Q %lu at %llu\n",
461 time, litmus_clock());
462
463 sched_trace_quantum_boundary();
464
465 advance_subtasks(cluster, time);
466 poll_releases(cluster);
467 schedule_subtasks(cluster, time);
468
469 list_for_each(pos, &cluster->topology.cpus) {
470 cpu = from_cluster_list(pos);
471 if (cpu->linked)
472 PTRACE_TASK(cpu->linked,
473 " linked on %d.\n", cpu_id(cpu));
474 else
475 PTRACE("(null) linked on %d.\n", cpu_id(cpu));
476 }
477 /* We are done. Advance time. */
478 mb();
479 list_for_each(pos, &cluster->topology.cpus) {
480 cpu = from_cluster_list(pos);
481 if (cpu->local_tick != cpu->cur_tick) {
482 TRACE("BAD Quantum not acked on %d "
483 "(l:%lu c:%lu p:%lu)\n",
484 cpu_id(cpu),
485 cpu->local_tick,
486 cpu->cur_tick,
487 cluster->pfair_time);
488 cpu->missed_quanta++;
489 }
490 cpu->cur_tick = time;
491 }
492 PTRACE(">>> Q %lu at %llu\n",
493 time, litmus_clock());
494 raw_spin_unlock(cluster_lock(cluster));
495}
496
497static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
498{
499 quanta_t loc;
500
501 goto first; /* skip mb() on first iteration */
502 do {
503 cpu_relax();
504 mb();
505 first: loc = state->cur_tick;
506 /* FIXME: what if loc > cur? */
507 } while (time_before(loc, q));
508 PTRACE("observed cur_tick:%lu >= q:%lu\n",
509 loc, q);
510}
511
512static quanta_t current_quantum(struct pfair_state* state)
513{
514 lt_t t = litmus_clock() - state->offset;
515 return time2quanta(t, FLOOR);
516}
517
518static void catchup_quanta(quanta_t from, quanta_t target,
519 struct pfair_state* state)
520{
521 quanta_t cur = from, time;
522 TRACE("+++< BAD catching up quanta from %lu to %lu\n",
523 from, target);
524 while (time_before(cur, target)) {
525 wait_for_quantum(cur, state);
526 cur++;
527 time = cmpxchg(&cpu_cluster(state)->pfair_time,
528 cur - 1, /* expected */
529 cur /* next */
530 );
531 if (time == cur - 1)
532 schedule_next_quantum(cpu_cluster(state), cur);
533 }
534 TRACE("+++> catching up done\n");
535}
536
537/* pfair_tick - this function is called for every local timer
538 * interrupt.
539 */
540static void pfair_tick(struct task_struct* t)
541{
542 struct pfair_state* state = &__get_cpu_var(pfair_state);
543 quanta_t time, cur;
544 int retry = 10;
545
546 do {
547 cur = current_quantum(state);
548 PTRACE("q %lu at %llu\n", cur, litmus_clock());
549
550 /* Attempt to advance time. First CPU to get here
551 * will prepare the next quantum.
552 */
553 time = cmpxchg(&cpu_cluster(state)->pfair_time,
554 cur - 1, /* expected */
555 cur /* next */
556 );
557 if (time == cur - 1) {
558 /* exchange succeeded */
559 wait_for_quantum(cur - 1, state);
560 schedule_next_quantum(cpu_cluster(state), cur);
561 retry = 0;
562 } else if (time_before(time, cur - 1)) {
563 /* the whole system missed a tick !? */
564 catchup_quanta(time, cur, state);
565 retry--;
566 } else if (time_after(time, cur)) {
567 /* our timer lagging behind!? */
568 TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
569 retry--;
570 } else {
571 /* Some other CPU already started scheduling
572 * this quantum. Let it do its job and then update.
573 */
574 retry = 0;
575 }
576 } while (retry);
577
578 /* Spin locally until time advances. */
579 wait_for_quantum(cur, state);
580
581 /* copy assignment */
582 /* FIXME: what if we race with a future update? Corrupted state? */
583 state->local = state->linked;
584 /* signal that we are done */
585 mb();
586 state->local_tick = state->cur_tick;
587
588 if (state->local != current
589 && (is_realtime(current) || is_present(state->local)))
590 litmus_reschedule_local();
591}
592
593static int safe_to_schedule(struct task_struct* t, int cpu)
594{
595 int where = tsk_rt(t)->scheduled_on;
596 if (where != NO_CPU && where != cpu) {
597 TRACE_TASK(t, "BAD: can't be scheduled on %d, "
598 "scheduled already on %d.\n", cpu, where);
599 return 0;
600 } else
601 return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
602}
603
604static struct task_struct* pfair_schedule(struct task_struct * prev)
605{
606 struct pfair_state* state = &__get_cpu_var(pfair_state);
607 struct pfair_cluster* cluster = cpu_cluster(state);
608 int blocks, completion, out_of_time;
609 struct task_struct* next = NULL;
610
611#ifdef CONFIG_RELEASE_MASTER
612 /* Bail out early if we are the release master.
613 * The release master never schedules any real-time tasks.
614 */
615 if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
616 sched_state_task_picked();
617 return NULL;
618 }
619#endif
620
621 raw_spin_lock(cpu_lock(state));
622
623 blocks = is_realtime(prev) && !is_running(prev);
624 completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
625 out_of_time = is_realtime(prev) && time_after(cur_release(prev),
626 state->local_tick);
627
628 if (is_realtime(prev))
629 PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
630 blocks, completion, out_of_time);
631
632 if (completion) {
633 sched_trace_task_completion(prev, 0);
634 pfair_prepare_next_period(prev);
635 prepare_release(prev, cur_release(prev));
636 }
637
638 if (!blocks && (completion || out_of_time)) {
639 drop_all_references(prev);
640 sched_trace_task_release(prev);
641 add_release(&cluster->pfair, prev);
642 }
643
644 if (state->local && safe_to_schedule(state->local, cpu_id(state)))
645 next = state->local;
646
647 if (prev != next) {
648 tsk_rt(prev)->scheduled_on = NO_CPU;
649 if (next)
650 tsk_rt(next)->scheduled_on = cpu_id(state);
651 }
652 sched_state_task_picked();
653 raw_spin_unlock(cpu_lock(state));
654
655 if (next)
656 TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
657 tsk_pfair(next)->release, cpu_cluster(state)->pfair_time, litmus_clock());
658 else if (is_realtime(prev))
659 TRACE("Becomes idle at %lu (%llu)\n", cpu_cluster(state)->pfair_time, litmus_clock());
660
661 return next;
662}
663
664static void pfair_task_new(struct task_struct * t, int on_rq, int running)
665{
666 unsigned long flags;
667 struct pfair_cluster* cluster;
668
669 TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
670
671 cluster = tsk_pfair(t)->cluster;
672
673 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
674
675 prepare_release(t, cluster->pfair_time + 1);
676
677 t->rt_param.scheduled_on = NO_CPU;
678
679 if (running) {
680#ifdef CONFIG_RELEASE_MASTER
681 if (task_cpu(t) != cluster->pfair.release_master)
682#endif
683 t->rt_param.scheduled_on = task_cpu(t);
684 __add_ready(&cluster->pfair, t);
685 }
686
687 check_preempt(t);
688
689 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
690}
691
692static void pfair_task_wake_up(struct task_struct *t)
693{
694 unsigned long flags;
695 lt_t now;
696 int requeue = 0;
697 struct pfair_cluster* cluster;
698
699 cluster = tsk_pfair(t)->cluster;
700
701 TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
702 litmus_clock(), cur_release(t), cluster->pfair_time);
703
704 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
705
706 /* If a task blocks and wakes before its next job release,
707 * then it may resume if it is currently linked somewhere
708 * (as if it never blocked at all). Otherwise, we have a
709 * new sporadic job release.
710 */
711 requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
712 now = litmus_clock();
713 if (lt_before(get_deadline(t), now)) {
714 TRACE_TASK(t, "sporadic release!\n");
715 release_at(t, now);
716 prepare_release(t, time2quanta(now, CEIL));
717 sched_trace_task_release(t);
718 }
719
720 /* only add to ready queue if the task isn't still linked somewhere */
721 if (requeue) {
722 TRACE_TASK(t, "requeueing required\n");
723 tsk_rt(t)->flags = RT_F_RUNNING;
724 __add_ready(&cluster->pfair, t);
725 }
726
727 check_preempt(t);
728
729 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
730 TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
731}
732
733static void pfair_task_block(struct task_struct *t)
734{
735 BUG_ON(!is_realtime(t));
736 TRACE_TASK(t, "blocks at %llu, state:%d\n",
737 litmus_clock(), t->state);
738}
739
740static void pfair_task_exit(struct task_struct * t)
741{
742 unsigned long flags;
743 struct pfair_cluster *cluster;
744
745 BUG_ON(!is_realtime(t));
746
747 cluster = tsk_pfair(t)->cluster;
748
749 /* Remote task from release or ready queue, and ensure
750 * that it is not the scheduled task for ANY CPU. We
751 * do this blanket check because occassionally when
752 * tasks exit while blocked, the task_cpu of the task
753 * might not be the same as the CPU that the PFAIR scheduler
754 * has chosen for it.
755 */
756 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
757
758 TRACE_TASK(t, "RIP, state:%d\n", t->state);
759 drop_all_references(t);
760
761 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
762
763 kfree(t->rt_param.pfair);
764 t->rt_param.pfair = NULL;
765}
766
767
768static void pfair_release_at(struct task_struct* task, lt_t start)
769{
770 unsigned long flags;
771 quanta_t release;
772
773 struct pfair_cluster *cluster;
774
775 cluster = tsk_pfair(task)->cluster;
776
777 BUG_ON(!is_realtime(task));
778
779 raw_spin_lock_irqsave(cluster_lock(cluster), flags);
780 release_at(task, start);
781 release = time2quanta(start, CEIL);
782
783 TRACE_TASK(task, "sys release at %lu\n", release);
784
785 drop_all_references(task);
786 prepare_release(task, release);
787 add_release(&cluster->pfair, task);
788
789 raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
790}
791
792static void init_subtask(struct subtask* sub, unsigned long i,
793 lt_t quanta, lt_t period)
794{
795 /* since i is zero-based, the formulas are shifted by one */
796 lt_t tmp;
797
798 /* release */
799 tmp = period * i;
800 do_div(tmp, quanta); /* floor */
801 sub->release = (quanta_t) tmp;
802
803 /* deadline */
804 tmp = period * (i + 1);
805 if (do_div(tmp, quanta)) /* ceil */
806 tmp++;
807 sub->deadline = (quanta_t) tmp;
808
809 /* next release */
810 tmp = period * (i + 1);
811 do_div(tmp, quanta); /* floor */
812 sub->overlap = sub->deadline - (quanta_t) tmp;
813
814 /* Group deadline.
815 * Based on the formula given in Uma's thesis.
816 */
817 if (2 * quanta >= period) {
818 /* heavy */
819 tmp = (sub->deadline - (i + 1)) * period;
820 if (period > quanta &&
821 do_div(tmp, (period - quanta))) /* ceil */
822 tmp++;
823 sub->group_deadline = (quanta_t) tmp;
824 } else
825 sub->group_deadline = 0;
826}
827
828static void dump_subtasks(struct task_struct* t)
829{
830 unsigned long i;
831 for (i = 0; i < t->rt_param.pfair->quanta; i++)
832 TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
833 i + 1,
834 t->rt_param.pfair->subtasks[i].release,
835 t->rt_param.pfair->subtasks[i].deadline,
836 t->rt_param.pfair->subtasks[i].overlap,
837 t->rt_param.pfair->subtasks[i].group_deadline);
838}
839
840static long pfair_admit_task(struct task_struct* t)
841{
842 lt_t quanta;
843 lt_t period;
844 s64 quantum_length = ktime_to_ns(tick_period);
845 struct pfair_param* param;
846 unsigned long i;
847
848 /* first check that the task is in the right cluster */
849 if (cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]) !=
850 cpu_cluster(pstate[task_cpu(t)]))
851 return -EINVAL;
852
853 if (get_rt_period(t) != get_rt_relative_deadline(t)) {
854 printk(KERN_INFO "%s: Admission rejected. "
855 "Only implicit deadlines are currently supported.\n",
856 litmus->plugin_name);
857 return -EINVAL;
858 }
859
860 /* Pfair is a tick-based method, so the time
861 * of interest is jiffies. Calculate tick-based
862 * times for everything.
863 * (Ceiling of exec cost, floor of period.)
864 */
865
866 quanta = get_exec_cost(t);
867 period = get_rt_period(t);
868
869 quanta = time2quanta(get_exec_cost(t), CEIL);
870
871 if (do_div(period, quantum_length))
872 printk(KERN_WARNING
873 "The period of %s/%d is not a multiple of %llu.\n",
874 t->comm, t->pid, (unsigned long long) quantum_length);
875
876 if (quanta == period) {
877 /* special case: task has weight 1.0 */
878 printk(KERN_INFO
879 "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
880 t->comm, t->pid, quanta, period);
881 quanta = 1;
882 period = 1;
883 }
884
885 param = kmalloc(sizeof(*param) +
886 quanta * sizeof(struct subtask), GFP_ATOMIC);
887
888 if (!param)
889 return -ENOMEM;
890
891 param->quanta = quanta;
892 param->cur = 0;
893 param->release = 0;
894 param->period = period;
895
896 param->cluster = cpu_cluster(pstate[tsk_rt(t)->task_params.cpu]);
897
898 for (i = 0; i < quanta; i++)
899 init_subtask(param->subtasks + i, i, quanta, period);
900
901 if (t->rt_param.pfair)
902 /* get rid of stale allocation */
903 kfree(t->rt_param.pfair);
904
905 t->rt_param.pfair = param;
906
907 /* spew out some debug info */
908 dump_subtasks(t);
909
910 return 0;
911}
912
913static void pfair_init_cluster(struct pfair_cluster* cluster)
914{
915 rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
916 bheap_init(&cluster->release_queue);
917 raw_spin_lock_init(&cluster->release_lock);
918 INIT_LIST_HEAD(&cluster->topology.cpus);
919}
920
921static void cleanup_clusters(void)
922{
923 int i;
924
925 if (num_pfair_clusters)
926 kfree(pfair_clusters);
927 pfair_clusters = NULL;
928 num_pfair_clusters = 0;
929
930 /* avoid stale pointers */
931 for (i = 0; i < num_online_cpus(); i++) {
932 pstate[i]->topology.cluster = NULL;
933 printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
934 pstate[i]->missed_updates, pstate[i]->missed_quanta);
935 }
936}
937
938static long pfair_activate_plugin(void)
939{
940 int err, i;
941 struct pfair_state* state;
942 struct pfair_cluster* cluster ;
943 quanta_t now;
944 int cluster_size;
945 struct cluster_cpu* cpus[NR_CPUS];
946 struct scheduling_cluster* clust[NR_CPUS];
947
948 cluster_size = get_cluster_size(pfair_cluster_level);
949
950 if (cluster_size <= 0 || num_online_cpus() % cluster_size != 0)
951 return -EINVAL;
952
953 num_pfair_clusters = num_online_cpus() / cluster_size;
954
955 pfair_clusters = kzalloc(num_pfair_clusters * sizeof(struct pfair_cluster), GFP_ATOMIC);
956 if (!pfair_clusters) {
957 num_pfair_clusters = 0;
958 printk(KERN_ERR "Could not allocate Pfair clusters!\n");
959 return -ENOMEM;
960 }
961
962 state = &__get_cpu_var(pfair_state);
963 now = current_quantum(state);
964 TRACE("Activating PFAIR at q=%lu\n", now);
965
966 for (i = 0; i < num_pfair_clusters; i++) {
967 cluster = &pfair_clusters[i];
968 pfair_init_cluster(cluster);
969 cluster->pfair_time = now;
970 clust[i] = &cluster->topology;
971#ifdef CONFIG_RELEASE_MASTER
972 cluster->pfair.release_master = atomic_read(&release_master_cpu);
973#endif
974 }
975
976 for (i = 0; i < num_online_cpus(); i++) {
977 state = &per_cpu(pfair_state, i);
978 state->cur_tick = now;
979 state->local_tick = now;
980 state->missed_quanta = 0;
981 state->missed_updates = 0;
982 state->offset = cpu_stagger_offset(i);
983 printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
984 cpus[i] = &state->topology;
985 }
986
987 err = assign_cpus_to_clusters(pfair_cluster_level, clust, num_pfair_clusters,
988 cpus, num_online_cpus());
989
990 if (err < 0)
991 cleanup_clusters();
992
993 return err;
994}
995
996static long pfair_deactivate_plugin(void)
997{
998 cleanup_clusters();
999 return 0;
1000}
1001
1002/* Plugin object */
1003static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
1004 .plugin_name = "PFAIR",
1005 .tick = pfair_tick,
1006 .task_new = pfair_task_new,
1007 .task_exit = pfair_task_exit,
1008 .schedule = pfair_schedule,
1009 .task_wake_up = pfair_task_wake_up,
1010 .task_block = pfair_task_block,
1011 .admit_task = pfair_admit_task,
1012 .release_at = pfair_release_at,
1013 .complete_job = complete_job,
1014 .activate_plugin = pfair_activate_plugin,
1015 .deactivate_plugin = pfair_deactivate_plugin,
1016};
1017
1018
1019static struct proc_dir_entry *cluster_file = NULL, *pfair_dir = NULL;
1020
1021static int __init init_pfair(void)
1022{
1023 int cpu, err, fs;
1024 struct pfair_state *state;
1025
1026 /*
1027 * initialize short_cut for per-cpu pfair state;
1028 * there may be a problem here if someone removes a cpu
1029 * while we are doing this initialization... and if cpus
1030 * are added / removed later... but we don't support CPU hotplug atm anyway.
1031 */
1032 pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
1033
1034 /* initialize CPU state */
1035 for (cpu = 0; cpu < num_online_cpus(); cpu++) {
1036 state = &per_cpu(pfair_state, cpu);
1037 state->topology.id = cpu;
1038 state->cur_tick = 0;
1039 state->local_tick = 0;
1040 state->linked = NULL;
1041 state->local = NULL;
1042 state->scheduled = NULL;
1043 state->missed_quanta = 0;
1044 state->offset = cpu_stagger_offset(cpu);
1045 pstate[cpu] = state;
1046 }
1047
1048 pfair_clusters = NULL;
1049 num_pfair_clusters = 0;
1050
1051 err = register_sched_plugin(&pfair_plugin);
1052 if (!err) {
1053 fs = make_plugin_proc_dir(&pfair_plugin, &pfair_dir);
1054 if (!fs)
1055 cluster_file = create_cluster_file(pfair_dir, &pfair_cluster_level);
1056 else
1057 printk(KERN_ERR "Could not allocate PFAIR procfs dir.\n");
1058 }
1059
1060 return err;
1061}
1062
1063static void __exit clean_pfair(void)
1064{
1065 kfree(pstate);
1066
1067 if (cluster_file)
1068 remove_proc_entry("cluster", pfair_dir);
1069 if (pfair_dir)
1070 remove_plugin_proc_dir(&pfair_plugin);
1071}
1072
1073module_init(init_pfair);
1074module_exit(clean_pfair);
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 00000000000..b1d5b4326a0
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,1693 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23
24#include <linux/uaccess.h>
25
26
27typedef struct {
28 rt_domain_t domain;
29 struct fp_prio_queue ready_queue;
30 int cpu;
31 struct task_struct* scheduled; /* only RT tasks */
32/*
33 * scheduling lock slock
34 * protects the domain and serializes scheduling decisions
35 */
36#define slock domain.ready_lock
37
38} pfp_domain_t;
39
40DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
41
42pfp_domain_t* pfp_doms[NR_CPUS];
43
44#define local_pfp (&__get_cpu_var(pfp_domains))
45#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
46#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
47#define task_dom(task) remote_dom(get_partition(task))
48#define task_pfp(task) remote_pfp(get_partition(task))
49
50/* we assume the lock is being held */
51static void preempt(pfp_domain_t *pfp)
52{
53 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
54}
55
56static unsigned int priority_index(struct task_struct* t)
57{
58#ifdef CONFIG_LOCKING
59 if (unlikely(t->rt_param.inh_task))
60 /* use effective priority */
61 t = t->rt_param.inh_task;
62
63 if (is_priority_boosted(t)) {
64 /* zero is reserved for priority-boosted tasks */
65 return 0;
66 } else
67#endif
68 return get_priority(t);
69}
70
71
72static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
73{
74 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
75 unsigned long flags;
76 struct task_struct* t;
77 struct bheap_node* hn;
78
79 raw_spin_lock_irqsave(&pfp->slock, flags);
80
81 while (!bheap_empty(tasks)) {
82 hn = bheap_take(fp_ready_order, tasks);
83 t = bheap2task(hn);
84 TRACE_TASK(t, "released (part:%d prio:%d)\n",
85 get_partition(t), get_priority(t));
86 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
87 }
88
89 /* do we need to preempt? */
90 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
91 TRACE_CUR("preempted by new release\n");
92 preempt(pfp);
93 }
94
95 raw_spin_unlock_irqrestore(&pfp->slock, flags);
96}
97
98static void pfp_preempt_check(pfp_domain_t *pfp)
99{
100 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
101 preempt(pfp);
102}
103
104static void pfp_domain_init(pfp_domain_t* pfp,
105 int cpu)
106{
107 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
108 pfp->cpu = cpu;
109 pfp->scheduled = NULL;
110 fp_prio_queue_init(&pfp->ready_queue);
111}
112
113static void requeue(struct task_struct* t, pfp_domain_t *pfp)
114{
115 if (t->state != TASK_RUNNING)
116 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
117
118 set_rt_flags(t, RT_F_RUNNING);
119 if (is_released(t, litmus_clock()))
120 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
121 else
122 add_release(&pfp->domain, t); /* it has got to wait */
123}
124
125static void job_completion(struct task_struct* t, int forced)
126{
127 sched_trace_task_completion(t,forced);
128 TRACE_TASK(t, "job_completion().\n");
129
130 set_rt_flags(t, RT_F_SLEEP);
131 prepare_for_next_period(t);
132}
133
134static void pfp_tick(struct task_struct *t)
135{
136 pfp_domain_t *pfp = local_pfp;
137
138 /* Check for inconsistency. We don't need the lock for this since
139 * ->scheduled is only changed in schedule, which obviously is not
140 * executing in parallel on this CPU
141 */
142 BUG_ON(is_realtime(t) && t != pfp->scheduled);
143
144 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
145 if (!is_np(t)) {
146 litmus_reschedule_local();
147 TRACE("pfp_scheduler_tick: "
148 "%d is preemptable "
149 " => FORCE_RESCHED\n", t->pid);
150 } else if (is_user_np(t)) {
151 TRACE("pfp_scheduler_tick: "
152 "%d is non-preemptable, "
153 "preemption delayed.\n", t->pid);
154 request_exit_np(t);
155 }
156 }
157}
158
159static struct task_struct* pfp_schedule(struct task_struct * prev)
160{
161 pfp_domain_t* pfp = local_pfp;
162 struct task_struct* next;
163
164 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
165
166 raw_spin_lock(&pfp->slock);
167
168 /* sanity checking
169 * differently from gedf, when a task exits (dead)
170 * pfp->schedule may be null and prev _is_ realtime
171 */
172 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
173 BUG_ON(pfp->scheduled && !is_realtime(prev));
174
175 /* (0) Determine state */
176 exists = pfp->scheduled != NULL;
177 blocks = exists && !is_running(pfp->scheduled);
178 out_of_time = exists &&
179 budget_enforced(pfp->scheduled) &&
180 budget_exhausted(pfp->scheduled);
181 np = exists && is_np(pfp->scheduled);
182 sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP;
183 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
184 preempt = migrate || fp_preemption_needed(&pfp->ready_queue, prev);
185
186 /* If we need to preempt do so.
187 * The following checks set resched to 1 in case of special
188 * circumstances.
189 */
190 resched = preempt;
191
192 /* If a task blocks we have no choice but to reschedule.
193 */
194 if (blocks)
195 resched = 1;
196
197 /* Request a sys_exit_np() call if we would like to preempt but cannot.
198 * Multiple calls to request_exit_np() don't hurt.
199 */
200 if (np && (out_of_time || preempt || sleep))
201 request_exit_np(pfp->scheduled);
202
203 /* Any task that is preemptable and either exhausts its execution
204 * budget or wants to sleep completes. We may have to reschedule after
205 * this.
206 */
207 if (!np && (out_of_time || sleep) && !blocks && !migrate) {
208 job_completion(pfp->scheduled, !sleep);
209 resched = 1;
210 }
211
212 /* The final scheduling decision. Do we need to switch for some reason?
213 * Switch if we are in RT mode and have no task or if we need to
214 * resched.
215 */
216 next = NULL;
217 if ((!np || blocks) && (resched || !exists)) {
218 /* When preempting a task that does not block, then
219 * re-insert it into either the ready queue or the
220 * release queue (if it completed). requeue() picks
221 * the appropriate queue.
222 */
223 if (pfp->scheduled && !blocks && !migrate)
224 requeue(pfp->scheduled, pfp);
225 next = fp_prio_take(&pfp->ready_queue);
226 } else
227 /* Only override Linux scheduler if we have a real-time task
228 * scheduled that needs to continue.
229 */
230 if (exists)
231 next = prev;
232
233 if (next) {
234 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
235 set_rt_flags(next, RT_F_RUNNING);
236 } else {
237 TRACE("becoming idle at %llu\n", litmus_clock());
238 }
239
240 pfp->scheduled = next;
241 sched_state_task_picked();
242 raw_spin_unlock(&pfp->slock);
243
244 return next;
245}
246
247#ifdef CONFIG_LITMUS_LOCKING
248
249/* prev is no longer scheduled --- see if it needs to migrate */
250static void pfp_finish_switch(struct task_struct *prev)
251{
252 pfp_domain_t *to;
253
254 if (is_realtime(prev) &&
255 is_running(prev) &&
256 get_partition(prev) != smp_processor_id()) {
257 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
258 smp_processor_id(), get_partition(prev));
259
260 to = task_pfp(prev);
261
262 raw_spin_lock(&to->slock);
263
264 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
265 requeue(prev, to);
266 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
267 preempt(to);
268
269 raw_spin_unlock(&to->slock);
270
271 }
272}
273
274#endif
275
276/* Prepare a task for running in RT mode
277 */
278static void pfp_task_new(struct task_struct * t, int on_rq, int running)
279{
280 pfp_domain_t* pfp = task_pfp(t);
281 unsigned long flags;
282
283 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
284 t->rt_param.task_params.cpu);
285
286 /* setup job parameters */
287 release_at(t, litmus_clock());
288
289 /* The task should be running in the queue, otherwise signal
290 * code will try to wake it up with fatal consequences.
291 */
292 raw_spin_lock_irqsave(&pfp->slock, flags);
293 if (running) {
294 /* there shouldn't be anything else running at the time */
295 BUG_ON(pfp->scheduled);
296 pfp->scheduled = t;
297 } else {
298 requeue(t, pfp);
299 /* maybe we have to reschedule */
300 pfp_preempt_check(pfp);
301 }
302 raw_spin_unlock_irqrestore(&pfp->slock, flags);
303}
304
305static void pfp_task_wake_up(struct task_struct *task)
306{
307 unsigned long flags;
308 pfp_domain_t* pfp = task_pfp(task);
309 lt_t now;
310
311 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
312 raw_spin_lock_irqsave(&pfp->slock, flags);
313
314#ifdef CONFIG_LITMUS_LOCKING
315 /* Should only be queued when processing a fake-wake up due to a
316 * migration-related state change. */
317 if (unlikely(is_queued(task))) {
318 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
319 goto out_unlock;
320 }
321#else
322 BUG_ON(is_queued(task));
323#endif
324 now = litmus_clock();
325 if (is_tardy(task, now)
326#ifdef CONFIG_LITMUS_LOCKING
327 /* We need to take suspensions because of semaphores into
328 * account! If a job resumes after being suspended due to acquiring
329 * a semaphore, it should never be treated as a new job release.
330 */
331 && !is_priority_boosted(task)
332#endif
333 ) {
334 /* new sporadic release */
335 release_at(task, now);
336 sched_trace_task_release(task);
337 }
338
339 /* Only add to ready queue if it is not the currently-scheduled
340 * task. This could be the case if a task was woken up concurrently
341 * on a remote CPU before the executing CPU got around to actually
342 * de-scheduling the task, i.e., wake_up() raced with schedule()
343 * and won. Also, don't requeue if it is still queued, which can
344 * happen under the DPCP due wake-ups racing with migrations.
345 */
346 if (pfp->scheduled != task) {
347 requeue(task, pfp);
348 pfp_preempt_check(pfp);
349 }
350
351out_unlock:
352 raw_spin_unlock_irqrestore(&pfp->slock, flags);
353 TRACE_TASK(task, "wake up done\n");
354}
355
356static void pfp_task_block(struct task_struct *t)
357{
358 /* only running tasks can block, thus t is in no queue */
359 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
360
361 BUG_ON(!is_realtime(t));
362
363 /* If this task blocked normally, it shouldn't be queued. The exception is
364 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
365 * This should only happen if the DPCP is being used.
366 */
367#ifdef CONFIG_LITMUS_LOCKING
368 if (unlikely(is_queued(t)))
369 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
370#else
371 BUG_ON(is_queued(t));
372#endif
373}
374
375static void pfp_task_exit(struct task_struct * t)
376{
377 unsigned long flags;
378 pfp_domain_t* pfp = task_pfp(t);
379 rt_domain_t* dom;
380
381 raw_spin_lock_irqsave(&pfp->slock, flags);
382 if (is_queued(t)) {
383 BUG(); /* This currently doesn't work. */
384 /* dequeue */
385 dom = task_dom(t);
386 remove(dom, t);
387 }
388 if (pfp->scheduled == t) {
389 pfp->scheduled = NULL;
390 preempt(pfp);
391 }
392 TRACE_TASK(t, "RIP, now reschedule\n");
393
394 raw_spin_unlock_irqrestore(&pfp->slock, flags);
395}
396
397#ifdef CONFIG_LITMUS_LOCKING
398
399#include <litmus/fdso.h>
400#include <litmus/srp.h>
401
402static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
403{
404 BUG_ON(pfp->scheduled == t && is_queued(t));
405 if (is_queued(t))
406 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
407}
408
409static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
410 struct task_struct* prio_inh)
411{
412 int requeue;
413
414 if (!t || t->rt_param.inh_task == prio_inh) {
415 /* no update required */
416 if (t)
417 TRACE_TASK(t, "no prio-inh update required\n");
418 return;
419 }
420
421 requeue = is_queued(t);
422 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
423
424 if (requeue)
425 /* first remove */
426 fp_dequeue(pfp, t);
427
428 t->rt_param.inh_task = prio_inh;
429
430 if (requeue)
431 /* add again to the right queue */
432 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
433}
434
435static int effective_agent_priority(int prio)
436{
437 /* make sure agents have higher priority */
438 return prio - LITMUS_MAX_PRIORITY;
439}
440
441static lt_t prio_point(int eprio)
442{
443 /* make sure we have non-negative prio points */
444 return eprio + LITMUS_MAX_PRIORITY;
445}
446
447static int prio_from_point(lt_t prio_point)
448{
449 return ((int) prio_point) - LITMUS_MAX_PRIORITY;
450}
451
452static void boost_priority(struct task_struct* t, lt_t priority_point)
453{
454 unsigned long flags;
455 pfp_domain_t* pfp = task_pfp(t);
456
457 raw_spin_lock_irqsave(&pfp->slock, flags);
458
459
460 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
461
462 tsk_rt(t)->priority_boosted = 1;
463 /* tie-break by protocol-specific priority point */
464 tsk_rt(t)->boost_start_time = priority_point;
465
466 if (pfp->scheduled != t) {
467 /* holder may be queued: first stop queue changes */
468 raw_spin_lock(&pfp->domain.release_lock);
469 if (is_queued(t) &&
470 /* If it is queued, then we need to re-order. */
471 bheap_decrease(fp_ready_order, tsk_rt(t)->heap_node) &&
472 /* If we bubbled to the top, then we need to check for preemptions. */
473 fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
474 preempt(pfp);
475 raw_spin_unlock(&pfp->domain.release_lock);
476 } /* else: nothing to do since the job is not queued while scheduled */
477
478 raw_spin_unlock_irqrestore(&pfp->slock, flags);
479}
480
481static void unboost_priority(struct task_struct* t)
482{
483 unsigned long flags;
484 pfp_domain_t* pfp = task_pfp(t);
485 lt_t now;
486
487 raw_spin_lock_irqsave(&pfp->slock, flags);
488 now = litmus_clock();
489
490 /* assumption: this only happens when the job is scheduled */
491 BUG_ON(pfp->scheduled != t);
492
493 TRACE_TASK(t, "priority restored at %llu\n", now);
494
495 /* priority boosted jobs must be scheduled */
496 BUG_ON(pfp->scheduled != t);
497
498 tsk_rt(t)->priority_boosted = 0;
499 tsk_rt(t)->boost_start_time = 0;
500
501 /* check if this changes anything */
502 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
503 preempt(pfp);
504
505 raw_spin_unlock_irqrestore(&pfp->slock, flags);
506}
507
508/* ******************** SRP support ************************ */
509
510static unsigned int pfp_get_srp_prio(struct task_struct* t)
511{
512 return get_priority(t);
513}
514
515/* ******************** FMLP support ********************** */
516
517struct fmlp_semaphore {
518 struct litmus_lock litmus_lock;
519
520 /* current resource holder */
521 struct task_struct *owner;
522
523 /* FIFO queue of waiting tasks */
524 wait_queue_head_t wait;
525};
526
527static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
528{
529 return container_of(lock, struct fmlp_semaphore, litmus_lock);
530}
531int pfp_fmlp_lock(struct litmus_lock* l)
532{
533 struct task_struct* t = current;
534 struct fmlp_semaphore *sem = fmlp_from_lock(l);
535 wait_queue_t wait;
536 unsigned long flags;
537 lt_t time_of_request;
538
539 if (!is_realtime(t))
540 return -EPERM;
541
542 spin_lock_irqsave(&sem->wait.lock, flags);
543
544 /* tie-break by this point in time */
545 time_of_request = litmus_clock();
546
547 /* Priority-boost ourself *before* we suspend so that
548 * our priority is boosted when we resume. */
549 boost_priority(t, time_of_request);
550
551 if (sem->owner) {
552 /* resource is not free => must suspend and wait */
553
554 init_waitqueue_entry(&wait, t);
555
556 /* FIXME: interruptible would be nice some day */
557 set_task_state(t, TASK_UNINTERRUPTIBLE);
558
559 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
560
561 TS_LOCK_SUSPEND;
562
563 /* release lock before sleeping */
564 spin_unlock_irqrestore(&sem->wait.lock, flags);
565
566 /* We depend on the FIFO order. Thus, we don't need to recheck
567 * when we wake up; we are guaranteed to have the lock since
568 * there is only one wake up per release.
569 */
570
571 schedule();
572
573 TS_LOCK_RESUME;
574
575 /* Since we hold the lock, no other task will change
576 * ->owner. We can thus check it without acquiring the spin
577 * lock. */
578 BUG_ON(sem->owner != t);
579 } else {
580 /* it's ours now */
581 sem->owner = t;
582
583 spin_unlock_irqrestore(&sem->wait.lock, flags);
584 }
585
586 return 0;
587}
588
589int pfp_fmlp_unlock(struct litmus_lock* l)
590{
591 struct task_struct *t = current, *next;
592 struct fmlp_semaphore *sem = fmlp_from_lock(l);
593 unsigned long flags;
594 int err = 0;
595
596 spin_lock_irqsave(&sem->wait.lock, flags);
597
598 if (sem->owner != t) {
599 err = -EINVAL;
600 goto out;
601 }
602
603 /* we lose the benefit of priority boosting */
604
605 unboost_priority(t);
606
607 /* check if there are jobs waiting for this resource */
608 next = __waitqueue_remove_first(&sem->wait);
609 if (next) {
610 /* next becomes the resouce holder */
611 sem->owner = next;
612
613 /* Wake up next. The waiting job is already priority-boosted. */
614 wake_up_process(next);
615 } else
616 /* resource becomes available */
617 sem->owner = NULL;
618
619out:
620 spin_unlock_irqrestore(&sem->wait.lock, flags);
621 return err;
622}
623
624int pfp_fmlp_close(struct litmus_lock* l)
625{
626 struct task_struct *t = current;
627 struct fmlp_semaphore *sem = fmlp_from_lock(l);
628 unsigned long flags;
629
630 int owner;
631
632 spin_lock_irqsave(&sem->wait.lock, flags);
633
634 owner = sem->owner == t;
635
636 spin_unlock_irqrestore(&sem->wait.lock, flags);
637
638 if (owner)
639 pfp_fmlp_unlock(l);
640
641 return 0;
642}
643
644void pfp_fmlp_free(struct litmus_lock* lock)
645{
646 kfree(fmlp_from_lock(lock));
647}
648
649static struct litmus_lock_ops pfp_fmlp_lock_ops = {
650 .close = pfp_fmlp_close,
651 .lock = pfp_fmlp_lock,
652 .unlock = pfp_fmlp_unlock,
653 .deallocate = pfp_fmlp_free,
654};
655
656static struct litmus_lock* pfp_new_fmlp(void)
657{
658 struct fmlp_semaphore* sem;
659
660 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
661 if (!sem)
662 return NULL;
663
664 sem->owner = NULL;
665 init_waitqueue_head(&sem->wait);
666 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
667
668 return &sem->litmus_lock;
669}
670
671/* ******************** MPCP support ********************** */
672
673struct mpcp_semaphore {
674 struct litmus_lock litmus_lock;
675
676 /* current resource holder */
677 struct task_struct *owner;
678
679 /* priority queue of waiting tasks */
680 wait_queue_head_t wait;
681
682 /* priority ceiling per cpu */
683 unsigned int prio_ceiling[NR_CPUS];
684
685 /* should jobs spin "virtually" for this resource? */
686 int vspin;
687};
688
689#define OMEGA_CEILING UINT_MAX
690
691/* Since jobs spin "virtually" while waiting to acquire a lock,
692 * they first must aquire a local per-cpu resource.
693 */
694static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
695static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
696
697/* called with preemptions off <=> no local modifications */
698static void mpcp_vspin_enter(void)
699{
700 struct task_struct* t = current;
701
702 while (1) {
703 if (__get_cpu_var(mpcpvs_vspin) == NULL) {
704 /* good, we get to issue our request */
705 __get_cpu_var(mpcpvs_vspin) = t;
706 break;
707 } else {
708 /* some job is spinning => enqueue in request queue */
709 prio_wait_queue_t wait;
710 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
711 unsigned long flags;
712
713 /* ordered by regular priority */
714 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
715
716 spin_lock_irqsave(&vspin->lock, flags);
717
718 set_task_state(t, TASK_UNINTERRUPTIBLE);
719
720 __add_wait_queue_prio_exclusive(vspin, &wait);
721
722 spin_unlock_irqrestore(&vspin->lock, flags);
723
724 TS_LOCK_SUSPEND;
725
726 preempt_enable_no_resched();
727
728 schedule();
729
730 preempt_disable();
731
732 TS_LOCK_RESUME;
733 /* Recheck if we got it --- some higher-priority process might
734 * have swooped in. */
735 }
736 }
737 /* ok, now it is ours */
738}
739
740/* called with preemptions off */
741static void mpcp_vspin_exit(void)
742{
743 struct task_struct* t = current, *next;
744 unsigned long flags;
745 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
746
747 BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
748
749 /* no spinning job */
750 __get_cpu_var(mpcpvs_vspin) = NULL;
751
752 /* see if anyone is waiting for us to stop "spinning" */
753 spin_lock_irqsave(&vspin->lock, flags);
754 next = __waitqueue_remove_first(vspin);
755
756 if (next)
757 wake_up_process(next);
758
759 spin_unlock_irqrestore(&vspin->lock, flags);
760}
761
762static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
763{
764 return container_of(lock, struct mpcp_semaphore, litmus_lock);
765}
766
767int pfp_mpcp_lock(struct litmus_lock* l)
768{
769 struct task_struct* t = current;
770 struct mpcp_semaphore *sem = mpcp_from_lock(l);
771 prio_wait_queue_t wait;
772 unsigned long flags;
773
774 if (!is_realtime(t))
775 return -EPERM;
776
777 preempt_disable();
778
779 if (sem->vspin)
780 mpcp_vspin_enter();
781
782 /* Priority-boost ourself *before* we suspend so that
783 * our priority is boosted when we resume. Use the priority
784 * ceiling for the local partition. */
785 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
786
787 spin_lock_irqsave(&sem->wait.lock, flags);
788
789 preempt_enable_no_resched();
790
791 if (sem->owner) {
792 /* resource is not free => must suspend and wait */
793
794 /* ordered by regular priority */
795 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
796
797 /* FIXME: interruptible would be nice some day */
798 set_task_state(t, TASK_UNINTERRUPTIBLE);
799
800 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
801
802 TS_LOCK_SUSPEND;
803
804 /* release lock before sleeping */
805 spin_unlock_irqrestore(&sem->wait.lock, flags);
806
807 /* We depend on the FIFO order. Thus, we don't need to recheck
808 * when we wake up; we are guaranteed to have the lock since
809 * there is only one wake up per release.
810 */
811
812 schedule();
813
814 TS_LOCK_RESUME;
815
816 /* Since we hold the lock, no other task will change
817 * ->owner. We can thus check it without acquiring the spin
818 * lock. */
819 BUG_ON(sem->owner != t);
820 } else {
821 /* it's ours now */
822 sem->owner = t;
823
824 spin_unlock_irqrestore(&sem->wait.lock, flags);
825 }
826
827 return 0;
828}
829
830int pfp_mpcp_unlock(struct litmus_lock* l)
831{
832 struct task_struct *t = current, *next;
833 struct mpcp_semaphore *sem = mpcp_from_lock(l);
834 unsigned long flags;
835 int err = 0;
836
837 spin_lock_irqsave(&sem->wait.lock, flags);
838
839 if (sem->owner != t) {
840 err = -EINVAL;
841 goto out;
842 }
843
844 /* we lose the benefit of priority boosting */
845
846 unboost_priority(t);
847
848 /* check if there are jobs waiting for this resource */
849 next = __waitqueue_remove_first(&sem->wait);
850 if (next) {
851 /* next becomes the resouce holder */
852 sem->owner = next;
853
854 /* Wake up next. The waiting job is already priority-boosted. */
855 wake_up_process(next);
856 } else
857 /* resource becomes available */
858 sem->owner = NULL;
859
860out:
861 spin_unlock_irqrestore(&sem->wait.lock, flags);
862
863 if (sem->vspin && err == 0) {
864 preempt_disable();
865 mpcp_vspin_exit();
866 preempt_enable();
867 }
868
869 return err;
870}
871
872int pfp_mpcp_open(struct litmus_lock* l, void* config)
873{
874 struct task_struct *t = current;
875 struct mpcp_semaphore *sem = mpcp_from_lock(l);
876 int cpu, local_cpu;
877 unsigned long flags;
878
879 if (!is_realtime(t))
880 /* we need to know the real-time priority */
881 return -EPERM;
882
883 local_cpu = get_partition(t);
884
885 spin_lock_irqsave(&sem->wait.lock, flags);
886
887 for (cpu = 0; cpu < NR_CPUS; cpu++)
888 if (cpu != local_cpu)
889 {
890 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
891 get_priority(t));
892 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
893 sem, sem->prio_ceiling[cpu], cpu);
894 }
895
896 spin_unlock_irqrestore(&sem->wait.lock, flags);
897
898 return 0;
899}
900
901int pfp_mpcp_close(struct litmus_lock* l)
902{
903 struct task_struct *t = current;
904 struct mpcp_semaphore *sem = mpcp_from_lock(l);
905 unsigned long flags;
906
907 int owner;
908
909 spin_lock_irqsave(&sem->wait.lock, flags);
910
911 owner = sem->owner == t;
912
913 spin_unlock_irqrestore(&sem->wait.lock, flags);
914
915 if (owner)
916 pfp_mpcp_unlock(l);
917
918 return 0;
919}
920
921void pfp_mpcp_free(struct litmus_lock* lock)
922{
923 kfree(mpcp_from_lock(lock));
924}
925
926static struct litmus_lock_ops pfp_mpcp_lock_ops = {
927 .close = pfp_mpcp_close,
928 .lock = pfp_mpcp_lock,
929 .open = pfp_mpcp_open,
930 .unlock = pfp_mpcp_unlock,
931 .deallocate = pfp_mpcp_free,
932};
933
934static struct litmus_lock* pfp_new_mpcp(int vspin)
935{
936 struct mpcp_semaphore* sem;
937 int cpu;
938
939 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
940 if (!sem)
941 return NULL;
942
943 sem->owner = NULL;
944 init_waitqueue_head(&sem->wait);
945 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
946
947 for (cpu = 0; cpu < NR_CPUS; cpu++)
948 sem->prio_ceiling[cpu] = OMEGA_CEILING;
949
950 /* mark as virtual spinning */
951 sem->vspin = vspin;
952
953 return &sem->litmus_lock;
954}
955
956
957/* ******************** PCP support ********************** */
958
959
960struct pcp_semaphore {
961 struct litmus_lock litmus_lock;
962
963 struct list_head ceiling;
964
965 /* current resource holder */
966 struct task_struct *owner;
967
968 /* priority ceiling --- can be negative due to DPCP support */
969 int prio_ceiling;
970
971 /* on which processor is this PCP semaphore allocated? */
972 int on_cpu;
973};
974
975static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
976{
977 return container_of(lock, struct pcp_semaphore, litmus_lock);
978}
979
980
981struct pcp_state {
982 struct list_head system_ceiling;
983
984 /* highest-priority waiting task */
985 struct task_struct* hp_waiter;
986
987 /* list of jobs waiting to get past the system ceiling */
988 wait_queue_head_t ceiling_blocked;
989};
990
991static void pcp_init_state(struct pcp_state* s)
992{
993 INIT_LIST_HEAD(&s->system_ceiling);
994 s->hp_waiter = NULL;
995 init_waitqueue_head(&s->ceiling_blocked);
996}
997
998static DEFINE_PER_CPU(struct pcp_state, pcp_state);
999
1000/* assumes preemptions are off */
1001static struct pcp_semaphore* pcp_get_ceiling(void)
1002{
1003 struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next;
1004
1005 if (top)
1006 return list_entry(top, struct pcp_semaphore, ceiling);
1007 else
1008 return NULL;
1009}
1010
1011/* assumes preempt off */
1012static void pcp_add_ceiling(struct pcp_semaphore* sem)
1013{
1014 struct list_head *pos;
1015 struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
1016 struct pcp_semaphore* held;
1017
1018 BUG_ON(sem->on_cpu != smp_processor_id());
1019 BUG_ON(in_list(&sem->ceiling));
1020
1021 list_for_each(pos, in_use) {
1022 held = list_entry(pos, struct pcp_semaphore, ceiling);
1023 if (held->prio_ceiling >= sem->prio_ceiling) {
1024 __list_add(&sem->ceiling, pos->prev, pos);
1025 return;
1026 }
1027 }
1028
1029 /* we hit the end of the list */
1030
1031 list_add_tail(&sem->ceiling, in_use);
1032}
1033
1034/* assumes preempt off */
1035static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1036 struct task_struct* task,
1037 int effective_prio)
1038{
1039 return ceiling == NULL ||
1040 ceiling->prio_ceiling > effective_prio ||
1041 ceiling->owner == task;
1042}
1043
1044/* assumes preempt off */
1045static void pcp_priority_inheritance(void)
1046{
1047 unsigned long flags;
1048 pfp_domain_t* pfp = local_pfp;
1049
1050 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1051 struct task_struct *blocker, *blocked;
1052
1053 blocker = ceiling ? ceiling->owner : NULL;
1054 blocked = __get_cpu_var(pcp_state).hp_waiter;
1055
1056 raw_spin_lock_irqsave(&pfp->slock, flags);
1057
1058 /* Current is no longer inheriting anything by default. This should be
1059 * the currently scheduled job, and hence not currently queued. */
1060 BUG_ON(current != pfp->scheduled);
1061
1062 fp_set_prio_inh(pfp, current, NULL);
1063 fp_set_prio_inh(pfp, blocked, NULL);
1064 fp_set_prio_inh(pfp, blocker, NULL);
1065
1066
1067 /* Let blocking job inherit priority of blocked job, if required. */
1068 if (blocker && blocked &&
1069 fp_higher_prio(blocked, blocker)) {
1070 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1071 blocked->comm, blocked->pid,
1072 get_priority(blocker), get_priority(blocked));
1073 fp_set_prio_inh(pfp, blocker, blocked);
1074 }
1075
1076 /* check if anything changed */
1077 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1078 preempt(pfp);
1079
1080 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1081}
1082
1083/* called with preemptions off */
1084static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1085 int effective_prio)
1086{
1087 struct task_struct* t = current;
1088 struct pcp_semaphore* ceiling;
1089 prio_wait_queue_t wait;
1090 unsigned int waiting_higher_prio;
1091
1092 do {
1093 ceiling = pcp_get_ceiling();
1094 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1095 break;
1096
1097 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1098 sem, ceiling->owner->comm, ceiling->owner->pid);
1099
1100 /* we need to wait until the ceiling is lowered */
1101
1102 /* enqueue in priority order */
1103 init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio));
1104 set_task_state(t, TASK_UNINTERRUPTIBLE);
1105 waiting_higher_prio = add_wait_queue_prio_exclusive(
1106 &__get_cpu_var(pcp_state).ceiling_blocked, &wait);
1107
1108 if (waiting_higher_prio == 0) {
1109 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1110
1111 /* we are the new highest-priority waiting job
1112 * => update inheritance */
1113 __get_cpu_var(pcp_state).hp_waiter = t;
1114 pcp_priority_inheritance();
1115 }
1116
1117 TS_LOCK_SUSPEND;
1118
1119 preempt_enable_no_resched();
1120 schedule();
1121 preempt_disable();
1122
1123 /* pcp_resume_unblocked() removed us from wait queue */
1124
1125 TS_LOCK_RESUME;
1126 } while(1);
1127
1128 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1129
1130 /* We are good to go. The semaphore should be available. */
1131 BUG_ON(sem->owner != NULL);
1132
1133 sem->owner = t;
1134
1135 pcp_add_ceiling(sem);
1136}
1137
1138static void pcp_resume_unblocked(void)
1139{
1140 wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked;
1141 unsigned long flags;
1142 prio_wait_queue_t* q;
1143 struct task_struct* t = NULL;
1144
1145 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1146
1147 spin_lock_irqsave(&blocked->lock, flags);
1148
1149 while (waitqueue_active(blocked)) {
1150 /* check first == highest-priority waiting job */
1151 q = list_entry(blocked->task_list.next,
1152 prio_wait_queue_t, wq.task_list);
1153 t = (struct task_struct*) q->wq.private;
1154
1155 /* can it proceed now? => let it go */
1156 if (pcp_exceeds_ceiling(ceiling, t,
1157 prio_from_point(q->priority))) {
1158 __remove_wait_queue(blocked, &q->wq);
1159 wake_up_process(t);
1160 } else {
1161 /* We are done. Update highest-priority waiter. */
1162 __get_cpu_var(pcp_state).hp_waiter = t;
1163 goto out;
1164 }
1165 }
1166 /* If we get here, then there are no more waiting
1167 * jobs. */
1168 __get_cpu_var(pcp_state).hp_waiter = NULL;
1169out:
1170 spin_unlock_irqrestore(&blocked->lock, flags);
1171}
1172
1173/* assumes preempt off */
1174static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1175{
1176 BUG_ON(!in_list(&sem->ceiling));
1177 BUG_ON(sem->owner != current);
1178 BUG_ON(sem->on_cpu != smp_processor_id());
1179
1180 /* remove from ceiling list */
1181 list_del(&sem->ceiling);
1182
1183 /* release */
1184 sem->owner = NULL;
1185
1186 TRACE_CUR("PCP released sem %p\n", sem);
1187
1188 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1189 pcp_resume_unblocked();
1190
1191 pcp_priority_inheritance();
1192}
1193
1194static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1195 int effective_prio)
1196{
1197 /* This needs to be synchronized on something.
1198 * Might as well use waitqueue lock for the processor.
1199 * We assume this happens only before the task set starts execution,
1200 * (i.e., during initialization), but it may happen on multiple processors
1201 * at the same time.
1202 */
1203 unsigned long flags;
1204
1205 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1206
1207 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1208
1209 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1210
1211 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1212}
1213
1214static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1215{
1216 sem->owner = NULL;
1217 INIT_LIST_HEAD(&sem->ceiling);
1218 sem->prio_ceiling = INT_MAX;
1219 sem->on_cpu = cpu;
1220}
1221
1222int pfp_pcp_lock(struct litmus_lock* l)
1223{
1224 struct task_struct* t = current;
1225 struct pcp_semaphore *sem = pcp_from_lock(l);
1226
1227 int eprio = effective_agent_priority(get_priority(t));
1228 int from = get_partition(t);
1229 int to = sem->on_cpu;
1230
1231 if (!is_realtime(t) || from != to)
1232 return -EPERM;
1233
1234 preempt_disable();
1235
1236 pcp_raise_ceiling(sem, eprio);
1237
1238 preempt_enable();
1239
1240 return 0;
1241}
1242
1243int pfp_pcp_unlock(struct litmus_lock* l)
1244{
1245 struct task_struct *t = current;
1246 struct pcp_semaphore *sem = pcp_from_lock(l);
1247
1248 int err = 0;
1249
1250 preempt_disable();
1251
1252 if (sem->on_cpu != smp_processor_id() || sem->owner != t) {
1253 err = -EINVAL;
1254 goto out;
1255 }
1256
1257 /* give it back */
1258 pcp_lower_ceiling(sem);
1259
1260out:
1261 preempt_enable();
1262
1263 return err;
1264}
1265
1266int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1267{
1268 struct task_struct *t = current;
1269 struct pcp_semaphore *sem = pcp_from_lock(l);
1270
1271 int cpu, eprio;
1272
1273 if (!is_realtime(t))
1274 /* we need to know the real-time priority */
1275 return -EPERM;
1276
1277 if (get_user(cpu, (int*) config))
1278 return -EFAULT;
1279
1280 /* make sure the resource location matches */
1281 if (cpu != sem->on_cpu)
1282 return -EINVAL;
1283
1284 eprio = effective_agent_priority(get_priority(t));
1285
1286 pcp_update_prio_ceiling(sem, eprio);
1287
1288 return 0;
1289}
1290
1291int pfp_pcp_close(struct litmus_lock* l)
1292{
1293 struct task_struct *t = current;
1294 struct pcp_semaphore *sem = pcp_from_lock(l);
1295
1296 int owner = 0;
1297
1298 preempt_disable();
1299
1300 if (sem->on_cpu == smp_processor_id())
1301 owner = sem->owner == t;
1302
1303 preempt_enable();
1304
1305 if (owner)
1306 pfp_pcp_unlock(l);
1307
1308 return 0;
1309}
1310
1311void pfp_pcp_free(struct litmus_lock* lock)
1312{
1313 kfree(pcp_from_lock(lock));
1314}
1315
1316
1317static struct litmus_lock_ops pfp_pcp_lock_ops = {
1318 .close = pfp_pcp_close,
1319 .lock = pfp_pcp_lock,
1320 .open = pfp_pcp_open,
1321 .unlock = pfp_pcp_unlock,
1322 .deallocate = pfp_pcp_free,
1323};
1324
1325
1326static struct litmus_lock* pfp_new_pcp(int on_cpu)
1327{
1328 struct pcp_semaphore* sem;
1329
1330 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1331 if (!sem)
1332 return NULL;
1333
1334 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1335 pcp_init_semaphore(sem, on_cpu);
1336
1337 return &sem->litmus_lock;
1338}
1339
1340/* ******************** DPCP support ********************** */
1341
1342struct dpcp_semaphore {
1343 struct litmus_lock litmus_lock;
1344 struct pcp_semaphore pcp;
1345 int owner_cpu;
1346};
1347
1348static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1349{
1350 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1351}
1352
1353/* called with preemptions disabled */
1354static void pfp_migrate_to(int target_cpu)
1355{
1356 struct task_struct* t = current;
1357 pfp_domain_t *from;
1358
1359 if (get_partition(t) == target_cpu)
1360 return;
1361
1362 /* make sure target_cpu makes sense */
1363 BUG_ON(!cpu_online(target_cpu));
1364
1365 local_irq_disable();
1366
1367 /* scheduled task should not be in any ready or release queue */
1368 BUG_ON(is_queued(t));
1369
1370 /* lock both pfp domains in order of address */
1371 from = task_pfp(t);
1372
1373 raw_spin_lock(&from->slock);
1374
1375 /* switch partitions */
1376 tsk_rt(t)->task_params.cpu = target_cpu;
1377
1378 raw_spin_unlock(&from->slock);
1379
1380 /* Don't trace scheduler costs as part of
1381 * locking overhead. Scheduling costs are accounted for
1382 * explicitly. */
1383 TS_LOCK_SUSPEND;
1384
1385 local_irq_enable();
1386 preempt_enable_no_resched();
1387
1388 /* deschedule to be migrated */
1389 schedule();
1390
1391 /* we are now on the target processor */
1392 preempt_disable();
1393
1394 /* start recording costs again */
1395 TS_LOCK_RESUME;
1396
1397 BUG_ON(smp_processor_id() != target_cpu);
1398}
1399
1400int pfp_dpcp_lock(struct litmus_lock* l)
1401{
1402 struct task_struct* t = current;
1403 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1404 int eprio = effective_agent_priority(get_priority(t));
1405 int from = get_partition(t);
1406 int to = sem->pcp.on_cpu;
1407
1408 if (!is_realtime(t))
1409 return -EPERM;
1410
1411 preempt_disable();
1412
1413 /* Priority-boost ourself *before* we suspend so that
1414 * our priority is boosted when we resume. */
1415
1416 boost_priority(t, get_priority(t));
1417
1418 pfp_migrate_to(to);
1419
1420 pcp_raise_ceiling(&sem->pcp, eprio);
1421
1422 /* yep, we got it => execute request */
1423 sem->owner_cpu = from;
1424
1425 preempt_enable();
1426
1427 return 0;
1428}
1429
1430int pfp_dpcp_unlock(struct litmus_lock* l)
1431{
1432 struct task_struct *t = current;
1433 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1434 int err = 0;
1435 int home;
1436
1437 preempt_disable();
1438
1439 if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) {
1440 err = -EINVAL;
1441 goto out;
1442 }
1443
1444 home = sem->owner_cpu;
1445
1446 /* give it back */
1447 pcp_lower_ceiling(&sem->pcp);
1448
1449 /* we lose the benefit of priority boosting */
1450 unboost_priority(t);
1451
1452 pfp_migrate_to(home);
1453
1454out:
1455 preempt_enable();
1456
1457 return err;
1458}
1459
1460int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1461{
1462 struct task_struct *t = current;
1463 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1464 int cpu, eprio;
1465
1466 if (!is_realtime(t))
1467 /* we need to know the real-time priority */
1468 return -EPERM;
1469
1470 if (get_user(cpu, (int*) config))
1471 return -EFAULT;
1472
1473 /* make sure the resource location matches */
1474 if (cpu != sem->pcp.on_cpu)
1475 return -EINVAL;
1476
1477 eprio = effective_agent_priority(get_priority(t));
1478
1479 pcp_update_prio_ceiling(&sem->pcp, eprio);
1480
1481 return 0;
1482}
1483
1484int pfp_dpcp_close(struct litmus_lock* l)
1485{
1486 struct task_struct *t = current;
1487 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1488 int owner = 0;
1489
1490 preempt_disable();
1491
1492 if (sem->pcp.on_cpu == smp_processor_id())
1493 owner = sem->pcp.owner == t;
1494
1495 preempt_enable();
1496
1497 if (owner)
1498 pfp_dpcp_unlock(l);
1499
1500 return 0;
1501}
1502
1503void pfp_dpcp_free(struct litmus_lock* lock)
1504{
1505 kfree(dpcp_from_lock(lock));
1506}
1507
1508static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1509 .close = pfp_dpcp_close,
1510 .lock = pfp_dpcp_lock,
1511 .open = pfp_dpcp_open,
1512 .unlock = pfp_dpcp_unlock,
1513 .deallocate = pfp_dpcp_free,
1514};
1515
1516static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1517{
1518 struct dpcp_semaphore* sem;
1519
1520 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1521 if (!sem)
1522 return NULL;
1523
1524 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1525 sem->owner_cpu = NO_CPU;
1526 pcp_init_semaphore(&sem->pcp, on_cpu);
1527
1528 return &sem->litmus_lock;
1529}
1530
1531
1532/* **** lock constructor **** */
1533
1534
1535static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1536 void* __user config)
1537{
1538 int err = -ENXIO, cpu;
1539 struct srp_semaphore* srp;
1540
1541 /* P-FP currently supports the SRP for local resources and the FMLP
1542 * for global resources. */
1543 switch (type) {
1544 case FMLP_SEM:
1545 /* FIFO Mutex Locking Protocol */
1546 *lock = pfp_new_fmlp();
1547 if (*lock)
1548 err = 0;
1549 else
1550 err = -ENOMEM;
1551 break;
1552
1553 case MPCP_SEM:
1554 /* Multiprocesor Priority Ceiling Protocol */
1555 *lock = pfp_new_mpcp(0);
1556 if (*lock)
1557 err = 0;
1558 else
1559 err = -ENOMEM;
1560 break;
1561
1562 case MPCP_VS_SEM:
1563 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1564 *lock = pfp_new_mpcp(1);
1565 if (*lock)
1566 err = 0;
1567 else
1568 err = -ENOMEM;
1569 break;
1570
1571 case DPCP_SEM:
1572 /* Distributed Priority Ceiling Protocol */
1573 if (get_user(cpu, (int*) config))
1574 return -EFAULT;
1575
1576 if (!cpu_online(cpu))
1577 return -EINVAL;
1578
1579 *lock = pfp_new_dpcp(cpu);
1580 if (*lock)
1581 err = 0;
1582 else
1583 err = -ENOMEM;
1584 break;
1585
1586 case SRP_SEM:
1587 /* Baker's Stack Resource Policy */
1588 srp = allocate_srp_semaphore();
1589 if (srp) {
1590 *lock = &srp->litmus_lock;
1591 err = 0;
1592 } else
1593 err = -ENOMEM;
1594 break;
1595
1596 case PCP_SEM:
1597 /* Priority Ceiling Protocol */
1598 if (get_user(cpu, (int*) config))
1599 return -EFAULT;
1600
1601 if (!cpu_online(cpu))
1602 return -EINVAL;
1603
1604 *lock = pfp_new_pcp(cpu);
1605 if (*lock)
1606 err = 0;
1607 else
1608 err = -ENOMEM;
1609 break;
1610 };
1611
1612 return err;
1613}
1614
1615#endif
1616
1617static long pfp_admit_task(struct task_struct* tsk)
1618{
1619 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1620#ifdef CONFIG_RELEASE_MASTER
1621 /* don't allow tasks on release master CPU */
1622 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1623#endif
1624 litmus_is_valid_fixed_prio(get_priority(tsk)))
1625 return 0;
1626 else
1627 return -EINVAL;
1628}
1629
1630static long pfp_activate_plugin(void)
1631{
1632#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1633 int cpu;
1634#endif
1635
1636#ifdef CONFIG_RELEASE_MASTER
1637 for_each_online_cpu(cpu) {
1638 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1639 }
1640#endif
1641
1642#ifdef CONFIG_LITMUS_LOCKING
1643 get_srp_prio = pfp_get_srp_prio;
1644
1645 for_each_online_cpu(cpu) {
1646 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1647 per_cpu(mpcpvs_vspin, cpu) = NULL;
1648
1649 pcp_init_state(&per_cpu(pcp_state, cpu));
1650 pfp_doms[cpu] = remote_pfp(cpu);
1651 }
1652
1653#endif
1654
1655 return 0;
1656}
1657
1658
1659/* Plugin object */
1660static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
1661 .plugin_name = "P-FP",
1662 .tick = pfp_tick,
1663 .task_new = pfp_task_new,
1664 .complete_job = complete_job,
1665 .task_exit = pfp_task_exit,
1666 .schedule = pfp_schedule,
1667 .task_wake_up = pfp_task_wake_up,
1668 .task_block = pfp_task_block,
1669 .admit_task = pfp_admit_task,
1670 .activate_plugin = pfp_activate_plugin,
1671#ifdef CONFIG_LITMUS_LOCKING
1672 .allocate_lock = pfp_allocate_lock,
1673 .finish_switch = pfp_finish_switch,
1674#endif
1675};
1676
1677
1678static int __init init_pfp(void)
1679{
1680 int i;
1681
1682 /* We do not really want to support cpu hotplug, do we? ;)
1683 * However, if we are so crazy to do so,
1684 * we cannot use num_online_cpu()
1685 */
1686 for (i = 0; i < num_online_cpus(); i++) {
1687 pfp_domain_init(remote_pfp(i), i);
1688 }
1689 return register_sched_plugin(&pfp_plugin);
1690}
1691
1692module_init(init_pfp);
1693
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 00000000000..00a1900d645
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,227 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/preempt.h>
14#include <litmus/jobs.h>
15
16/*
17 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is
19 * non-preemptive section aware and does not invoke the scheduler / send
20 * IPIs if the to-be-preempted task is actually non-preemptive.
21 */
22void preempt_if_preemptable(struct task_struct* t, int cpu)
23{
24 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
25 * on_cpu is currently scheduling background work.
26 */
27
28 int reschedule = 0;
29
30 if (!t)
31 /* move non-real-time task out of the way */
32 reschedule = 1;
33 else {
34 if (smp_processor_id() == cpu) {
35 /* local CPU case */
36 /* check if we need to poke userspace */
37 if (is_user_np(t))
38 /* Yes, poke it. This doesn't have to be atomic since
39 * the task is definitely not executing. */
40 request_exit_np(t);
41 else if (!is_kernel_np(t))
42 /* only if we are allowed to preempt the
43 * currently-executing task */
44 reschedule = 1;
45 } else {
46 /* Remote CPU case. Only notify if it's not a kernel
47 * NP section and if we didn't set the userspace
48 * flag. */
49 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
50 }
51 }
52 if (likely(reschedule))
53 litmus_reschedule(cpu);
54}
55
56
57/*************************************************************
58 * Dummy plugin functions *
59 *************************************************************/
60
61static void litmus_dummy_finish_switch(struct task_struct * prev)
62{
63}
64
65static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
66{
67 sched_state_task_picked();
68 return NULL;
69}
70
71static void litmus_dummy_tick(struct task_struct* tsk)
72{
73}
74
75static long litmus_dummy_admit_task(struct task_struct* tsk)
76{
77 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
78 tsk->comm, tsk->pid);
79 return -EINVAL;
80}
81
82static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
83{
84}
85
86static void litmus_dummy_task_wake_up(struct task_struct *task)
87{
88}
89
90static void litmus_dummy_task_block(struct task_struct *task)
91{
92}
93
94static void litmus_dummy_task_exit(struct task_struct *task)
95{
96}
97
98static long litmus_dummy_complete_job(void)
99{
100 return -ENOSYS;
101}
102
103static long litmus_dummy_activate_plugin(void)
104{
105 return 0;
106}
107
108static long litmus_dummy_deactivate_plugin(void)
109{
110 return 0;
111}
112
113#ifdef CONFIG_LITMUS_LOCKING
114
115static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
116 void* __user config)
117{
118 return -ENXIO;
119}
120
121#endif
122
123
124/* The default scheduler plugin. It doesn't do anything and lets Linux do its
125 * job.
126 */
127struct sched_plugin linux_sched_plugin = {
128 .plugin_name = "Linux",
129 .tick = litmus_dummy_tick,
130 .task_new = litmus_dummy_task_new,
131 .task_exit = litmus_dummy_task_exit,
132 .task_wake_up = litmus_dummy_task_wake_up,
133 .task_block = litmus_dummy_task_block,
134 .complete_job = litmus_dummy_complete_job,
135 .schedule = litmus_dummy_schedule,
136 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin,
139#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock,
141#endif
142 .admit_task = litmus_dummy_admit_task
143};
144
145/*
146 * The reference to current plugin that is used to schedule tasks within
147 * the system. It stores references to actual function implementations
148 * Should be initialized by calling "init_***_plugin()"
149 */
150struct sched_plugin *litmus = &linux_sched_plugin;
151
152/* the list of registered scheduling plugins */
153static LIST_HEAD(sched_plugins);
154static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
155
156#define CHECK(func) {\
157 if (!plugin->func) \
158 plugin->func = litmus_dummy_ ## func;}
159
160/* FIXME: get reference to module */
161int register_sched_plugin(struct sched_plugin* plugin)
162{
163 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
164 plugin->plugin_name);
165
166 /* make sure we don't trip over null pointers later */
167 CHECK(finish_switch);
168 CHECK(schedule);
169 CHECK(tick);
170 CHECK(task_wake_up);
171 CHECK(task_exit);
172 CHECK(task_block);
173 CHECK(task_new);
174 CHECK(complete_job);
175 CHECK(activate_plugin);
176 CHECK(deactivate_plugin);
177#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock);
179#endif
180 CHECK(admit_task);
181
182 if (!plugin->release_at)
183 plugin->release_at = release_at;
184
185 raw_spin_lock(&sched_plugins_lock);
186 list_add(&plugin->list, &sched_plugins);
187 raw_spin_unlock(&sched_plugins_lock);
188
189 return 0;
190}
191
192
193/* FIXME: reference counting, etc. */
194struct sched_plugin* find_sched_plugin(const char* name)
195{
196 struct list_head *pos;
197 struct sched_plugin *plugin;
198
199 raw_spin_lock(&sched_plugins_lock);
200 list_for_each(pos, &sched_plugins) {
201 plugin = list_entry(pos, struct sched_plugin, list);
202 if (!strcmp(plugin->plugin_name, name))
203 goto out_unlock;
204 }
205 plugin = NULL;
206
207out_unlock:
208 raw_spin_unlock(&sched_plugins_lock);
209 return plugin;
210}
211
212int print_sched_plugins(char* buf, int max)
213{
214 int count = 0;
215 struct list_head *pos;
216 struct sched_plugin *plugin;
217
218 raw_spin_lock(&sched_plugins_lock);
219 list_for_each(pos, &sched_plugins) {
220 plugin = list_entry(pos, struct sched_plugin, list);
221 count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
222 if (max - count <= 0)
223 break;
224 }
225 raw_spin_unlock(&sched_plugins_lock);
226 return count;
227}
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 00000000000..8933e15605a
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,653 @@
1/*
2 * kernel/sched_psn_edf.c
3 *
4 * Implementation of the PSN-EDF scheduler plugin.
5 * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
6 *
7 * Suspensions and non-preemptable sections are supported.
8 * Priority inheritance is not supported.
9 */
10
11#include <linux/percpu.h>
12#include <linux/sched.h>
13#include <linux/list.h>
14#include <linux/spinlock.h>
15#include <linux/module.h>
16
17#include <litmus/litmus.h>
18#include <litmus/jobs.h>
19#include <litmus/preempt.h>
20#include <litmus/budget.h>
21#include <litmus/sched_plugin.h>
22#include <litmus/edf_common.h>
23#include <litmus/sched_trace.h>
24#include <litmus/trace.h>
25
26typedef struct {
27 rt_domain_t domain;
28 int cpu;
29 struct task_struct* scheduled; /* only RT tasks */
30/*
31 * scheduling lock slock
32 * protects the domain and serializes scheduling decisions
33 */
34#define slock domain.ready_lock
35
36} psnedf_domain_t;
37
38DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
39
40#define local_edf (&__get_cpu_var(psnedf_domains).domain)
41#define local_pedf (&__get_cpu_var(psnedf_domains))
42#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
43#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
44#define task_edf(task) remote_edf(get_partition(task))
45#define task_pedf(task) remote_pedf(get_partition(task))
46
47
48static void psnedf_domain_init(psnedf_domain_t* pedf,
49 check_resched_needed_t check,
50 release_jobs_t release,
51 int cpu)
52{
53 edf_domain_init(&pedf->domain, check, release);
54 pedf->cpu = cpu;
55 pedf->scheduled = NULL;
56}
57
58static void requeue(struct task_struct* t, rt_domain_t *edf)
59{
60 if (t->state != TASK_RUNNING)
61 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
62
63 set_rt_flags(t, RT_F_RUNNING);
64 if (is_released(t, litmus_clock()))
65 __add_ready(edf, t);
66 else
67 add_release(edf, t); /* it has got to wait */
68}
69
70/* we assume the lock is being held */
71static void preempt(psnedf_domain_t *pedf)
72{
73 preempt_if_preemptable(pedf->scheduled, pedf->cpu);
74}
75
76#ifdef CONFIG_LITMUS_LOCKING
77
78static void boost_priority(struct task_struct* t)
79{
80 unsigned long flags;
81 psnedf_domain_t* pedf = task_pedf(t);
82 lt_t now;
83
84 raw_spin_lock_irqsave(&pedf->slock, flags);
85 now = litmus_clock();
86
87 TRACE_TASK(t, "priority boosted at %llu\n", now);
88
89 tsk_rt(t)->priority_boosted = 1;
90 tsk_rt(t)->boost_start_time = now;
91
92 if (pedf->scheduled != t) {
93 /* holder may be queued: first stop queue changes */
94 raw_spin_lock(&pedf->domain.release_lock);
95 if (is_queued(t) &&
96 /* If it is queued, then we need to re-order. */
97 bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
98 /* If we bubbled to the top, then we need to check for preemptions. */
99 edf_preemption_needed(&pedf->domain, pedf->scheduled))
100 preempt(pedf);
101 raw_spin_unlock(&pedf->domain.release_lock);
102 } /* else: nothing to do since the job is not queued while scheduled */
103
104 raw_spin_unlock_irqrestore(&pedf->slock, flags);
105}
106
107static void unboost_priority(struct task_struct* t)
108{
109 unsigned long flags;
110 psnedf_domain_t* pedf = task_pedf(t);
111 lt_t now;
112
113 raw_spin_lock_irqsave(&pedf->slock, flags);
114 now = litmus_clock();
115
116 /* assumption: this only happens when the job is scheduled */
117 BUG_ON(pedf->scheduled != t);
118
119 TRACE_TASK(t, "priority restored at %llu\n", now);
120
121 /* priority boosted jobs must be scheduled */
122 BUG_ON(pedf->scheduled != t);
123
124 tsk_rt(t)->priority_boosted = 0;
125 tsk_rt(t)->boost_start_time = 0;
126
127 /* check if this changes anything */
128 if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
129 preempt(pedf);
130
131 raw_spin_unlock_irqrestore(&pedf->slock, flags);
132}
133
134#endif
135
136static int psnedf_preempt_check(psnedf_domain_t *pedf)
137{
138 if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
139 preempt(pedf);
140 return 1;
141 } else
142 return 0;
143}
144
145/* This check is trivial in partioned systems as we only have to consider
146 * the CPU of the partition.
147 */
148static int psnedf_check_resched(rt_domain_t *edf)
149{
150 psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
151
152 /* because this is a callback from rt_domain_t we already hold
153 * the necessary lock for the ready queue
154 */
155 return psnedf_preempt_check(pedf);
156}
157
158static void job_completion(struct task_struct* t, int forced)
159{
160 sched_trace_task_completion(t,forced);
161 TRACE_TASK(t, "job_completion().\n");
162
163 set_rt_flags(t, RT_F_SLEEP);
164 prepare_for_next_period(t);
165}
166
167static void psnedf_tick(struct task_struct *t)
168{
169 psnedf_domain_t *pedf = local_pedf;
170
171 /* Check for inconsistency. We don't need the lock for this since
172 * ->scheduled is only changed in schedule, which obviously is not
173 * executing in parallel on this CPU
174 */
175 BUG_ON(is_realtime(t) && t != pedf->scheduled);
176
177 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
178 if (!is_np(t)) {
179 litmus_reschedule_local();
180 TRACE("psnedf_scheduler_tick: "
181 "%d is preemptable "
182 " => FORCE_RESCHED\n", t->pid);
183 } else if (is_user_np(t)) {
184 TRACE("psnedf_scheduler_tick: "
185 "%d is non-preemptable, "
186 "preemption delayed.\n", t->pid);
187 request_exit_np(t);
188 }
189 }
190}
191
192static struct task_struct* psnedf_schedule(struct task_struct * prev)
193{
194 psnedf_domain_t* pedf = local_pedf;
195 rt_domain_t* edf = &pedf->domain;
196 struct task_struct* next;
197
198 int out_of_time, sleep, preempt,
199 np, exists, blocks, resched;
200
201 raw_spin_lock(&pedf->slock);
202
203 /* sanity checking
204 * differently from gedf, when a task exits (dead)
205 * pedf->schedule may be null and prev _is_ realtime
206 */
207 BUG_ON(pedf->scheduled && pedf->scheduled != prev);
208 BUG_ON(pedf->scheduled && !is_realtime(prev));
209
210 /* (0) Determine state */
211 exists = pedf->scheduled != NULL;
212 blocks = exists && !is_running(pedf->scheduled);
213 out_of_time = exists &&
214 budget_enforced(pedf->scheduled) &&
215 budget_exhausted(pedf->scheduled);
216 np = exists && is_np(pedf->scheduled);
217 sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
218 preempt = edf_preemption_needed(edf, prev);
219
220 /* If we need to preempt do so.
221 * The following checks set resched to 1 in case of special
222 * circumstances.
223 */
224 resched = preempt;
225
226 /* If a task blocks we have no choice but to reschedule.
227 */
228 if (blocks)
229 resched = 1;
230
231 /* Request a sys_exit_np() call if we would like to preempt but cannot.
232 * Multiple calls to request_exit_np() don't hurt.
233 */
234 if (np && (out_of_time || preempt || sleep))
235 request_exit_np(pedf->scheduled);
236
237 /* Any task that is preemptable and either exhausts its execution
238 * budget or wants to sleep completes. We may have to reschedule after
239 * this.
240 */
241 if (!np && (out_of_time || sleep) && !blocks) {
242 job_completion(pedf->scheduled, !sleep);
243 resched = 1;
244 }
245
246 /* The final scheduling decision. Do we need to switch for some reason?
247 * Switch if we are in RT mode and have no task or if we need to
248 * resched.
249 */
250 next = NULL;
251 if ((!np || blocks) && (resched || !exists)) {
252 /* When preempting a task that does not block, then
253 * re-insert it into either the ready queue or the
254 * release queue (if it completed). requeue() picks
255 * the appropriate queue.
256 */
257 if (pedf->scheduled && !blocks)
258 requeue(pedf->scheduled, edf);
259 next = __take_ready(edf);
260 } else
261 /* Only override Linux scheduler if we have a real-time task
262 * scheduled that needs to continue.
263 */
264 if (exists)
265 next = prev;
266
267 if (next) {
268 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
269 set_rt_flags(next, RT_F_RUNNING);
270 } else {
271 TRACE("becoming idle at %llu\n", litmus_clock());
272 }
273
274 pedf->scheduled = next;
275 sched_state_task_picked();
276 raw_spin_unlock(&pedf->slock);
277
278 return next;
279}
280
281
282/* Prepare a task for running in RT mode
283 */
284static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
285{
286 rt_domain_t* edf = task_edf(t);
287 psnedf_domain_t* pedf = task_pedf(t);
288 unsigned long flags;
289
290 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
291 t->rt_param.task_params.cpu);
292
293 /* setup job parameters */
294 release_at(t, litmus_clock());
295
296 /* The task should be running in the queue, otherwise signal
297 * code will try to wake it up with fatal consequences.
298 */
299 raw_spin_lock_irqsave(&pedf->slock, flags);
300 if (running) {
301 /* there shouldn't be anything else running at the time */
302 BUG_ON(pedf->scheduled);
303 pedf->scheduled = t;
304 } else {
305 requeue(t, edf);
306 /* maybe we have to reschedule */
307 psnedf_preempt_check(pedf);
308 }
309 raw_spin_unlock_irqrestore(&pedf->slock, flags);
310}
311
312static void psnedf_task_wake_up(struct task_struct *task)
313{
314 unsigned long flags;
315 psnedf_domain_t* pedf = task_pedf(task);
316 rt_domain_t* edf = task_edf(task);
317 lt_t now;
318
319 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
320 raw_spin_lock_irqsave(&pedf->slock, flags);
321 BUG_ON(is_queued(task));
322 now = litmus_clock();
323 if (is_tardy(task, now)
324#ifdef CONFIG_LITMUS_LOCKING
325 /* We need to take suspensions because of semaphores into
326 * account! If a job resumes after being suspended due to acquiring
327 * a semaphore, it should never be treated as a new job release.
328 */
329 && !is_priority_boosted(task)
330#endif
331 ) {
332 /* new sporadic release */
333 release_at(task, now);
334 sched_trace_task_release(task);
335 }
336
337 /* Only add to ready queue if it is not the currently-scheduled
338 * task. This could be the case if a task was woken up concurrently
339 * on a remote CPU before the executing CPU got around to actually
340 * de-scheduling the task, i.e., wake_up() raced with schedule()
341 * and won.
342 */
343 if (pedf->scheduled != task) {
344 requeue(task, edf);
345 psnedf_preempt_check(pedf);
346 }
347
348 raw_spin_unlock_irqrestore(&pedf->slock, flags);
349 TRACE_TASK(task, "wake up done\n");
350}
351
352static void psnedf_task_block(struct task_struct *t)
353{
354 /* only running tasks can block, thus t is in no queue */
355 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
356
357 BUG_ON(!is_realtime(t));
358 BUG_ON(is_queued(t));
359}
360
361static void psnedf_task_exit(struct task_struct * t)
362{
363 unsigned long flags;
364 psnedf_domain_t* pedf = task_pedf(t);
365 rt_domain_t* edf;
366
367 raw_spin_lock_irqsave(&pedf->slock, flags);
368 if (is_queued(t)) {
369 /* dequeue */
370 edf = task_edf(t);
371 remove(edf, t);
372 }
373 if (pedf->scheduled == t)
374 pedf->scheduled = NULL;
375
376 TRACE_TASK(t, "RIP, now reschedule\n");
377
378 preempt(pedf);
379 raw_spin_unlock_irqrestore(&pedf->slock, flags);
380}
381
382#ifdef CONFIG_LITMUS_LOCKING
383
384#include <litmus/fdso.h>
385#include <litmus/srp.h>
386
387/* ******************** SRP support ************************ */
388
389static unsigned int psnedf_get_srp_prio(struct task_struct* t)
390{
391 /* assumes implicit deadlines */
392 return get_rt_period(t);
393}
394
395/* ******************** FMLP support ********************** */
396
397/* struct for semaphore with priority inheritance */
398struct fmlp_semaphore {
399 struct litmus_lock litmus_lock;
400
401 /* current resource holder */
402 struct task_struct *owner;
403
404 /* FIFO queue of waiting tasks */
405 wait_queue_head_t wait;
406};
407
408static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
409{
410 return container_of(lock, struct fmlp_semaphore, litmus_lock);
411}
412int psnedf_fmlp_lock(struct litmus_lock* l)
413{
414 struct task_struct* t = current;
415 struct fmlp_semaphore *sem = fmlp_from_lock(l);
416 wait_queue_t wait;
417 unsigned long flags;
418
419 if (!is_realtime(t))
420 return -EPERM;
421
422 spin_lock_irqsave(&sem->wait.lock, flags);
423
424 if (sem->owner) {
425 /* resource is not free => must suspend and wait */
426
427 init_waitqueue_entry(&wait, t);
428
429 /* FIXME: interruptible would be nice some day */
430 set_task_state(t, TASK_UNINTERRUPTIBLE);
431
432 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
433
434 TS_LOCK_SUSPEND;
435
436 /* release lock before sleeping */
437 spin_unlock_irqrestore(&sem->wait.lock, flags);
438
439 /* We depend on the FIFO order. Thus, we don't need to recheck
440 * when we wake up; we are guaranteed to have the lock since
441 * there is only one wake up per release.
442 */
443
444 schedule();
445
446 TS_LOCK_RESUME;
447
448 /* Since we hold the lock, no other task will change
449 * ->owner. We can thus check it without acquiring the spin
450 * lock. */
451 BUG_ON(sem->owner != t);
452 } else {
453 /* it's ours now */
454 sem->owner = t;
455
456 /* mark the task as priority-boosted. */
457 boost_priority(t);
458
459 spin_unlock_irqrestore(&sem->wait.lock, flags);
460 }
461
462 return 0;
463}
464
465int psnedf_fmlp_unlock(struct litmus_lock* l)
466{
467 struct task_struct *t = current, *next;
468 struct fmlp_semaphore *sem = fmlp_from_lock(l);
469 unsigned long flags;
470 int err = 0;
471
472 spin_lock_irqsave(&sem->wait.lock, flags);
473
474 if (sem->owner != t) {
475 err = -EINVAL;
476 goto out;
477 }
478
479 /* we lose the benefit of priority boosting */
480
481 unboost_priority(t);
482
483 /* check if there are jobs waiting for this resource */
484 next = __waitqueue_remove_first(&sem->wait);
485 if (next) {
486 /* boost next job */
487 boost_priority(next);
488
489 /* next becomes the resouce holder */
490 sem->owner = next;
491
492 /* wake up next */
493 wake_up_process(next);
494 } else
495 /* resource becomes available */
496 sem->owner = NULL;
497
498out:
499 spin_unlock_irqrestore(&sem->wait.lock, flags);
500 return err;
501}
502
503int psnedf_fmlp_close(struct litmus_lock* l)
504{
505 struct task_struct *t = current;
506 struct fmlp_semaphore *sem = fmlp_from_lock(l);
507 unsigned long flags;
508
509 int owner;
510
511 spin_lock_irqsave(&sem->wait.lock, flags);
512
513 owner = sem->owner == t;
514
515 spin_unlock_irqrestore(&sem->wait.lock, flags);
516
517 if (owner)
518 psnedf_fmlp_unlock(l);
519
520 return 0;
521}
522
523void psnedf_fmlp_free(struct litmus_lock* lock)
524{
525 kfree(fmlp_from_lock(lock));
526}
527
528static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
529 .close = psnedf_fmlp_close,
530 .lock = psnedf_fmlp_lock,
531 .unlock = psnedf_fmlp_unlock,
532 .deallocate = psnedf_fmlp_free,
533};
534
535static struct litmus_lock* psnedf_new_fmlp(void)
536{
537 struct fmlp_semaphore* sem;
538
539 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
540 if (!sem)
541 return NULL;
542
543 sem->owner = NULL;
544 init_waitqueue_head(&sem->wait);
545 sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
546
547 return &sem->litmus_lock;
548}
549
550/* **** lock constructor **** */
551
552
553static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
554 void* __user unused)
555{
556 int err = -ENXIO;
557 struct srp_semaphore* srp;
558
559 /* PSN-EDF currently supports the SRP for local resources and the FMLP
560 * for global resources. */
561 switch (type) {
562 case FMLP_SEM:
563 /* Flexible Multiprocessor Locking Protocol */
564 *lock = psnedf_new_fmlp();
565 if (*lock)
566 err = 0;
567 else
568 err = -ENOMEM;
569 break;
570
571 case SRP_SEM:
572 /* Baker's Stack Resource Policy */
573 srp = allocate_srp_semaphore();
574 if (srp) {
575 *lock = &srp->litmus_lock;
576 err = 0;
577 } else
578 err = -ENOMEM;
579 break;
580 };
581
582 return err;
583}
584
585#endif
586
587
588static long psnedf_activate_plugin(void)
589{
590#ifdef CONFIG_RELEASE_MASTER
591 int cpu;
592
593 for_each_online_cpu(cpu) {
594 remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
595 }
596#endif
597
598#ifdef CONFIG_LITMUS_LOCKING
599 get_srp_prio = psnedf_get_srp_prio;
600#endif
601
602 return 0;
603}
604
605static long psnedf_admit_task(struct task_struct* tsk)
606{
607 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
608#ifdef CONFIG_RELEASE_MASTER
609 /* don't allow tasks on release master CPU */
610 && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
611#endif
612 )
613 return 0;
614 else
615 return -EINVAL;
616}
617
618/* Plugin object */
619static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
620 .plugin_name = "PSN-EDF",
621 .tick = psnedf_tick,
622 .task_new = psnedf_task_new,
623 .complete_job = complete_job,
624 .task_exit = psnedf_task_exit,
625 .schedule = psnedf_schedule,
626 .task_wake_up = psnedf_task_wake_up,
627 .task_block = psnedf_task_block,
628 .admit_task = psnedf_admit_task,
629 .activate_plugin = psnedf_activate_plugin,
630#ifdef CONFIG_LITMUS_LOCKING
631 .allocate_lock = psnedf_allocate_lock,
632#endif
633};
634
635
636static int __init init_psn_edf(void)
637{
638 int i;
639
640 /* We do not really want to support cpu hotplug, do we? ;)
641 * However, if we are so crazy to do so,
642 * we cannot use num_online_cpu()
643 */
644 for (i = 0; i < num_online_cpus(); i++) {
645 psnedf_domain_init(remote_pedf(i),
646 psnedf_check_resched,
647 NULL, i);
648 }
649 return register_sched_plugin(&psn_edf_plugin);
650}
651
652module_init(init_psn_edf);
653
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
new file mode 100644
index 00000000000..5ef8d09ab41
--- /dev/null
+++ b/litmus/sched_task_trace.c
@@ -0,0 +1,241 @@
1/*
2 * sched_task_trace.c -- record scheduling events to a byte stream
3 */
4
5#define NO_TASK_TRACE_DECLS
6
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/percpu.h>
10
11#include <litmus/ftdev.h>
12#include <litmus/litmus.h>
13
14#include <litmus/sched_trace.h>
15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h>
17
18
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
20
21#define now() litmus_clock()
22
23struct local_buffer {
24 struct st_event_record record[NO_EVENTS];
25 char flag[NO_EVENTS];
26 struct ft_buffer ftbuf;
27};
28
29DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
30
31static struct ftdev st_dev;
32
33static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
34{
35 return cpu_online(cpu) ? 0 : -ENODEV;
36}
37
38static int __init init_sched_task_trace(void)
39{
40 struct local_buffer* buf;
41 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer));
45
46 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace");
48 if (err)
49 goto err_out;
50
51 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
54 sizeof(struct st_event_record),
55 buf->flag,
56 buf->record);
57 st_dev.minor[i].buf = &buf->ftbuf;
58 }
59 if (ok == st_dev.minor_cnt) {
60 st_dev.can_open = st_dev_can_open;
61 err = register_ftdev(&st_dev);
62 if (err)
63 goto err_dealloc;
64 } else {
65 err = -EINVAL;
66 goto err_dealloc;
67 }
68
69 return 0;
70
71err_dealloc:
72 ftdev_exit(&st_dev);
73err_out:
74 printk(KERN_WARNING "Could not register sched_trace module\n");
75 return err;
76}
77
78static void __exit exit_sched_task_trace(void)
79{
80 ftdev_exit(&st_dev);
81}
82
83module_init(init_sched_task_trace);
84module_exit(exit_sched_task_trace);
85
86
87static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
88{
89 struct st_event_record* rec = NULL;
90 struct local_buffer* buf;
91
92 buf = &get_cpu_var(st_event_buffer);
93 if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
94 rec->hdr.type = type;
95 rec->hdr.cpu = smp_processor_id();
96 rec->hdr.pid = t ? t->pid : 0;
97 rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
98 } else {
99 put_cpu_var(st_event_buffer);
100 }
101 /* rec will be NULL if it failed */
102 return rec;
103}
104
105static inline void put_record(struct st_event_record* rec)
106{
107 struct local_buffer* buf;
108 buf = &__get_cpu_var(st_event_buffer);
109 ft_buffer_finish_write(&buf->ftbuf, rec);
110 put_cpu_var(st_event_buffer);
111}
112
113feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
114{
115 struct task_struct *t = (struct task_struct*) _task;
116 struct st_event_record* rec = get_record(ST_NAME, t);
117 int i;
118 if (rec) {
119 for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
120 rec->data.name.cmd[i] = t->comm[i];
121 put_record(rec);
122 }
123}
124
125feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
126{
127 struct task_struct *t = (struct task_struct*) _task;
128 struct st_event_record* rec = get_record(ST_PARAM, t);
129 if (rec) {
130 rec->data.param.wcet = get_exec_cost(t);
131 rec->data.param.period = get_rt_period(t);
132 rec->data.param.phase = get_rt_phase(t);
133 rec->data.param.partition = get_partition(t);
134 rec->data.param.class = get_class(t);
135 put_record(rec);
136 }
137}
138
139feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
140{
141 struct task_struct *t = (struct task_struct*) _task;
142 struct st_event_record* rec = get_record(ST_RELEASE, t);
143 if (rec) {
144 rec->data.release.release = get_release(t);
145 rec->data.release.deadline = get_deadline(t);
146 put_record(rec);
147 }
148}
149
150/* skipped: st_assigned_data, we don't use it atm */
151
152feather_callback void do_sched_trace_task_switch_to(unsigned long id,
153 unsigned long _task)
154{
155 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec;
157 if (is_realtime(t)) {
158 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) {
160 rec->data.switch_to.when = now();
161 rec->data.switch_to.exec_time = get_exec_time(t);
162 put_record(rec);
163 }
164 }
165}
166
167feather_callback void do_sched_trace_task_switch_away(unsigned long id,
168 unsigned long _task)
169{
170 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec;
172 if (is_realtime(t)) {
173 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) {
175 rec->data.switch_away.when = now();
176 rec->data.switch_away.exec_time = get_exec_time(t);
177 put_record(rec);
178 }
179 }
180}
181
182feather_callback void do_sched_trace_task_completion(unsigned long id,
183 unsigned long _task,
184 unsigned long forced)
185{
186 struct task_struct *t = (struct task_struct*) _task;
187 struct st_event_record* rec = get_record(ST_COMPLETION, t);
188 if (rec) {
189 rec->data.completion.when = now();
190 rec->data.completion.forced = forced;
191 put_record(rec);
192 }
193}
194
195feather_callback void do_sched_trace_task_block(unsigned long id,
196 unsigned long _task)
197{
198 struct task_struct *t = (struct task_struct*) _task;
199 struct st_event_record* rec = get_record(ST_BLOCK, t);
200 if (rec) {
201 rec->data.block.when = now();
202 put_record(rec);
203 }
204}
205
206feather_callback void do_sched_trace_task_resume(unsigned long id,
207 unsigned long _task)
208{
209 struct task_struct *t = (struct task_struct*) _task;
210 struct st_event_record* rec = get_record(ST_RESUME, t);
211 if (rec) {
212 rec->data.resume.when = now();
213 put_record(rec);
214 }
215}
216
217feather_callback void do_sched_trace_sys_release(unsigned long id,
218 unsigned long _start)
219{
220 lt_t *start = (lt_t*) _start;
221 struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
222 if (rec) {
223 rec->data.sys_release.when = now();
224 rec->data.sys_release.release = *start;
225 put_record(rec);
226 }
227}
228
229feather_callback void do_sched_trace_action(unsigned long id,
230 unsigned long _task,
231 unsigned long action)
232{
233 struct task_struct *t = (struct task_struct*) _task;
234 struct st_event_record* rec = get_record(ST_ACTION, t);
235
236 if (rec) {
237 rec->data.action.when = now();
238 rec->data.action.action = action;
239 put_record(rec);
240 }
241}
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 00000000000..f4171fddbbb
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,252 @@
1/*
2 * sched_trace.c -- record scheduling events to a byte stream.
3 */
4#include <linux/spinlock.h>
5#include <linux/mutex.h>
6
7#include <linux/fs.h>
8#include <linux/slab.h>
9#include <linux/miscdevice.h>
10#include <asm/uaccess.h>
11#include <linux/module.h>
12#include <linux/sysrq.h>
13
14#include <linux/kfifo.h>
15
16#include <litmus/sched_trace.h>
17#include <litmus/litmus.h>
18
19#define SCHED_TRACE_NAME "litmus/log"
20
21/* Compute size of TRACE() buffer */
22#define LITMUS_TRACE_BUF_SIZE (1 << CONFIG_SCHED_DEBUG_TRACE_SHIFT)
23
24/* Max length of one read from the buffer */
25#define MAX_READ_LEN (64 * 1024)
26
27/* Max length for one write --- by TRACE() --- to the buffer. This is used to
28 * allocate a per-cpu buffer for printf() formatting. */
29#define MSG_SIZE 255
30
31
32static DEFINE_MUTEX(reader_mutex);
33static atomic_t reader_cnt = ATOMIC_INIT(0);
34static DEFINE_KFIFO(debug_buffer, char, LITMUS_TRACE_BUF_SIZE);
35
36
37static DEFINE_RAW_SPINLOCK(log_buffer_lock);
38static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
39
40/*
41 * sched_trace_log_message - Write to the trace buffer (log_buffer)
42 *
43 * This is the only function accessing the log_buffer from inside the
44 * kernel for writing.
45 * Concurrent access to sched_trace_log_message must be serialized using
46 * log_buffer_lock
47 * The maximum length of a formatted message is 255
48 */
49void sched_trace_log_message(const char* fmt, ...)
50{
51 unsigned long flags;
52 va_list args;
53 size_t len;
54 char* buf;
55
56 if (!atomic_read(&reader_cnt))
57 /* early exit if nobody is listening */
58 return;
59
60 va_start(args, fmt);
61 local_irq_save(flags);
62
63 /* format message */
64 buf = __get_cpu_var(fmt_buffer);
65 len = vscnprintf(buf, MSG_SIZE, fmt, args);
66
67 raw_spin_lock(&log_buffer_lock);
68 /* Don't copy the trailing null byte, we don't want null bytes in a
69 * text file.
70 */
71 kfifo_in(&debug_buffer, buf, len);
72 raw_spin_unlock(&log_buffer_lock);
73
74 local_irq_restore(flags);
75 va_end(args);
76}
77
78
79/*
80 * log_read - Read the trace buffer
81 *
82 * This function is called as a file operation from userspace.
83 * Readers can sleep. Access is serialized through reader_mutex
84 */
85static ssize_t log_read(struct file *filp,
86 char __user *to, size_t len,
87 loff_t *f_pos)
88{
89 /* we ignore f_pos, this is strictly sequential */
90
91 ssize_t error = -EINVAL;
92 char* mem;
93
94 if (mutex_lock_interruptible(&reader_mutex)) {
95 error = -ERESTARTSYS;
96 goto out;
97 }
98
99 if (len > MAX_READ_LEN)
100 len = MAX_READ_LEN;
101
102 mem = kmalloc(len, GFP_KERNEL);
103 if (!mem) {
104 error = -ENOMEM;
105 goto out_unlock;
106 }
107
108 error = kfifo_out(&debug_buffer, mem, len);
109 while (!error) {
110 set_current_state(TASK_INTERRUPTIBLE);
111 schedule_timeout(110);
112 if (signal_pending(current))
113 error = -ERESTARTSYS;
114 else
115 error = kfifo_out(&debug_buffer, mem, len);
116 }
117
118 if (error > 0 && copy_to_user(to, mem, error))
119 error = -EFAULT;
120
121 kfree(mem);
122 out_unlock:
123 mutex_unlock(&reader_mutex);
124 out:
125 return error;
126}
127
128/*
129 * Enable redirection of printk() messages to the trace buffer.
130 * Defined in kernel/printk.c
131 */
132extern int trace_override;
133extern int trace_recurse;
134
135/*
136 * log_open - open the global log message ring buffer.
137 */
138static int log_open(struct inode *in, struct file *filp)
139{
140 int error = -EINVAL;
141
142 if (mutex_lock_interruptible(&reader_mutex)) {
143 error = -ERESTARTSYS;
144 goto out;
145 }
146
147 atomic_inc(&reader_cnt);
148 error = 0;
149
150 printk(KERN_DEBUG
151 "sched_trace kfifo with buffer starting at: 0x%p\n",
152 debug_buffer.buf);
153
154 /* override printk() */
155 trace_override++;
156
157 mutex_unlock(&reader_mutex);
158 out:
159 return error;
160}
161
162static int log_release(struct inode *in, struct file *filp)
163{
164 int error = -EINVAL;
165
166 if (mutex_lock_interruptible(&reader_mutex)) {
167 error = -ERESTARTSYS;
168 goto out;
169 }
170
171 atomic_dec(&reader_cnt);
172
173 /* release printk() overriding */
174 trace_override--;
175
176 printk(KERN_DEBUG "sched_trace kfifo released\n");
177
178 mutex_unlock(&reader_mutex);
179 out:
180 return error;
181}
182
183/*
184 * log_fops - The file operations for accessing the global LITMUS log message
185 * buffer.
186 *
187 * Except for opening the device file it uses the same operations as trace_fops.
188 */
189static struct file_operations log_fops = {
190 .owner = THIS_MODULE,
191 .open = log_open,
192 .release = log_release,
193 .read = log_read,
194};
195
196static struct miscdevice litmus_log_dev = {
197 .name = SCHED_TRACE_NAME,
198 .minor = MISC_DYNAMIC_MINOR,
199 .fops = &log_fops,
200};
201
202#ifdef CONFIG_MAGIC_SYSRQ
203void dump_trace_buffer(int max)
204{
205 char line[80];
206 int len;
207 int count = 0;
208
209 /* potential, but very unlikely, race... */
210 trace_recurse = 1;
211 while ((max == 0 || count++ < max) &&
212 (len = kfifo_out(&debug_buffer, line, sizeof(line - 1))) > 0) {
213 line[len] = '\0';
214 printk("%s", line);
215 }
216 trace_recurse = 0;
217}
218
219static void sysrq_dump_trace_buffer(int key)
220{
221 dump_trace_buffer(100);
222}
223
224static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
225 .handler = sysrq_dump_trace_buffer,
226 .help_msg = "dump-trace-buffer(Y)",
227 .action_msg = "writing content of TRACE() buffer",
228};
229#endif
230
231static int __init init_sched_trace(void)
232{
233 printk("Initializing TRACE() device\n");
234
235#ifdef CONFIG_MAGIC_SYSRQ
236 /* offer some debugging help */
237 if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
238 printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
239 else
240 printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
241#endif
242
243 return misc_register(&litmus_log_dev);
244}
245
246static void __exit exit_sched_trace(void)
247{
248 misc_deregister(&litmus_log_dev);
249}
250
251module_init(init_sched_trace);
252module_exit(exit_sched_trace);
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 00000000000..2ed4ec12a9d
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,295 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/wait.h>
8
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/fdso.h>
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_LITMUS_LOCKING
16
17#include <litmus/srp.h>
18
19srp_prioritization_t get_srp_prio;
20
21struct srp {
22 struct list_head ceiling;
23 wait_queue_head_t ceiling_blocked;
24};
25#define system_ceiling(srp) list2prio(srp->ceiling.next)
26#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
27
28#define UNDEF_SEM -2
29
30atomic_t srp_objects_in_use = ATOMIC_INIT(0);
31
32DEFINE_PER_CPU(struct srp, srp);
33
34/* Initialize SRP semaphores at boot time. */
35static int __init srp_init(void)
36{
37 int i;
38
39 printk("Initializing SRP per-CPU ceilings...");
40 for (i = 0; i < NR_CPUS; i++) {
41 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
42 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
43 }
44 printk(" done!\n");
45
46 return 0;
47}
48module_init(srp_init);
49
50/* SRP task priority comparison function. Smaller numeric values have higher
51 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
52 */
53static int srp_higher_prio(struct srp_priority* first,
54 struct srp_priority* second)
55{
56 if (!first->priority)
57 return 0;
58 else
59 return !second->priority ||
60 first->priority < second->priority || (
61 first->priority == second->priority &&
62 first->pid < second->pid);
63}
64
65
66static int srp_exceeds_ceiling(struct task_struct* first,
67 struct srp* srp)
68{
69 struct srp_priority prio;
70
71 if (list_empty(&srp->ceiling))
72 return 1;
73 else {
74 prio.pid = first->pid;
75 prio.priority = get_srp_prio(first);
76 return srp_higher_prio(&prio, system_ceiling(srp)) ||
77 ceiling2sem(system_ceiling(srp))->owner == first;
78 }
79}
80
81static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
82{
83 struct list_head *pos;
84 if (in_list(&prio->list)) {
85 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
86 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
87 return;
88 }
89 list_for_each(pos, &srp->ceiling)
90 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
91 __list_add(&prio->list, pos->prev, pos);
92 return;
93 }
94
95 list_add_tail(&prio->list, &srp->ceiling);
96}
97
98
99static int lock_srp_semaphore(struct litmus_lock* l)
100{
101 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
102
103 if (!is_realtime(current))
104 return -EPERM;
105
106 preempt_disable();
107
108 /* Update ceiling. */
109 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
110
111 /* SRP invariant: all resources available */
112 BUG_ON(sem->owner != NULL);
113
114 sem->owner = current;
115 TRACE_CUR("acquired srp 0x%p\n", sem);
116
117 preempt_enable();
118
119 return 0;
120}
121
122static int unlock_srp_semaphore(struct litmus_lock* l)
123{
124 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
125 int err = 0;
126
127 preempt_disable();
128
129 if (sem->owner != current) {
130 err = -EINVAL;
131 } else {
132 /* Determine new system priority ceiling for this CPU. */
133 BUG_ON(!in_list(&sem->ceiling.list));
134
135 list_del(&sem->ceiling.list);
136 sem->owner = NULL;
137
138 /* Wake tasks on this CPU, if they exceed current ceiling. */
139 TRACE_CUR("released srp 0x%p\n", sem);
140 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
141 }
142
143 preempt_enable();
144 return err;
145}
146
147static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
148{
149 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
150 int err = 0;
151 struct task_struct* t = current;
152 struct srp_priority t_prio;
153
154 if (!is_realtime(t))
155 return -EPERM;
156
157 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
158
159 preempt_disable();
160
161 if (sem->owner != NULL)
162 err = -EBUSY;
163
164 if (err == 0) {
165 if (sem->cpu == UNDEF_SEM)
166 sem->cpu = get_partition(t);
167 else if (sem->cpu != get_partition(t))
168 err = -EPERM;
169 }
170
171 if (err == 0) {
172 t_prio.priority = get_srp_prio(t);
173 t_prio.pid = t->pid;
174 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
175 sem->ceiling.priority = t_prio.priority;
176 sem->ceiling.pid = t_prio.pid;
177 }
178 }
179
180 preempt_enable();
181
182 return err;
183}
184
185static int close_srp_semaphore(struct litmus_lock* l)
186{
187 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
188 int err = 0;
189
190 preempt_disable();
191
192 if (sem->owner == current)
193 unlock_srp_semaphore(l);
194
195 preempt_enable();
196
197 return err;
198}
199
200static void deallocate_srp_semaphore(struct litmus_lock* l)
201{
202 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
203 atomic_dec(&srp_objects_in_use);
204 kfree(sem);
205}
206
207static struct litmus_lock_ops srp_lock_ops = {
208 .open = open_srp_semaphore,
209 .close = close_srp_semaphore,
210 .lock = lock_srp_semaphore,
211 .unlock = unlock_srp_semaphore,
212 .deallocate = deallocate_srp_semaphore,
213};
214
215struct srp_semaphore* allocate_srp_semaphore(void)
216{
217 struct srp_semaphore* sem;
218
219 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
220 if (!sem)
221 return NULL;
222
223 INIT_LIST_HEAD(&sem->ceiling.list);
224 sem->ceiling.priority = 0;
225 sem->cpu = UNDEF_SEM;
226 sem->owner = NULL;
227
228 sem->litmus_lock.ops = &srp_lock_ops;
229
230 atomic_inc(&srp_objects_in_use);
231 return sem;
232}
233
234static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
235 void *key)
236{
237 int cpu = smp_processor_id();
238 struct task_struct *tsk = wait->private;
239 if (cpu != get_partition(tsk))
240 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
241 get_partition(tsk));
242 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
243 return default_wake_function(wait, mode, sync, key);
244 return 0;
245}
246
247static void do_ceiling_block(struct task_struct *tsk)
248{
249 wait_queue_t wait = {
250 .private = tsk,
251 .func = srp_wake_up,
252 .task_list = {NULL, NULL}
253 };
254
255 tsk->state = TASK_UNINTERRUPTIBLE;
256 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
257 tsk->rt_param.srp_non_recurse = 1;
258 preempt_enable_no_resched();
259 schedule();
260 preempt_disable();
261 tsk->rt_param.srp_non_recurse = 0;
262 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
263}
264
265/* Wait for current task priority to exceed system-wide priority ceiling.
266 * FIXME: the hotpath should be inline.
267 */
268void srp_ceiling_block(void)
269{
270 struct task_struct *tsk = current;
271
272 /* Only applies to real-time tasks, but optimize for RT tasks. */
273 if (unlikely(!is_realtime(tsk)))
274 return;
275
276 /* Avoid recursive ceiling blocking. */
277 if (unlikely(tsk->rt_param.srp_non_recurse))
278 return;
279
280 /* Bail out early if there aren't any SRP resources around. */
281 if (likely(!atomic_read(&srp_objects_in_use)))
282 return;
283
284 preempt_disable();
285 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
286 TRACE_CUR("is priority ceiling blocked.\n");
287 while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
288 do_ceiling_block(tsk);
289 TRACE_CUR("finally exceeds system ceiling.\n");
290 } else
291 TRACE_CUR("is not priority ceiling blocked\n");
292 preempt_enable();
293}
294
295#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 00000000000..bf75fde5450
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,104 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19static DECLARE_COMPLETION(ts_release);
20
21static long do_wait_for_ts_release(void)
22{
23 long ret = 0;
24
25 /* If the interruption races with a release, the completion object
26 * may have a non-zero counter. To avoid this problem, this should
27 * be replaced by wait_for_completion().
28 *
29 * For debugging purposes, this is interruptible for now.
30 */
31 ret = wait_for_completion_interruptible(&ts_release);
32
33 return ret;
34}
35
36int count_tasks_waiting_for_release(void)
37{
38 unsigned long flags;
39 int task_count = 0;
40 struct list_head *pos;
41
42 spin_lock_irqsave(&ts_release.wait.lock, flags);
43 list_for_each(pos, &ts_release.wait.task_list) {
44 task_count++;
45 }
46 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
47
48 return task_count;
49}
50
51static long do_release_ts(lt_t start)
52{
53 int task_count = 0;
54 unsigned long flags;
55 struct list_head *pos;
56 struct task_struct *t;
57
58
59 spin_lock_irqsave(&ts_release.wait.lock, flags);
60 TRACE("<<<<<< synchronous task system release >>>>>>\n");
61
62 sched_trace_sys_release(&start);
63 list_for_each(pos, &ts_release.wait.task_list) {
64 t = (struct task_struct*) list_entry(pos,
65 struct __wait_queue,
66 task_list)->private;
67 task_count++;
68 litmus->release_at(t, start + t->rt_param.task_params.phase);
69 sched_trace_task_release(t);
70 }
71
72 spin_unlock_irqrestore(&ts_release.wait.lock, flags);
73
74 complete_n(&ts_release, task_count);
75
76 return task_count;
77}
78
79
80asmlinkage long sys_wait_for_ts_release(void)
81{
82 long ret = -EPERM;
83 struct task_struct *t = current;
84
85 if (is_realtime(t))
86 ret = do_wait_for_ts_release();
87
88 return ret;
89}
90
91
92asmlinkage long sys_release_ts(lt_t __user *__delay)
93{
94 long ret;
95 lt_t delay;
96
97 /* FIXME: check capabilities... */
98
99 ret = copy_from_user(&delay, __delay, sizeof(delay));
100 if (ret == 0)
101 ret = do_release_ts(litmus_clock() + delay);
102
103 return ret;
104}
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 00000000000..3c35c527e80
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,225 @@
1#include <linux/sched.h>
2#include <linux/module.h>
3#include <linux/uaccess.h>
4
5#include <litmus/ftdev.h>
6#include <litmus/litmus.h>
7#include <litmus/trace.h>
8
9/******************************************************************************/
10/* Allocation */
11/******************************************************************************/
12
13static struct ftdev overhead_dev;
14
15#define trace_ts_buf overhead_dev.minor[0].buf
16
17static unsigned int ts_seq_no = 0;
18
19DEFINE_PER_CPU(atomic_t, irq_fired_count);
20
21static inline void clear_irq_fired(void)
22{
23 atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
24}
25
26static inline unsigned int get_and_clear_irq_fired(void)
27{
28 /* This is potentially not atomic since we might migrate if
29 * preemptions are not disabled. As a tradeoff between
30 * accuracy and tracing overheads, this seems acceptable.
31 * If it proves to be a problem, then one could add a callback
32 * from the migration code to invalidate irq_fired_count.
33 */
34 return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
35}
36
37static inline void __save_irq_flags(struct timestamp *ts)
38{
39 unsigned int irq_count;
40
41 irq_count = get_and_clear_irq_fired();
42 /* Store how many interrupts occurred. */
43 ts->irq_count = irq_count;
44 /* Extra flag because ts->irq_count overflows quickly. */
45 ts->irq_flag = irq_count > 0;
46}
47
48static inline void __save_timestamp_cpu(unsigned long event,
49 uint8_t type, uint8_t cpu)
50{
51 unsigned int seq_no;
52 struct timestamp *ts;
53 seq_no = fetch_and_inc((int *) &ts_seq_no);
54 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
55 ts->event = event;
56 ts->seq_no = seq_no;
57 ts->cpu = cpu;
58 ts->task_type = type;
59 __save_irq_flags(ts);
60 barrier();
61 /* prevent re-ordering of ft_timestamp() */
62 ts->timestamp = ft_timestamp();
63 ft_buffer_finish_write(trace_ts_buf, ts);
64 }
65}
66
67static void __add_timestamp_user(struct timestamp *pre_recorded)
68{
69 unsigned int seq_no;
70 struct timestamp *ts;
71 seq_no = fetch_and_inc((int *) &ts_seq_no);
72 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
73 *ts = *pre_recorded;
74 ts->seq_no = seq_no;
75 __save_irq_flags(ts);
76 ft_buffer_finish_write(trace_ts_buf, ts);
77 }
78}
79
80static inline void __save_timestamp(unsigned long event,
81 uint8_t type)
82{
83 __save_timestamp_cpu(event, type, raw_smp_processor_id());
84}
85
86feather_callback void save_timestamp(unsigned long event)
87{
88 __save_timestamp(event, TSK_UNKNOWN);
89}
90
91feather_callback void save_timestamp_def(unsigned long event,
92 unsigned long type)
93{
94 __save_timestamp(event, (uint8_t) type);
95}
96
97feather_callback void save_timestamp_task(unsigned long event,
98 unsigned long t_ptr)
99{
100 int rt = is_realtime((struct task_struct *) t_ptr);
101 __save_timestamp(event, rt ? TSK_RT : TSK_BE);
102}
103
104feather_callback void save_timestamp_cpu(unsigned long event,
105 unsigned long cpu)
106{
107 __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
108}
109
110feather_callback void save_task_latency(unsigned long event,
111 unsigned long when_ptr)
112{
113 lt_t now = litmus_clock();
114 lt_t *when = (lt_t*) when_ptr;
115 unsigned int seq_no;
116 int cpu = raw_smp_processor_id();
117 struct timestamp *ts;
118
119 seq_no = fetch_and_inc((int *) &ts_seq_no);
120 if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
121 ts->event = event;
122 ts->timestamp = now - *when;
123 ts->seq_no = seq_no;
124 ts->cpu = cpu;
125 ts->task_type = TSK_RT;
126 __save_irq_flags(ts);
127 ft_buffer_finish_write(trace_ts_buf, ts);
128 }
129}
130
131/******************************************************************************/
132/* DEVICE FILE DRIVER */
133/******************************************************************************/
134
135/*
136 * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
137 * and we might not get as much
138 */
139#define NO_TIMESTAMPS (2 << 16)
140
141static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
142{
143 unsigned int count = NO_TIMESTAMPS;
144
145 /* An overhead-tracing timestamp should be exactly 16 bytes long. */
146 BUILD_BUG_ON(sizeof(struct timestamp) != 16);
147
148 while (count && !trace_ts_buf) {
149 printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
150 ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
151 count /= 2;
152 }
153 return ftdev->minor[idx].buf ? 0 : -ENOMEM;
154}
155
156static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
157{
158 free_ft_buffer(ftdev->minor[idx].buf);
159 ftdev->minor[idx].buf = NULL;
160}
161
162static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
163 const char __user *from)
164{
165 ssize_t consumed = 0;
166 struct timestamp ts;
167
168 /* don't give us partial timestamps */
169 if (len % sizeof(ts))
170 return -EINVAL;
171
172 while (len >= sizeof(ts)) {
173 if (copy_from_user(&ts, from, sizeof(ts))) {
174 consumed = -EFAULT;
175 goto out;
176 }
177 len -= sizeof(ts);
178 from += sizeof(ts);
179 consumed += sizeof(ts);
180
181 __add_timestamp_user(&ts);
182 }
183
184out:
185 return consumed;
186}
187
188static int __init init_ft_overhead_trace(void)
189{
190 int err, cpu;
191
192 printk("Initializing Feather-Trace overhead tracing device.\n");
193 err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
194 if (err)
195 goto err_out;
196
197 overhead_dev.alloc = alloc_timestamp_buffer;
198 overhead_dev.free = free_timestamp_buffer;
199 overhead_dev.write = write_timestamp_from_user;
200
201 err = register_ftdev(&overhead_dev);
202 if (err)
203 goto err_dealloc;
204
205 /* initialize IRQ flags */
206 for (cpu = 0; cpu < NR_CPUS; cpu++) {
207 clear_irq_fired();
208 }
209
210 return 0;
211
212err_dealloc:
213 ftdev_exit(&overhead_dev);
214err_out:
215 printk(KERN_WARNING "Could not register ft_trace module.\n");
216 return err;
217}
218
219static void __exit exit_ft_overhead_trace(void)
220{
221 ftdev_exit(&overhead_dev);
222}
223
224module_init(init_ft_overhead_trace);
225module_exit(exit_ft_overhead_trace);