summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNathan O <otternes@cs.unc.edu>2019-12-09 14:59:56 -0500
committerNathan O <otternes@cs.unc.edu>2019-12-09 14:59:56 -0500
commit3c4abebc788e9d92d776d7bc8b778f398cdb4010 (patch)
tree7392a57bb2d5e0e61cd3a03bae0e8ce79991f6d5
parent2627f203874e04500ea80f6e588cd659bec5866b (diff)
Initial attempt to "connect the wires"
- This is my first attempt to re-add all of the modifications on top of this version of the Linux kernel that were present in the previous version of LITMUS. - More notes on changes will follow after testing--no guarantees the code as it is now will compile or run correctly.
-rw-r--r--Makefile1
-rw-r--r--arch/arm/Kconfig9
-rw-r--r--arch/arm64/Kconfig9
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/include/asm/feather_trace.h18
-rw-r--r--arch/x86/include/asm/feather_trace_32.h115
-rw-r--r--arch/x86/include/asm/feather_trace_64.h124
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/ft_event.c170
-rw-r--r--fs/exec.c3
-rw-r--r--fs/inode.c2
-rw-r--r--fs/select.c6
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/hardirq.h3
-rw-r--r--include/linux/hrtimer.h3
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/trace/events/litmus.h231
-rw-r--r--include/uapi/linux/sched.h1
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/locking/rwsem.c13
-rw-r--r--kernel/printk/printk.c14
-rw-r--r--kernel/sched/Makefile3
-rw-r--r--kernel/sched/core.c153
-rw-r--r--kernel/sched/deadline.c21
-rw-r--r--kernel/sched/litmus.c386
-rw-r--r--kernel/sched/rt.c12
-rw-r--r--kernel/sched/sched.h22
-rw-r--r--kernel/sched/stop_task.c8
-rw-r--r--kernel/time/hrtimer.c69
-rw-r--r--mm/page-writeback.c7
-rw-r--r--mm/page_alloc.c6
32 files changed, 1413 insertions, 40 deletions
diff --git a/Makefile b/Makefile
index 1d5298356ea8..405d18d59837 100644
--- a/Makefile
+++ b/Makefile
@@ -1011,6 +1011,7 @@ export MODORDER := $(extmod-prefix)modules.order
1011 1011
1012ifeq ($(KBUILD_EXTMOD),) 1012ifeq ($(KBUILD_EXTMOD),)
1013core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ 1013core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
1014core-y += litmus/
1014 1015
1015vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ 1016vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
1016 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ 1017 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8a50efb559f3..3aaa81a3ae70 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2094,3 +2094,12 @@ source "arch/arm/crypto/Kconfig"
2094endif 2094endif
2095 2095
2096source "arch/arm/kvm/Kconfig" 2096source "arch/arm/kvm/Kconfig"
2097
2098config ARCH_HAS_FEATHER_TRACE
2099 def_bool n
2100
2101config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
2102 def_bool n
2103
2104source "litmus/Kconfig"
2105
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..a6bf629e708c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1680,3 +1680,12 @@ source "arch/arm64/kvm/Kconfig"
1680if CRYPTO 1680if CRYPTO
1681source "arch/arm64/crypto/Kconfig" 1681source "arch/arm64/crypto/Kconfig"
1682endif 1682endif
1683
1684config ARCH_HAS_FEATHER_TRACE
1685 def_bool n
1686
1687config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
1688 def_bool n
1689
1690source "litmus/Kconfig"
1691
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ef85139553f..3765164809c5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2972,3 +2972,12 @@ config X86_DEV_DMA_OPS
2972source "drivers/firmware/Kconfig" 2972source "drivers/firmware/Kconfig"
2973 2973
2974source "arch/x86/kvm/Kconfig" 2974source "arch/x86/kvm/Kconfig"
2975
2976config ARCH_HAS_FEATHER_TRACE
2977 def_bool y
2978
2979config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
2980 def_bool y
2981
2982source "litmus/Kconfig"
2983
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
new file mode 100644
index 000000000000..4e732d4ea508
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace.h
@@ -0,0 +1,18 @@
1#ifndef _ARCH_FEATHER_TRACE_H
2#define _ARCH_FEATHER_TRACE_H
3
4#include <asm/msr.h>
5#include <asm/timex.h>
6
7static inline unsigned long long ft_timestamp(void)
8{
9 return get_cycles();
10}
11
12#ifdef CONFIG_X86_32
13#include "feather_trace_32.h"
14#else
15#include "feather_trace_64.h"
16#endif
17
18#endif
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
new file mode 100644
index 000000000000..75e81a9f9382
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -0,0 +1,115 @@
1/* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org>
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24/* Do not directly include this file. Include feather_trace.h instead */
25
26#define feather_callback __attribute__((regparm(3))) __attribute__((used))
27
28/*
29 * Make the compiler reload any register that is not saved in a cdecl function
30 * call (minus the registers that we explicitly clobber as output registers).
31 */
32#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx"
33#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx"
34#define __FT_CLOBBER_LIST2 "memory", "cc", "eax"
35#define __FT_CLOBBER_LIST3 "memory", "cc", "eax"
36
37#define __FT_TMP1(x) "=d" (x)
38#define __FT_ARG1(x) "0" ((long) (x))
39#define __FT_TMP2(x) "=c" (x)
40#define __FT_ARG2(x) "1" ((long) (x))
41
42#define __FT_ARG3(x) "r" ((long) (x))
43
44#define ft_event(id, callback) \
45 __asm__ __volatile__( \
46 "1: jmp 2f \n\t" \
47 " call " #callback " \n\t" \
48 ".section __event_table, \"aw\" \n\t" \
49 ".long " #id ", 0, 1b, 2f \n\t" \
50 ".previous \n\t" \
51 "2: \n\t" \
52 : : : __FT_CLOBBER_LIST0)
53
54#define ft_event0(id, callback) \
55 __asm__ __volatile__( \
56 "1: jmp 2f \n\t" \
57 " movl $" #id ", %%eax \n\t" \
58 " call " #callback " \n\t" \
59 ".section __event_table, \"aw\" \n\t" \
60 ".long " #id ", 0, 1b, 2f \n\t" \
61 ".previous \n\t" \
62 "2: \n\t" \
63 : : : __FT_CLOBBER_LIST0)
64
65#define ft_event1(id, callback, param) \
66 do { \
67 long __ft_tmp1; \
68 __asm__ __volatile__( \
69 "1: jmp 2f \n\t" \
70 " movl $" #id ", %%eax \n\t" \
71 " call " #callback " \n\t" \
72 ".section __event_table, \"aw\" \n\t" \
73 ".long " #id ", 0, 1b, 2f \n\t" \
74 ".previous \n\t" \
75 "2: \n\t" \
76 : __FT_TMP1(__ft_tmp1) \
77 : __FT_ARG1(param) \
78 : __FT_CLOBBER_LIST1); \
79 } while (0);
80
81#define ft_event2(id, callback, param, param2) \
82 do { \
83 long __ft_tmp1, __ft_tmp2; \
84 __asm__ __volatile__( \
85 "1: jmp 2f \n\t" \
86 " movl $" #id ", %%eax \n\t" \
87 " call " #callback " \n\t" \
88 ".section __event_table, \"aw\" \n\t" \
89 ".long " #id ", 0, 1b, 2f \n\t" \
90 ".previous \n\t" \
91 "2: \n\t" \
92 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
93 : __FT_ARG1(param), __FT_ARG2(param2) \
94 : __FT_CLOBBER_LIST2); \
95 } while (0);
96
97
98#define ft_event3(id, callback, param, param2, param3) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
101 __asm__ __volatile__( \
102 "1: jmp 2f \n\t" \
103 " subl $4, %%esp \n\t" \
104 " movl $" #id ", %%eax \n\t" \
105 " movl %2, (%%esp) \n\t" \
106 " call " #callback " \n\t" \
107 " addl $4, %%esp \n\t" \
108 ".section __event_table, \"aw\" \n\t" \
109 ".long " #id ", 0, 1b, 2f \n\t" \
110 ".previous \n\t" \
111 "2: \n\t" \
112 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
113 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
114 : __FT_CLOBBER_LIST3); \
115 } while (0);
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
new file mode 100644
index 000000000000..5ce49e2eebba
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -0,0 +1,124 @@
1/* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu>
2 * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25/* Do not directly include this file. Include feather_trace.h instead */
26
27/* regparm is the default on x86_64 */
28#define feather_callback __attribute__((used))
29
30#define __FT_EVENT_TABLE(id,from,to) \
31 ".section __event_table, \"aw\"\n\t" \
32 ".balign 8\n\t" \
33 ".quad " #id ", 0, " #from ", " #to " \n\t" \
34 ".previous \n\t"
35
36/*
37 * x86_64 caller only owns rbp, rbx, r12-r15;
38 * the callee can freely modify the others.
39 */
40#define __FT_CLOBBER_LIST0 "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
41 "r8", "r9", "r10", "r11", "rax"
42
43#define __FT_CLOBBER_LIST1 "memory", "cc", "rdi", "rdx", "rcx", \
44 "r8", "r9", "r10", "r11", "rax"
45
46#define __FT_CLOBBER_LIST2 "memory", "cc", "rdi", "rcx", \
47 "r8", "r9", "r10", "r11", "rax"
48
49#define __FT_CLOBBER_LIST3 "memory", "cc", "rdi", \
50 "r8", "r9", "r10", "r11", "rax"
51
52/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer
53 * arguments. */
54
55/* RSI */
56#define __FT_TMP1(x) "=S" (x)
57#define __FT_ARG1(x) "0" ((long) (x))
58
59/* RDX */
60#define __FT_TMP2(x) "=d" (x)
61#define __FT_ARG2(x) "1" ((long) (x))
62
63/* RCX */
64#define __FT_TMP3(x) "=c" (x)
65#define __FT_ARG3(x) "2" ((long) (x))
66
67#define ft_event(id, callback) \
68 __asm__ __volatile__( \
69 "1: jmp 2f \n\t" \
70 " call " #callback " \n\t" \
71 __FT_EVENT_TABLE(id,1b,2f) \
72 "2: \n\t" \
73 : : : __FT_CLOBBER_LIST0)
74
75#define ft_event0(id, callback) \
76 __asm__ __volatile__( \
77 "1: jmp 2f \n\t" \
78 " movq $" #id ", %%rdi \n\t" \
79 " call " #callback " \n\t" \
80 __FT_EVENT_TABLE(id,1b,2f) \
81 "2: \n\t" \
82 : : : __FT_CLOBBER_LIST0)
83
84#define ft_event1(id, callback, param) \
85 do { \
86 long __ft_tmp1; \
87 __asm__ __volatile__( \
88 "1: jmp 2f \n\t" \
89 " movq $" #id ", %%rdi \n\t" \
90 " call " #callback " \n\t" \
91 __FT_EVENT_TABLE(id,1b,2f) \
92 "2: \n\t" \
93 : __FT_TMP1(__ft_tmp1) \
94 : __FT_ARG1(param) \
95 : __FT_CLOBBER_LIST1); \
96 } while (0);
97
98#define ft_event2(id, callback, param, param2) \
99 do { \
100 long __ft_tmp1, __ft_tmp2; \
101 __asm__ __volatile__( \
102 "1: jmp 2f \n\t" \
103 " movq $" #id ", %%rdi \n\t" \
104 " call " #callback " \n\t" \
105 __FT_EVENT_TABLE(id,1b,2f) \
106 "2: \n\t" \
107 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \
108 : __FT_ARG1(param), __FT_ARG2(param2) \
109 : __FT_CLOBBER_LIST2); \
110 } while (0);
111
112#define ft_event3(id, callback, param, param2, param3) \
113 do { \
114 long __ft_tmp1, __ft_tmp2, __ft_tmp3; \
115 __asm__ __volatile__( \
116 "1: jmp 2f \n\t" \
117 " movq $" #id ", %%rdi \n\t" \
118 " call " #callback " \n\t" \
119 __FT_EVENT_TABLE(id,1b,2f) \
120 "2: \n\t" \
121 : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \
122 : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \
123 : __FT_CLOBBER_LIST3); \
124 } while (0);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3578ad248bc9..5ee68d48e0a4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -140,6 +140,8 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
140obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o 140obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
141obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o 141obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
142 142
143obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
144
143### 145###
144# 64 bit specific files 146# 64 bit specific files
145ifeq ($(CONFIG_X86_64),y) 147ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
new file mode 100644
index 000000000000..7aa3d0592ff2
--- /dev/null
+++ b/arch/x86/kernel/ft_event.c
@@ -0,0 +1,170 @@
1#include <linux/types.h>
2#include <linux/module.h>
3#include <asm/cacheflush.h>
4#include <asm/sections.h>
5
6#include <litmus/feather_trace.h>
7
8/* the feather trace management functions assume
9 * exclusive access to the event table
10 */
11
12#ifndef CONFIG_RELOCATABLE
13
14#define BYTE_JUMP 0xeb
15#define BYTE_JUMP_LEN 0x02
16
17/* for each event, there is an entry in the event table */
18struct trace_event {
19 long id;
20 long count;
21 long start_addr;
22 long end_addr;
23};
24
25extern struct trace_event __start___event_table[];
26extern struct trace_event __stop___event_table[];
27
28
29/* NOTE: The following two functions have been stolen from ftrace.c */
30
31static inline int
32within(unsigned long addr, unsigned long start, unsigned long end)
33{
34 return addr >= start && addr < end;
35}
36
37static unsigned long text_ip_addr(unsigned long ip)
38{
39 /*
40 * On x86_64, kernel text mappings are mapped read-only, so we use
41 * the kernel identity mapping instead of the kernel text mapping
42 * to modify the kernel text.
43 *
44 * For 32bit kernels, these mappings are same and we can use
45 * kernel identity mapping to modify code.
46 */
47 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
48 ip = (unsigned long)__va(__pa_symbol(ip));
49
50 return ip;
51}
52
53/* Workaround: if no events are defined, then the event_table section does not
54 * exist and the above references cause linker errors. This could probably be
55 * fixed by adjusting the linker script, but it is easier to maintain for us if
56 * we simply create a dummy symbol in the event table section.
57 */
58int __event_table_dummy[0] __attribute__ ((section("__event_table")));
59
60int ft_enable_event(unsigned long id)
61{
62 struct trace_event* te = __start___event_table;
63 int count = 0;
64 char* delta;
65 unsigned char* instr;
66
67 set_kernel_text_rw();
68 set_all_modules_text_rw();
69
70 while (te < __stop___event_table) {
71 if (te->id == id && ++te->count == 1) {
72 instr = (unsigned char*) te->start_addr;
73 /* make sure we don't clobber something wrong */
74 if (*instr == BYTE_JUMP) {
75 delta = (unsigned char*) text_ip_addr(
76 ((unsigned long) te->start_addr)
77 + 1);
78 *delta = 0;
79 }
80 }
81 if (te->id == id)
82 count++;
83 te++;
84 }
85
86 set_all_modules_text_ro();
87 set_kernel_text_ro();
88
89 printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
90 return count;
91}
92
93int ft_disable_event(unsigned long id)
94{
95 struct trace_event* te = __start___event_table;
96 int count = 0;
97 char* delta;
98 unsigned char* instr;
99
100 set_kernel_text_rw();
101 set_all_modules_text_rw();
102
103 while (te < __stop___event_table) {
104 if (te->id == id && --te->count == 0) {
105 instr = (unsigned char*) te->start_addr;
106 if (*instr == BYTE_JUMP) {
107 delta = (unsigned char*) text_ip_addr(
108 ((unsigned long) te->start_addr)
109 + 1);
110 *delta = te->end_addr - te->start_addr -
111 BYTE_JUMP_LEN;
112 }
113 }
114 if (te->id == id)
115 count++;
116 te++;
117 }
118
119 set_all_modules_text_ro();
120 set_kernel_text_ro();
121
122 printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
123 return count;
124}
125
126int ft_disable_all_events(void)
127{
128 struct trace_event* te = __start___event_table;
129 int count = 0;
130 char* delta;
131 unsigned char* instr;
132
133 set_kernel_text_rw();
134 set_all_modules_text_rw();
135
136 while (te < __stop___event_table) {
137 if (te->count) {
138 instr = (unsigned char*) te->start_addr;
139 if (*instr == BYTE_JUMP) {
140 delta = (unsigned char*) text_ip_addr(
141 ((unsigned long) te->start_addr)
142 + 1);
143 *delta = te->end_addr - te->start_addr -
144 BYTE_JUMP_LEN;
145 te->count = 0;
146 count++;
147 }
148 }
149 te++;
150 }
151
152 set_all_modules_text_ro();
153 set_kernel_text_ro();
154
155 return count;
156}
157
158int ft_is_event_enabled(unsigned long id)
159{
160 struct trace_event* te = __start___event_table;
161
162 while (te < __stop___event_table) {
163 if (te->id == id)
164 return te->count;
165 te++;
166 }
167 return 0;
168}
169
170#endif
diff --git a/fs/exec.c b/fs/exec.c
index 555e93c7dec8..49c8613d2510 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,8 @@
64#include <linux/compat.h> 64#include <linux/compat.h>
65#include <linux/vmalloc.h> 65#include <linux/vmalloc.h>
66 66
67#include <litmus/litmus.h>
68
67#include <linux/uaccess.h> 69#include <linux/uaccess.h>
68#include <asm/mmu_context.h> 70#include <asm/mmu_context.h>
69#include <asm/tlb.h> 71#include <asm/tlb.h>
@@ -1765,6 +1767,7 @@ static int __do_execve_file(int fd, struct filename *filename,
1765 goto out_unmark; 1767 goto out_unmark;
1766 1768
1767 sched_exec(); 1769 sched_exec();
1770 litmus_exec();
1768 1771
1769 bprm->file = file; 1772 bprm->file = file;
1770 if (!filename) { 1773 if (!filename) {
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..abf61717d9db 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -394,6 +394,8 @@ void inode_init_once(struct inode *inode)
394 INIT_LIST_HEAD(&inode->i_lru); 394 INIT_LIST_HEAD(&inode->i_lru);
395 __address_space_init_once(&inode->i_data); 395 __address_space_init_once(&inode->i_data);
396 i_size_ordered_init(inode); 396 i_size_ordered_init(inode);
397 INIT_LIST_HEAD(&inode->i_obj_list);
398 mutex_init(&inode->i_obj_mutex);
397} 399}
398EXPORT_SYMBOL(inode_init_once); 400EXPORT_SYMBOL(inode_init_once);
399 401
diff --git a/fs/select.c b/fs/select.c
index 53a0c149f528..7a3745f8d17f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -32,6 +32,8 @@
32#include <net/busy_poll.h> 32#include <net/busy_poll.h>
33#include <linux/vmalloc.h> 33#include <linux/vmalloc.h>
34 34
35#include <litmus/litmus.h>
36
35#include <linux/uaccess.h> 37#include <linux/uaccess.h>
36 38
37 39
@@ -80,9 +82,9 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
80 /* 82 /*
81 * Realtime tasks get a slack of 0 for obvious reasons. 83 * Realtime tasks get a slack of 0 for obvious reasons.
82 */ 84 */
83 85 if (rt_task(current) || is_realtime(current)) {
84 if (rt_task(current))
85 return 0; 86 return 0;
87 }
86 88
87 ktime_get_ts64(&now); 89 ktime_get_ts64(&now);
88 now = timespec64_sub(*tv, now); 90 now = timespec64_sub(*tv, now);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d35763..d65e17d3d302 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -734,6 +734,9 @@ struct inode {
734 struct fsverity_info *i_verity_info; 734 struct fsverity_info *i_verity_info;
735#endif 735#endif
736 736
737 struct list_head i_obj_list;
738 struct mutex i_obj_mutex;
739
737 void *i_private; /* fs or device private pointer */ 740 void *i_private; /* fs or device private pointer */
738} __randomize_layout; 741} __randomize_layout;
739 742
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index da0af631ded5..35271458e22b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,6 +8,7 @@
8#include <linux/vtime.h> 8#include <linux/vtime.h>
9#include <asm/hardirq.h> 9#include <asm/hardirq.h>
10 10
11#include <litmus/trace_irq.h>
11 12
12extern void synchronize_irq(unsigned int irq); 13extern void synchronize_irq(unsigned int irq);
13extern bool synchronize_hardirq(unsigned int irq); 14extern bool synchronize_hardirq(unsigned int irq);
@@ -38,6 +39,7 @@ extern void rcu_nmi_exit(void);
38 account_irq_enter_time(current); \ 39 account_irq_enter_time(current); \
39 preempt_count_add(HARDIRQ_OFFSET); \ 40 preempt_count_add(HARDIRQ_OFFSET); \
40 trace_hardirq_enter(); \ 41 trace_hardirq_enter(); \
42 ft_irq_fired(); \
41 } while (0) 43 } while (0)
42 44
43/* 45/*
@@ -75,6 +77,7 @@ extern void irq_exit(void);
75 preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ 77 preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
76 rcu_nmi_enter(); \ 78 rcu_nmi_enter(); \
77 trace_hardirq_enter(); \ 79 trace_hardirq_enter(); \
80 ft_irq_fired(); \
78 } while (0) 81 } while (0)
79 82
80#define nmi_exit() \ 83#define nmi_exit() \
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1b9a51a1bccb..a145e140d532 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -123,6 +123,9 @@ struct hrtimer {
123 u8 is_rel; 123 u8 is_rel;
124 u8 is_soft; 124 u8 is_soft;
125 u8 is_hard; 125 u8 is_hard;
126#if defined(CONFIG_REPORT_TIMER_LATENCY) || defined(CONFIG_SCHED_OVERHEAD_TRACE)
127 ktime_t when_added;
128#endif
126}; 129};
127 130
128/** 131/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 67a1d86981a9..0a1b09305248 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -32,6 +32,9 @@
32#include <linux/posix-timers.h> 32#include <linux/posix-timers.h>
33#include <linux/rseq.h> 33#include <linux/rseq.h>
34 34
35#include <litmus/rt_param.h>
36#include <litmus/preempt.h>
37
35/* task_struct member predeclarations (sorted alphabetically): */ 38/* task_struct member predeclarations (sorted alphabetically): */
36struct audit_context; 39struct audit_context;
37struct backing_dev_info; 40struct backing_dev_info;
@@ -61,6 +64,8 @@ struct signal_struct;
61struct task_delay_info; 64struct task_delay_info;
62struct task_group; 65struct task_group;
63 66
67struct od_table_entry;
68
64/* 69/*
65 * Task state bitmask. NOTE! These bits are also 70 * Task state bitmask. NOTE! These bits are also
66 * encoded in fs/proc/array.c: get_task_state(). 71 * encoded in fs/proc/array.c: get_task_state().
@@ -1158,6 +1163,10 @@ struct task_struct {
1158 /* Start of a write-and-pause period: */ 1163 /* Start of a write-and-pause period: */
1159 unsigned long dirty_paused_when; 1164 unsigned long dirty_paused_when;
1160 1165
1166 /* LITMUS RT parameters and state */
1167 struct rt_param rt_param;
1168 struct od_table_entry *od_table;
1169
1161#ifdef CONFIG_LATENCYTOP 1170#ifdef CONFIG_LATENCYTOP
1162 int latency_record_count; 1171 int latency_record_count;
1163 struct latency_record latency_record[LT_SAVECOUNT]; 1172 struct latency_record latency_record[LT_SAVECOUNT];
@@ -1741,6 +1750,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
1741static inline void set_tsk_need_resched(struct task_struct *tsk) 1750static inline void set_tsk_need_resched(struct task_struct *tsk)
1742{ 1751{
1743 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); 1752 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1753 sched_state_will_schedule(tsk);
1744} 1754}
1745 1755
1746static inline void clear_tsk_need_resched(struct task_struct *tsk) 1756static inline void clear_tsk_need_resched(struct task_struct *tsk)
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
new file mode 100644
index 000000000000..0fffcee02be0
--- /dev/null
+++ b/include/trace/events/litmus.h
@@ -0,0 +1,231 @@
1/*
2 * LITMUS^RT kernel style scheduling tracepoints
3 */
4#undef TRACE_SYSTEM
5#define TRACE_SYSTEM litmus
6
7#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
8#define _SCHED_TASK_TRACEPOINT_H
9
10#include <linux/tracepoint.h>
11
12#include <litmus/litmus.h>
13#include <litmus/rt_param.h>
14
15/*
16 * Tracing task admission
17 */
18TRACE_EVENT(litmus_task_param,
19
20 TP_PROTO(struct task_struct *t),
21
22 TP_ARGS(t),
23
24 TP_STRUCT__entry(
25 __field( pid_t, pid )
26 __field( unsigned int, job )
27 __field( lt_t, wcet )
28 __field( lt_t, period )
29 __field( lt_t, phase )
30 __field( int, partition )
31 ),
32
33 TP_fast_assign(
34 __entry->pid = t ? t->pid : 0;
35 __entry->job = t ? t->rt_param.job_params.job_no : 0;
36 __entry->wcet = get_exec_cost(t);
37 __entry->period = get_rt_period(t);
38 __entry->phase = get_rt_phase(t);
39 __entry->partition = get_partition(t);
40 ),
41
42 TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
43 __entry->pid, __entry->period,
44 __entry->pid, __entry->wcet)
45);
46
47/*
48 * Tracing jobs release
49 */
50TRACE_EVENT(litmus_task_release,
51
52 TP_PROTO(struct task_struct *t),
53
54 TP_ARGS(t),
55
56 TP_STRUCT__entry(
57 __field( pid_t, pid )
58 __field( unsigned int, job )
59 __field( lt_t, release )
60 __field( lt_t, deadline )
61 ),
62
63 TP_fast_assign(
64 __entry->pid = t ? t->pid : 0;
65 __entry->job = t ? t->rt_param.job_params.job_no : 0;
66 __entry->release = get_release(t);
67 __entry->deadline = get_deadline(t);
68 ),
69
70 TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
71 __entry->pid, __entry->job, __entry->release,
72 __entry->pid, __entry->job, __entry->deadline)
73);
74
75/*
76 * Tracepoint for switching to new task
77 */
78TRACE_EVENT(litmus_switch_to,
79
80 TP_PROTO(struct task_struct *t),
81
82 TP_ARGS(t),
83
84 TP_STRUCT__entry(
85 __field( pid_t, pid )
86 __field( unsigned int, job )
87 __field( lt_t, when )
88 __field( lt_t, exec_time )
89 ),
90
91 TP_fast_assign(
92 __entry->pid = is_realtime(t) ? t->pid : 0;
93 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
94 __entry->when = litmus_clock();
95 __entry->exec_time = get_exec_time(t);
96 ),
97
98 TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
99 __entry->pid, __entry->job,
100 __entry->when, __entry->exec_time)
101);
102
103/*
104 * Tracepoint for switching away previous task
105 */
106TRACE_EVENT(litmus_switch_away,
107
108 TP_PROTO(struct task_struct *t),
109
110 TP_ARGS(t),
111
112 TP_STRUCT__entry(
113 __field( pid_t, pid )
114 __field( unsigned int, job )
115 __field( lt_t, when )
116 __field( lt_t, exec_time )
117 ),
118
119 TP_fast_assign(
120 __entry->pid = is_realtime(t) ? t->pid : 0;
121 __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
122 __entry->when = litmus_clock();
123 __entry->exec_time = get_exec_time(t);
124 ),
125
126 TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
127 __entry->pid, __entry->job,
128 __entry->when, __entry->exec_time)
129);
130
131/*
132 * Tracing jobs completion
133 */
134TRACE_EVENT(litmus_task_completion,
135
136 TP_PROTO(struct task_struct *t, unsigned long forced),
137
138 TP_ARGS(t, forced),
139
140 TP_STRUCT__entry(
141 __field( pid_t, pid )
142 __field( unsigned int, job )
143 __field( lt_t, when )
144 __field( unsigned long, forced )
145 ),
146
147 TP_fast_assign(
148 __entry->pid = t ? t->pid : 0;
149 __entry->job = t ? t->rt_param.job_params.job_no : 0;
150 __entry->when = litmus_clock();
151 __entry->forced = forced;
152 ),
153
154 TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
155 __entry->pid, __entry->job,
156 __entry->when, __entry->forced)
157);
158
159/*
160 * Trace blocking tasks.
161 */
162TRACE_EVENT(litmus_task_block,
163
164 TP_PROTO(struct task_struct *t),
165
166 TP_ARGS(t),
167
168 TP_STRUCT__entry(
169 __field( pid_t, pid )
170 __field( lt_t, when )
171 ),
172
173 TP_fast_assign(
174 __entry->pid = t ? t->pid : 0;
175 __entry->when = litmus_clock();
176 ),
177
178 TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when)
179);
180
181/*
182 * Tracing jobs resume
183 */
184TRACE_EVENT(litmus_task_resume,
185
186 TP_PROTO(struct task_struct *t),
187
188 TP_ARGS(t),
189
190 TP_STRUCT__entry(
191 __field( pid_t, pid )
192 __field( unsigned int, job )
193 __field( lt_t, when )
194 ),
195
196 TP_fast_assign(
197 __entry->pid = t ? t->pid : 0;
198 __entry->job = t ? t->rt_param.job_params.job_no : 0;
199 __entry->when = litmus_clock();
200 ),
201
202 TP_printk("resume(job(%u, %u)): %Lu\n",
203 __entry->pid, __entry->job, __entry->when)
204);
205
206/*
207 * Trace synchronous release
208 */
209TRACE_EVENT(litmus_sys_release,
210
211 TP_PROTO(lt_t *start),
212
213 TP_ARGS(start),
214
215 TP_STRUCT__entry(
216 __field( lt_t, rel )
217 __field( lt_t, when )
218 ),
219
220 TP_fast_assign(
221 __entry->rel = *start;
222 __entry->when = litmus_clock();
223 ),
224
225 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
226);
227
228#endif /* _SCHED_TASK_TRACEPOINT_H */
229
230/* Must stay outside the protection */
231#include <trace/define_trace.h>
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 25b4fa00bad1..f6e838d97ff3 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -87,6 +87,7 @@ struct clone_args {
87/* SCHED_ISO: reserved but not implemented yet */ 87/* SCHED_ISO: reserved but not implemented yet */
88#define SCHED_IDLE 5 88#define SCHED_IDLE 5
89#define SCHED_DEADLINE 6 89#define SCHED_DEADLINE 6
90#define SCHED_LITMUS 7
90 91
91/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ 92/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
92#define SCHED_RESET_ON_FORK 0x40000000 93#define SCHED_RESET_ON_FORK 0x40000000
diff --git a/kernel/exit.c b/kernel/exit.c
index a46a50d67002..6832c614c663 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -69,6 +69,10 @@
69#include <asm/pgtable.h> 69#include <asm/pgtable.h>
70#include <asm/mmu_context.h> 70#include <asm/mmu_context.h>
71 71
72#include <litmus/litmus.h>
73
74extern void exit_od_table(struct task_struct *t);
75
72static void __unhash_process(struct task_struct *p, bool group_dead) 76static void __unhash_process(struct task_struct *p, bool group_dead)
73{ 77{
74 nr_threads--; 78 nr_threads--;
@@ -727,6 +731,14 @@ void __noreturn do_exit(long code)
727 if (unlikely(!tsk->pid)) 731 if (unlikely(!tsk->pid))
728 panic("Attempted to kill the idle task!"); 732 panic("Attempted to kill the idle task!");
729 733
734 if (unlikely(is_realtime(tsk))) {
735 /* We would like the task to be polite and transition out of
736 * RT mode first.
737 */
738 litmus_do_exit(tsk);
739 BUG_ON(is_realtime(tsk);
740 }
741
730 /* 742 /*
731 * If do_exit is called because this processes oopsed, it's possible 743 * If do_exit is called because this processes oopsed, it's possible
732 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before 744 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
@@ -798,6 +810,8 @@ void __noreturn do_exit(long code)
798 tty_audit_exit(); 810 tty_audit_exit();
799 audit_free(tsk); 811 audit_free(tsk);
800 812
813 exit_od_table(tsk);
814
801 tsk->exit_code = code; 815 tsk->exit_code = code;
802 taskstats_exit(tsk, group_dead); 816 taskstats_exit(tsk, group_dead);
803 817
diff --git a/kernel/fork.c b/kernel/fork.c
index 55af6931c6ec..220211ef8946 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -104,6 +104,9 @@
104 104
105#include <trace/events/sched.h> 105#include <trace/events/sched.h>
106 106
107#include <litmus/litmus.h>
108#include <litmus/sched_plugin.h>
109
107#define CREATE_TRACE_POINTS 110#define CREATE_TRACE_POINTS
108#include <trace/events/task.h> 111#include <trace/events/task.h>
109 112
@@ -740,6 +743,9 @@ void __put_task_struct(struct task_struct *tsk)
740 cgroup_free(tsk); 743 cgroup_free(tsk);
741 task_numa_free(tsk, true); 744 task_numa_free(tsk, true);
742 security_task_free(tsk); 745 security_task_free(tsk);
746
747 exit_litmus(tsk);
748
743 exit_creds(tsk); 749 exit_creds(tsk);
744 delayacct_tsk_free(tsk); 750 delayacct_tsk_free(tsk);
745 put_signal_struct(tsk->signal); 751 put_signal_struct(tsk->signal);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index eef04551eae7..9adb95795f83 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -31,6 +31,8 @@
31#include "rwsem.h" 31#include "rwsem.h"
32#include "lock_events.h" 32#include "lock_events.h"
33 33
34#include <litmus/litmus.h>
35
34/* 36/*
35 * The least significant 3 bits of the owner value has the following 37 * The least significant 3 bits of the owner value has the following
36 * meanings when set. 38 * meanings when set.
@@ -886,11 +888,13 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
886 * a writer, need_resched() check needs to be done here. 888 * a writer, need_resched() check needs to be done here.
887 */ 889 */
888 if (owner_state != OWNER_WRITER) { 890 if (owner_state != OWNER_WRITER) {
889 if (need_resched()) 891 if (need_resched()) {
890 break; 892 break;
891 if (rt_task(current) && 893 }
892 (prev_owner_state != OWNER_WRITER)) 894 if ((rt_task(current) || is_realtime(current)) &&
895 (prev_owner_state != OWNER_WRITER)) {
893 break; 896 break;
897 }
894 } 898 }
895 prev_owner_state = owner_state; 899 prev_owner_state = owner_state;
896 900
@@ -1258,7 +1262,8 @@ wait:
1258 * until rwsem_try_write_lock() is called. 1262 * until rwsem_try_write_lock() is called.
1259 */ 1263 */
1260 if ((wstate == WRITER_FIRST) && (rt_task(current) || 1264 if ((wstate == WRITER_FIRST) && (rt_task(current) ||
1261 time_after(jiffies, waiter.timeout))) { 1265 is_realtime(current) ||
1266 time_after(jiffies, waiter.timeout))) {
1262 wstate = WRITER_HANDOFF; 1267 wstate = WRITER_HANDOFF;
1263 lockevent_inc(rwsem_wlock_handoff); 1268 lockevent_inc(rwsem_wlock_handoff);
1264 break; 1269 break;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ca65327a6de8..4c3d18d2587e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -68,6 +68,13 @@ int console_printk[4] = {
68}; 68};
69EXPORT_SYMBOL_GPL(console_printk); 69EXPORT_SYMBOL_GPL(console_printk);
70 70
71/*
72 * Divert printk() messages when there is a LITMUS^RT debug listener.
73 */
74#include <litmus/debug_trace.h>
75int trace_override = 0;
76int trace_recurse = 0;
77
71atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); 78atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
72EXPORT_SYMBOL(ignore_console_lock_warning); 79EXPORT_SYMBOL(ignore_console_lock_warning);
73 80
@@ -1916,6 +1923,11 @@ int vprintk_store(int facility, int level,
1916 */ 1923 */
1917 text_len = vscnprintf(text, sizeof(textbuf), fmt, args); 1924 text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
1918 1925
1926 /* If the LITMUS^RT tracer is active then divert printk messages. */
1927 if (trace_override && !trace_recurse) {
1928 TRACE("%s", text);
1929 }
1930
1919 /* mark and strip a trailing newline */ 1931 /* mark and strip a trailing newline */
1920 if (text_len && text[text_len-1] == '\n') { 1932 if (text_len && text[text_len-1] == '\n') {
1921 text_len--; 1933 text_len--;
@@ -2967,7 +2979,7 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
2967void wake_up_klogd(void) 2979void wake_up_klogd(void)
2968{ 2980{
2969 preempt_disable(); 2981 preempt_disable();
2970 if (waitqueue_active(&log_wait)) { 2982 if (!trace_override && waitqueue_active(&log_wait)) {
2971 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); 2983 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
2972 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); 2984 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
2973 } 2985 }
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 21fb5a5662b5..95000e43fce7 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -30,3 +30,6 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
30obj-$(CONFIG_MEMBARRIER) += membarrier.o 30obj-$(CONFIG_MEMBARRIER) += membarrier.o
31obj-$(CONFIG_CPU_ISOLATION) += isolation.o 31obj-$(CONFIG_CPU_ISOLATION) += isolation.o
32obj-$(CONFIG_PSI) += psi.o 32obj-$(CONFIG_PSI) += psi.o
33
34obj-y += litmus.o
35
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f2eb3629070..917a374b616f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -20,6 +20,12 @@
20 20
21#include "pelt.h" 21#include "pelt.h"
22 22
23#include <litmus/litmus.h>
24#include <litmus/debug_trace.h>
25#include <litmus/trace.h>
26#include <litmus/sched_trace.h>
27#include <litmus/sched_plugin.h>
28
23#define CREATE_TRACE_POINTS 29#define CREATE_TRACE_POINTS
24#include <trace/events/sched.h> 30#include <trace/events/sched.h>
25 31
@@ -520,6 +526,11 @@ void resched_curr(struct rq *rq)
520 set_tsk_need_resched(curr); 526 set_tsk_need_resched(curr);
521 set_preempt_need_resched(); 527 set_preempt_need_resched();
522 return; 528 return;
529 } else if (is_realtime(curr)) {
530 /* Cannot call set_tsk_need_resched() on LITMUS tasks on a
531 * remote core. Only policy plugins may do this
532 * via litmus_reschedule(). */
533 return;
523 } 534 }
524 535
525 if (set_nr_and_not_polling(curr)) 536 if (set_nr_and_not_polling(curr))
@@ -2317,9 +2328,17 @@ void scheduler_ipi(void)
2317 * this IPI. 2328 * this IPI.
2318 */ 2329 */
2319 preempt_fold_need_resched(); 2330 preempt_fold_need_resched();
2320 2331 /* Let LITMUS' preemption state machine know about this IPI. */
2321 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) 2332 sched_state_ipi();
2333
2334 if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) {
2335#ifndef CONFIG_ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
2336 /* If we don't call irq_enter() then we need to trigger the
2337 * IRQ tracing manually. */
2338 ft_irq_fired();
2339#endif
2322 return; 2340 return;
2341 }
2323 2342
2324 /* 2343 /*
2325 * Not all reschedule IPI handlers call irq_enter/irq_exit, since 2344 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
@@ -2397,7 +2416,12 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
2397 struct rq_flags rf; 2416 struct rq_flags rf;
2398 2417
2399#if defined(CONFIG_SMP) 2418#if defined(CONFIG_SMP)
2400 if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { 2419 /*
2420 * In LITMUS, it is up to a plugin to determine whether to send an IPI
2421 * to a remote CPU.
2422 */
2423 if (!is_realtime(p) && sched_feat(TTWU_QUEUE) &&
2424 !cpus_share_cache(smp_processor_id(), cpu)) {
2401 sched_clock_cpu(cpu); /* Sync clocks across CPUs */ 2425 sched_clock_cpu(cpu); /* Sync clocks across CPUs */
2402 ttwu_queue_remote(p, cpu, wake_flags); 2426 ttwu_queue_remote(p, cpu, wake_flags);
2403 return; 2427 return;
@@ -2517,6 +2541,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2517{ 2541{
2518 unsigned long flags; 2542 unsigned long flags;
2519 int cpu, success = 0; 2543 int cpu, success = 0;
2544 if (is_realtime(p)) {
2545 TRACE_TASK(p, "try_to_wake_up() state: %d\n", p->state);
2546 }
2520 2547
2521 preempt_disable(); 2548 preempt_disable();
2522 if (p == current) { 2549 if (p == current) {
@@ -2616,6 +2643,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2616 */ 2643 */
2617 smp_cond_load_acquire(&p->on_cpu, !VAL); 2644 smp_cond_load_acquire(&p->on_cpu, !VAL);
2618 2645
2646 /* LITMUS: Once the task can be safely referenced by this CPU, don't
2647 * mess with further Linux load balancing stuff.
2648 */
2649 if (is_realtime(p)) {
2650 goto litmus_out_activate;
2651 }
2652
2619 p->sched_contributes_to_load = !!task_contributes_to_load(p); 2653 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2620 p->state = TASK_WAKING; 2654 p->state = TASK_WAKING;
2621 2655
@@ -2631,6 +2665,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2631 set_task_cpu(p, cpu); 2665 set_task_cpu(p, cpu);
2632 } 2666 }
2633 2667
2668litmus_out_activate:
2634#else /* CONFIG_SMP */ 2669#else /* CONFIG_SMP */
2635 2670
2636 if (p->in_iowait) { 2671 if (p->in_iowait) {
@@ -2641,6 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2641#endif /* CONFIG_SMP */ 2676#endif /* CONFIG_SMP */
2642 2677
2643 ttwu_queue(p, cpu, wake_flags); 2678 ttwu_queue(p, cpu, wake_flags);
2679
2680 if (is_realtime(p)) {
2681 TRACE_TASK(p, "try_to_wake_up() done state: %d\n", p->state);
2682 }
2644unlock: 2683unlock:
2645 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 2684 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2646out: 2685out:
@@ -2853,13 +2892,16 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2853 */ 2892 */
2854 p->prio = current->normal_prio; 2893 p->prio = current->normal_prio;
2855 2894
2895 litmus_fork(p);
2896
2856 uclamp_fork(p); 2897 uclamp_fork(p);
2857 2898
2858 /* 2899 /*
2859 * Revert to default priority/policy on fork if requested. 2900 * Revert to default priority/policy on fork if requested.
2860 */ 2901 */
2861 if (unlikely(p->sched_reset_on_fork)) { 2902 if (unlikely(p->sched_reset_on_fork)) {
2862 if (task_has_dl_policy(p) || task_has_rt_policy(p)) { 2903 if (task_has_dl_policy(p) || task_has_rt_policy(p) ||
2904 p->policy == SCHED_LITMUS) {
2863 p->policy = SCHED_NORMAL; 2905 p->policy = SCHED_NORMAL;
2864 p->static_prio = NICE_TO_PRIO(0); 2906 p->static_prio = NICE_TO_PRIO(0);
2865 p->rt_priority = 0; 2907 p->rt_priority = 0;
@@ -2876,12 +2918,15 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
2876 p->sched_reset_on_fork = 0; 2918 p->sched_reset_on_fork = 0;
2877 } 2919 }
2878 2920
2879 if (dl_prio(p->prio)) 2921 if (is_realtime(p)) {
2922 p->sched_class = &litmus_sched_class;
2923 } else if (dl_prio(p->prio)) {
2880 return -EAGAIN; 2924 return -EAGAIN;
2881 else if (rt_prio(p->prio)) 2925 } else if (rt_prio(p->prio)) {
2882 p->sched_class = &rt_sched_class; 2926 p->sched_class = &rt_sched_class;
2883 else 2927 } else {
2884 p->sched_class = &fair_sched_class; 2928 p->sched_class = &fair_sched_class;
2929 }
2885 2930
2886 init_entity_runnable_average(&p->se); 2931 init_entity_runnable_average(&p->se);
2887 2932
@@ -2945,6 +2990,10 @@ void wake_up_new_task(struct task_struct *p)
2945 struct rq_flags rf; 2990 struct rq_flags rf;
2946 struct rq *rq; 2991 struct rq *rq;
2947 2992
2993 if (is_realtime(p)) {
2994 litmus->task_new(p, 1, 0);
2995 }
2996
2948 raw_spin_lock_irqsave(&p->pi_lock, rf.flags); 2997 raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2949 p->state = TASK_RUNNING; 2998 p->state = TASK_RUNNING;
2950#ifdef CONFIG_SMP 2999#ifdef CONFIG_SMP
@@ -3218,6 +3267,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
3218 */ 3267 */
3219 prev_state = prev->state; 3268 prev_state = prev->state;
3220 vtime_task_switch(prev); 3269 vtime_task_switch(prev);
3270 litmus->finish_switch(prev);
3271 prev->rt_param.stack_in_use = NO_CPU;
3221 perf_event_task_sched_in(prev, current); 3272 perf_event_task_sched_in(prev, current);
3222 finish_task(prev); 3273 finish_task(prev);
3223 finish_lock_switch(rq); 3274 finish_lock_switch(rq);
@@ -3317,6 +3368,12 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
3317 */ 3368 */
3318 3369
3319 rq = finish_task_switch(prev); 3370 rq = finish_task_switch(prev);
3371
3372 sched_trace_task_switch_to(current);
3373 if (unlikely(sched_state_validate_switch())) {
3374 litmus_reschedule_local();
3375 }
3376
3320 balance_callback(rq); 3377 balance_callback(rq);
3321 preempt_enable(); 3378 preempt_enable();
3322 3379
@@ -3608,7 +3665,9 @@ void scheduler_tick(void)
3608 3665
3609#ifdef CONFIG_SMP 3666#ifdef CONFIG_SMP
3610 rq->idle_balance = idle_cpu(cpu); 3667 rq->idle_balance = idle_cpu(cpu);
3611 trigger_load_balance(rq); 3668 if (!is_realtime(current)) {
3669 trigger_load_balance(rq);
3670 }
3612#endif 3671#endif
3613} 3672}
3614 3673
@@ -3910,9 +3969,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
3910 /* 3969 /*
3911 * Optimization: we know that if all tasks are in the fair class we can 3970 * Optimization: we know that if all tasks are in the fair class we can
3912 * call that function directly, but only if the @prev task wasn't of a 3971 * call that function directly, but only if the @prev task wasn't of a
3913 * higher scheduling class, because otherwise those loose the 3972 * higher scheduling class, because otherwise those lose the
3914 * opportunity to pull in more work from other CPUs. 3973 * opportunity to pull in more work from other CPUs.
3915 */ 3974 *
3975 * We can't do this in LITMUS!
3976 *
3977 * This breaks many assumptions in the plugins. Do not uncomment
3978 * without considering how this affects global plugins such as GSN-EDF.
3916 if (likely((prev->sched_class == &idle_sched_class || 3979 if (likely((prev->sched_class == &idle_sched_class ||
3917 prev->sched_class == &fair_sched_class) && 3980 prev->sched_class == &fair_sched_class) &&
3918 rq->nr_running == rq->cfs.h_nr_running)) { 3981 rq->nr_running == rq->cfs.h_nr_running)) {
@@ -3921,12 +3984,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
3921 if (unlikely(p == RETRY_TASK)) 3984 if (unlikely(p == RETRY_TASK))
3922 goto restart; 3985 goto restart;
3923 3986
3924 /* Assumes fair_sched_class->next == idle_sched_class */ 3987 // Assumes fair_sched_class->next == idle_sched_class
3925 if (unlikely(!p)) 3988 if (unlikely(!p))
3926 p = idle_sched_class.pick_next_task(rq, prev, rf); 3989 p = idle_sched_class.pick_next_task(rq, prev, rf);
3927 3990
3928 return p; 3991 return p;
3929 } 3992 }
3993 */
3930 3994
3931restart: 3995restart:
3932#ifdef CONFIG_SMP 3996#ifdef CONFIG_SMP
@@ -4003,10 +4067,15 @@ static void __sched notrace __schedule(bool preempt)
4003 struct rq *rq; 4067 struct rq *rq;
4004 int cpu; 4068 int cpu;
4005 4069
4070 TS_SCHED_START;
4071 sched_state_entered_schedule();
4072
4006 cpu = smp_processor_id(); 4073 cpu = smp_processor_id();
4007 rq = cpu_rq(cpu); 4074 rq = cpu_rq(cpu);
4008 prev = rq->curr; 4075 prev = rq->curr;
4009 4076
4077 sched_trace_task_switch_away(prev);
4078
4010 schedule_debug(prev, preempt); 4079 schedule_debug(prev, preempt);
4011 4080
4012 if (sched_feat(HRTICK)) 4081 if (sched_feat(HRTICK))
@@ -4030,6 +4099,8 @@ static void __sched notrace __schedule(bool preempt)
4030 rq->clock_update_flags <<= 1; 4099 rq->clock_update_flags <<= 1;
4031 update_rq_clock(rq); 4100 update_rq_clock(rq);
4032 4101
4102 this_cpu_write(litmus_preemption_in_process, preempt);
4103
4033 switch_count = &prev->nivcsw; 4104 switch_count = &prev->nivcsw;
4034 if (!preempt && prev->state) { 4105 if (!preempt && prev->state) {
4035 if (signal_pending_state(prev->state, prev)) { 4106 if (signal_pending_state(prev->state, prev)) {
@@ -4049,6 +4120,8 @@ static void __sched notrace __schedule(bool preempt)
4049 clear_tsk_need_resched(prev); 4120 clear_tsk_need_resched(prev);
4050 clear_preempt_need_resched(); 4121 clear_preempt_need_resched();
4051 4122
4123 this_cpu_write(litmus_preemption_in_progress, false);
4124
4052 if (likely(prev != next)) { 4125 if (likely(prev != next)) {
4053 rq->nr_switches++; 4126 rq->nr_switches++;
4054 /* 4127 /*
@@ -4073,15 +4146,25 @@ static void __sched notrace __schedule(bool preempt)
4073 ++*switch_count; 4146 ++*switch_count;
4074 4147
4075 trace_sched_switch(preempt, prev, next); 4148 trace_sched_switch(preempt, prev, next);
4076 4149 TS_SCHED_END(next);
4150 TS_CXS_START(next);
4077 /* Also unlocks the rq: */ 4151 /* Also unlocks the rq: */
4078 rq = context_switch(rq, prev, next, &rf); 4152 rq = context_switch(rq, prev, next, &rf);
4153 TS_CXS_END(current);
4079 } else { 4154 } else {
4080 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); 4155 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
4156 TS_SCHED_END(prev);
4081 rq_unlock_irq(rq, &rf); 4157 rq_unlock_irq(rq, &rf);
4082 } 4158 }
4083 4159
4160 TS_SCHED2_START(prev);
4161 sched_trace_task_switch_to(current);
4162 if (unlikely(sched_state_validate_switch())) {
4163 litmus_reschedule_local();
4164 }
4165
4084 balance_callback(rq); 4166 balance_callback(rq);
4167 TS_SCHED2_END(prev);
4085} 4168}
4086 4169
4087void __noreturn do_task_dead(void) 4170void __noreturn do_task_dead(void)
@@ -4513,7 +4596,7 @@ void set_user_nice(struct task_struct *p, long nice)
4513 * it wont have any effect on scheduling until the task is 4596 * it wont have any effect on scheduling until the task is
4514 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: 4597 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
4515 */ 4598 */
4516 if (task_has_dl_policy(p) || task_has_rt_policy(p)) { 4599 if (task_has_dl_policy(p) || task_has_rt_policy(p) || is_realtime(p)) {
4517 p->static_prio = NICE_TO_PRIO(nice); 4600 p->static_prio = NICE_TO_PRIO(nice);
4518 goto out_unlock; 4601 goto out_unlock;
4519 } 4602 }
@@ -4723,12 +4806,15 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
4723 if (keep_boost) 4806 if (keep_boost)
4724 p->prio = rt_effective_prio(p, p->prio); 4807 p->prio = rt_effective_prio(p, p->prio);
4725 4808
4726 if (dl_prio(p->prio)) 4809 if (p->policy == SCHED_LITMUS) {
4810 p->sched_class = &litmus_sched_class;
4811 } else if (dl_prio(p->prio)) {
4727 p->sched_class = &dl_sched_class; 4812 p->sched_class = &dl_sched_class;
4728 else if (rt_prio(p->prio)) 4813 } else if (rt_prio(p->prio)) {
4729 p->sched_class = &rt_sched_class; 4814 p->sched_class = &rt_sched_class;
4730 else 4815 } else {
4731 p->sched_class = &fair_sched_class; 4816 p->sched_class = &fair_sched_class;
4817 }
4732} 4818}
4733 4819
4734/* 4820/*
@@ -4760,6 +4846,7 @@ static int __sched_setscheduler(struct task_struct *p,
4760 int reset_on_fork; 4846 int reset_on_fork;
4761 int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; 4847 int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
4762 struct rq *rq; 4848 struct rq *rq;
4849 int litmus_task = 0;
4763 4850
4764 /* The pi code expects interrupts enabled */ 4851 /* The pi code expects interrupts enabled */
4765 BUG_ON(pi && in_interrupt()); 4852 BUG_ON(pi && in_interrupt());
@@ -4789,7 +4876,9 @@ recheck:
4789 if ((dl_policy(policy) && !__checkparam_dl(attr)) || 4876 if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
4790 (rt_policy(policy) != (attr->sched_priority != 0))) 4877 (rt_policy(policy) != (attr->sched_priority != 0)))
4791 return -EINVAL; 4878 return -EINVAL;
4792 4879 if ((policy == SCHED_LITMUS) && (policy == p->policy)) {
4880 return -EINVAL;
4881 }
4793 /* 4882 /*
4794 * Allow unprivileged RT tasks to decrease priority: 4883 * Allow unprivileged RT tasks to decrease priority:
4795 */ 4884 */
@@ -4857,6 +4946,13 @@ recheck:
4857 return retval; 4946 return retval;
4858 } 4947 }
4859 4948
4949 if (policy == SCHED_LITMUS) {
4950 retval = litmus_admit_task(p);
4951 if (retval) {
4952 return retval;
4953 }
4954 }
4955
4860 if (pi) 4956 if (pi)
4861 cpuset_read_lock(); 4957 cpuset_read_lock();
4862 4958
@@ -4949,6 +5045,11 @@ change:
4949 goto unlock; 5045 goto unlock;
4950 } 5046 }
4951 5047
5048 if (is_realtime(p)) {
5049 litmus_exit_task(p);
5050 litmus_task = 1;
5051 }
5052
4952 p->sched_reset_on_fork = reset_on_fork; 5053 p->sched_reset_on_fork = reset_on_fork;
4953 oldprio = p->prio; 5054 oldprio = p->prio;
4954 5055
@@ -4977,6 +5078,16 @@ change:
4977 __setscheduler(rq, p, attr, pi); 5078 __setscheduler(rq, p, attr, pi);
4978 __setscheduler_uclamp(p, attr); 5079 __setscheduler_uclamp(p, attr);
4979 5080
5081 if (litmus_policy(policy)) {
5082#ifdef CONFIG_SMP
5083 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
5084#else
5085 p->rt_param.stack_in_use = running ? 0 : NO_CPU;
5086#endif
5087 p->rt_param.present = running;
5088 litmus->task_new(p, queued, running);
5089 }
5090
4980 if (queued) { 5091 if (queued) {
4981 /* 5092 /*
4982 * We enqueue to tail when the priority of a task is 5093 * We enqueue to tail when the priority of a task is
@@ -5005,6 +5116,10 @@ change:
5005 balance_callback(rq); 5116 balance_callback(rq);
5006 preempt_enable(); 5117 preempt_enable();
5007 5118
5119 if (litmus_task) {
5120 litmus_dealloc(p);
5121 }
5122
5008 return 0; 5123 return 0;
5009 5124
5010unlock: 5125unlock:
@@ -5391,9 +5506,9 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5391 rcu_read_lock(); 5506 rcu_read_lock();
5392 5507
5393 p = find_process_by_pid(pid); 5508 p = find_process_by_pid(pid);
5394 if (!p) { 5509 if (!p || is_realtime(p)) {
5395 rcu_read_unlock(); 5510 rcu_read_unlock();
5396 return -ESRCH; 5511 return p ? -EPERM : -ESRCH;
5397 } 5512 }
5398 5513
5399 /* Prevent p going away */ 5514 /* Prevent p going away */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a8a08030a8f7..1842c3e33476 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -18,6 +18,8 @@
18#include "sched.h" 18#include "sched.h"
19#include "pelt.h" 19#include "pelt.h"
20 20
21#include <litmus/litmus.h>
22
21struct dl_bandwidth def_dl_bandwidth; 23struct dl_bandwidth def_dl_bandwidth;
22 24
23static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 25static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -1049,17 +1051,21 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
1049#endif 1051#endif
1050 1052
1051 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 1053 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1052 if (dl_task(rq->curr)) 1054 if (dl_task(rq->curr)) {
1053 check_preempt_curr_dl(rq, p, 0); 1055 check_preempt_curr_dl(rq, p, 0);
1054 else 1056 } else if (!is_realtime(rq->curr)) {
1055 resched_curr(rq); 1057 resched_curr(rq);
1058 }
1056 1059
1057#ifdef CONFIG_SMP 1060#ifdef CONFIG_SMP
1058 /* 1061 /*
1059 * Queueing this task back might have overloaded rq, check if we need 1062 * Queueing this task back might have overloaded rq, check if we need
1060 * to kick someone away. 1063 * to kick someone away.
1064 *
1065 * LITMUS note: Don't incur this overhead if we are running a LITMUS
1066 * task.
1061 */ 1067 */
1062 if (has_pushable_dl_tasks(rq)) { 1068 if (has_pushable_dl_tasks(rq) && (!is_realtime(rq->curr))) {
1063 /* 1069 /*
1064 * Nothing relies on rq->lock after this, so its safe to drop 1070 * Nothing relies on rq->lock after this, so its safe to drop
1065 * rq->lock. 1071 * rq->lock.
@@ -2357,9 +2363,13 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
2357 * Since this might be the only -deadline task on the rq, 2363 * Since this might be the only -deadline task on the rq,
2358 * this is the right place to try to pull some other one 2364 * this is the right place to try to pull some other one
2359 * from an overloaded CPU, if any. 2365 * from an overloaded CPU, if any.
2366 *
2367 * LITMUS note: also don't pull a task when we're running LITMUS tasks.
2360 */ 2368 */
2361 if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) 2369 if (!task_on_rq_queued(p) || rq->dl.dl_nr_running ||
2370 is_realtime(rq->curr)) {
2362 return; 2371 return;
2372 }
2363 2373
2364 deadline_queue_pull_task(rq); 2374 deadline_queue_pull_task(rq);
2365} 2375}
@@ -2374,9 +2384,8 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
2374 put_task_struct(p); 2384 put_task_struct(p);
2375 2385
2376 /* If p is not queued we will update its parameters at next wakeup. */ 2386 /* If p is not queued we will update its parameters at next wakeup. */
2377 if (!task_on_rq_queued(p)) { 2387 if (!task_on_rq_queued(p) || is_realtime(rq->curr)) {
2378 add_rq_bw(&p->dl, &rq->dl); 2388 add_rq_bw(&p->dl, &rq->dl);
2379
2380 return; 2389 return;
2381 } 2390 }
2382 2391
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..d9c59998155b
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,386 @@
1/* This file is included from kernel/sched.c */
2
3#include "sched.h"
4
5#include <litmus/trace.h>
6#include <litmus/sched_trace.h>
7
8#include <litmus/debug_trace.h>
9#include <litmus/litmus.h>
10#include <litmus/budget.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/preempt.h>
13#include <litmus/np.h>
14
15static void update_time_litmus(struct rq *rq, struct task_struct *p)
16{
17 u64 delta = rq->clock - p->se.exec_start;
18 if (unlikely((s64)delta < 0))
19 delta = 0;
20 /* per job counter */
21 p->rt_param.job_params.exec_time += delta;
22 /* task counter */
23 p->se.sum_exec_runtime += delta;
24 if (delta) {
25 TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
26 delta, p->rt_param.job_params.exec_time, budget_remaining(p));
27 }
28 /* sched_clock() */
29 p->se.exec_start = rq->clock;
30 cpuacct_charge(p, delta);
31}
32
33static void double_rq_lock(struct rq *rq1, struct rq *rq2);
34static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
35
36static struct task_struct *
37litmus_schedule(struct rq *rq, struct task_struct *prev)
38{
39 struct task_struct *next;
40
41#ifdef CONFIG_SMP
42 struct rq* other_rq;
43 long was_running;
44 int from_where;
45 lt_t _maybe_deadlock = 0;
46#endif
47
48 /* let the plugin schedule */
49 next = litmus->schedule(prev);
50
51 sched_state_plugin_check();
52
53#ifdef CONFIG_SMP
54 /* check if a global plugin pulled a task from a different RQ */
55 if (next && task_rq(next) != rq) {
56 /* we need to migrate the task */
57 other_rq = task_rq(next);
58 from_where = other_rq->cpu;
59 TRACE_TASK(next, "migrate from %d\n", from_where);
60
61 /* while we drop the lock, the prev task could change its
62 * state
63 */
64 BUG_ON(prev != current);
65 was_running = is_current_running();
66
67 /* Don't race with a concurrent switch. This could deadlock in
68 * the case of cross or circular migrations. It's the job of
69 * the plugin to make sure that doesn't happen.
70 */
71 TRACE_TASK(next, "stack_in_use=%d\n",
72 next->rt_param.stack_in_use);
73 if (next->rt_param.stack_in_use != NO_CPU) {
74 TRACE_TASK(next, "waiting to deschedule\n");
75 _maybe_deadlock = litmus_clock();
76 }
77
78 raw_spin_unlock(&rq->lock);
79
80 while (next->rt_param.stack_in_use != NO_CPU) {
81 cpu_relax();
82 mb();
83 if (next->rt_param.stack_in_use == NO_CPU)
84 TRACE_TASK(next,"descheduled. Proceeding.\n");
85
86 if (!litmus->should_wait_for_stack(next)) {
87 /* plugin aborted the wait */
88 TRACE_TASK(next,
89 "plugin gave up waiting for stack\n");
90 next = NULL;
91 /* Make sure plugin is given a chance to
92 * reconsider. */
93 litmus_reschedule_local();
94 /* give up */
95 raw_spin_lock(&rq->lock);
96 goto out;
97 }
98
99 if (from_where != task_rq(next)->cpu) {
100 /* The plugin should not give us something
101 * that other cores are trying to pull, too */
102 TRACE_TASK(next, "next invalid: task keeps "
103 "shifting around!? "
104 "(%d->%d)\n",
105 from_where,
106 task_rq(next)->cpu);
107
108 /* bail out */
109 raw_spin_lock(&rq->lock);
110 litmus->next_became_invalid(next);
111 litmus_reschedule_local();
112 next = NULL;
113 goto out;
114 }
115
116 if (lt_before(_maybe_deadlock + 1000000000L,
117 litmus_clock())) {
118 /* We've been spinning for 1s.
119 * Something can't be right!
120 * Let's abandon the task and bail out; at least
121 * we will have debug info instead of a hard
122 * deadlock.
123 */
124#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
125 BUG();
126#else
127 TRACE_TASK(next,"stack too long in use. "
128 "Deadlock?\n");
129 next = NULL;
130
131 /* bail out */
132 raw_spin_lock(&rq->lock);
133 goto out;
134#endif
135 }
136 }
137#ifdef __ARCH_WANT_UNLOCKED_CTXSW
138 if (next->on_cpu)
139 TRACE_TASK(next, "waiting for !oncpu");
140 while (next->on_cpu) {
141 cpu_relax();
142 mb();
143 }
144#endif
145 double_rq_lock(rq, other_rq);
146 if (other_rq == task_rq(next) &&
147 next->rt_param.stack_in_use == NO_CPU) {
148 /* ok, we can grab it */
149 set_task_cpu(next, rq->cpu);
150 /* release the other CPU's runqueue, but keep ours */
151 raw_spin_unlock(&other_rq->lock);
152 } else {
153 /* Either it moved or the stack was claimed; both is
154 * bad and forces us to abort the migration. */
155 TRACE_TASK(next, "next invalid: no longer available\n");
156 raw_spin_unlock(&other_rq->lock);
157 litmus->next_became_invalid(next);
158 next = NULL;
159 goto out;
160 }
161
162 if (!litmus->post_migration_validate(next)) {
163 TRACE_TASK(next, "plugin deems task now invalid\n");
164 litmus_reschedule_local();
165 next = NULL;
166 }
167 }
168#endif
169
170 /* check if the task became invalid while we dropped the lock */
171 if (next && (!is_realtime(next) || !tsk_rt(next)->present)) {
172 TRACE_TASK(next,
173 "BAD: next (no longer?) valid\n");
174 litmus->next_became_invalid(next);
175 litmus_reschedule_local();
176 next = NULL;
177 }
178
179 if (next) {
180#ifdef CONFIG_SMP
181 next->rt_param.stack_in_use = rq->cpu;
182#else
183 next->rt_param.stack_in_use = 0;
184#endif
185 update_rq_clock(rq);
186 next->se.exec_start = rq->clock;
187 }
188
189out:
190 update_enforcement_timer(next);
191 return next;
192}
193
194static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
195 int flags)
196{
197 tsk_rt(p)->present = 1;
198 if (flags & ENQUEUE_WAKEUP) {
199 sched_trace_task_resume(p);
200 /* LITMUS^RT plugins need to update the state
201 * _before_ making it available in global structures.
202 * Linux gets away with being lazy about the task state
203 * update. We can't do that, hence we update the task
204 * state already here.
205 *
206 * WARNING: this needs to be re-evaluated when porting
207 * to newer kernel versions.
208 */
209 p->state = TASK_RUNNING;
210 litmus->task_wake_up(p);
211
212 rq->litmus.nr_running++;
213 } else {
214 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
215 p->se.exec_start = rq->clock;
216 }
217}
218
219static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
220 int flags)
221{
222 if (flags & DEQUEUE_SLEEP) {
223#ifdef CONFIG_SCHED_TASK_TRACE
224 tsk_rt(p)->job_params.last_suspension = litmus_clock();
225#endif
226 litmus->task_block(p);
227 tsk_rt(p)->present = 0;
228 sched_trace_task_block(p);
229
230 rq->litmus.nr_running--;
231 } else
232 TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
233}
234
235static void yield_task_litmus(struct rq *rq)
236{
237 TS_SYSCALL_IN_START;
238 TS_SYSCALL_IN_END;
239
240 BUG_ON(rq->curr != current);
241 /* sched_yield() is called to trigger delayed preemptions.
242 * Thus, mark the current task as needing to be rescheduled.
243 * This will cause the scheduler plugin to be invoked, which can
244 * then determine if a preemption is still required.
245 */
246 clear_exit_np(current);
247 litmus_reschedule_local();
248
249 TS_SYSCALL_OUT_START;
250}
251
252/* Plugins are responsible for this.
253 */
254static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
255{
256}
257
258static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
259{
260}
261
262/* pick_next_task_litmus() - litmus_schedule() function
263 *
264 * return the next task to be scheduled
265 */
266static struct task_struct *pick_next_task_litmus(struct rq *rq,
267 struct task_struct *prev, struct pin_cookie cookie)
268{
269 struct task_struct *next;
270
271 if (is_realtime(prev))
272 update_time_litmus(rq, prev);
273
274 lockdep_unpin_lock(&rq->lock, cookie);
275 TS_PLUGIN_SCHED_START;
276 next = litmus_schedule(rq, prev);
277 TS_PLUGIN_SCHED_END;
278 lockdep_repin_lock(&rq->lock, cookie);
279
280 /* This is a bit backwards: the other classes call put_prev_task()
281 * _after_ they've determined that the class has some queued tasks.
282 * We can't determine this easily because each plugin manages its own
283 * ready queues, and because in the case of globally shared queues,
284 * we really don't know whether we'll have something ready even if
285 * we test here. So we do it in reverse: first ask the plugin to
286 * provide a task, and if we find one, call put_prev_task() on the
287 * previously scheduled task.
288 */
289 if (next)
290 put_prev_task(rq, prev);
291
292 return next;
293}
294
295static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
296{
297 if (is_realtime(p) && !queued) {
298 update_time_litmus(rq, p);
299 /* budget check for QUANTUM_ENFORCEMENT tasks */
300 if (budget_enforced(p) && budget_exhausted(p)) {
301 litmus_reschedule_local();
302 }
303 }
304}
305
306static void switched_to_litmus(struct rq *rq, struct task_struct *p)
307{
308}
309
310static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
311 int oldprio)
312{
313}
314
315unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
316{
317 /* return infinity */
318 return 0;
319}
320
321/* This is called when a task became a real-time task, either due to a SCHED_*
322 * class transition or due to PI mutex inheritance. We don't handle Linux PI
323 * mutex inheritance yet (and probably never will). Use LITMUS provided
324 * synchronization primitives instead.
325 */
326static void set_curr_task_litmus(struct rq *rq)
327{
328 rq->curr->se.exec_start = rq->clock;
329}
330
331
332#ifdef CONFIG_SMP
333/* execve tries to rebalance task in this scheduling domain.
334 * We don't care about the scheduling domain; can gets called from
335 * exec, fork, wakeup.
336 */
337static int
338select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
339{
340 /* preemption is already disabled.
341 * We don't want to change cpu here
342 */
343 return task_cpu(p);
344}
345#endif
346
347static void update_curr_litmus(struct rq *rq)
348{
349 struct task_struct *p = rq->curr;
350
351 if (!is_realtime(p))
352 return;
353
354 update_time_litmus(rq, p);
355}
356
357const struct sched_class litmus_sched_class = {
358 /* From 34f971f6 the stop/migrate worker threads have a class on
359 * their own, which is the highest prio class. We don't support
360 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
361 * CPU capacity.
362 */
363 .next = &stop_sched_class,
364 .enqueue_task = enqueue_task_litmus,
365 .dequeue_task = dequeue_task_litmus,
366 .yield_task = yield_task_litmus,
367
368 .check_preempt_curr = check_preempt_curr_litmus,
369
370 .pick_next_task = pick_next_task_litmus,
371 .put_prev_task = put_prev_task_litmus,
372
373#ifdef CONFIG_SMP
374 .select_task_rq = select_task_rq_litmus,
375#endif
376
377 .set_curr_task = set_curr_task_litmus,
378 .task_tick = task_tick_litmus,
379
380 .get_rr_interval = get_rr_interval_litmus,
381
382 .prio_changed = prio_changed_litmus,
383 .switched_to = switched_to_litmus,
384
385 .update_curr = update_curr_litmus,
386};
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9b8adc01be3d..a48c98b950b3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
7 7
8#include "pelt.h" 8#include "pelt.h"
9 9
10#include <litmus/litmus.h>
11
10int sched_rr_timeslice = RR_TIMESLICE; 12int sched_rr_timeslice = RR_TIMESLICE;
11int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; 13int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
12 14
@@ -499,8 +501,12 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
499 else if (!on_rt_rq(rt_se)) 501 else if (!on_rt_rq(rt_se))
500 enqueue_rt_entity(rt_se, 0); 502 enqueue_rt_entity(rt_se, 0);
501 503
502 if (rt_rq->highest_prio.curr < curr->prio) 504 // LITMUS note: Don't subject LITMUS tasks to remote
505 // reschedules.
506 if ((rt_rq->highest_prio.curr < curr->prio) &&
507 !is_realtime(curr)) {
503 resched_curr(rq); 508 resched_curr(rq);
509 }
504 } 510 }
505} 511}
506 512
@@ -589,8 +595,10 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
589{ 595{
590 struct rq *rq = rq_of_rt_rq(rt_rq); 596 struct rq *rq = rq_of_rt_rq(rt_rq);
591 597
592 if (!rt_rq->rt_nr_running) 598 if (!rt_rq->rt_nr_running ||
599 is_realtime(rq_of_rt_rq(rt_rq)->current)) {
593 return; 600 return;
601 }
594 602
595 enqueue_top_rt_rq(rt_rq); 603 enqueue_top_rt_rq(rt_rq);
596 resched_curr(rq); 604 resched_curr(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8870c5bd7df..c4f7afbe90c0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -162,6 +162,11 @@ static inline int rt_policy(int policy)
162 return policy == SCHED_FIFO || policy == SCHED_RR; 162 return policy == SCHED_FIFO || policy == SCHED_RR;
163} 163}
164 164
165static inline int litmus_policy(int policy)
166{
167 return policy == SCHED_LITMUS;
168}
169
165static inline int dl_policy(int policy) 170static inline int dl_policy(int policy)
166{ 171{
167 return policy == SCHED_DEADLINE; 172 return policy == SCHED_DEADLINE;
@@ -169,7 +174,8 @@ static inline int dl_policy(int policy)
169static inline bool valid_policy(int policy) 174static inline bool valid_policy(int policy)
170{ 175{
171 return idle_policy(policy) || fair_policy(policy) || 176 return idle_policy(policy) || fair_policy(policy) ||
172 rt_policy(policy) || dl_policy(policy); 177 rt_policy(policy) || dl_policy(policy) ||
178 litmus_policy(policy);
173} 179}
174 180
175static inline int task_has_idle_policy(struct task_struct *p) 181static inline int task_has_idle_policy(struct task_struct *p)
@@ -685,6 +691,10 @@ struct dl_rq {
685 u64 bw_ratio; 691 u64 bw_ratio;
686}; 692};
687 693
694struct litmus_rq {
695 unsigned long nr_running;
696};
697
688#ifdef CONFIG_FAIR_GROUP_SCHED 698#ifdef CONFIG_FAIR_GROUP_SCHED
689/* An entity is a task if it doesn't "own" a runqueue */ 699/* An entity is a task if it doesn't "own" a runqueue */
690#define entity_is_task(se) (!se->my_q) 700#define entity_is_task(se) (!se->my_q)
@@ -881,6 +891,7 @@ struct rq {
881 struct cfs_rq cfs; 891 struct cfs_rq cfs;
882 struct rt_rq rt; 892 struct rt_rq rt;
883 struct dl_rq dl; 893 struct dl_rq dl;
894 struct litmus_rq litmus;
884 895
885#ifdef CONFIG_FAIR_GROUP_SCHED 896#ifdef CONFIG_FAIR_GROUP_SCHED
886 /* list of leaf cfs_rq on this CPU: */ 897 /* list of leaf cfs_rq on this CPU: */
@@ -1783,11 +1794,19 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
1783 next->sched_class->set_next_task(rq, next); 1794 next->sched_class->set_next_task(rq, next);
1784} 1795}
1785 1796
1797/* FIXME: This is conceptually wrong; this should be below the stop-machine
1798 * class, but existing plugins (that predate the stop-machine class) depend on
1799 * the assumption that LITMUS^RT plugins are the top scheduling class.
1800 */
1801#define sched_class_highest (&litmus_sched_class)
1802
1803/*
1786#ifdef CONFIG_SMP 1804#ifdef CONFIG_SMP
1787#define sched_class_highest (&stop_sched_class) 1805#define sched_class_highest (&stop_sched_class)
1788#else 1806#else
1789#define sched_class_highest (&dl_sched_class) 1807#define sched_class_highest (&dl_sched_class)
1790#endif 1808#endif
1809*/
1791 1810
1792#define for_class_range(class, _from, _to) \ 1811#define for_class_range(class, _from, _to) \
1793 for (class = (_from); class != (_to); class = class->next) 1812 for (class = (_from); class != (_to); class = class->next)
@@ -1795,6 +1814,7 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
1795#define for_each_class(class) \ 1814#define for_each_class(class) \
1796 for_class_range(class, sched_class_highest, NULL) 1815 for_class_range(class, sched_class_highest, NULL)
1797 1816
1817extern const struct sched_class litmus_sched_class;
1798extern const struct sched_class stop_sched_class; 1818extern const struct sched_class stop_sched_class;
1799extern const struct sched_class dl_sched_class; 1819extern const struct sched_class dl_sched_class;
1800extern const struct sched_class rt_sched_class; 1820extern const struct sched_class rt_sched_class;
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index c0640739e05e..3bd42cf27d88 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -8,6 +8,7 @@
8 * See kernel/stop_machine.c 8 * See kernel/stop_machine.c
9 */ 9 */
10#include "sched.h" 10#include "sched.h"
11#include <litmus/preempt.h>
11 12
12#ifdef CONFIG_SMP 13#ifdef CONFIG_SMP
13static int 14static int
@@ -43,6 +44,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
43 return NULL; 44 return NULL;
44 45
45 set_next_task_stop(rq, rq->stop); 46 set_next_task_stop(rq, rq->stop);
47
48 /* Let the LITMUS state machine know that a task was picked. This is
49 * needed because the LITMUS scheduling plugin will not be called if
50 * the stop-task class picks a task.
51 */
52 sched_state_task_picked();
53
46 return rq->stop; 54 return rq->stop;
47} 55}
48 56
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 65605530ee34..ce20111d3fe2 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -42,6 +42,10 @@
42#include <linux/freezer.h> 42#include <linux/freezer.h>
43#include <linux/compat.h> 43#include <linux/compat.h>
44 44
45#include <litmus/litmus.h>
46#include <litmus/debug_trace.h>
47#include <litmus/trace.h>
48
45#include <linux/uaccess.h> 49#include <linux/uaccess.h>
46 50
47#include <trace/events/timer.h> 51#include <trace/events/timer.h>
@@ -1092,6 +1096,10 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1092 1096
1093 tim = hrtimer_update_lowres(timer, tim, mode); 1097 tim = hrtimer_update_lowres(timer, tim, mode);
1094 1098
1099#ifdef CONFIG_REPORT_TIMER_LATENCY
1100 timer->when_added = base->get_time();
1101#endif
1102
1095 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1103 hrtimer_set_expires_range_ns(timer, tim, delta_ns);
1096 1104
1097 /* Switch the timer base, if necessary: */ 1105 /* Switch the timer base, if necessary: */
@@ -1546,6 +1554,9 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1546{ 1554{
1547 struct hrtimer_clock_base *base; 1555 struct hrtimer_clock_base *base;
1548 unsigned int active = cpu_base->active_bases & active_mask; 1556 unsigned int active = cpu_base->active_bases & active_mask;
1557#ifdef CONFIG_REPORT_TIMER_LATENCY
1558 ktime_t was_exp_nxt = cpu_base->expires_next;
1559#endif
1549 1560
1550 for_each_active_base(base, cpu_base, active) { 1561 for_each_active_base(base, cpu_base, active) {
1551 struct timerqueue_node *node; 1562 struct timerqueue_node *node;
@@ -1573,6 +1584,26 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1573 if (basenow < hrtimer_get_softexpires_tv64(timer)) 1584 if (basenow < hrtimer_get_softexpires_tv64(timer))
1574 break; 1585 break;
1575 1586
1587#ifdef CONFIG_REPORT_TIMER_LATENCY
1588 if (cpu_base->hres_active && (basenow.tv64 >=
1589 hrtimer_get_expires_tv64(timer) +
1590 ((s64) CONFIG_REPORT_TIMER_LATENCY_THRESHOLD))) {
1591 printk_ratelimited(KERN_WARNING "WARNING: "
1592 "P%d timer latency: %lld now: %lld "
1593 "basenow:%lld exp:%lld "
1594 "nxt:%lld added:%lld "
1595 "timer:%p fn:%p\n",
1596 smp_processor_id(),
1597 basenow.tv64 - hrtimer_get_expires_tv64(timer),
1598 now.tv64, basenow.tv64,
1599 hrtimer_get_expires_tv64(timer),
1600 hrtimer_get_softexpires(timer),
1601 was_exp_nxt.tv64,
1602 timer->when_added.tv64,
1603 timer, timer->function);
1604 }
1605#endif
1606
1576 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1607 __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1577 if (active_mask == HRTIMER_ACTIVE_SOFT) 1608 if (active_mask == HRTIMER_ACTIVE_SOFT)
1578 hrtimer_sync_wait_running(cpu_base, flags); 1609 hrtimer_sync_wait_running(cpu_base, flags);
@@ -1679,9 +1710,14 @@ retry:
1679 */ 1710 */
1680 cpu_base->nr_hangs++; 1711 cpu_base->nr_hangs++;
1681 cpu_base->hang_detected = 1; 1712 cpu_base->hang_detected = 1;
1713
1714 TRACE("hrtimer hang detected on P%d: #%u\n", cpu_base->cpu,
1715 cpu_base->nr_hangs);
1716
1682 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1717 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1683 1718
1684 delta = ktime_sub(now, entry_time); 1719 delta = ktime_sub(now, entry_time);
1720 TRACE("hrtimer hang delta.tv64:%u\n", (unsigned int) delta.tv64);
1685 if ((unsigned int)delta > cpu_base->max_hang_time) 1721 if ((unsigned int)delta > cpu_base->max_hang_time)
1686 cpu_base->max_hang_time = (unsigned int) delta; 1722 cpu_base->max_hang_time = (unsigned int) delta;
1687 /* 1723 /*
@@ -1692,6 +1728,9 @@ retry:
1692 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1728 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1693 else 1729 else
1694 expires_next = ktime_add(now, delta); 1730 expires_next = ktime_add(now, delta);
1731
1732 TRACE("hrtimer expires_next:%llu\n", expires_next.tv64);
1733
1695 tick_program_event(expires_next, 1); 1734 tick_program_event(expires_next, 1);
1696 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); 1735 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1697} 1736}
@@ -1762,8 +1801,21 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1762 struct task_struct *task = t->task; 1801 struct task_struct *task = t->task;
1763 1802
1764 t->task = NULL; 1803 t->task = NULL;
1765 if (task) 1804 if (task) {
1805#ifdef CONFIG_SCHED_OVERHEAD_TRACE
1806 if (is_realtime(task)) {
1807 ktime_t expires = hrtimer_get_expires(timer);
1808 /* Fix up timers that were added past their due date,
1809 * because that's not really release latency. */
1810 lt_t intended_release = max(expires.tv64,
1811 timer->when_added.tv64);
1812 TS_RELEASE_LATENCY(intended_release);
1813 }
1814#endif
1815 TS_RELEASE_START;
1766 wake_up_process(task); 1816 wake_up_process(task);
1817 TS_RELEASE_END;
1818 }
1767 1819
1768 return HRTIMER_NORESTART; 1820 return HRTIMER_NORESTART;
1769} 1821}
@@ -1916,9 +1968,19 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
1916 u64 slack; 1968 u64 slack;
1917 1969
1918 slack = current->timer_slack_ns; 1970 slack = current->timer_slack_ns;
1919 if (dl_task(current) || rt_task(current)) 1971 if (dl_task(current) || rt_task(current) || is_realtime(current))
1920 slack = 0; 1972 slack = 0;
1921 1973
1974 if (is_realtime(current) && (clockid == CLOCK_MONOTONIC) &&
1975 (mode == HRTIMER_MODE_ABS)) {
1976 /* Special handling: to handle periodic activations correctly
1977 * despite timer jitter and overheads, the plugin might need to
1978 * know the time at which the task intends to wake up. */
1979 tsk_rt(current)->doing_abs_nanosleep = 1;
1980 tsk_rt(current)->nanosleep_wakeup = ktime_to_ns(
1981 timespec_to_ktime(*rqtp));
1982 }
1983
1922 hrtimer_init_sleeper_on_stack(&t, clockid, mode); 1984 hrtimer_init_sleeper_on_stack(&t, clockid, mode);
1923 hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); 1985 hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
1924 ret = do_nanosleep(&t, mode); 1986 ret = do_nanosleep(&t, mode);
@@ -1937,6 +1999,9 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
1937 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 1999 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
1938out: 2000out:
1939 destroy_hrtimer_on_stack(&t.timer); 2001 destroy_hrtimer_on_stack(&t.timer);
2002
2003 tsk_rt(current)->doing_abs_nanosleep = 0;
2004
1940 return ret; 2005 return ret;
1941} 2006}
1942 2007
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50055d2e4ea8..1ad757848f69 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -42,6 +42,8 @@
42 42
43#include "internal.h" 43#include "internal.h"
44 44
45#include <litmus/litmus.h>
46
45/* 47/*
46 * Sleep at most 200ms at a time in balance_dirty_pages(). 48 * Sleep at most 200ms at a time in balance_dirty_pages().
47 */ 49 */
@@ -436,7 +438,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
436 if (bg_thresh >= thresh) 438 if (bg_thresh >= thresh)
437 bg_thresh = thresh / 2; 439 bg_thresh = thresh / 2;
438 tsk = current; 440 tsk = current;
439 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { 441 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) ||
442 is_realtime(tsk)) {
440 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; 443 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
441 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; 444 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
442 } 445 }
@@ -486,7 +489,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
486 else 489 else
487 dirty = vm_dirty_ratio * node_memory / 100; 490 dirty = vm_dirty_ratio * node_memory / 100;
488 491
489 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) 492 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || is_realtime(tsk))
490 dirty += dirty / 4; 493 dirty += dirty / 4;
491 494
492 return dirty; 495 return dirty;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f391c0c4ed1d..6d90a9ed20c4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -75,6 +75,8 @@
75#include "internal.h" 75#include "internal.h"
76#include "shuffle.h" 76#include "shuffle.h"
77 77
78#include <litmus/litmus.h>
79
78/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ 80/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
79static DEFINE_MUTEX(pcp_batch_high_lock); 81static DEFINE_MUTEX(pcp_batch_high_lock);
80#define MIN_PERCPU_PAGELIST_FRACTION (8) 82#define MIN_PERCPU_PAGELIST_FRACTION (8)
@@ -4208,8 +4210,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
4208 * comment for __cpuset_node_allowed(). 4210 * comment for __cpuset_node_allowed().
4209 */ 4211 */
4210 alloc_flags &= ~ALLOC_CPUSET; 4212 alloc_flags &= ~ALLOC_CPUSET;
4211 } else if (unlikely(rt_task(current)) && !in_interrupt()) 4213 } else if (unlikely(rt_task(current) || is_realtime(current)) &&
4214 !in_interrupt()) {
4212 alloc_flags |= ALLOC_HARDER; 4215 alloc_flags |= ALLOC_HARDER;
4216 }
4213 4217
4214 if (gfp_mask & __GFP_KSWAPD_RECLAIM) 4218 if (gfp_mask & __GFP_KSWAPD_RECLAIM)
4215 alloc_flags |= ALLOC_KSWAPD; 4219 alloc_flags |= ALLOC_KSWAPD;