From 3c4abebc788e9d92d776d7bc8b778f398cdb4010 Mon Sep 17 00:00:00 2001
From: Nathan O <otternes@cs.unc.edu>
Date: Mon, 9 Dec 2019 14:59:56 -0500
Subject: Initial attempt to "connect the wires"

 - This is my first attempt to re-add all of the modifications on top of
   this version of the Linux kernel that were present in the previous
   version of LITMUS.

 - More notes on changes will follow after testing--no guarantees the
   code as it is now will compile or run correctly.
---
 Makefile                                |   1 +
 arch/arm/Kconfig                        |   9 +
 arch/arm64/Kconfig                      |   9 +
 arch/x86/Kconfig                        |   9 +
 arch/x86/include/asm/feather_trace.h    |  18 ++
 arch/x86/include/asm/feather_trace_32.h | 115 ++++++++++
 arch/x86/include/asm/feather_trace_64.h | 124 ++++++++++
 arch/x86/kernel/Makefile                |   2 +
 arch/x86/kernel/ft_event.c              | 170 ++++++++++++++
 fs/exec.c                               |   3 +
 fs/inode.c                              |   2 +
 fs/select.c                             |   6 +-
 include/linux/fs.h                      |   3 +
 include/linux/hardirq.h                 |   3 +
 include/linux/hrtimer.h                 |   3 +
 include/linux/sched.h                   |  10 +
 include/trace/events/litmus.h           | 231 +++++++++++++++++++
 include/uapi/linux/sched.h              |   1 +
 kernel/exit.c                           |  14 ++
 kernel/fork.c                           |   6 +
 kernel/locking/rwsem.c                  |  13 +-
 kernel/printk/printk.c                  |  14 +-
 kernel/sched/Makefile                   |   3 +
 kernel/sched/core.c                     | 153 +++++++++++--
 kernel/sched/deadline.c                 |  21 +-
 kernel/sched/litmus.c                   | 386 ++++++++++++++++++++++++++++++++
 kernel/sched/rt.c                       |  12 +-
 kernel/sched/sched.h                    |  22 +-
 kernel/sched/stop_task.c                |   8 +
 kernel/time/hrtimer.c                   |  69 +++++-
 mm/page-writeback.c                     |   7 +-
 mm/page_alloc.c                         |   6 +-
 32 files changed, 1413 insertions(+), 40 deletions(-)
 create mode 100644 arch/x86/include/asm/feather_trace.h
 create mode 100644 arch/x86/include/asm/feather_trace_32.h
 create mode 100644 arch/x86/include/asm/feather_trace_64.h
 create mode 100644 arch/x86/kernel/ft_event.c
 create mode 100644 include/trace/events/litmus.h
 create mode 100644 kernel/sched/litmus.c

diff --git a/Makefile b/Makefile
index 1d5298356ea8..405d18d59837 100644
--- a/Makefile
+++ b/Makefile
@@ -1011,6 +1011,7 @@ export MODORDER := $(extmod-prefix)modules.order
 
 ifeq ($(KBUILD_EXTMOD),)
 core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
+core-y		+= litmus/
 
 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8a50efb559f3..3aaa81a3ae70 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2094,3 +2094,12 @@ source "arch/arm/crypto/Kconfig"
 endif
 
 source "arch/arm/kvm/Kconfig"
+
+config ARCH_HAS_FEATHER_TRACE
+       def_bool n
+
+config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
+       def_bool n
+
+source "litmus/Kconfig"
+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..a6bf629e708c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1680,3 +1680,12 @@ source "arch/arm64/kvm/Kconfig"
 if CRYPTO
 source "arch/arm64/crypto/Kconfig"
 endif
+
+config ARCH_HAS_FEATHER_TRACE
+       def_bool n
+
+config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
+       def_bool n
+
+source "litmus/Kconfig"
+
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ef85139553f..3765164809c5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2972,3 +2972,12 @@ config X86_DEV_DMA_OPS
 source "drivers/firmware/Kconfig"
 
 source "arch/x86/kvm/Kconfig"
+
+config ARCH_HAS_FEATHER_TRACE
+       def_bool y
+
+config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
+       def_bool y
+
+source "litmus/Kconfig"
+
diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
new file mode 100644
index 000000000000..4e732d4ea508
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace.h
@@ -0,0 +1,18 @@
+#ifndef _ARCH_FEATHER_TRACE_H
+#define _ARCH_FEATHER_TRACE_H
+
+#include <asm/msr.h>
+#include <asm/timex.h>
+
+static inline unsigned long long ft_timestamp(void)
+{
+	return get_cycles();
+}
+
+#ifdef CONFIG_X86_32
+#include "feather_trace_32.h"
+#else
+#include "feather_trace_64.h"
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
new file mode 100644
index 000000000000..75e81a9f9382
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_32.h
@@ -0,0 +1,115 @@
+/* Copyright (c) 2007-2012 Björn Brandenburg, <bbb@mpi-sws.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Do not directly include this file. Include feather_trace.h instead */
+
+#define feather_callback __attribute__((regparm(3)))  __attribute__((used))
+
+/*
+ * Make the compiler reload any register that is not saved in a cdecl function
+ * call (minus the registers that we explicitly clobber as output registers).
+ */
+#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx"
+#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx"
+#define __FT_CLOBBER_LIST2 "memory", "cc", "eax"
+#define __FT_CLOBBER_LIST3 "memory", "cc", "eax"
+
+#define __FT_TMP1(x) "=d" (x)
+#define __FT_ARG1(x) "0" ((long) (x))
+#define __FT_TMP2(x) "=c" (x)
+#define __FT_ARG2(x) "1" ((long) (x))
+
+#define __FT_ARG3(x) "r" ((long) (x))
+
+#define ft_event(id, callback)                                  \
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " call " #callback "                          \n\t" \
+            ".section __event_table, \"aw\"               \n\t" \
+            ".long " #id  ", 0, 1b, 2f                    \n\t" \
+            ".previous                                    \n\t" \
+            "2:                                           \n\t" \
+	    : : : __FT_CLOBBER_LIST0)
+
+#define ft_event0(id, callback)                                 \
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+            " movl $" #id  ", %%eax                       \n\t" \
+	    " call " #callback "                          \n\t" \
+            ".section __event_table, \"aw\"               \n\t" \
+            ".long " #id  ", 0, 1b, 2f                    \n\t" \
+            ".previous                                    \n\t" \
+            "2:                                           \n\t" \
+	    : : : __FT_CLOBBER_LIST0)
+
+#define ft_event1(id, callback, param)				\
+	do {							\
+		long __ft_tmp1;					\
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+            " movl $" #id  ", %%eax                       \n\t" \
+	    " call " #callback "                          \n\t" \
+            ".section __event_table, \"aw\"               \n\t" \
+            ".long " #id  ", 0, 1b, 2f                    \n\t" \
+            ".previous                                    \n\t" \
+            "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1)				\
+	    : __FT_ARG1(param)					\
+	    : __FT_CLOBBER_LIST1);				\
+	} while (0);
+
+#define ft_event2(id, callback, param, param2)                  \
+	do {							\
+		long __ft_tmp1, __ft_tmp2;			\
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+            " movl $" #id  ", %%eax                       \n\t" \
+	    " call " #callback "                          \n\t" \
+            ".section __event_table, \"aw\"               \n\t" \
+            ".long " #id  ", 0, 1b, 2f                    \n\t" \
+            ".previous                                    \n\t" \
+            "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
+	    : __FT_ARG1(param), __FT_ARG2(param2)		\
+	    : __FT_CLOBBER_LIST2);				\
+	} while (0);
+
+
+#define ft_event3(id, callback, param, param2, param3)		\
+	do {							\
+		long __ft_tmp1, __ft_tmp2;			\
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " subl $4, %%esp                              \n\t" \
+            " movl $" #id  ", %%eax                       \n\t" \
+	    " movl %2, (%%esp)                            \n\t" \
+	    " call " #callback "                          \n\t" \
+	    " addl $4, %%esp                              \n\t" \
+            ".section __event_table, \"aw\"               \n\t" \
+            ".long " #id  ", 0, 1b, 2f                    \n\t" \
+            ".previous                                    \n\t" \
+            "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
+	    : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3)	\
+	    : __FT_CLOBBER_LIST3);				\
+	} while (0);
diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
new file mode 100644
index 000000000000..5ce49e2eebba
--- /dev/null
+++ b/arch/x86/include/asm/feather_trace_64.h
@@ -0,0 +1,124 @@
+/* Copyright (c) 2010 Andrea Bastoni, <bastoni@cs.unc.edu>
+ * Copyright (c) 2012 Björn Brandenburg, <bbb@mpi-sws.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Do not directly include this file. Include feather_trace.h instead */
+
+/* regparm is the default on x86_64 */
+#define feather_callback  __attribute__((used))
+
+#define __FT_EVENT_TABLE(id,from,to) \
+            ".section __event_table, \"aw\"\n\t" \
+	    ".balign 8\n\t" \
+            ".quad " #id  ", 0, " #from ", " #to " \n\t" \
+            ".previous \n\t"
+
+/*
+ * x86_64 caller only owns rbp, rbx, r12-r15;
+ * the callee can freely modify the others.
+ */
+#define __FT_CLOBBER_LIST0	"memory", "cc", "rdi", "rsi", "rdx", "rcx", \
+			"r8", "r9", "r10", "r11", "rax"
+
+#define __FT_CLOBBER_LIST1	"memory", "cc", "rdi", "rdx", "rcx", \
+			"r8", "r9", "r10", "r11", "rax"
+
+#define __FT_CLOBBER_LIST2	"memory", "cc", "rdi", "rcx", \
+			"r8", "r9", "r10", "r11", "rax"
+
+#define __FT_CLOBBER_LIST3	"memory", "cc", "rdi", \
+			"r8", "r9", "r10", "r11", "rax"
+
+/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer
+ * arguments. */
+
+/* RSI */
+#define __FT_TMP1(x) "=S" (x)
+#define __FT_ARG1(x) "0" ((long) (x))
+
+/* RDX */
+#define __FT_TMP2(x) "=d" (x)
+#define __FT_ARG2(x) "1" ((long) (x))
+
+/* RCX */
+#define __FT_TMP3(x) "=c" (x)
+#define __FT_ARG3(x) "2" ((long) (x))
+
+#define ft_event(id, callback)                                  \
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " call " #callback "                          \n\t" \
+            __FT_EVENT_TABLE(id,1b,2f)				\
+            "2:                                           \n\t" \
+        : : : __FT_CLOBBER_LIST0)
+
+#define ft_event0(id, callback)                                 \
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " movq $" #id ", %%rdi			  \n\t" \
+	    " call " #callback "                          \n\t" \
+	    __FT_EVENT_TABLE(id,1b,2f)				\
+            "2:                                           \n\t" \
+        : :  : __FT_CLOBBER_LIST0)
+
+#define ft_event1(id, callback, param)                          \
+	do {							\
+		long __ft_tmp1;					\
+	__asm__ __volatile__(                                   \
+	    "1: jmp 2f                                    \n\t" \
+	    " movq $" #id ", %%rdi			  \n\t" \
+	    " call " #callback "                          \n\t" \
+	    __FT_EVENT_TABLE(id,1b,2f)				\
+	    "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1)				\
+	    : __FT_ARG1(param)					\
+	    : __FT_CLOBBER_LIST1);				\
+	} while (0);
+
+#define ft_event2(id, callback, param, param2)                  \
+	do {							\
+		long __ft_tmp1, __ft_tmp2;			\
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " movq $" #id ", %%rdi			  \n\t" \
+	    " call " #callback "                          \n\t" \
+            __FT_EVENT_TABLE(id,1b,2f)				\
+            "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2)	\
+	    : __FT_ARG1(param), __FT_ARG2(param2)		\
+	    : __FT_CLOBBER_LIST2);				\
+	} while (0);
+
+#define ft_event3(id, callback, param, param2, param3)		\
+	do {							\
+		long __ft_tmp1, __ft_tmp2, __ft_tmp3;		\
+        __asm__ __volatile__(                                   \
+            "1: jmp 2f                                    \n\t" \
+	    " movq $" #id ", %%rdi			  \n\t" \
+	    " call " #callback "                          \n\t" \
+            __FT_EVENT_TABLE(id,1b,2f)				\
+            "2:                                           \n\t" \
+	    : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \
+	    : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3)	\
+	    : __FT_CLOBBER_LIST3);				\
+	} while (0);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3578ad248bc9..5ee68d48e0a4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -140,6 +140,8 @@ obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
 obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
+obj-$(CONFIG_FEATHER_TRACE)    += ft_event.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
new file mode 100644
index 000000000000..7aa3d0592ff2
--- /dev/null
+++ b/arch/x86/kernel/ft_event.c
@@ -0,0 +1,170 @@
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+
+#include <litmus/feather_trace.h>
+
+/* the feather trace management functions assume
+ * exclusive access to the event table
+ */
+
+#ifndef CONFIG_RELOCATABLE
+
+#define BYTE_JUMP      0xeb
+#define BYTE_JUMP_LEN  0x02
+
+/* for each event, there is an entry in the event table */
+struct trace_event {
+	long 	id;
+	long	count;
+	long	start_addr;
+	long	end_addr;
+};
+
+extern struct trace_event  __start___event_table[];
+extern struct trace_event  __stop___event_table[];
+
+
+/* NOTE: The following two functions have been stolen from ftrace.c */
+
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr < end;
+}
+
+static unsigned long text_ip_addr(unsigned long ip)
+{
+	/*
+	 * On x86_64, kernel text mappings are mapped read-only, so we use
+	 * the kernel identity mapping instead of the kernel text mapping
+	 * to modify the kernel text.
+	 *
+	 * For 32bit kernels, these mappings are same and we can use
+	 * kernel identity mapping to modify code.
+	 */
+	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+		ip = (unsigned long)__va(__pa_symbol(ip));
+
+	return ip;
+}
+
+/* Workaround: if no events are defined, then the event_table section does not
+ * exist and the above references cause linker errors. This could probably be
+ * fixed by adjusting the linker script, but it is easier to maintain for us if
+ * we simply create a dummy symbol in the event table section.
+ */
+int __event_table_dummy[0] __attribute__ ((section("__event_table")));
+
+int ft_enable_event(unsigned long id)
+{
+	struct trace_event* te = __start___event_table;
+	int count = 0;
+	char* delta;
+	unsigned char* instr;
+
+	set_kernel_text_rw();
+	set_all_modules_text_rw();
+
+	while (te < __stop___event_table) {
+		if (te->id == id && ++te->count == 1) {
+			instr  = (unsigned char*) te->start_addr;
+			/* make sure we don't clobber something wrong */
+			if (*instr == BYTE_JUMP) {
+				delta  = (unsigned char*) text_ip_addr(
+						((unsigned long) te->start_addr)
+						+ 1);
+				*delta = 0;
+			}
+		}
+		if (te->id == id)
+			count++;
+		te++;
+	}
+
+	set_all_modules_text_ro();
+	set_kernel_text_ro();
+
+	printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
+	return count;
+}
+
+int ft_disable_event(unsigned long id)
+{
+	struct trace_event* te = __start___event_table;
+	int count = 0;
+	char* delta;
+	unsigned char* instr;
+
+	set_kernel_text_rw();
+	set_all_modules_text_rw();
+
+	while (te < __stop___event_table) {
+		if (te->id == id && --te->count == 0) {
+			instr  = (unsigned char*) te->start_addr;
+			if (*instr == BYTE_JUMP) {
+				delta  = (unsigned char*) text_ip_addr(
+						((unsigned long) te->start_addr)
+						+ 1);
+				*delta = te->end_addr - te->start_addr -
+					BYTE_JUMP_LEN;
+			}
+		}
+		if (te->id == id)
+			count++;
+		te++;
+	}
+
+	set_all_modules_text_ro();
+	set_kernel_text_ro();
+
+	printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
+	return count;
+}
+
+int ft_disable_all_events(void)
+{
+	struct trace_event* te = __start___event_table;
+	int count = 0;
+	char* delta;
+	unsigned char* instr;
+
+	set_kernel_text_rw();
+	set_all_modules_text_rw();
+
+	while (te < __stop___event_table) {
+		if (te->count) {
+			instr  = (unsigned char*) te->start_addr;
+			if (*instr == BYTE_JUMP) {
+				delta  = (unsigned char*) text_ip_addr(
+						((unsigned long) te->start_addr)
+						+ 1);
+				*delta = te->end_addr - te->start_addr -
+					BYTE_JUMP_LEN;
+				te->count = 0;
+				count++;
+			}
+		}
+		te++;
+	}
+
+	set_all_modules_text_ro();
+	set_kernel_text_ro();
+
+	return count;
+}
+
+int ft_is_event_enabled(unsigned long id)
+{
+	struct trace_event* te = __start___event_table;
+
+	while (te < __stop___event_table) {
+		if (te->id == id)
+			return te->count;
+		te++;
+	}
+	return 0;
+}
+
+#endif
diff --git a/fs/exec.c b/fs/exec.c
index 555e93c7dec8..49c8613d2510 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,8 @@
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
 
+#include <litmus/litmus.h>
+
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
@@ -1765,6 +1767,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 		goto out_unmark;
 
 	sched_exec();
+	litmus_exec();
 
 	bprm->file = file;
 	if (!filename) {
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..abf61717d9db 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -394,6 +394,8 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->i_lru);
 	__address_space_init_once(&inode->i_data);
 	i_size_ordered_init(inode);
+	INIT_LIST_HEAD(&inode->i_obj_list);
+	mutex_init(&inode->i_obj_mutex);
 }
 EXPORT_SYMBOL(inode_init_once);
 
diff --git a/fs/select.c b/fs/select.c
index 53a0c149f528..7a3745f8d17f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -32,6 +32,8 @@
 #include <net/busy_poll.h>
 #include <linux/vmalloc.h>
 
+#include <litmus/litmus.h>
+
 #include <linux/uaccess.h>
 
 
@@ -80,9 +82,9 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
 	/*
 	 * Realtime tasks get a slack of 0 for obvious reasons.
 	 */
-
-	if (rt_task(current))
+	if (rt_task(current) || is_realtime(current)) {
 		return 0;
+	}
 
 	ktime_get_ts64(&now);
 	now = timespec64_sub(*tv, now);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d35763..d65e17d3d302 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -734,6 +734,9 @@ struct inode {
 	struct fsverity_info	*i_verity_info;
 #endif
 
+	struct list_head	i_obj_list;
+	struct mutex		i_obj_mutex;
+
 	void			*i_private; /* fs or device private pointer */
 } __randomize_layout;
 
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index da0af631ded5..35271458e22b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -8,6 +8,7 @@
 #include <linux/vtime.h>
 #include <asm/hardirq.h>
 
+#include <litmus/trace_irq.h>
 
 extern void synchronize_irq(unsigned int irq);
 extern bool synchronize_hardirq(unsigned int irq);
@@ -38,6 +39,7 @@ extern void rcu_nmi_exit(void);
 		account_irq_enter_time(current);	\
 		preempt_count_add(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
+		ft_irq_fired();				\
 	} while (0)
 
 /*
@@ -75,6 +77,7 @@ extern void irq_exit(void);
 		preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
+		ft_irq_fired();					\
 	} while (0)
 
 #define nmi_exit()						\
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 1b9a51a1bccb..a145e140d532 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -123,6 +123,9 @@ struct hrtimer {
 	u8				is_rel;
 	u8				is_soft;
 	u8				is_hard;
+#if defined(CONFIG_REPORT_TIMER_LATENCY) || defined(CONFIG_SCHED_OVERHEAD_TRACE)
+	ktime_t when_added;
+#endif
 };
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 67a1d86981a9..0a1b09305248 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -32,6 +32,9 @@
 #include <linux/posix-timers.h>
 #include <linux/rseq.h>
 
+#include <litmus/rt_param.h>
+#include <litmus/preempt.h>
+
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
 struct backing_dev_info;
@@ -61,6 +64,8 @@ struct signal_struct;
 struct task_delay_info;
 struct task_group;
 
+struct od_table_entry;
+
 /*
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
@@ -1158,6 +1163,10 @@ struct task_struct {
 	/* Start of a write-and-pause period: */
 	unsigned long			dirty_paused_when;
 
+	/* LITMUS RT parameters and state */
+	struct rt_param rt_param;
+	struct od_table_entry *od_table;
+
 #ifdef CONFIG_LATENCYTOP
 	int				latency_record_count;
 	struct latency_record		latency_record[LT_SAVECOUNT];
@@ -1741,6 +1750,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
 static inline void set_tsk_need_resched(struct task_struct *tsk)
 {
 	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	sched_state_will_schedule(tsk);
 }
 
 static inline void clear_tsk_need_resched(struct task_struct *tsk)
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
new file mode 100644
index 000000000000..0fffcee02be0
--- /dev/null
+++ b/include/trace/events/litmus.h
@@ -0,0 +1,231 @@
+/*
+ * LITMUS^RT kernel style scheduling tracepoints
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM litmus
+
+#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _SCHED_TASK_TRACEPOINT_H
+
+#include <linux/tracepoint.h>
+
+#include <litmus/litmus.h>
+#include <litmus/rt_param.h>
+
+/*
+ * Tracing task admission
+ */
+TRACE_EVENT(litmus_task_param,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		wcet	)
+		__field( lt_t,		period	)
+		__field( lt_t,		phase	)
+		__field( int,		partition )
+	),
+
+	TP_fast_assign(
+		__entry->pid	= t ? t->pid : 0;
+		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
+		__entry->wcet	= get_exec_cost(t);
+		__entry->period	= get_rt_period(t);
+		__entry->phase	= get_rt_phase(t);
+		__entry->partition = get_partition(t);
+	),
+
+	TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n",
+		__entry->pid, __entry->period,
+		__entry->pid, __entry->wcet)
+);
+
+/*
+ * Tracing jobs release
+ */
+TRACE_EVENT(litmus_task_release,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		release	)
+		__field( lt_t,		deadline	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= t ? t->pid : 0;
+		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
+		__entry->release	= get_release(t);
+		__entry->deadline	= get_deadline(t);
+	),
+
+	TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n",
+			__entry->pid, __entry->job, __entry->release,
+			__entry->pid, __entry->job, __entry->deadline)
+);
+
+/*
+ * Tracepoint for switching to new task
+ */
+TRACE_EVENT(litmus_switch_to,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		when	)
+		__field( lt_t,		exec_time	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= is_realtime(t) ? t->pid : 0;
+		__entry->job	= is_realtime(t) ? t->rt_param.job_params.job_no : 0;
+		__entry->when		= litmus_clock();
+		__entry->exec_time	= get_exec_time(t);
+	),
+
+	TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
+			__entry->pid, __entry->job,
+			__entry->when, __entry->exec_time)
+);
+
+/*
+ * Tracepoint for switching away previous task
+ */
+TRACE_EVENT(litmus_switch_away,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		when	)
+		__field( lt_t,		exec_time	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= is_realtime(t) ? t->pid : 0;
+		__entry->job	= is_realtime(t) ? t->rt_param.job_params.job_no : 0;
+		__entry->when		= litmus_clock();
+		__entry->exec_time	= get_exec_time(t);
+	),
+
+	TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
+			__entry->pid, __entry->job,
+			__entry->when, __entry->exec_time)
+);
+
+/*
+ * Tracing jobs completion
+ */
+TRACE_EVENT(litmus_task_completion,
+
+	TP_PROTO(struct task_struct *t, unsigned long forced),
+
+	TP_ARGS(t, forced),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		when	)
+		__field( unsigned long,	forced	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= t ? t->pid : 0;
+		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
+		__entry->when	= litmus_clock();
+		__entry->forced	= forced;
+	),
+
+	TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
+			__entry->pid, __entry->job,
+			__entry->when, __entry->forced)
+);
+
+/*
+ * Trace blocking tasks.
+ */
+TRACE_EVENT(litmus_task_block,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( lt_t,		when	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= t ? t->pid : 0;
+		__entry->when	= litmus_clock();
+	),
+
+	TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when)
+);
+
+/*
+ * Tracing jobs resume
+ */
+TRACE_EVENT(litmus_task_resume,
+
+	TP_PROTO(struct task_struct *t),
+
+	TP_ARGS(t),
+
+	TP_STRUCT__entry(
+		__field( pid_t,		pid	)
+		__field( unsigned int,	job	)
+		__field( lt_t,		when	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= t ? t->pid : 0;
+		__entry->job	= t ? t->rt_param.job_params.job_no : 0;
+		__entry->when	= litmus_clock();
+	),
+
+	TP_printk("resume(job(%u, %u)): %Lu\n",
+			__entry->pid, __entry->job, __entry->when)
+);
+
+/*
+ * Trace synchronous release
+ */
+TRACE_EVENT(litmus_sys_release,
+
+	TP_PROTO(lt_t *start),
+
+	TP_ARGS(start),
+
+	TP_STRUCT__entry(
+		__field( lt_t,		rel	)
+		__field( lt_t,		when	)
+	),
+
+	TP_fast_assign(
+		__entry->rel	= *start;
+		__entry->when	= litmus_clock();
+	),
+
+	TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
+);
+
+#endif /* _SCHED_TASK_TRACEPOINT_H */
+
+/* Must stay outside the protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 25b4fa00bad1..f6e838d97ff3 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -87,6 +87,7 @@ struct clone_args {
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE		5
 #define SCHED_DEADLINE		6
+#define SCHED_LITMUS		7
 
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
diff --git a/kernel/exit.c b/kernel/exit.c
index a46a50d67002..6832c614c663 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -69,6 +69,10 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 
+#include <litmus/litmus.h>
+
+extern void exit_od_table(struct task_struct *t);
+
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
@@ -727,6 +731,14 @@ void __noreturn do_exit(long code)
 	if (unlikely(!tsk->pid))
 		panic("Attempted to kill the idle task!");
 
+	if (unlikely(is_realtime(tsk))) {
+		/* We would like the task to be polite and transition out of
+		 * RT mode first.
+		 */
+		litmus_do_exit(tsk);
+		BUG_ON(is_realtime(tsk);
+	}
+
 	/*
 	 * If do_exit is called because this processes oopsed, it's possible
 	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
@@ -798,6 +810,8 @@ void __noreturn do_exit(long code)
 		tty_audit_exit();
 	audit_free(tsk);
 
+	exit_od_table(tsk);
+
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 55af6931c6ec..220211ef8946 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -104,6 +104,9 @@
 
 #include <trace/events/sched.h>
 
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
 
@@ -740,6 +743,9 @@ void __put_task_struct(struct task_struct *tsk)
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
 	security_task_free(tsk);
+
+	exit_litmus(tsk);
+
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index eef04551eae7..9adb95795f83 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -31,6 +31,8 @@
 #include "rwsem.h"
 #include "lock_events.h"
 
+#include <litmus/litmus.h>
+
 /*
  * The least significant 3 bits of the owner value has the following
  * meanings when set.
@@ -886,11 +888,13 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
 		 * a writer, need_resched() check needs to be done here.
 		 */
 		if (owner_state != OWNER_WRITER) {
-			if (need_resched())
+			if (need_resched()) {
 				break;
-			if (rt_task(current) &&
-			   (prev_owner_state != OWNER_WRITER))
+			}
+			if ((rt_task(current) || is_realtime(current)) &&
+			   (prev_owner_state != OWNER_WRITER)) {
 				break;
+			}
 		}
 		prev_owner_state = owner_state;
 
@@ -1258,7 +1262,8 @@ wait:
 			 * until rwsem_try_write_lock() is called.
 			 */
 			if ((wstate == WRITER_FIRST) && (rt_task(current) ||
-			    time_after(jiffies, waiter.timeout))) {
+				is_realtime(current) ||
+				time_after(jiffies, waiter.timeout))) {
 				wstate = WRITER_HANDOFF;
 				lockevent_inc(rwsem_wlock_handoff);
 				break;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ca65327a6de8..4c3d18d2587e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -68,6 +68,13 @@ int console_printk[4] = {
 };
 EXPORT_SYMBOL_GPL(console_printk);
 
+/*
+ * Divert printk() messages when there is a LITMUS^RT debug listener.
+ */
+#include <litmus/debug_trace.h>
+int trace_override = 0;
+int trace_recurse = 0;
+
 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
 EXPORT_SYMBOL(ignore_console_lock_warning);
 
@@ -1916,6 +1923,11 @@ int vprintk_store(int facility, int level,
 	 */
 	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
 
+	/* If the LITMUS^RT tracer is active then divert printk messages. */
+	if (trace_override && !trace_recurse) {
+		TRACE("%s", text);
+	}
+
 	/* mark and strip a trailing newline */
 	if (text_len && text[text_len-1] == '\n') {
 		text_len--;
@@ -2967,7 +2979,7 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
 void wake_up_klogd(void)
 {
 	preempt_disable();
-	if (waitqueue_active(&log_wait)) {
+	if (!trace_override && waitqueue_active(&log_wait)) {
 		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
 		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
 	}
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 21fb5a5662b5..95000e43fce7 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -30,3 +30,6 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
 obj-$(CONFIG_CPU_ISOLATION) += isolation.o
 obj-$(CONFIG_PSI) += psi.o
+
+obj-y += litmus.o
+
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0f2eb3629070..917a374b616f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -20,6 +20,12 @@
 
 #include "pelt.h"
 
+#include <litmus/litmus.h>
+#include <litmus/debug_trace.h>
+#include <litmus/trace.h>
+#include <litmus/sched_trace.h>
+#include <litmus/sched_plugin.h>
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
@@ -520,6 +526,11 @@ void resched_curr(struct rq *rq)
 		set_tsk_need_resched(curr);
 		set_preempt_need_resched();
 		return;
+	} else if (is_realtime(curr)) {
+		/* Cannot call set_tsk_need_resched() on LITMUS tasks on a
+		 * remote core. Only policy plugins may do this
+		 * via litmus_reschedule(). */
+		return;
 	}
 
 	if (set_nr_and_not_polling(curr))
@@ -2317,9 +2328,17 @@ void scheduler_ipi(void)
 	 * this IPI.
 	 */
 	preempt_fold_need_resched();
-
-	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
+	/* Let LITMUS' preemption state machine know about this IPI. */
+	sched_state_ipi();
+
+	if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) {
+#ifndef CONFIG_ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI
+		/* If we don't call irq_enter() then we need to trigger the
+		 * IRQ tracing manually. */
+		ft_irq_fired();
+#endif
 		return;
+	}
 
 	/*
 	 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
@@ -2397,7 +2416,12 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 	struct rq_flags rf;
 
 #if defined(CONFIG_SMP)
-	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
+	/*
+	 * In LITMUS, it is up to a plugin to determine whether to send an IPI
+	 * to a remote CPU.
+	 */
+	if (!is_realtime(p) && sched_feat(TTWU_QUEUE) &&
+		!cpus_share_cache(smp_processor_id(), cpu)) {
 		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
 		ttwu_queue_remote(p, cpu, wake_flags);
 		return;
@@ -2517,6 +2541,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
 	unsigned long flags;
 	int cpu, success = 0;
+	if (is_realtime(p)) {
+		TRACE_TASK(p, "try_to_wake_up() state: %d\n", p->state);
+	}
 
 	preempt_disable();
 	if (p == current) {
@@ -2616,6 +2643,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	smp_cond_load_acquire(&p->on_cpu, !VAL);
 
+	/* LITMUS: Once the task can be safely referenced by this CPU, don't
+	 * mess with further Linux load balancing stuff.
+	 */
+	if (is_realtime(p)) {
+		goto litmus_out_activate;
+	}
+
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
 
@@ -2631,6 +2665,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		set_task_cpu(p, cpu);
 	}
 
+litmus_out_activate:
 #else /* CONFIG_SMP */
 
 	if (p->in_iowait) {
@@ -2641,6 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 #endif /* CONFIG_SMP */
 
 	ttwu_queue(p, cpu, wake_flags);
+
+	if (is_realtime(p)) {
+		TRACE_TASK(p, "try_to_wake_up() done state: %d\n", p->state);
+	}
 unlock:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 out:
@@ -2853,13 +2892,16 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	 */
 	p->prio = current->normal_prio;
 
+	litmus_fork(p);
+
 	uclamp_fork(p);
 
 	/*
 	 * Revert to default priority/policy on fork if requested.
 	 */
 	if (unlikely(p->sched_reset_on_fork)) {
-		if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
+		if (task_has_dl_policy(p) || task_has_rt_policy(p) ||
+			p->policy == SCHED_LITMUS) {
 			p->policy = SCHED_NORMAL;
 			p->static_prio = NICE_TO_PRIO(0);
 			p->rt_priority = 0;
@@ -2876,12 +2918,15 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_reset_on_fork = 0;
 	}
 
-	if (dl_prio(p->prio))
+	if (is_realtime(p)) {
+		p->sched_class = &litmus_sched_class;
+	} else if (dl_prio(p->prio)) {
 		return -EAGAIN;
-	else if (rt_prio(p->prio))
+	} else if (rt_prio(p->prio)) {
 		p->sched_class = &rt_sched_class;
-	else
+	} else {
 		p->sched_class = &fair_sched_class;
+	}
 
 	init_entity_runnable_average(&p->se);
 
@@ -2945,6 +2990,10 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq_flags rf;
 	struct rq *rq;
 
+	if (is_realtime(p)) {
+		litmus->task_new(p, 1, 0);
+	}
+
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -3218,6 +3267,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	vtime_task_switch(prev);
+	litmus->finish_switch(prev);
+	prev->rt_param.stack_in_use = NO_CPU;
 	perf_event_task_sched_in(prev, current);
 	finish_task(prev);
 	finish_lock_switch(rq);
@@ -3317,6 +3368,12 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 	 */
 
 	rq = finish_task_switch(prev);
+
+	sched_trace_task_switch_to(current);
+	if (unlikely(sched_state_validate_switch())) {
+		litmus_reschedule_local();
+	}
+
 	balance_callback(rq);
 	preempt_enable();
 
@@ -3608,7 +3665,9 @@ void scheduler_tick(void)
 
 #ifdef CONFIG_SMP
 	rq->idle_balance = idle_cpu(cpu);
-	trigger_load_balance(rq);
+	if (!is_realtime(current)) {
+		trigger_load_balance(rq);
+	}
 #endif
 }
 
@@ -3910,9 +3969,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	/*
 	 * Optimization: we know that if all tasks are in the fair class we can
 	 * call that function directly, but only if the @prev task wasn't of a
-	 * higher scheduling class, because otherwise those loose the
+	 * higher scheduling class, because otherwise those lose the
 	 * opportunity to pull in more work from other CPUs.
-	 */
+	 *
+	 * We can't do this in LITMUS!
+	 *
+	 * This breaks many assumptions in the plugins. Do not uncomment
+	 * without considering how this affects global plugins such as GSN-EDF.
 	if (likely((prev->sched_class == &idle_sched_class ||
 		    prev->sched_class == &fair_sched_class) &&
 		   rq->nr_running == rq->cfs.h_nr_running)) {
@@ -3921,12 +3984,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 		if (unlikely(p == RETRY_TASK))
 			goto restart;
 
-		/* Assumes fair_sched_class->next == idle_sched_class */
+		// Assumes fair_sched_class->next == idle_sched_class 
 		if (unlikely(!p))
 			p = idle_sched_class.pick_next_task(rq, prev, rf);
 
 		return p;
 	}
+	*/
 
 restart:
 #ifdef CONFIG_SMP
@@ -4003,10 +4067,15 @@ static void __sched notrace __schedule(bool preempt)
 	struct rq *rq;
 	int cpu;
 
+	TS_SCHED_START;
+	sched_state_entered_schedule();
+
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
 
+	sched_trace_task_switch_away(prev);
+
 	schedule_debug(prev, preempt);
 
 	if (sched_feat(HRTICK))
@@ -4030,6 +4099,8 @@ static void __sched notrace __schedule(bool preempt)
 	rq->clock_update_flags <<= 1;
 	update_rq_clock(rq);
 
+	this_cpu_write(litmus_preemption_in_process, preempt);
+
 	switch_count = &prev->nivcsw;
 	if (!preempt && prev->state) {
 		if (signal_pending_state(prev->state, prev)) {
@@ -4049,6 +4120,8 @@ static void __sched notrace __schedule(bool preempt)
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 
+	this_cpu_write(litmus_preemption_in_progress, false);
+
 	if (likely(prev != next)) {
 		rq->nr_switches++;
 		/*
@@ -4073,15 +4146,25 @@ static void __sched notrace __schedule(bool preempt)
 		++*switch_count;
 
 		trace_sched_switch(preempt, prev, next);
-
+		TS_SCHED_END(next);
+		TS_CXS_START(next);
 		/* Also unlocks the rq: */
 		rq = context_switch(rq, prev, next, &rf);
+		TS_CXS_END(current);
 	} else {
 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
+		TS_SCHED_END(prev);
 		rq_unlock_irq(rq, &rf);
 	}
 
+	TS_SCHED2_START(prev);
+	sched_trace_task_switch_to(current);
+	if (unlikely(sched_state_validate_switch())) {
+		litmus_reschedule_local();
+	}
+
 	balance_callback(rq);
+	TS_SCHED2_END(prev);
 }
 
 void __noreturn do_task_dead(void)
@@ -4513,7 +4596,7 @@ void set_user_nice(struct task_struct *p, long nice)
 	 * it wont have any effect on scheduling until the task is
 	 * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
 	 */
-	if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
+	if (task_has_dl_policy(p) || task_has_rt_policy(p) || is_realtime(p)) {
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
@@ -4723,12 +4806,15 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 	if (keep_boost)
 		p->prio = rt_effective_prio(p, p->prio);
 
-	if (dl_prio(p->prio))
+	if (p->policy == SCHED_LITMUS) {
+		p->sched_class = &litmus_sched_class;
+	} else if (dl_prio(p->prio)) {
 		p->sched_class = &dl_sched_class;
-	else if (rt_prio(p->prio))
+	} else if (rt_prio(p->prio)) {
 		p->sched_class = &rt_sched_class;
-	else
+	} else {
 		p->sched_class = &fair_sched_class;
+	}
 }
 
 /*
@@ -4760,6 +4846,7 @@ static int __sched_setscheduler(struct task_struct *p,
 	int reset_on_fork;
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct rq *rq;
+	int litmus_task = 0;
 
 	/* The pi code expects interrupts enabled */
 	BUG_ON(pi && in_interrupt());
@@ -4789,7 +4876,9 @@ recheck:
 	if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
 	    (rt_policy(policy) != (attr->sched_priority != 0)))
 		return -EINVAL;
-
+	if ((policy == SCHED_LITMUS) && (policy == p->policy)) {
+		return -EINVAL;
+	}
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
 	 */
@@ -4857,6 +4946,13 @@ recheck:
 			return retval;
 	}
 
+	if (policy == SCHED_LITMUS) {
+		retval = litmus_admit_task(p);
+		if (retval) {
+			return retval;
+		}
+	}
+
 	if (pi)
 		cpuset_read_lock();
 
@@ -4949,6 +5045,11 @@ change:
 		goto unlock;
 	}
 
+	if (is_realtime(p)) {
+		litmus_exit_task(p);
+		litmus_task = 1;
+	}
+
 	p->sched_reset_on_fork = reset_on_fork;
 	oldprio = p->prio;
 
@@ -4977,6 +5078,16 @@ change:
 	__setscheduler(rq, p, attr, pi);
 	__setscheduler_uclamp(p, attr);
 
+	if (litmus_policy(policy)) {
+#ifdef CONFIG_SMP
+		p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
+#else
+		p->rt_param.stack_in_use = running ? 0 : NO_CPU;
+#endif
+		p->rt_param.present = running;
+		litmus->task_new(p, queued, running);
+	}
+
 	if (queued) {
 		/*
 		 * We enqueue to tail when the priority of a task is
@@ -5005,6 +5116,10 @@ change:
 	balance_callback(rq);
 	preempt_enable();
 
+	if (litmus_task) {
+		litmus_dealloc(p);
+	}
+
 	return 0;
 
 unlock:
@@ -5391,9 +5506,9 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	rcu_read_lock();
 
 	p = find_process_by_pid(pid);
-	if (!p) {
+	if (!p || is_realtime(p)) {
 		rcu_read_unlock();
-		return -ESRCH;
+		return p ? -EPERM : -ESRCH;
 	}
 
 	/* Prevent p going away */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a8a08030a8f7..1842c3e33476 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -18,6 +18,8 @@
 #include "sched.h"
 #include "pelt.h"
 
+#include <litmus/litmus.h>
+
 struct dl_bandwidth def_dl_bandwidth;
 
 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -1049,17 +1051,21 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 #endif
 
 	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
-	if (dl_task(rq->curr))
+	if (dl_task(rq->curr)) {
 		check_preempt_curr_dl(rq, p, 0);
-	else
+	} else if (!is_realtime(rq->curr)) {
 		resched_curr(rq);
+	}
 
 #ifdef CONFIG_SMP
 	/*
 	 * Queueing this task back might have overloaded rq, check if we need
 	 * to kick someone away.
+	 *
+	 * LITMUS note: Don't incur this overhead if we are running a LITMUS
+	 * task.
 	 */
-	if (has_pushable_dl_tasks(rq)) {
+	if (has_pushable_dl_tasks(rq) && (!is_realtime(rq->curr))) {
 		/*
 		 * Nothing relies on rq->lock after this, so its safe to drop
 		 * rq->lock.
@@ -2357,9 +2363,13 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	 * Since this might be the only -deadline task on the rq,
 	 * this is the right place to try to pull some other one
 	 * from an overloaded CPU, if any.
+	 *
+	 * LITMUS note: also don't pull a task when we're running LITMUS tasks.
 	 */
-	if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
+	if (!task_on_rq_queued(p) || rq->dl.dl_nr_running ||
+		is_realtime(rq->curr)) {
 		return;
+	}
 
 	deadline_queue_pull_task(rq);
 }
@@ -2374,9 +2384,8 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 		put_task_struct(p);
 
 	/* If p is not queued we will update its parameters at next wakeup. */
-	if (!task_on_rq_queued(p)) {
+	if (!task_on_rq_queued(p) || is_realtime(rq->curr)) {
 		add_rq_bw(&p->dl, &rq->dl);
-
 		return;
 	}
 
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..d9c59998155b
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,386 @@
+/* This file is included from kernel/sched.c */
+
+#include "sched.h"
+
+#include <litmus/trace.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/debug_trace.h>
+#include <litmus/litmus.h>
+#include <litmus/budget.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/preempt.h>
+#include <litmus/np.h>
+
+static void update_time_litmus(struct rq *rq, struct task_struct *p)
+{
+	u64 delta = rq->clock - p->se.exec_start;
+	if (unlikely((s64)delta < 0))
+		delta = 0;
+	/* per job counter */
+	p->rt_param.job_params.exec_time += delta;
+	/* task counter */
+	p->se.sum_exec_runtime += delta;
+	if (delta) {
+		TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
+			delta, p->rt_param.job_params.exec_time, budget_remaining(p));
+	}
+	/* sched_clock() */
+	p->se.exec_start = rq->clock;
+	cpuacct_charge(p, delta);
+}
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
+
+static struct task_struct *
+litmus_schedule(struct rq *rq, struct task_struct *prev)
+{
+	struct task_struct *next;
+
+#ifdef CONFIG_SMP
+	struct rq* other_rq;
+	long was_running;
+	int from_where;
+	lt_t _maybe_deadlock = 0;
+#endif
+
+	/* let the plugin schedule */
+	next = litmus->schedule(prev);
+
+	sched_state_plugin_check();
+
+#ifdef CONFIG_SMP
+	/* check if a global plugin pulled a task from a different RQ */
+	if (next && task_rq(next) != rq) {
+		/* we need to migrate the task */
+		other_rq = task_rq(next);
+		from_where = other_rq->cpu;
+		TRACE_TASK(next, "migrate from %d\n", from_where);
+
+		/* while we drop the lock, the prev task could change its
+		 * state
+		 */
+		BUG_ON(prev != current);
+		was_running = is_current_running();
+
+		/* Don't race with a concurrent switch.  This could deadlock in
+		 * the case of cross or circular migrations.  It's the job of
+		 * the plugin to make sure that doesn't happen.
+		 */
+		TRACE_TASK(next, "stack_in_use=%d\n",
+			   next->rt_param.stack_in_use);
+		if (next->rt_param.stack_in_use != NO_CPU) {
+			TRACE_TASK(next, "waiting to deschedule\n");
+			_maybe_deadlock = litmus_clock();
+		}
+
+		raw_spin_unlock(&rq->lock);
+
+		while (next->rt_param.stack_in_use != NO_CPU) {
+			cpu_relax();
+			mb();
+			if (next->rt_param.stack_in_use == NO_CPU)
+				TRACE_TASK(next,"descheduled. Proceeding.\n");
+
+			if (!litmus->should_wait_for_stack(next)) {
+				/* plugin aborted the wait */
+				TRACE_TASK(next,
+				           "plugin gave up waiting for stack\n");
+				next = NULL;
+				/* Make sure plugin is given a chance to
+				 * reconsider. */
+				litmus_reschedule_local();
+				/* give up */
+				raw_spin_lock(&rq->lock);
+				goto out;
+			}
+
+			if (from_where != task_rq(next)->cpu) {
+				/* The plugin should not give us something
+				 * that other cores are trying to pull, too */
+				TRACE_TASK(next, "next invalid: task keeps "
+				                 "shifting around!? "
+				                 "(%d->%d)\n",
+				                 from_where,
+				                 task_rq(next)->cpu);
+
+				/* bail out */
+				raw_spin_lock(&rq->lock);
+				litmus->next_became_invalid(next);
+				litmus_reschedule_local();
+				next = NULL;
+				goto out;
+			}
+
+			if (lt_before(_maybe_deadlock + 1000000000L,
+				      litmus_clock())) {
+				/* We've been spinning for 1s.
+				 * Something can't be right!
+				 * Let's abandon the task and bail out; at least
+				 * we will have debug info instead of a hard
+				 * deadlock.
+				 */
+#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
+				BUG();
+#else
+				TRACE_TASK(next,"stack too long in use. "
+					   "Deadlock?\n");
+				next = NULL;
+
+				/* bail out */
+				raw_spin_lock(&rq->lock);
+				goto out;
+#endif
+			}
+		}
+#ifdef  __ARCH_WANT_UNLOCKED_CTXSW
+		if (next->on_cpu)
+			TRACE_TASK(next, "waiting for !oncpu");
+		while (next->on_cpu) {
+			cpu_relax();
+			mb();
+		}
+#endif
+		double_rq_lock(rq, other_rq);
+		if (other_rq == task_rq(next) &&
+		    next->rt_param.stack_in_use == NO_CPU) {
+		    	/* ok, we can grab it */
+			set_task_cpu(next, rq->cpu);
+			/* release the other CPU's runqueue, but keep ours */
+			raw_spin_unlock(&other_rq->lock);
+		} else {
+			/* Either it moved or the stack was claimed; both is
+			 * bad and forces us to abort the migration. */
+			TRACE_TASK(next, "next invalid: no longer available\n");
+			raw_spin_unlock(&other_rq->lock);
+			litmus->next_became_invalid(next);
+			next = NULL;
+			goto out;
+		}
+
+		if (!litmus->post_migration_validate(next)) {
+			TRACE_TASK(next, "plugin deems task now invalid\n");
+			litmus_reschedule_local();
+			next = NULL;
+		}
+	}
+#endif
+
+	/* check if the task became invalid while we dropped the lock */
+	if (next && (!is_realtime(next) || !tsk_rt(next)->present)) {
+		TRACE_TASK(next,
+			"BAD: next (no longer?) valid\n");
+		litmus->next_became_invalid(next);
+		litmus_reschedule_local();
+		next = NULL;
+	}
+
+	if (next) {
+#ifdef CONFIG_SMP
+		next->rt_param.stack_in_use = rq->cpu;
+#else
+		next->rt_param.stack_in_use = 0;
+#endif
+		update_rq_clock(rq);
+		next->se.exec_start = rq->clock;
+	}
+
+out:
+	update_enforcement_timer(next);
+	return next;
+}
+
+static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
+				int flags)
+{
+	tsk_rt(p)->present = 1;
+	if (flags & ENQUEUE_WAKEUP) {
+		sched_trace_task_resume(p);
+		/* LITMUS^RT plugins need to update the state
+		 * _before_ making it available in global structures.
+		 * Linux gets away with being lazy about the task state
+		 * update. We can't do that, hence we update the task
+		 * state already here.
+		 *
+		 * WARNING: this needs to be re-evaluated when porting
+		 *          to newer kernel versions.
+		 */
+		p->state = TASK_RUNNING;
+		litmus->task_wake_up(p);
+
+		rq->litmus.nr_running++;
+	} else {
+		TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
+		p->se.exec_start = rq->clock;
+	}
+}
+
+static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
+				int flags)
+{
+	if (flags & DEQUEUE_SLEEP) {
+#ifdef CONFIG_SCHED_TASK_TRACE
+		tsk_rt(p)->job_params.last_suspension = litmus_clock();
+#endif
+		litmus->task_block(p);
+		tsk_rt(p)->present = 0;
+		sched_trace_task_block(p);
+
+		rq->litmus.nr_running--;
+	} else
+		TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
+}
+
+static void yield_task_litmus(struct rq *rq)
+{
+	TS_SYSCALL_IN_START;
+	TS_SYSCALL_IN_END;
+
+	BUG_ON(rq->curr != current);
+	/* sched_yield() is called to trigger delayed preemptions.
+	 * Thus, mark the current task as needing to be rescheduled.
+	 * This will cause the scheduler plugin to be invoked, which can
+	 * then determine if a preemption is still required.
+	 */
+	clear_exit_np(current);
+	litmus_reschedule_local();
+
+	TS_SYSCALL_OUT_START;
+}
+
+/* Plugins are responsible for this.
+ */
+static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
+{
+}
+
+static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
+{
+}
+
+/* pick_next_task_litmus() - litmus_schedule() function
+ *
+ * return the next task to be scheduled
+ */
+static struct task_struct *pick_next_task_litmus(struct rq *rq,
+	struct task_struct *prev, struct pin_cookie cookie)
+{
+	struct task_struct *next;
+
+	if (is_realtime(prev))
+		update_time_litmus(rq, prev);
+
+	lockdep_unpin_lock(&rq->lock, cookie);
+	TS_PLUGIN_SCHED_START;
+	next = litmus_schedule(rq, prev);
+	TS_PLUGIN_SCHED_END;
+	lockdep_repin_lock(&rq->lock, cookie);
+
+	/* This is a bit backwards: the other classes call put_prev_task()
+	 * _after_ they've determined that the class has some queued tasks.
+	 * We can't determine this easily because each plugin manages its own
+	 * ready queues, and because in the case of globally shared queues,
+	 * we really don't know whether we'll have something ready even if
+	 * we test here. So we do it in reverse: first ask the plugin to
+	 * provide a task, and if we find one, call put_prev_task() on the
+	 * previously scheduled task.
+	 */
+	if (next)
+		put_prev_task(rq, prev);
+
+	return next;
+}
+
+static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
+{
+	if (is_realtime(p) && !queued) {
+		update_time_litmus(rq, p);
+		/* budget check for QUANTUM_ENFORCEMENT tasks */
+		if (budget_enforced(p) && budget_exhausted(p)) {
+			litmus_reschedule_local();
+		}
+	}
+}
+
+static void switched_to_litmus(struct rq *rq, struct task_struct *p)
+{
+}
+
+static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
+				int oldprio)
+{
+}
+
+unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
+{
+	/* return infinity */
+	return 0;
+}
+
+/* This is called when a task became a real-time task, either due to a SCHED_*
+ * class transition or due to PI mutex inheritance. We don't handle Linux PI
+ * mutex inheritance yet (and probably never will). Use LITMUS provided
+ * synchronization primitives instead.
+ */
+static void set_curr_task_litmus(struct rq *rq)
+{
+	rq->curr->se.exec_start = rq->clock;
+}
+
+
+#ifdef CONFIG_SMP
+/* execve tries to rebalance task in this scheduling domain.
+ * We don't care about the scheduling domain; can gets called from
+ * exec, fork, wakeup.
+ */
+static int
+select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
+{
+	/* preemption is already disabled.
+	 * We don't want to change cpu here
+	 */
+	return task_cpu(p);
+}
+#endif
+
+static void update_curr_litmus(struct rq *rq)
+{
+	struct task_struct *p = rq->curr;
+
+	if (!is_realtime(p))
+		return;
+
+	update_time_litmus(rq, p);
+}
+
+const struct sched_class litmus_sched_class = {
+	/* From 34f971f6 the stop/migrate worker threads have a class on
+	 * their own, which is the highest prio class. We don't support
+	 * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
+	 * CPU capacity.
+	 */
+	.next			= &stop_sched_class,
+	.enqueue_task		= enqueue_task_litmus,
+	.dequeue_task		= dequeue_task_litmus,
+	.yield_task		= yield_task_litmus,
+
+	.check_preempt_curr	= check_preempt_curr_litmus,
+
+	.pick_next_task		= pick_next_task_litmus,
+	.put_prev_task		= put_prev_task_litmus,
+
+#ifdef CONFIG_SMP
+	.select_task_rq		= select_task_rq_litmus,
+#endif
+
+	.set_curr_task          = set_curr_task_litmus,
+	.task_tick		= task_tick_litmus,
+
+	.get_rr_interval	= get_rr_interval_litmus,
+
+	.prio_changed		= prio_changed_litmus,
+	.switched_to		= switched_to_litmus,
+
+	.update_curr		= update_curr_litmus,
+};
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9b8adc01be3d..a48c98b950b3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
 
 #include "pelt.h"
 
+#include <litmus/litmus.h>
+
 int sched_rr_timeslice = RR_TIMESLICE;
 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
 
@@ -499,8 +501,12 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 		else if (!on_rt_rq(rt_se))
 			enqueue_rt_entity(rt_se, 0);
 
-		if (rt_rq->highest_prio.curr < curr->prio)
+		// LITMUS note: Don't subject LITMUS tasks to remote
+		// reschedules.
+		if ((rt_rq->highest_prio.curr < curr->prio) &&
+			!is_realtime(curr)) {
 			resched_curr(rq);
+		}
 	}
 }
 
@@ -589,8 +595,10 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
 	struct rq *rq = rq_of_rt_rq(rt_rq);
 
-	if (!rt_rq->rt_nr_running)
+	if (!rt_rq->rt_nr_running ||
+		is_realtime(rq_of_rt_rq(rt_rq)->current)) {
 		return;
+	}
 
 	enqueue_top_rt_rq(rt_rq);
 	resched_curr(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c8870c5bd7df..c4f7afbe90c0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -162,6 +162,11 @@ static inline int rt_policy(int policy)
 	return policy == SCHED_FIFO || policy == SCHED_RR;
 }
 
+static inline int litmus_policy(int policy)
+{
+	return policy == SCHED_LITMUS;
+}
+
 static inline int dl_policy(int policy)
 {
 	return policy == SCHED_DEADLINE;
@@ -169,7 +174,8 @@ static inline int dl_policy(int policy)
 static inline bool valid_policy(int policy)
 {
 	return idle_policy(policy) || fair_policy(policy) ||
-		rt_policy(policy) || dl_policy(policy);
+		rt_policy(policy) || dl_policy(policy) ||
+		litmus_policy(policy);
 }
 
 static inline int task_has_idle_policy(struct task_struct *p)
@@ -685,6 +691,10 @@ struct dl_rq {
 	u64			bw_ratio;
 };
 
+struct litmus_rq {
+	unsigned long nr_running;
+};
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /* An entity is a task if it doesn't "own" a runqueue */
 #define entity_is_task(se)	(!se->my_q)
@@ -881,6 +891,7 @@ struct rq {
 	struct cfs_rq		cfs;
 	struct rt_rq		rt;
 	struct dl_rq		dl;
+	struct litmus_rq	litmus;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this CPU: */
@@ -1783,11 +1794,19 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
 	next->sched_class->set_next_task(rq, next);
 }
 
+/* FIXME: This is conceptually wrong; this should be below the stop-machine
+ * class, but existing plugins (that predate the stop-machine class) depend on
+ * the assumption that LITMUS^RT plugins are the top scheduling class.
+ */
+#define sched_class_highest (&litmus_sched_class)
+
+/*
 #ifdef CONFIG_SMP
 #define sched_class_highest (&stop_sched_class)
 #else
 #define sched_class_highest (&dl_sched_class)
 #endif
+*/
 
 #define for_class_range(class, _from, _to) \
 	for (class = (_from); class != (_to); class = class->next)
@@ -1795,6 +1814,7 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
 #define for_each_class(class) \
 	for_class_range(class, sched_class_highest, NULL)
 
+extern const struct sched_class litmus_sched_class;
 extern const struct sched_class stop_sched_class;
 extern const struct sched_class dl_sched_class;
 extern const struct sched_class rt_sched_class;
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index c0640739e05e..3bd42cf27d88 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -8,6 +8,7 @@
  * See kernel/stop_machine.c
  */
 #include "sched.h"
+#include <litmus/preempt.h>
 
 #ifdef CONFIG_SMP
 static int
@@ -43,6 +44,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 		return NULL;
 
 	set_next_task_stop(rq, rq->stop);
+
+	/* Let the LITMUS state machine know that a task was picked. This is
+	 * needed because the LITMUS scheduling plugin will not be called if
+	 * the stop-task class picks a task.
+	 */
+	sched_state_task_picked();
+
 	return rq->stop;
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 65605530ee34..ce20111d3fe2 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -42,6 +42,10 @@
 #include <linux/freezer.h>
 #include <linux/compat.h>
 
+#include <litmus/litmus.h>
+#include <litmus/debug_trace.h>
+#include <litmus/trace.h>
+
 #include <linux/uaccess.h>
 
 #include <trace/events/timer.h>
@@ -1092,6 +1096,10 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 
 	tim = hrtimer_update_lowres(timer, tim, mode);
 
+#ifdef CONFIG_REPORT_TIMER_LATENCY
+	timer->when_added = base->get_time();
+#endif
+
 	hrtimer_set_expires_range_ns(timer, tim, delta_ns);
 
 	/* Switch the timer base, if necessary: */
@@ -1546,6 +1554,9 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 {
 	struct hrtimer_clock_base *base;
 	unsigned int active = cpu_base->active_bases & active_mask;
+#ifdef CONFIG_REPORT_TIMER_LATENCY
+	ktime_t was_exp_nxt = cpu_base->expires_next;
+#endif
 
 	for_each_active_base(base, cpu_base, active) {
 		struct timerqueue_node *node;
@@ -1573,6 +1584,26 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 			if (basenow < hrtimer_get_softexpires_tv64(timer))
 				break;
 
+#ifdef CONFIG_REPORT_TIMER_LATENCY
+			if (cpu_base->hres_active && (basenow.tv64 >=
+				hrtimer_get_expires_tv64(timer) +
+				((s64) CONFIG_REPORT_TIMER_LATENCY_THRESHOLD))) {
+				printk_ratelimited(KERN_WARNING "WARNING: "
+					"P%d timer latency: %lld now: %lld "
+					"basenow:%lld exp:%lld "
+					"nxt:%lld added:%lld "
+					"timer:%p fn:%p\n",
+					smp_processor_id(),
+					basenow.tv64 - hrtimer_get_expires_tv64(timer),
+					now.tv64, basenow.tv64,
+					hrtimer_get_expires_tv64(timer),
+					hrtimer_get_softexpires(timer),
+					was_exp_nxt.tv64,
+					timer->when_added.tv64,
+					timer, timer->function);
+			}
+#endif
+
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
 			if (active_mask == HRTIMER_ACTIVE_SOFT)
 				hrtimer_sync_wait_running(cpu_base, flags);
@@ -1679,9 +1710,14 @@ retry:
 	 */
 	cpu_base->nr_hangs++;
 	cpu_base->hang_detected = 1;
+
+	TRACE("hrtimer hang detected on P%d: #%u\n", cpu_base->cpu,
+		cpu_base->nr_hangs);
+
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
 	delta = ktime_sub(now, entry_time);
+	TRACE("hrtimer hang delta.tv64:%u\n", (unsigned int) delta.tv64);
 	if ((unsigned int)delta > cpu_base->max_hang_time)
 		cpu_base->max_hang_time = (unsigned int) delta;
 	/*
@@ -1692,6 +1728,9 @@ retry:
 		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
 	else
 		expires_next = ktime_add(now, delta);
+
+	TRACE("hrtimer expires_next:%llu\n", expires_next.tv64);
+
 	tick_program_event(expires_next, 1);
 	pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
 }
@@ -1762,8 +1801,21 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	struct task_struct *task = t->task;
 
 	t->task = NULL;
-	if (task)
+	if (task) {
+#ifdef CONFIG_SCHED_OVERHEAD_TRACE
+		if (is_realtime(task)) {
+			ktime_t expires = hrtimer_get_expires(timer);
+			/* Fix up timers that were added past their due date,
+			 * because that's not really release latency. */
+			lt_t intended_release = max(expires.tv64,
+				timer->when_added.tv64);
+			TS_RELEASE_LATENCY(intended_release);
+		}
+#endif
+		TS_RELEASE_START;
 		wake_up_process(task);
+		TS_RELEASE_END;
+	}
 
 	return HRTIMER_NORESTART;
 }
@@ -1916,9 +1968,19 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
 	u64 slack;
 
 	slack = current->timer_slack_ns;
-	if (dl_task(current) || rt_task(current))
+	if (dl_task(current) || rt_task(current) || is_realtime(current))
 		slack = 0;
 
+	if (is_realtime(current) && (clockid == CLOCK_MONOTONIC) &&
+		(mode == HRTIMER_MODE_ABS)) {
+		/* Special handling: to handle periodic activations correctly
+		 * despite timer jitter and overheads, the plugin might need to
+		 * know the time at which the task intends to wake up. */
+		tsk_rt(current)->doing_abs_nanosleep = 1;
+		tsk_rt(current)->nanosleep_wakeup = ktime_to_ns(
+			timespec_to_ktime(*rqtp));
+	}
+
 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
 	ret = do_nanosleep(&t, mode);
@@ -1937,6 +1999,9 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
 	restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
 out:
 	destroy_hrtimer_on_stack(&t.timer);
+
+	tsk_rt(current)->doing_abs_nanosleep = 0;
+
 	return ret;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50055d2e4ea8..1ad757848f69 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -42,6 +42,8 @@
 
 #include "internal.h"
 
+#include <litmus/litmus.h>
+
 /*
  * Sleep at most 200ms at a time in balance_dirty_pages().
  */
@@ -436,7 +438,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
 	if (bg_thresh >= thresh)
 		bg_thresh = thresh / 2;
 	tsk = current;
-	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
+	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) ||
+		is_realtime(tsk)) {
 		bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
 		thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
 	}
@@ -486,7 +489,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
 	else
 		dirty = vm_dirty_ratio * node_memory / 100;
 
-	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || is_realtime(tsk))
 		dirty += dirty / 4;
 
 	return dirty;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f391c0c4ed1d..6d90a9ed20c4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -75,6 +75,8 @@
 #include "internal.h"
 #include "shuffle.h"
 
+#include <litmus/litmus.h>
+
 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
 static DEFINE_MUTEX(pcp_batch_high_lock);
 #define MIN_PERCPU_PAGELIST_FRACTION	(8)
@@ -4208,8 +4210,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 		 * comment for __cpuset_node_allowed().
 		 */
 		alloc_flags &= ~ALLOC_CPUSET;
-	} else if (unlikely(rt_task(current)) && !in_interrupt())
+	} else if (unlikely(rt_task(current) || is_realtime(current)) &&
+		!in_interrupt()) {
 		alloc_flags |= ALLOC_HARDER;
+	}
 
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		alloc_flags |= ALLOC_KSWAPD;
-- 
cgit v1.2.2