From 3c4abebc788e9d92d776d7bc8b778f398cdb4010 Mon Sep 17 00:00:00 2001 From: Nathan O Date: Mon, 9 Dec 2019 14:59:56 -0500 Subject: Initial attempt to "connect the wires" - This is my first attempt to re-add all of the modifications on top of this version of the Linux kernel that were present in the previous version of LITMUS. - More notes on changes will follow after testing--no guarantees the code as it is now will compile or run correctly. --- Makefile | 1 + arch/arm/Kconfig | 9 + arch/arm64/Kconfig | 9 + arch/x86/Kconfig | 9 + arch/x86/include/asm/feather_trace.h | 18 ++ arch/x86/include/asm/feather_trace_32.h | 115 ++++++++++ arch/x86/include/asm/feather_trace_64.h | 124 ++++++++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/ft_event.c | 170 ++++++++++++++ fs/exec.c | 3 + fs/inode.c | 2 + fs/select.c | 6 +- include/linux/fs.h | 3 + include/linux/hardirq.h | 3 + include/linux/hrtimer.h | 3 + include/linux/sched.h | 10 + include/trace/events/litmus.h | 231 +++++++++++++++++++ include/uapi/linux/sched.h | 1 + kernel/exit.c | 14 ++ kernel/fork.c | 6 + kernel/locking/rwsem.c | 13 +- kernel/printk/printk.c | 14 +- kernel/sched/Makefile | 3 + kernel/sched/core.c | 153 +++++++++++-- kernel/sched/deadline.c | 21 +- kernel/sched/litmus.c | 386 ++++++++++++++++++++++++++++++++ kernel/sched/rt.c | 12 +- kernel/sched/sched.h | 22 +- kernel/sched/stop_task.c | 8 + kernel/time/hrtimer.c | 69 +++++- mm/page-writeback.c | 7 +- mm/page_alloc.c | 6 +- 32 files changed, 1413 insertions(+), 40 deletions(-) create mode 100644 arch/x86/include/asm/feather_trace.h create mode 100644 arch/x86/include/asm/feather_trace_32.h create mode 100644 arch/x86/include/asm/feather_trace_64.h create mode 100644 arch/x86/kernel/ft_event.c create mode 100644 include/trace/events/litmus.h create mode 100644 kernel/sched/litmus.c diff --git a/Makefile b/Makefile index 1d5298356ea8..405d18d59837 100644 --- a/Makefile +++ b/Makefile @@ -1011,6 +1011,7 @@ export MODORDER := $(extmod-prefix)modules.order ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += litmus/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8a50efb559f3..3aaa81a3ae70 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2094,3 +2094,12 @@ source "arch/arm/crypto/Kconfig" endif source "arch/arm/kvm/Kconfig" + +config ARCH_HAS_FEATHER_TRACE + def_bool n + +config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI + def_bool n + +source "litmus/Kconfig" + diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3f047afb982c..a6bf629e708c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1680,3 +1680,12 @@ source "arch/arm64/kvm/Kconfig" if CRYPTO source "arch/arm64/crypto/Kconfig" endif + +config ARCH_HAS_FEATHER_TRACE + def_bool n + +config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI + def_bool n + +source "litmus/Kconfig" + diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ef85139553f..3765164809c5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2972,3 +2972,12 @@ config X86_DEV_DMA_OPS source "drivers/firmware/Kconfig" source "arch/x86/kvm/Kconfig" + +config ARCH_HAS_FEATHER_TRACE + def_bool y + +config ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI + def_bool y + +source "litmus/Kconfig" + diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h new file mode 100644 index 000000000000..4e732d4ea508 --- /dev/null +++ b/arch/x86/include/asm/feather_trace.h @@ -0,0 +1,18 @@ +#ifndef _ARCH_FEATHER_TRACE_H +#define _ARCH_FEATHER_TRACE_H + +#include +#include + +static inline unsigned long long ft_timestamp(void) +{ + return get_cycles(); +} + +#ifdef CONFIG_X86_32 +#include "feather_trace_32.h" +#else +#include "feather_trace_64.h" +#endif + +#endif diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h new file mode 100644 index 000000000000..75e81a9f9382 --- /dev/null +++ b/arch/x86/include/asm/feather_trace_32.h @@ -0,0 +1,115 @@ +/* Copyright (c) 2007-2012 Björn Brandenburg, + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not directly include this file. Include feather_trace.h instead */ + +#define feather_callback __attribute__((regparm(3))) __attribute__((used)) + +/* + * Make the compiler reload any register that is not saved in a cdecl function + * call (minus the registers that we explicitly clobber as output registers). + */ +#define __FT_CLOBBER_LIST0 "memory", "cc", "eax", "edx", "ecx" +#define __FT_CLOBBER_LIST1 "memory", "cc", "eax", "ecx" +#define __FT_CLOBBER_LIST2 "memory", "cc", "eax" +#define __FT_CLOBBER_LIST3 "memory", "cc", "eax" + +#define __FT_TMP1(x) "=d" (x) +#define __FT_ARG1(x) "0" ((long) (x)) +#define __FT_TMP2(x) "=c" (x) +#define __FT_ARG2(x) "1" ((long) (x)) + +#define __FT_ARG3(x) "r" ((long) (x)) + +#define ft_event(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " call " #callback " \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : : __FT_CLOBBER_LIST0) + +#define ft_event0(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movl $" #id ", %%eax \n\t" \ + " call " #callback " \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : : : __FT_CLOBBER_LIST0) + +#define ft_event1(id, callback, param) \ + do { \ + long __ft_tmp1; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movl $" #id ", %%eax \n\t" \ + " call " #callback " \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1) \ + : __FT_ARG1(param) \ + : __FT_CLOBBER_LIST1); \ + } while (0); + +#define ft_event2(id, callback, param, param2) \ + do { \ + long __ft_tmp1, __ft_tmp2; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movl $" #id ", %%eax \n\t" \ + " call " #callback " \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ + : __FT_ARG1(param), __FT_ARG2(param2) \ + : __FT_CLOBBER_LIST2); \ + } while (0); + + +#define ft_event3(id, callback, param, param2, param3) \ + do { \ + long __ft_tmp1, __ft_tmp2; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " subl $4, %%esp \n\t" \ + " movl $" #id ", %%eax \n\t" \ + " movl %2, (%%esp) \n\t" \ + " call " #callback " \n\t" \ + " addl $4, %%esp \n\t" \ + ".section __event_table, \"aw\" \n\t" \ + ".long " #id ", 0, 1b, 2f \n\t" \ + ".previous \n\t" \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ + : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \ + : __FT_CLOBBER_LIST3); \ + } while (0); diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h new file mode 100644 index 000000000000..5ce49e2eebba --- /dev/null +++ b/arch/x86/include/asm/feather_trace_64.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2010 Andrea Bastoni, + * Copyright (c) 2012 Björn Brandenburg, + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not directly include this file. Include feather_trace.h instead */ + +/* regparm is the default on x86_64 */ +#define feather_callback __attribute__((used)) + +#define __FT_EVENT_TABLE(id,from,to) \ + ".section __event_table, \"aw\"\n\t" \ + ".balign 8\n\t" \ + ".quad " #id ", 0, " #from ", " #to " \n\t" \ + ".previous \n\t" + +/* + * x86_64 caller only owns rbp, rbx, r12-r15; + * the callee can freely modify the others. + */ +#define __FT_CLOBBER_LIST0 "memory", "cc", "rdi", "rsi", "rdx", "rcx", \ + "r8", "r9", "r10", "r11", "rax" + +#define __FT_CLOBBER_LIST1 "memory", "cc", "rdi", "rdx", "rcx", \ + "r8", "r9", "r10", "r11", "rax" + +#define __FT_CLOBBER_LIST2 "memory", "cc", "rdi", "rcx", \ + "r8", "r9", "r10", "r11", "rax" + +#define __FT_CLOBBER_LIST3 "memory", "cc", "rdi", \ + "r8", "r9", "r10", "r11", "rax" + +/* The registers RDI, RSI, RDX, RCX, R8 and R9 are used for integer and pointer + * arguments. */ + +/* RSI */ +#define __FT_TMP1(x) "=S" (x) +#define __FT_ARG1(x) "0" ((long) (x)) + +/* RDX */ +#define __FT_TMP2(x) "=d" (x) +#define __FT_ARG2(x) "1" ((long) (x)) + +/* RCX */ +#define __FT_TMP3(x) "=c" (x) +#define __FT_ARG3(x) "2" ((long) (x)) + +#define ft_event(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " call " #callback " \n\t" \ + __FT_EVENT_TABLE(id,1b,2f) \ + "2: \n\t" \ + : : : __FT_CLOBBER_LIST0) + +#define ft_event0(id, callback) \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movq $" #id ", %%rdi \n\t" \ + " call " #callback " \n\t" \ + __FT_EVENT_TABLE(id,1b,2f) \ + "2: \n\t" \ + : : : __FT_CLOBBER_LIST0) + +#define ft_event1(id, callback, param) \ + do { \ + long __ft_tmp1; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movq $" #id ", %%rdi \n\t" \ + " call " #callback " \n\t" \ + __FT_EVENT_TABLE(id,1b,2f) \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1) \ + : __FT_ARG1(param) \ + : __FT_CLOBBER_LIST1); \ + } while (0); + +#define ft_event2(id, callback, param, param2) \ + do { \ + long __ft_tmp1, __ft_tmp2; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movq $" #id ", %%rdi \n\t" \ + " call " #callback " \n\t" \ + __FT_EVENT_TABLE(id,1b,2f) \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2) \ + : __FT_ARG1(param), __FT_ARG2(param2) \ + : __FT_CLOBBER_LIST2); \ + } while (0); + +#define ft_event3(id, callback, param, param2, param3) \ + do { \ + long __ft_tmp1, __ft_tmp2, __ft_tmp3; \ + __asm__ __volatile__( \ + "1: jmp 2f \n\t" \ + " movq $" #id ", %%rdi \n\t" \ + " call " #callback " \n\t" \ + __FT_EVENT_TABLE(id,1b,2f) \ + "2: \n\t" \ + : __FT_TMP1(__ft_tmp1), __FT_TMP2(__ft_tmp2), __FT_TMP3(__ft_tmp3) \ + : __FT_ARG1(param), __FT_ARG2(param2), __FT_ARG3(param3) \ + : __FT_CLOBBER_LIST3); \ + } while (0); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3578ad248bc9..5ee68d48e0a4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -140,6 +140,8 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o +obj-$(CONFIG_FEATHER_TRACE) += ft_event.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c new file mode 100644 index 000000000000..7aa3d0592ff2 --- /dev/null +++ b/arch/x86/kernel/ft_event.c @@ -0,0 +1,170 @@ +#include +#include +#include +#include + +#include + +/* the feather trace management functions assume + * exclusive access to the event table + */ + +#ifndef CONFIG_RELOCATABLE + +#define BYTE_JUMP 0xeb +#define BYTE_JUMP_LEN 0x02 + +/* for each event, there is an entry in the event table */ +struct trace_event { + long id; + long count; + long start_addr; + long end_addr; +}; + +extern struct trace_event __start___event_table[]; +extern struct trace_event __stop___event_table[]; + + +/* NOTE: The following two functions have been stolen from ftrace.c */ + +static inline int +within(unsigned long addr, unsigned long start, unsigned long end) +{ + return addr >= start && addr < end; +} + +static unsigned long text_ip_addr(unsigned long ip) +{ + /* + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. + * + * For 32bit kernels, these mappings are same and we can use + * kernel identity mapping to modify code. + */ + if (within(ip, (unsigned long)_text, (unsigned long)_etext)) + ip = (unsigned long)__va(__pa_symbol(ip)); + + return ip; +} + +/* Workaround: if no events are defined, then the event_table section does not + * exist and the above references cause linker errors. This could probably be + * fixed by adjusting the linker script, but it is easier to maintain for us if + * we simply create a dummy symbol in the event table section. + */ +int __event_table_dummy[0] __attribute__ ((section("__event_table"))); + +int ft_enable_event(unsigned long id) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + set_kernel_text_rw(); + set_all_modules_text_rw(); + + while (te < __stop___event_table) { + if (te->id == id && ++te->count == 1) { + instr = (unsigned char*) te->start_addr; + /* make sure we don't clobber something wrong */ + if (*instr == BYTE_JUMP) { + delta = (unsigned char*) text_ip_addr( + ((unsigned long) te->start_addr) + + 1); + *delta = 0; + } + } + if (te->id == id) + count++; + te++; + } + + set_all_modules_text_ro(); + set_kernel_text_ro(); + + printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count); + return count; +} + +int ft_disable_event(unsigned long id) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + set_kernel_text_rw(); + set_all_modules_text_rw(); + + while (te < __stop___event_table) { + if (te->id == id && --te->count == 0) { + instr = (unsigned char*) te->start_addr; + if (*instr == BYTE_JUMP) { + delta = (unsigned char*) text_ip_addr( + ((unsigned long) te->start_addr) + + 1); + *delta = te->end_addr - te->start_addr - + BYTE_JUMP_LEN; + } + } + if (te->id == id) + count++; + te++; + } + + set_all_modules_text_ro(); + set_kernel_text_ro(); + + printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count); + return count; +} + +int ft_disable_all_events(void) +{ + struct trace_event* te = __start___event_table; + int count = 0; + char* delta; + unsigned char* instr; + + set_kernel_text_rw(); + set_all_modules_text_rw(); + + while (te < __stop___event_table) { + if (te->count) { + instr = (unsigned char*) te->start_addr; + if (*instr == BYTE_JUMP) { + delta = (unsigned char*) text_ip_addr( + ((unsigned long) te->start_addr) + + 1); + *delta = te->end_addr - te->start_addr - + BYTE_JUMP_LEN; + te->count = 0; + count++; + } + } + te++; + } + + set_all_modules_text_ro(); + set_kernel_text_ro(); + + return count; +} + +int ft_is_event_enabled(unsigned long id) +{ + struct trace_event* te = __start___event_table; + + while (te < __stop___event_table) { + if (te->id == id) + return te->count; + te++; + } + return 0; +} + +#endif diff --git a/fs/exec.c b/fs/exec.c index 555e93c7dec8..49c8613d2510 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -64,6 +64,8 @@ #include #include +#include + #include #include #include @@ -1765,6 +1767,7 @@ static int __do_execve_file(int fd, struct filename *filename, goto out_unmark; sched_exec(); + litmus_exec(); bprm->file = file; if (!filename) { diff --git a/fs/inode.c b/fs/inode.c index fef457a42882..abf61717d9db 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -394,6 +394,8 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_lru); __address_space_init_once(&inode->i_data); i_size_ordered_init(inode); + INIT_LIST_HEAD(&inode->i_obj_list); + mutex_init(&inode->i_obj_mutex); } EXPORT_SYMBOL(inode_init_once); diff --git a/fs/select.c b/fs/select.c index 53a0c149f528..7a3745f8d17f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -32,6 +32,8 @@ #include #include +#include + #include @@ -80,9 +82,9 @@ u64 select_estimate_accuracy(struct timespec64 *tv) /* * Realtime tasks get a slack of 0 for obvious reasons. */ - - if (rt_task(current)) + if (rt_task(current) || is_realtime(current)) { return 0; + } ktime_get_ts64(&now); now = timespec64_sub(*tv, now); diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..d65e17d3d302 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -734,6 +734,9 @@ struct inode { struct fsverity_info *i_verity_info; #endif + struct list_head i_obj_list; + struct mutex i_obj_mutex; + void *i_private; /* fs or device private pointer */ } __randomize_layout; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index da0af631ded5..35271458e22b 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -8,6 +8,7 @@ #include #include +#include extern void synchronize_irq(unsigned int irq); extern bool synchronize_hardirq(unsigned int irq); @@ -38,6 +39,7 @@ extern void rcu_nmi_exit(void); account_irq_enter_time(current); \ preempt_count_add(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ + ft_irq_fired(); \ } while (0) /* @@ -75,6 +77,7 @@ extern void irq_exit(void); preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ + ft_irq_fired(); \ } while (0) #define nmi_exit() \ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1b9a51a1bccb..a145e140d532 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -123,6 +123,9 @@ struct hrtimer { u8 is_rel; u8 is_soft; u8 is_hard; +#if defined(CONFIG_REPORT_TIMER_LATENCY) || defined(CONFIG_SCHED_OVERHEAD_TRACE) + ktime_t when_added; +#endif }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..0a1b09305248 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -32,6 +32,9 @@ #include #include +#include +#include + /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; struct backing_dev_info; @@ -61,6 +64,8 @@ struct signal_struct; struct task_delay_info; struct task_group; +struct od_table_entry; + /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). @@ -1158,6 +1163,10 @@ struct task_struct { /* Start of a write-and-pause period: */ unsigned long dirty_paused_when; + /* LITMUS RT parameters and state */ + struct rt_param rt_param; + struct od_table_entry *od_table; + #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; @@ -1741,6 +1750,7 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) static inline void set_tsk_need_resched(struct task_struct *tsk) { set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); + sched_state_will_schedule(tsk); } static inline void clear_tsk_need_resched(struct task_struct *tsk) diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h new file mode 100644 index 000000000000..0fffcee02be0 --- /dev/null +++ b/include/trace/events/litmus.h @@ -0,0 +1,231 @@ +/* + * LITMUS^RT kernel style scheduling tracepoints + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM litmus + +#if !defined(_SCHED_TASK_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _SCHED_TASK_TRACEPOINT_H + +#include + +#include +#include + +/* + * Tracing task admission + */ +TRACE_EVENT(litmus_task_param, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, wcet ) + __field( lt_t, period ) + __field( lt_t, phase ) + __field( int, partition ) + ), + + TP_fast_assign( + __entry->pid = t ? t->pid : 0; + __entry->job = t ? t->rt_param.job_params.job_no : 0; + __entry->wcet = get_exec_cost(t); + __entry->period = get_rt_period(t); + __entry->phase = get_rt_phase(t); + __entry->partition = get_partition(t); + ), + + TP_printk("period(%d, %Lu).\nwcet(%d, %Lu).\n", + __entry->pid, __entry->period, + __entry->pid, __entry->wcet) +); + +/* + * Tracing jobs release + */ +TRACE_EVENT(litmus_task_release, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, release ) + __field( lt_t, deadline ) + ), + + TP_fast_assign( + __entry->pid = t ? t->pid : 0; + __entry->job = t ? t->rt_param.job_params.job_no : 0; + __entry->release = get_release(t); + __entry->deadline = get_deadline(t); + ), + + TP_printk("release(job(%u, %u)): %Lu\ndeadline(job(%u, %u)): %Lu\n", + __entry->pid, __entry->job, __entry->release, + __entry->pid, __entry->job, __entry->deadline) +); + +/* + * Tracepoint for switching to new task + */ +TRACE_EVENT(litmus_switch_to, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, when ) + __field( lt_t, exec_time ) + ), + + TP_fast_assign( + __entry->pid = is_realtime(t) ? t->pid : 0; + __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; + __entry->when = litmus_clock(); + __entry->exec_time = get_exec_time(t); + ), + + TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n", + __entry->pid, __entry->job, + __entry->when, __entry->exec_time) +); + +/* + * Tracepoint for switching away previous task + */ +TRACE_EVENT(litmus_switch_away, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, when ) + __field( lt_t, exec_time ) + ), + + TP_fast_assign( + __entry->pid = is_realtime(t) ? t->pid : 0; + __entry->job = is_realtime(t) ? t->rt_param.job_params.job_no : 0; + __entry->when = litmus_clock(); + __entry->exec_time = get_exec_time(t); + ), + + TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n", + __entry->pid, __entry->job, + __entry->when, __entry->exec_time) +); + +/* + * Tracing jobs completion + */ +TRACE_EVENT(litmus_task_completion, + + TP_PROTO(struct task_struct *t, unsigned long forced), + + TP_ARGS(t, forced), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, when ) + __field( unsigned long, forced ) + ), + + TP_fast_assign( + __entry->pid = t ? t->pid : 0; + __entry->job = t ? t->rt_param.job_params.job_no : 0; + __entry->when = litmus_clock(); + __entry->forced = forced; + ), + + TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n", + __entry->pid, __entry->job, + __entry->when, __entry->forced) +); + +/* + * Trace blocking tasks. + */ +TRACE_EVENT(litmus_task_block, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( lt_t, when ) + ), + + TP_fast_assign( + __entry->pid = t ? t->pid : 0; + __entry->when = litmus_clock(); + ), + + TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when) +); + +/* + * Tracing jobs resume + */ +TRACE_EVENT(litmus_task_resume, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned int, job ) + __field( lt_t, when ) + ), + + TP_fast_assign( + __entry->pid = t ? t->pid : 0; + __entry->job = t ? t->rt_param.job_params.job_no : 0; + __entry->when = litmus_clock(); + ), + + TP_printk("resume(job(%u, %u)): %Lu\n", + __entry->pid, __entry->job, __entry->when) +); + +/* + * Trace synchronous release + */ +TRACE_EVENT(litmus_sys_release, + + TP_PROTO(lt_t *start), + + TP_ARGS(start), + + TP_STRUCT__entry( + __field( lt_t, rel ) + __field( lt_t, when ) + ), + + TP_fast_assign( + __entry->rel = *start; + __entry->when = litmus_clock(); + ), + + TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when) +); + +#endif /* _SCHED_TASK_TRACEPOINT_H */ + +/* Must stay outside the protection */ +#include diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 25b4fa00bad1..f6e838d97ff3 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -87,6 +87,7 @@ struct clone_args { /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 #define SCHED_DEADLINE 6 +#define SCHED_LITMUS 7 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 diff --git a/kernel/exit.c b/kernel/exit.c index a46a50d67002..6832c614c663 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -69,6 +69,10 @@ #include #include +#include + +extern void exit_od_table(struct task_struct *t); + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -727,6 +731,14 @@ void __noreturn do_exit(long code) if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); + if (unlikely(is_realtime(tsk))) { + /* We would like the task to be polite and transition out of + * RT mode first. + */ + litmus_do_exit(tsk); + BUG_ON(is_realtime(tsk); + } + /* * If do_exit is called because this processes oopsed, it's possible * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before @@ -798,6 +810,8 @@ void __noreturn do_exit(long code) tty_audit_exit(); audit_free(tsk); + exit_od_table(tsk); + tsk->exit_code = code; taskstats_exit(tsk, group_dead); diff --git a/kernel/fork.c b/kernel/fork.c index 55af6931c6ec..220211ef8946 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -104,6 +104,9 @@ #include +#include +#include + #define CREATE_TRACE_POINTS #include @@ -740,6 +743,9 @@ void __put_task_struct(struct task_struct *tsk) cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); + + exit_litmus(tsk); + exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index eef04551eae7..9adb95795f83 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -31,6 +31,8 @@ #include "rwsem.h" #include "lock_events.h" +#include + /* * The least significant 3 bits of the owner value has the following * meanings when set. @@ -886,11 +888,13 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock) * a writer, need_resched() check needs to be done here. */ if (owner_state != OWNER_WRITER) { - if (need_resched()) + if (need_resched()) { break; - if (rt_task(current) && - (prev_owner_state != OWNER_WRITER)) + } + if ((rt_task(current) || is_realtime(current)) && + (prev_owner_state != OWNER_WRITER)) { break; + } } prev_owner_state = owner_state; @@ -1258,7 +1262,8 @@ wait: * until rwsem_try_write_lock() is called. */ if ((wstate == WRITER_FIRST) && (rt_task(current) || - time_after(jiffies, waiter.timeout))) { + is_realtime(current) || + time_after(jiffies, waiter.timeout))) { wstate = WRITER_HANDOFF; lockevent_inc(rwsem_wlock_handoff); break; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca65327a6de8..4c3d18d2587e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -68,6 +68,13 @@ int console_printk[4] = { }; EXPORT_SYMBOL_GPL(console_printk); +/* + * Divert printk() messages when there is a LITMUS^RT debug listener. + */ +#include +int trace_override = 0; +int trace_recurse = 0; + atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); EXPORT_SYMBOL(ignore_console_lock_warning); @@ -1916,6 +1923,11 @@ int vprintk_store(int facility, int level, */ text_len = vscnprintf(text, sizeof(textbuf), fmt, args); + /* If the LITMUS^RT tracer is active then divert printk messages. */ + if (trace_override && !trace_recurse) { + TRACE("%s", text); + } + /* mark and strip a trailing newline */ if (text_len && text[text_len-1] == '\n') { text_len--; @@ -2967,7 +2979,7 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { void wake_up_klogd(void) { preempt_disable(); - if (waitqueue_active(&log_wait)) { + if (!trace_override && waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5662b5..95000e43fce7 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -30,3 +30,6 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_CPU_ISOLATION) += isolation.o obj-$(CONFIG_PSI) += psi.o + +obj-y += litmus.o + diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0f2eb3629070..917a374b616f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -20,6 +20,12 @@ #include "pelt.h" +#include +#include +#include +#include +#include + #define CREATE_TRACE_POINTS #include @@ -520,6 +526,11 @@ void resched_curr(struct rq *rq) set_tsk_need_resched(curr); set_preempt_need_resched(); return; + } else if (is_realtime(curr)) { + /* Cannot call set_tsk_need_resched() on LITMUS tasks on a + * remote core. Only policy plugins may do this + * via litmus_reschedule(). */ + return; } if (set_nr_and_not_polling(curr)) @@ -2317,9 +2328,17 @@ void scheduler_ipi(void) * this IPI. */ preempt_fold_need_resched(); - - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + /* Let LITMUS' preemption state machine know about this IPI. */ + sched_state_ipi(); + + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) { +#ifndef CONFIG_ARCH_CALLS_IRQ_ENTER_ON_RESCHED_IPI + /* If we don't call irq_enter() then we need to trigger the + * IRQ tracing manually. */ + ft_irq_fired(); +#endif return; + } /* * Not all reschedule IPI handlers call irq_enter/irq_exit, since @@ -2397,7 +2416,12 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) struct rq_flags rf; #if defined(CONFIG_SMP) - if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { + /* + * In LITMUS, it is up to a plugin to determine whether to send an IPI + * to a remote CPU. + */ + if (!is_realtime(p) && sched_feat(TTWU_QUEUE) && + !cpus_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* Sync clocks across CPUs */ ttwu_queue_remote(p, cpu, wake_flags); return; @@ -2517,6 +2541,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) { unsigned long flags; int cpu, success = 0; + if (is_realtime(p)) { + TRACE_TASK(p, "try_to_wake_up() state: %d\n", p->state); + } preempt_disable(); if (p == current) { @@ -2616,6 +2643,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_cond_load_acquire(&p->on_cpu, !VAL); + /* LITMUS: Once the task can be safely referenced by this CPU, don't + * mess with further Linux load balancing stuff. + */ + if (is_realtime(p)) { + goto litmus_out_activate; + } + p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; @@ -2631,6 +2665,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) set_task_cpu(p, cpu); } +litmus_out_activate: #else /* CONFIG_SMP */ if (p->in_iowait) { @@ -2641,6 +2676,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) #endif /* CONFIG_SMP */ ttwu_queue(p, cpu, wake_flags); + + if (is_realtime(p)) { + TRACE_TASK(p, "try_to_wake_up() done state: %d\n", p->state); + } unlock: raw_spin_unlock_irqrestore(&p->pi_lock, flags); out: @@ -2853,13 +2892,16 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) */ p->prio = current->normal_prio; + litmus_fork(p); + uclamp_fork(p); /* * Revert to default priority/policy on fork if requested. */ if (unlikely(p->sched_reset_on_fork)) { - if (task_has_dl_policy(p) || task_has_rt_policy(p)) { + if (task_has_dl_policy(p) || task_has_rt_policy(p) || + p->policy == SCHED_LITMUS) { p->policy = SCHED_NORMAL; p->static_prio = NICE_TO_PRIO(0); p->rt_priority = 0; @@ -2876,12 +2918,15 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_reset_on_fork = 0; } - if (dl_prio(p->prio)) + if (is_realtime(p)) { + p->sched_class = &litmus_sched_class; + } else if (dl_prio(p->prio)) { return -EAGAIN; - else if (rt_prio(p->prio)) + } else if (rt_prio(p->prio)) { p->sched_class = &rt_sched_class; - else + } else { p->sched_class = &fair_sched_class; + } init_entity_runnable_average(&p->se); @@ -2945,6 +2990,10 @@ void wake_up_new_task(struct task_struct *p) struct rq_flags rf; struct rq *rq; + if (is_realtime(p)) { + litmus->task_new(p, 1, 0); + } + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); p->state = TASK_RUNNING; #ifdef CONFIG_SMP @@ -3218,6 +3267,8 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ prev_state = prev->state; vtime_task_switch(prev); + litmus->finish_switch(prev); + prev->rt_param.stack_in_use = NO_CPU; perf_event_task_sched_in(prev, current); finish_task(prev); finish_lock_switch(rq); @@ -3317,6 +3368,12 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) */ rq = finish_task_switch(prev); + + sched_trace_task_switch_to(current); + if (unlikely(sched_state_validate_switch())) { + litmus_reschedule_local(); + } + balance_callback(rq); preempt_enable(); @@ -3608,7 +3665,9 @@ void scheduler_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); - trigger_load_balance(rq); + if (!is_realtime(current)) { + trigger_load_balance(rq); + } #endif } @@ -3910,9 +3969,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) /* * Optimization: we know that if all tasks are in the fair class we can * call that function directly, but only if the @prev task wasn't of a - * higher scheduling class, because otherwise those loose the + * higher scheduling class, because otherwise those lose the * opportunity to pull in more work from other CPUs. - */ + * + * We can't do this in LITMUS! + * + * This breaks many assumptions in the plugins. Do not uncomment + * without considering how this affects global plugins such as GSN-EDF. if (likely((prev->sched_class == &idle_sched_class || prev->sched_class == &fair_sched_class) && rq->nr_running == rq->cfs.h_nr_running)) { @@ -3921,12 +3984,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (unlikely(p == RETRY_TASK)) goto restart; - /* Assumes fair_sched_class->next == idle_sched_class */ + // Assumes fair_sched_class->next == idle_sched_class if (unlikely(!p)) p = idle_sched_class.pick_next_task(rq, prev, rf); return p; } + */ restart: #ifdef CONFIG_SMP @@ -4003,10 +4067,15 @@ static void __sched notrace __schedule(bool preempt) struct rq *rq; int cpu; + TS_SCHED_START; + sched_state_entered_schedule(); + cpu = smp_processor_id(); rq = cpu_rq(cpu); prev = rq->curr; + sched_trace_task_switch_away(prev); + schedule_debug(prev, preempt); if (sched_feat(HRTICK)) @@ -4030,6 +4099,8 @@ static void __sched notrace __schedule(bool preempt) rq->clock_update_flags <<= 1; update_rq_clock(rq); + this_cpu_write(litmus_preemption_in_process, preempt); + switch_count = &prev->nivcsw; if (!preempt && prev->state) { if (signal_pending_state(prev->state, prev)) { @@ -4049,6 +4120,8 @@ static void __sched notrace __schedule(bool preempt) clear_tsk_need_resched(prev); clear_preempt_need_resched(); + this_cpu_write(litmus_preemption_in_progress, false); + if (likely(prev != next)) { rq->nr_switches++; /* @@ -4073,15 +4146,25 @@ static void __sched notrace __schedule(bool preempt) ++*switch_count; trace_sched_switch(preempt, prev, next); - + TS_SCHED_END(next); + TS_CXS_START(next); /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); + TS_CXS_END(current); } else { rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); + TS_SCHED_END(prev); rq_unlock_irq(rq, &rf); } + TS_SCHED2_START(prev); + sched_trace_task_switch_to(current); + if (unlikely(sched_state_validate_switch())) { + litmus_reschedule_local(); + } + balance_callback(rq); + TS_SCHED2_END(prev); } void __noreturn do_task_dead(void) @@ -4513,7 +4596,7 @@ void set_user_nice(struct task_struct *p, long nice) * it wont have any effect on scheduling until the task is * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR: */ - if (task_has_dl_policy(p) || task_has_rt_policy(p)) { + if (task_has_dl_policy(p) || task_has_rt_policy(p) || is_realtime(p)) { p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } @@ -4723,12 +4806,15 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, if (keep_boost) p->prio = rt_effective_prio(p, p->prio); - if (dl_prio(p->prio)) + if (p->policy == SCHED_LITMUS) { + p->sched_class = &litmus_sched_class; + } else if (dl_prio(p->prio)) { p->sched_class = &dl_sched_class; - else if (rt_prio(p->prio)) + } else if (rt_prio(p->prio)) { p->sched_class = &rt_sched_class; - else + } else { p->sched_class = &fair_sched_class; + } } /* @@ -4760,6 +4846,7 @@ static int __sched_setscheduler(struct task_struct *p, int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; struct rq *rq; + int litmus_task = 0; /* The pi code expects interrupts enabled */ BUG_ON(pi && in_interrupt()); @@ -4789,7 +4876,9 @@ recheck: if ((dl_policy(policy) && !__checkparam_dl(attr)) || (rt_policy(policy) != (attr->sched_priority != 0))) return -EINVAL; - + if ((policy == SCHED_LITMUS) && (policy == p->policy)) { + return -EINVAL; + } /* * Allow unprivileged RT tasks to decrease priority: */ @@ -4857,6 +4946,13 @@ recheck: return retval; } + if (policy == SCHED_LITMUS) { + retval = litmus_admit_task(p); + if (retval) { + return retval; + } + } + if (pi) cpuset_read_lock(); @@ -4949,6 +5045,11 @@ change: goto unlock; } + if (is_realtime(p)) { + litmus_exit_task(p); + litmus_task = 1; + } + p->sched_reset_on_fork = reset_on_fork; oldprio = p->prio; @@ -4977,6 +5078,16 @@ change: __setscheduler(rq, p, attr, pi); __setscheduler_uclamp(p, attr); + if (litmus_policy(policy)) { +#ifdef CONFIG_SMP + p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU; +#else + p->rt_param.stack_in_use = running ? 0 : NO_CPU; +#endif + p->rt_param.present = running; + litmus->task_new(p, queued, running); + } + if (queued) { /* * We enqueue to tail when the priority of a task is @@ -5005,6 +5116,10 @@ change: balance_callback(rq); preempt_enable(); + if (litmus_task) { + litmus_dealloc(p); + } + return 0; unlock: @@ -5391,9 +5506,9 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) rcu_read_lock(); p = find_process_by_pid(pid); - if (!p) { + if (!p || is_realtime(p)) { rcu_read_unlock(); - return -ESRCH; + return p ? -EPERM : -ESRCH; } /* Prevent p going away */ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a8a08030a8f7..1842c3e33476 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -18,6 +18,8 @@ #include "sched.h" #include "pelt.h" +#include + struct dl_bandwidth def_dl_bandwidth; static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) @@ -1049,17 +1051,21 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) #endif enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); - if (dl_task(rq->curr)) + if (dl_task(rq->curr)) { check_preempt_curr_dl(rq, p, 0); - else + } else if (!is_realtime(rq->curr)) { resched_curr(rq); + } #ifdef CONFIG_SMP /* * Queueing this task back might have overloaded rq, check if we need * to kick someone away. + * + * LITMUS note: Don't incur this overhead if we are running a LITMUS + * task. */ - if (has_pushable_dl_tasks(rq)) { + if (has_pushable_dl_tasks(rq) && (!is_realtime(rq->curr))) { /* * Nothing relies on rq->lock after this, so its safe to drop * rq->lock. @@ -2357,9 +2363,13 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) * Since this might be the only -deadline task on the rq, * this is the right place to try to pull some other one * from an overloaded CPU, if any. + * + * LITMUS note: also don't pull a task when we're running LITMUS tasks. */ - if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) + if (!task_on_rq_queued(p) || rq->dl.dl_nr_running || + is_realtime(rq->curr)) { return; + } deadline_queue_pull_task(rq); } @@ -2374,9 +2384,8 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) put_task_struct(p); /* If p is not queued we will update its parameters at next wakeup. */ - if (!task_on_rq_queued(p)) { + if (!task_on_rq_queued(p) || is_realtime(rq->curr)) { add_rq_bw(&p->dl, &rq->dl); - return; } diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c new file mode 100644 index 000000000000..d9c59998155b --- /dev/null +++ b/kernel/sched/litmus.c @@ -0,0 +1,386 @@ +/* This file is included from kernel/sched.c */ + +#include "sched.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +static void update_time_litmus(struct rq *rq, struct task_struct *p) +{ + u64 delta = rq->clock - p->se.exec_start; + if (unlikely((s64)delta < 0)) + delta = 0; + /* per job counter */ + p->rt_param.job_params.exec_time += delta; + /* task counter */ + p->se.sum_exec_runtime += delta; + if (delta) { + TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n", + delta, p->rt_param.job_params.exec_time, budget_remaining(p)); + } + /* sched_clock() */ + p->se.exec_start = rq->clock; + cpuacct_charge(p, delta); +} + +static void double_rq_lock(struct rq *rq1, struct rq *rq2); +static void double_rq_unlock(struct rq *rq1, struct rq *rq2); + +static struct task_struct * +litmus_schedule(struct rq *rq, struct task_struct *prev) +{ + struct task_struct *next; + +#ifdef CONFIG_SMP + struct rq* other_rq; + long was_running; + int from_where; + lt_t _maybe_deadlock = 0; +#endif + + /* let the plugin schedule */ + next = litmus->schedule(prev); + + sched_state_plugin_check(); + +#ifdef CONFIG_SMP + /* check if a global plugin pulled a task from a different RQ */ + if (next && task_rq(next) != rq) { + /* we need to migrate the task */ + other_rq = task_rq(next); + from_where = other_rq->cpu; + TRACE_TASK(next, "migrate from %d\n", from_where); + + /* while we drop the lock, the prev task could change its + * state + */ + BUG_ON(prev != current); + was_running = is_current_running(); + + /* Don't race with a concurrent switch. This could deadlock in + * the case of cross or circular migrations. It's the job of + * the plugin to make sure that doesn't happen. + */ + TRACE_TASK(next, "stack_in_use=%d\n", + next->rt_param.stack_in_use); + if (next->rt_param.stack_in_use != NO_CPU) { + TRACE_TASK(next, "waiting to deschedule\n"); + _maybe_deadlock = litmus_clock(); + } + + raw_spin_unlock(&rq->lock); + + while (next->rt_param.stack_in_use != NO_CPU) { + cpu_relax(); + mb(); + if (next->rt_param.stack_in_use == NO_CPU) + TRACE_TASK(next,"descheduled. Proceeding.\n"); + + if (!litmus->should_wait_for_stack(next)) { + /* plugin aborted the wait */ + TRACE_TASK(next, + "plugin gave up waiting for stack\n"); + next = NULL; + /* Make sure plugin is given a chance to + * reconsider. */ + litmus_reschedule_local(); + /* give up */ + raw_spin_lock(&rq->lock); + goto out; + } + + if (from_where != task_rq(next)->cpu) { + /* The plugin should not give us something + * that other cores are trying to pull, too */ + TRACE_TASK(next, "next invalid: task keeps " + "shifting around!? " + "(%d->%d)\n", + from_where, + task_rq(next)->cpu); + + /* bail out */ + raw_spin_lock(&rq->lock); + litmus->next_became_invalid(next); + litmus_reschedule_local(); + next = NULL; + goto out; + } + + if (lt_before(_maybe_deadlock + 1000000000L, + litmus_clock())) { + /* We've been spinning for 1s. + * Something can't be right! + * Let's abandon the task and bail out; at least + * we will have debug info instead of a hard + * deadlock. + */ +#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK + BUG(); +#else + TRACE_TASK(next,"stack too long in use. " + "Deadlock?\n"); + next = NULL; + + /* bail out */ + raw_spin_lock(&rq->lock); + goto out; +#endif + } + } +#ifdef __ARCH_WANT_UNLOCKED_CTXSW + if (next->on_cpu) + TRACE_TASK(next, "waiting for !oncpu"); + while (next->on_cpu) { + cpu_relax(); + mb(); + } +#endif + double_rq_lock(rq, other_rq); + if (other_rq == task_rq(next) && + next->rt_param.stack_in_use == NO_CPU) { + /* ok, we can grab it */ + set_task_cpu(next, rq->cpu); + /* release the other CPU's runqueue, but keep ours */ + raw_spin_unlock(&other_rq->lock); + } else { + /* Either it moved or the stack was claimed; both is + * bad and forces us to abort the migration. */ + TRACE_TASK(next, "next invalid: no longer available\n"); + raw_spin_unlock(&other_rq->lock); + litmus->next_became_invalid(next); + next = NULL; + goto out; + } + + if (!litmus->post_migration_validate(next)) { + TRACE_TASK(next, "plugin deems task now invalid\n"); + litmus_reschedule_local(); + next = NULL; + } + } +#endif + + /* check if the task became invalid while we dropped the lock */ + if (next && (!is_realtime(next) || !tsk_rt(next)->present)) { + TRACE_TASK(next, + "BAD: next (no longer?) valid\n"); + litmus->next_became_invalid(next); + litmus_reschedule_local(); + next = NULL; + } + + if (next) { +#ifdef CONFIG_SMP + next->rt_param.stack_in_use = rq->cpu; +#else + next->rt_param.stack_in_use = 0; +#endif + update_rq_clock(rq); + next->se.exec_start = rq->clock; + } + +out: + update_enforcement_timer(next); + return next; +} + +static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, + int flags) +{ + tsk_rt(p)->present = 1; + if (flags & ENQUEUE_WAKEUP) { + sched_trace_task_resume(p); + /* LITMUS^RT plugins need to update the state + * _before_ making it available in global structures. + * Linux gets away with being lazy about the task state + * update. We can't do that, hence we update the task + * state already here. + * + * WARNING: this needs to be re-evaluated when porting + * to newer kernel versions. + */ + p->state = TASK_RUNNING; + litmus->task_wake_up(p); + + rq->litmus.nr_running++; + } else { + TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); + p->se.exec_start = rq->clock; + } +} + +static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, + int flags) +{ + if (flags & DEQUEUE_SLEEP) { +#ifdef CONFIG_SCHED_TASK_TRACE + tsk_rt(p)->job_params.last_suspension = litmus_clock(); +#endif + litmus->task_block(p); + tsk_rt(p)->present = 0; + sched_trace_task_block(p); + + rq->litmus.nr_running--; + } else + TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); +} + +static void yield_task_litmus(struct rq *rq) +{ + TS_SYSCALL_IN_START; + TS_SYSCALL_IN_END; + + BUG_ON(rq->curr != current); + /* sched_yield() is called to trigger delayed preemptions. + * Thus, mark the current task as needing to be rescheduled. + * This will cause the scheduler plugin to be invoked, which can + * then determine if a preemption is still required. + */ + clear_exit_np(current); + litmus_reschedule_local(); + + TS_SYSCALL_OUT_START; +} + +/* Plugins are responsible for this. + */ +static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) +{ +} + +static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) +{ +} + +/* pick_next_task_litmus() - litmus_schedule() function + * + * return the next task to be scheduled + */ +static struct task_struct *pick_next_task_litmus(struct rq *rq, + struct task_struct *prev, struct pin_cookie cookie) +{ + struct task_struct *next; + + if (is_realtime(prev)) + update_time_litmus(rq, prev); + + lockdep_unpin_lock(&rq->lock, cookie); + TS_PLUGIN_SCHED_START; + next = litmus_schedule(rq, prev); + TS_PLUGIN_SCHED_END; + lockdep_repin_lock(&rq->lock, cookie); + + /* This is a bit backwards: the other classes call put_prev_task() + * _after_ they've determined that the class has some queued tasks. + * We can't determine this easily because each plugin manages its own + * ready queues, and because in the case of globally shared queues, + * we really don't know whether we'll have something ready even if + * we test here. So we do it in reverse: first ask the plugin to + * provide a task, and if we find one, call put_prev_task() on the + * previously scheduled task. + */ + if (next) + put_prev_task(rq, prev); + + return next; +} + +static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) +{ + if (is_realtime(p) && !queued) { + update_time_litmus(rq, p); + /* budget check for QUANTUM_ENFORCEMENT tasks */ + if (budget_enforced(p) && budget_exhausted(p)) { + litmus_reschedule_local(); + } + } +} + +static void switched_to_litmus(struct rq *rq, struct task_struct *p) +{ +} + +static void prio_changed_litmus(struct rq *rq, struct task_struct *p, + int oldprio) +{ +} + +unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p) +{ + /* return infinity */ + return 0; +} + +/* This is called when a task became a real-time task, either due to a SCHED_* + * class transition or due to PI mutex inheritance. We don't handle Linux PI + * mutex inheritance yet (and probably never will). Use LITMUS provided + * synchronization primitives instead. + */ +static void set_curr_task_litmus(struct rq *rq) +{ + rq->curr->se.exec_start = rq->clock; +} + + +#ifdef CONFIG_SMP +/* execve tries to rebalance task in this scheduling domain. + * We don't care about the scheduling domain; can gets called from + * exec, fork, wakeup. + */ +static int +select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags) +{ + /* preemption is already disabled. + * We don't want to change cpu here + */ + return task_cpu(p); +} +#endif + +static void update_curr_litmus(struct rq *rq) +{ + struct task_struct *p = rq->curr; + + if (!is_realtime(p)) + return; + + update_time_litmus(rq, p); +} + +const struct sched_class litmus_sched_class = { + /* From 34f971f6 the stop/migrate worker threads have a class on + * their own, which is the highest prio class. We don't support + * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0 + * CPU capacity. + */ + .next = &stop_sched_class, + .enqueue_task = enqueue_task_litmus, + .dequeue_task = dequeue_task_litmus, + .yield_task = yield_task_litmus, + + .check_preempt_curr = check_preempt_curr_litmus, + + .pick_next_task = pick_next_task_litmus, + .put_prev_task = put_prev_task_litmus, + +#ifdef CONFIG_SMP + .select_task_rq = select_task_rq_litmus, +#endif + + .set_curr_task = set_curr_task_litmus, + .task_tick = task_tick_litmus, + + .get_rr_interval = get_rr_interval_litmus, + + .prio_changed = prio_changed_litmus, + .switched_to = switched_to_litmus, + + .update_curr = update_curr_litmus, +}; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 9b8adc01be3d..a48c98b950b3 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,8 @@ #include "pelt.h" +#include + int sched_rr_timeslice = RR_TIMESLICE; int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; @@ -499,8 +501,12 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) else if (!on_rt_rq(rt_se)) enqueue_rt_entity(rt_se, 0); - if (rt_rq->highest_prio.curr < curr->prio) + // LITMUS note: Don't subject LITMUS tasks to remote + // reschedules. + if ((rt_rq->highest_prio.curr < curr->prio) && + !is_realtime(curr)) { resched_curr(rq); + } } } @@ -589,8 +595,10 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { struct rq *rq = rq_of_rt_rq(rt_rq); - if (!rt_rq->rt_nr_running) + if (!rt_rq->rt_nr_running || + is_realtime(rq_of_rt_rq(rt_rq)->current)) { return; + } enqueue_top_rt_rq(rt_rq); resched_curr(rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c8870c5bd7df..c4f7afbe90c0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -162,6 +162,11 @@ static inline int rt_policy(int policy) return policy == SCHED_FIFO || policy == SCHED_RR; } +static inline int litmus_policy(int policy) +{ + return policy == SCHED_LITMUS; +} + static inline int dl_policy(int policy) { return policy == SCHED_DEADLINE; @@ -169,7 +174,8 @@ static inline int dl_policy(int policy) static inline bool valid_policy(int policy) { return idle_policy(policy) || fair_policy(policy) || - rt_policy(policy) || dl_policy(policy); + rt_policy(policy) || dl_policy(policy) || + litmus_policy(policy); } static inline int task_has_idle_policy(struct task_struct *p) @@ -685,6 +691,10 @@ struct dl_rq { u64 bw_ratio; }; +struct litmus_rq { + unsigned long nr_running; +}; + #ifdef CONFIG_FAIR_GROUP_SCHED /* An entity is a task if it doesn't "own" a runqueue */ #define entity_is_task(se) (!se->my_q) @@ -881,6 +891,7 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; struct dl_rq dl; + struct litmus_rq litmus; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this CPU: */ @@ -1783,11 +1794,19 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) next->sched_class->set_next_task(rq, next); } +/* FIXME: This is conceptually wrong; this should be below the stop-machine + * class, but existing plugins (that predate the stop-machine class) depend on + * the assumption that LITMUS^RT plugins are the top scheduling class. + */ +#define sched_class_highest (&litmus_sched_class) + +/* #ifdef CONFIG_SMP #define sched_class_highest (&stop_sched_class) #else #define sched_class_highest (&dl_sched_class) #endif +*/ #define for_class_range(class, _from, _to) \ for (class = (_from); class != (_to); class = class->next) @@ -1795,6 +1814,7 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) #define for_each_class(class) \ for_class_range(class, sched_class_highest, NULL) +extern const struct sched_class litmus_sched_class; extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class; extern const struct sched_class rt_sched_class; diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index c0640739e05e..3bd42cf27d88 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -8,6 +8,7 @@ * See kernel/stop_machine.c */ #include "sched.h" +#include #ifdef CONFIG_SMP static int @@ -43,6 +44,13 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf return NULL; set_next_task_stop(rq, rq->stop); + + /* Let the LITMUS state machine know that a task was picked. This is + * needed because the LITMUS scheduling plugin will not be called if + * the stop-task class picks a task. + */ + sched_state_task_picked(); + return rq->stop; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 65605530ee34..ce20111d3fe2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -42,6 +42,10 @@ #include #include +#include +#include +#include + #include #include @@ -1092,6 +1096,10 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, tim = hrtimer_update_lowres(timer, tim, mode); +#ifdef CONFIG_REPORT_TIMER_LATENCY + timer->when_added = base->get_time(); +#endif + hrtimer_set_expires_range_ns(timer, tim, delta_ns); /* Switch the timer base, if necessary: */ @@ -1546,6 +1554,9 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases & active_mask; +#ifdef CONFIG_REPORT_TIMER_LATENCY + ktime_t was_exp_nxt = cpu_base->expires_next; +#endif for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; @@ -1573,6 +1584,26 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, if (basenow < hrtimer_get_softexpires_tv64(timer)) break; +#ifdef CONFIG_REPORT_TIMER_LATENCY + if (cpu_base->hres_active && (basenow.tv64 >= + hrtimer_get_expires_tv64(timer) + + ((s64) CONFIG_REPORT_TIMER_LATENCY_THRESHOLD))) { + printk_ratelimited(KERN_WARNING "WARNING: " + "P%d timer latency: %lld now: %lld " + "basenow:%lld exp:%lld " + "nxt:%lld added:%lld " + "timer:%p fn:%p\n", + smp_processor_id(), + basenow.tv64 - hrtimer_get_expires_tv64(timer), + now.tv64, basenow.tv64, + hrtimer_get_expires_tv64(timer), + hrtimer_get_softexpires(timer), + was_exp_nxt.tv64, + timer->when_added.tv64, + timer, timer->function); + } +#endif + __run_hrtimer(cpu_base, base, timer, &basenow, flags); if (active_mask == HRTIMER_ACTIVE_SOFT) hrtimer_sync_wait_running(cpu_base, flags); @@ -1679,9 +1710,14 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; + + TRACE("hrtimer hang detected on P%d: #%u\n", cpu_base->cpu, + cpu_base->nr_hangs); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); delta = ktime_sub(now, entry_time); + TRACE("hrtimer hang delta.tv64:%u\n", (unsigned int) delta.tv64); if ((unsigned int)delta > cpu_base->max_hang_time) cpu_base->max_hang_time = (unsigned int) delta; /* @@ -1692,6 +1728,9 @@ retry: expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); else expires_next = ktime_add(now, delta); + + TRACE("hrtimer expires_next:%llu\n", expires_next.tv64); + tick_program_event(expires_next, 1); pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); } @@ -1762,8 +1801,21 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) struct task_struct *task = t->task; t->task = NULL; - if (task) + if (task) { +#ifdef CONFIG_SCHED_OVERHEAD_TRACE + if (is_realtime(task)) { + ktime_t expires = hrtimer_get_expires(timer); + /* Fix up timers that were added past their due date, + * because that's not really release latency. */ + lt_t intended_release = max(expires.tv64, + timer->when_added.tv64); + TS_RELEASE_LATENCY(intended_release); + } +#endif + TS_RELEASE_START; wake_up_process(task); + TS_RELEASE_END; + } return HRTIMER_NORESTART; } @@ -1916,9 +1968,19 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, u64 slack; slack = current->timer_slack_ns; - if (dl_task(current) || rt_task(current)) + if (dl_task(current) || rt_task(current) || is_realtime(current)) slack = 0; + if (is_realtime(current) && (clockid == CLOCK_MONOTONIC) && + (mode == HRTIMER_MODE_ABS)) { + /* Special handling: to handle periodic activations correctly + * despite timer jitter and overheads, the plugin might need to + * know the time at which the task intends to wake up. */ + tsk_rt(current)->doing_abs_nanosleep = 1; + tsk_rt(current)->nanosleep_wakeup = ktime_to_ns( + timespec_to_ktime(*rqtp)); + } + hrtimer_init_sleeper_on_stack(&t, clockid, mode); hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); ret = do_nanosleep(&t, mode); @@ -1937,6 +1999,9 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); out: destroy_hrtimer_on_stack(&t.timer); + + tsk_rt(current)->doing_abs_nanosleep = 0; + return ret; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 50055d2e4ea8..1ad757848f69 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -42,6 +42,8 @@ #include "internal.h" +#include + /* * Sleep at most 200ms at a time in balance_dirty_pages(). */ @@ -436,7 +438,8 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) if (bg_thresh >= thresh) bg_thresh = thresh / 2; tsk = current; - if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { + if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || + is_realtime(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } @@ -486,7 +489,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) else dirty = vm_dirty_ratio * node_memory / 100; - if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) + if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk) || is_realtime(tsk)) dirty += dirty / 4; return dirty; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f391c0c4ed1d..6d90a9ed20c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -75,6 +75,8 @@ #include "internal.h" #include "shuffle.h" +#include + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_FRACTION (8) @@ -4208,8 +4210,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * comment for __cpuset_node_allowed(). */ alloc_flags &= ~ALLOC_CPUSET; - } else if (unlikely(rt_task(current)) && !in_interrupt()) + } else if (unlikely(rt_task(current) || is_realtime(current)) && + !in_interrupt()) { alloc_flags |= ALLOC_HARDER; + } if (gfp_mask & __GFP_KSWAPD_RECLAIM) alloc_flags |= ALLOC_KSWAPD; -- cgit v1.2.2