diff options
-rw-r--r-- | arch/Kconfig | 10 | ||||
-rw-r--r-- | arch/um/drivers/mconsole_kern.c | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/rcu.h | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 10 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 9 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 109 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 13 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 15 | ||||
-rw-r--r-- | include/linux/sched.h | 8 | ||||
-rw-r--r-- | init/Kconfig | 18 | ||||
-rw-r--r-- | kernel/rcutree.c | 210 | ||||
-rw-r--r-- | kernel/rcutree.h | 4 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 20 | ||||
-rw-r--r-- | kernel/sched/core.c | 17 |
17 files changed, 407 insertions, 79 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc5..1401a7587973 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -281,4 +281,14 @@ config SECCOMP_FILTER | |||
281 | 281 | ||
282 | See Documentation/prctl/seccomp_filter.txt for details. | 282 | See Documentation/prctl/seccomp_filter.txt for details. |
283 | 283 | ||
284 | config HAVE_RCU_USER_QS | ||
285 | bool | ||
286 | help | ||
287 | Provide kernel entry/exit hooks necessary for userspace | ||
288 | RCU extended quiescent state. Syscalls need to be wrapped inside | ||
289 | rcu_user_exit()-rcu_user_enter() through the slow path using | ||
290 | TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs | ||
291 | are already protected inside rcu_irq_enter/rcu_irq_exit() but | ||
292 | preemption or signal handling on irq exit still need to be protected. | ||
293 | |||
284 | source "kernel/gcov/Kconfig" | 294 | source "kernel/gcov/Kconfig" |
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb4..c17de0db6736 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c | |||
@@ -705,6 +705,7 @@ static void stack_proc(void *arg) | |||
705 | struct task_struct *from = current, *to = arg; | 705 | struct task_struct *from = current, *to = arg; |
706 | 706 | ||
707 | to->thread.saved_task = from; | 707 | to->thread.saved_task = from; |
708 | rcu_switch(from, to); | ||
708 | switch_to(from, to, from); | 709 | switch_to(from, to, from); |
709 | } | 710 | } |
710 | 711 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 50a1d1f9b6d3..20c49b8450b8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -97,6 +97,7 @@ config X86 | |||
97 | select KTIME_SCALAR if X86_32 | 97 | select KTIME_SCALAR if X86_32 |
98 | select GENERIC_STRNCPY_FROM_USER | 98 | select GENERIC_STRNCPY_FROM_USER |
99 | select GENERIC_STRNLEN_USER | 99 | select GENERIC_STRNLEN_USER |
100 | select HAVE_RCU_USER_QS if X86_64 | ||
100 | 101 | ||
101 | config INSTRUCTION_DECODER | 102 | config INSTRUCTION_DECODER |
102 | def_bool (KPROBES || PERF_EVENTS || UPROBES) | 103 | def_bool (KPROBES || PERF_EVENTS || UPROBES) |
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 000000000000..d1ac07a23979 --- /dev/null +++ b/arch/x86/include/asm/rcu.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef _ASM_X86_RCU_H | ||
2 | #define _ASM_X86_RCU_H | ||
3 | |||
4 | #ifndef __ASSEMBLY__ | ||
5 | |||
6 | #include <linux/rcupdate.h> | ||
7 | #include <asm/ptrace.h> | ||
8 | |||
9 | static inline void exception_enter(struct pt_regs *regs) | ||
10 | { | ||
11 | rcu_user_exit(); | ||
12 | } | ||
13 | |||
14 | static inline void exception_exit(struct pt_regs *regs) | ||
15 | { | ||
16 | #ifdef CONFIG_RCU_USER_QS | ||
17 | if (user_mode(regs)) | ||
18 | rcu_user_enter(); | ||
19 | #endif | ||
20 | } | ||
21 | |||
22 | #else /* __ASSEMBLY__ */ | ||
23 | |||
24 | #ifdef CONFIG_RCU_USER_QS | ||
25 | # define SCHEDULE_USER call schedule_user | ||
26 | #else | ||
27 | # define SCHEDULE_USER call schedule | ||
28 | #endif | ||
29 | |||
30 | #endif /* !__ASSEMBLY__ */ | ||
31 | |||
32 | #endif | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007ec..c535d847e3b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -89,6 +89,7 @@ struct thread_info { | |||
89 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 89 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
90 | #define TIF_IA32 17 /* IA32 compatibility process */ | 90 | #define TIF_IA32 17 /* IA32 compatibility process */ |
91 | #define TIF_FORK 18 /* ret_from_fork */ | 91 | #define TIF_FORK 18 /* ret_from_fork */ |
92 | #define TIF_NOHZ 19 /* in adaptive nohz mode */ | ||
92 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ | 93 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
93 | #define TIF_DEBUG 21 /* uses debug registers */ | 94 | #define TIF_DEBUG 21 /* uses debug registers */ |
94 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 95 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
@@ -114,6 +115,7 @@ struct thread_info { | |||
114 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 115 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
115 | #define _TIF_IA32 (1 << TIF_IA32) | 116 | #define _TIF_IA32 (1 << TIF_IA32) |
116 | #define _TIF_FORK (1 << TIF_FORK) | 117 | #define _TIF_FORK (1 << TIF_FORK) |
118 | #define _TIF_NOHZ (1 << TIF_NOHZ) | ||
117 | #define _TIF_DEBUG (1 << TIF_DEBUG) | 119 | #define _TIF_DEBUG (1 << TIF_DEBUG) |
118 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 120 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
119 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 121 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
@@ -126,12 +128,13 @@ struct thread_info { | |||
126 | /* work to do in syscall_trace_enter() */ | 128 | /* work to do in syscall_trace_enter() */ |
127 | #define _TIF_WORK_SYSCALL_ENTRY \ | 129 | #define _TIF_WORK_SYSCALL_ENTRY \ |
128 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ | 130 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ |
129 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) | 131 | _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ |
132 | _TIF_NOHZ) | ||
130 | 133 | ||
131 | /* work to do in syscall_trace_leave() */ | 134 | /* work to do in syscall_trace_leave() */ |
132 | #define _TIF_WORK_SYSCALL_EXIT \ | 135 | #define _TIF_WORK_SYSCALL_EXIT \ |
133 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ | 136 | (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ |
134 | _TIF_SYSCALL_TRACEPOINT) | 137 | _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) |
135 | 138 | ||
136 | /* work to do on interrupt/exception return */ | 139 | /* work to do on interrupt/exception return */ |
137 | #define _TIF_WORK_MASK \ | 140 | #define _TIF_WORK_MASK \ |
@@ -141,7 +144,8 @@ struct thread_info { | |||
141 | 144 | ||
142 | /* work to do on any return to user space */ | 145 | /* work to do on any return to user space */ |
143 | #define _TIF_ALLWORK_MASK \ | 146 | #define _TIF_ALLWORK_MASK \ |
144 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) | 147 | ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ |
148 | _TIF_NOHZ) | ||
145 | 149 | ||
146 | /* Only used for 64 bit */ | 150 | /* Only used for 64 bit */ |
147 | #define _TIF_DO_NOTIFY_MASK \ | 151 | #define _TIF_DO_NOTIFY_MASK \ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834f..1a8f3cbb6ee3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/rcu.h> | ||
59 | #include <linux/err.h> | 60 | #include <linux/err.h> |
60 | 61 | ||
61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 62 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
@@ -565,7 +566,7 @@ sysret_careful: | |||
565 | TRACE_IRQS_ON | 566 | TRACE_IRQS_ON |
566 | ENABLE_INTERRUPTS(CLBR_NONE) | 567 | ENABLE_INTERRUPTS(CLBR_NONE) |
567 | pushq_cfi %rdi | 568 | pushq_cfi %rdi |
568 | call schedule | 569 | SCHEDULE_USER |
569 | popq_cfi %rdi | 570 | popq_cfi %rdi |
570 | jmp sysret_check | 571 | jmp sysret_check |
571 | 572 | ||
@@ -678,7 +679,7 @@ int_careful: | |||
678 | TRACE_IRQS_ON | 679 | TRACE_IRQS_ON |
679 | ENABLE_INTERRUPTS(CLBR_NONE) | 680 | ENABLE_INTERRUPTS(CLBR_NONE) |
680 | pushq_cfi %rdi | 681 | pushq_cfi %rdi |
681 | call schedule | 682 | SCHEDULE_USER |
682 | popq_cfi %rdi | 683 | popq_cfi %rdi |
683 | DISABLE_INTERRUPTS(CLBR_NONE) | 684 | DISABLE_INTERRUPTS(CLBR_NONE) |
684 | TRACE_IRQS_OFF | 685 | TRACE_IRQS_OFF |
@@ -974,7 +975,7 @@ retint_careful: | |||
974 | TRACE_IRQS_ON | 975 | TRACE_IRQS_ON |
975 | ENABLE_INTERRUPTS(CLBR_NONE) | 976 | ENABLE_INTERRUPTS(CLBR_NONE) |
976 | pushq_cfi %rdi | 977 | pushq_cfi %rdi |
977 | call schedule | 978 | SCHEDULE_USER |
978 | popq_cfi %rdi | 979 | popq_cfi %rdi |
979 | GET_THREAD_INFO(%rcx) | 980 | GET_THREAD_INFO(%rcx) |
980 | DISABLE_INTERRUPTS(CLBR_NONE) | 981 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -1449,7 +1450,7 @@ paranoid_userspace: | |||
1449 | paranoid_schedule: | 1450 | paranoid_schedule: |
1450 | TRACE_IRQS_ON | 1451 | TRACE_IRQS_ON |
1451 | ENABLE_INTERRUPTS(CLBR_ANY) | 1452 | ENABLE_INTERRUPTS(CLBR_ANY) |
1452 | call schedule | 1453 | SCHEDULE_USER |
1453 | DISABLE_INTERRUPTS(CLBR_ANY) | 1454 | DISABLE_INTERRUPTS(CLBR_ANY) |
1454 | TRACE_IRQS_OFF | 1455 | TRACE_IRQS_OFF |
1455 | jmp paranoid_userspace | 1456 | jmp paranoid_userspace |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0f..9f94f8ec26e4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
22 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
23 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
24 | #include <linux/rcupdate.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1463 | { | 1464 | { |
1464 | long ret = 0; | 1465 | long ret = 0; |
1465 | 1466 | ||
1467 | rcu_user_exit(); | ||
1468 | |||
1466 | /* | 1469 | /* |
1467 | * If we stepped into a sysenter/syscall insn, it trapped in | 1470 | * If we stepped into a sysenter/syscall insn, it trapped in |
1468 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | 1471 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. |
@@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs) | |||
1526 | !test_thread_flag(TIF_SYSCALL_EMU); | 1529 | !test_thread_flag(TIF_SYSCALL_EMU); |
1527 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) | 1530 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1528 | tracehook_report_syscall_exit(regs, step); | 1531 | tracehook_report_syscall_exit(regs, step); |
1532 | |||
1533 | rcu_user_enter(); | ||
1529 | } | 1534 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..bca0ab903e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs) | |||
779 | void | 779 | void |
780 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 780 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
781 | { | 781 | { |
782 | rcu_user_exit(); | ||
783 | |||
782 | #ifdef CONFIG_X86_MCE | 784 | #ifdef CONFIG_X86_MCE |
783 | /* notify userspace of pending MCEs */ | 785 | /* notify userspace of pending MCEs */ |
784 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 786 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
@@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
804 | #ifdef CONFIG_X86_32 | 806 | #ifdef CONFIG_X86_32 |
805 | clear_thread_flag(TIF_IRET); | 807 | clear_thread_flag(TIF_IRET); |
806 | #endif /* CONFIG_X86_32 */ | 808 | #endif /* CONFIG_X86_32 */ |
809 | |||
810 | rcu_user_enter(); | ||
807 | } | 811 | } |
808 | 812 | ||
809 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 813 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..378967578f22 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
56 | #include <asm/fpu-internal.h> | 56 | #include <asm/fpu-internal.h> |
57 | #include <asm/mce.h> | 57 | #include <asm/mce.h> |
58 | #include <asm/rcu.h> | ||
58 | 59 | ||
59 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
60 | 61 | ||
@@ -180,11 +181,15 @@ vm86_trap: | |||
180 | #define DO_ERROR(trapnr, signr, str, name) \ | 181 | #define DO_ERROR(trapnr, signr, str, name) \ |
181 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | 182 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ |
182 | { \ | 183 | { \ |
183 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 184 | exception_enter(regs); \ |
184 | == NOTIFY_STOP) \ | 185 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
186 | trapnr, signr) == NOTIFY_STOP) { \ | ||
187 | exception_exit(regs); \ | ||
185 | return; \ | 188 | return; \ |
189 | } \ | ||
186 | conditional_sti(regs); \ | 190 | conditional_sti(regs); \ |
187 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 191 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ |
192 | exception_exit(regs); \ | ||
188 | } | 193 | } |
189 | 194 | ||
190 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 195 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
@@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | |||
195 | info.si_errno = 0; \ | 200 | info.si_errno = 0; \ |
196 | info.si_code = sicode; \ | 201 | info.si_code = sicode; \ |
197 | info.si_addr = (void __user *)siaddr; \ | 202 | info.si_addr = (void __user *)siaddr; \ |
198 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 203 | exception_enter(regs); \ |
199 | == NOTIFY_STOP) \ | 204 | if (notify_die(DIE_TRAP, str, regs, error_code, \ |
205 | trapnr, signr) == NOTIFY_STOP) { \ | ||
206 | exception_exit(regs); \ | ||
200 | return; \ | 207 | return; \ |
208 | } \ | ||
201 | conditional_sti(regs); \ | 209 | conditional_sti(regs); \ |
202 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 210 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
211 | exception_exit(regs); \ | ||
203 | } | 212 | } |
204 | 213 | ||
205 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, | 214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, |
@@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, | |||
222 | /* Runs on IST stack */ | 231 | /* Runs on IST stack */ |
223 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | 232 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) |
224 | { | 233 | { |
234 | exception_enter(regs); | ||
225 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | 235 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, |
226 | X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) | 236 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { |
227 | return; | 237 | preempt_conditional_sti(regs); |
228 | preempt_conditional_sti(regs); | 238 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); |
229 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | 239 | preempt_conditional_cli(regs); |
230 | preempt_conditional_cli(regs); | 240 | } |
241 | exception_exit(regs); | ||
231 | } | 242 | } |
232 | 243 | ||
233 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 244 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
@@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
235 | static const char str[] = "double fault"; | 246 | static const char str[] = "double fault"; |
236 | struct task_struct *tsk = current; | 247 | struct task_struct *tsk = current; |
237 | 248 | ||
249 | exception_enter(regs); | ||
238 | /* Return not checked because double check cannot be ignored */ | 250 | /* Return not checked because double check cannot be ignored */ |
239 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 251 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
240 | 252 | ||
@@ -255,16 +267,29 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
255 | { | 267 | { |
256 | struct task_struct *tsk; | 268 | struct task_struct *tsk; |
257 | 269 | ||
270 | exception_enter(regs); | ||
258 | conditional_sti(regs); | 271 | conditional_sti(regs); |
259 | 272 | ||
260 | #ifdef CONFIG_X86_32 | 273 | #ifdef CONFIG_X86_32 |
261 | if (regs->flags & X86_VM_MASK) | 274 | if (regs->flags & X86_VM_MASK) { |
262 | goto gp_in_vm86; | 275 | local_irq_enable(); |
276 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
277 | goto exit; | ||
278 | } | ||
263 | #endif | 279 | #endif |
264 | 280 | ||
265 | tsk = current; | 281 | tsk = current; |
266 | if (!user_mode(regs)) | 282 | if (!user_mode(regs)) { |
267 | goto gp_in_kernel; | 283 | if (fixup_exception(regs)) |
284 | goto exit; | ||
285 | |||
286 | tsk->thread.error_code = error_code; | ||
287 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
288 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
289 | X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) | ||
290 | die("general protection fault", regs, error_code); | ||
291 | goto exit; | ||
292 | } | ||
268 | 293 | ||
269 | tsk->thread.error_code = error_code; | 294 | tsk->thread.error_code = error_code; |
270 | tsk->thread.trap_nr = X86_TRAP_GP; | 295 | tsk->thread.trap_nr = X86_TRAP_GP; |
@@ -279,25 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code) | |||
279 | } | 304 | } |
280 | 305 | ||
281 | force_sig(SIGSEGV, tsk); | 306 | force_sig(SIGSEGV, tsk); |
282 | return; | 307 | exit: |
283 | 308 | exception_exit(regs); | |
284 | #ifdef CONFIG_X86_32 | ||
285 | gp_in_vm86: | ||
286 | local_irq_enable(); | ||
287 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
288 | return; | ||
289 | #endif | ||
290 | |||
291 | gp_in_kernel: | ||
292 | if (fixup_exception(regs)) | ||
293 | return; | ||
294 | |||
295 | tsk->thread.error_code = error_code; | ||
296 | tsk->thread.trap_nr = X86_TRAP_GP; | ||
297 | if (notify_die(DIE_GPF, "general protection fault", regs, error_code, | ||
298 | X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) | ||
299 | return; | ||
300 | die("general protection fault", regs, error_code); | ||
301 | } | 309 | } |
302 | 310 | ||
303 | /* May run on IST stack. */ | 311 | /* May run on IST stack. */ |
@@ -312,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
312 | ftrace_int3_handler(regs)) | 320 | ftrace_int3_handler(regs)) |
313 | return; | 321 | return; |
314 | #endif | 322 | #endif |
323 | exception_enter(regs); | ||
315 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 324 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
316 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 325 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
317 | SIGTRAP) == NOTIFY_STOP) | 326 | SIGTRAP) == NOTIFY_STOP) |
318 | return; | 327 | goto exit; |
319 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 328 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
320 | 329 | ||
321 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 330 | if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
322 | SIGTRAP) == NOTIFY_STOP) | 331 | SIGTRAP) == NOTIFY_STOP) |
323 | return; | 332 | goto exit; |
324 | 333 | ||
325 | /* | 334 | /* |
326 | * Let others (NMI) know that the debug stack is in use | 335 | * Let others (NMI) know that the debug stack is in use |
@@ -331,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co | |||
331 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); | 340 | do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); |
332 | preempt_conditional_cli(regs); | 341 | preempt_conditional_cli(regs); |
333 | debug_stack_usage_dec(); | 342 | debug_stack_usage_dec(); |
343 | exit: | ||
344 | exception_exit(regs); | ||
334 | } | 345 | } |
335 | 346 | ||
336 | #ifdef CONFIG_X86_64 | 347 | #ifdef CONFIG_X86_64 |
@@ -391,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
391 | unsigned long dr6; | 402 | unsigned long dr6; |
392 | int si_code; | 403 | int si_code; |
393 | 404 | ||
405 | exception_enter(regs); | ||
406 | |||
394 | get_debugreg(dr6, 6); | 407 | get_debugreg(dr6, 6); |
395 | 408 | ||
396 | /* Filter out all the reserved bits which are preset to 1 */ | 409 | /* Filter out all the reserved bits which are preset to 1 */ |
@@ -406,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
406 | 419 | ||
407 | /* Catch kmemcheck conditions first of all! */ | 420 | /* Catch kmemcheck conditions first of all! */ |
408 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) | 421 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
409 | return; | 422 | goto exit; |
410 | 423 | ||
411 | /* DR6 may or may not be cleared by the CPU */ | 424 | /* DR6 may or may not be cleared by the CPU */ |
412 | set_debugreg(0, 6); | 425 | set_debugreg(0, 6); |
@@ -421,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
421 | 434 | ||
422 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | 435 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, |
423 | SIGTRAP) == NOTIFY_STOP) | 436 | SIGTRAP) == NOTIFY_STOP) |
424 | return; | 437 | goto exit; |
425 | 438 | ||
426 | /* | 439 | /* |
427 | * Let others (NMI) know that the debug stack is in use | 440 | * Let others (NMI) know that the debug stack is in use |
@@ -437,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
437 | X86_TRAP_DB); | 450 | X86_TRAP_DB); |
438 | preempt_conditional_cli(regs); | 451 | preempt_conditional_cli(regs); |
439 | debug_stack_usage_dec(); | 452 | debug_stack_usage_dec(); |
440 | return; | 453 | goto exit; |
441 | } | 454 | } |
442 | 455 | ||
443 | /* | 456 | /* |
@@ -458,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
458 | preempt_conditional_cli(regs); | 471 | preempt_conditional_cli(regs); |
459 | debug_stack_usage_dec(); | 472 | debug_stack_usage_dec(); |
460 | 473 | ||
461 | return; | 474 | exit: |
475 | exception_exit(regs); | ||
462 | } | 476 | } |
463 | 477 | ||
464 | /* | 478 | /* |
@@ -555,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | |||
555 | #ifdef CONFIG_X86_32 | 569 | #ifdef CONFIG_X86_32 |
556 | ignore_fpu_irq = 1; | 570 | ignore_fpu_irq = 1; |
557 | #endif | 571 | #endif |
558 | 572 | exception_enter(regs); | |
559 | math_error(regs, error_code, X86_TRAP_MF); | 573 | math_error(regs, error_code, X86_TRAP_MF); |
574 | exception_exit(regs); | ||
560 | } | 575 | } |
561 | 576 | ||
562 | dotraplinkage void | 577 | dotraplinkage void |
563 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 578 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
564 | { | 579 | { |
580 | exception_enter(regs); | ||
565 | math_error(regs, error_code, X86_TRAP_XF); | 581 | math_error(regs, error_code, X86_TRAP_XF); |
582 | exception_exit(regs); | ||
566 | } | 583 | } |
567 | 584 | ||
568 | dotraplinkage void | 585 | dotraplinkage void |
@@ -629,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
629 | dotraplinkage void __kprobes | 646 | dotraplinkage void __kprobes |
630 | do_device_not_available(struct pt_regs *regs, long error_code) | 647 | do_device_not_available(struct pt_regs *regs, long error_code) |
631 | { | 648 | { |
649 | exception_enter(regs); | ||
632 | #ifdef CONFIG_MATH_EMULATION | 650 | #ifdef CONFIG_MATH_EMULATION |
633 | if (read_cr0() & X86_CR0_EM) { | 651 | if (read_cr0() & X86_CR0_EM) { |
634 | struct math_emu_info info = { }; | 652 | struct math_emu_info info = { }; |
@@ -637,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
637 | 655 | ||
638 | info.regs = regs; | 656 | info.regs = regs; |
639 | math_emulate(&info); | 657 | math_emulate(&info); |
658 | exception_exit(regs); | ||
640 | return; | 659 | return; |
641 | } | 660 | } |
642 | #endif | 661 | #endif |
@@ -644,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
644 | #ifdef CONFIG_X86_32 | 663 | #ifdef CONFIG_X86_32 |
645 | conditional_sti(regs); | 664 | conditional_sti(regs); |
646 | #endif | 665 | #endif |
666 | exception_exit(regs); | ||
647 | } | 667 | } |
648 | 668 | ||
649 | #ifdef CONFIG_X86_32 | 669 | #ifdef CONFIG_X86_32 |
650 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | 670 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
651 | { | 671 | { |
652 | siginfo_t info; | 672 | siginfo_t info; |
673 | |||
674 | exception_enter(regs); | ||
653 | local_irq_enable(); | 675 | local_irq_enable(); |
654 | 676 | ||
655 | info.si_signo = SIGILL; | 677 | info.si_signo = SIGILL; |
@@ -657,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
657 | info.si_code = ILL_BADSTK; | 679 | info.si_code = ILL_BADSTK; |
658 | info.si_addr = NULL; | 680 | info.si_addr = NULL; |
659 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, | 681 | if (notify_die(DIE_TRAP, "iret exception", regs, error_code, |
660 | X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) | 682 | X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { |
661 | return; | 683 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, |
662 | do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, | 684 | &info); |
663 | &info); | 685 | } |
686 | exception_exit(regs); | ||
664 | } | 687 | } |
665 | #endif | 688 | #endif |
666 | 689 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0bc..7dde46d68a25 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 18 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
19 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ | 19 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ |
20 | #include <asm/fixmap.h> /* VSYSCALL_START */ | 20 | #include <asm/fixmap.h> /* VSYSCALL_START */ |
21 | #include <asm/rcu.h> /* exception_enter(), ... */ | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * Page fault error code bits: | 24 | * Page fault error code bits: |
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address) | |||
1000 | * and the problem, and then passes it off to one of the appropriate | 1001 | * and the problem, and then passes it off to one of the appropriate |
1001 | * routines. | 1002 | * routines. |
1002 | */ | 1003 | */ |
1003 | dotraplinkage void __kprobes | 1004 | static void __kprobes |
1004 | do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1005 | __do_page_fault(struct pt_regs *regs, unsigned long error_code) |
1005 | { | 1006 | { |
1006 | struct vm_area_struct *vma; | 1007 | struct vm_area_struct *vma; |
1007 | struct task_struct *tsk; | 1008 | struct task_struct *tsk; |
@@ -1209,3 +1210,11 @@ good_area: | |||
1209 | 1210 | ||
1210 | up_read(&mm->mmap_sem); | 1211 | up_read(&mm->mmap_sem); |
1211 | } | 1212 | } |
1213 | |||
1214 | dotraplinkage void __kprobes | ||
1215 | do_page_fault(struct pt_regs *regs, unsigned long error_code) | ||
1216 | { | ||
1217 | exception_enter(regs); | ||
1218 | __do_page_fault(regs, error_code); | ||
1219 | exception_exit(regs); | ||
1220 | } | ||
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0fbbd52e01f9..7c968e4f929e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -191,6 +191,21 @@ extern void rcu_idle_enter(void); | |||
191 | extern void rcu_idle_exit(void); | 191 | extern void rcu_idle_exit(void); |
192 | extern void rcu_irq_enter(void); | 192 | extern void rcu_irq_enter(void); |
193 | extern void rcu_irq_exit(void); | 193 | extern void rcu_irq_exit(void); |
194 | |||
195 | #ifdef CONFIG_RCU_USER_QS | ||
196 | extern void rcu_user_enter(void); | ||
197 | extern void rcu_user_exit(void); | ||
198 | extern void rcu_user_enter_after_irq(void); | ||
199 | extern void rcu_user_exit_after_irq(void); | ||
200 | extern void rcu_user_hooks_switch(struct task_struct *prev, | ||
201 | struct task_struct *next); | ||
202 | #else | ||
203 | static inline void rcu_user_enter(void) { } | ||
204 | static inline void rcu_user_exit(void) { } | ||
205 | static inline void rcu_user_enter_after_irq(void) { } | ||
206 | static inline void rcu_user_exit_after_irq(void) { } | ||
207 | #endif /* CONFIG_RCU_USER_QS */ | ||
208 | |||
194 | extern void exit_rcu(void); | 209 | extern void exit_rcu(void); |
195 | 210 | ||
196 | /** | 211 | /** |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 23bddac4bad8..335720a1fc33 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
1885 | 1885 | ||
1886 | #endif | 1886 | #endif |
1887 | 1887 | ||
1888 | static inline void rcu_switch(struct task_struct *prev, | ||
1889 | struct task_struct *next) | ||
1890 | { | ||
1891 | #ifdef CONFIG_RCU_USER_QS | ||
1892 | rcu_user_hooks_switch(prev, next); | ||
1893 | #endif | ||
1894 | } | ||
1895 | |||
1888 | static inline void tsk_restore_flags(struct task_struct *task, | 1896 | static inline void tsk_restore_flags(struct task_struct *task, |
1889 | unsigned long orig_flags, unsigned long flags) | 1897 | unsigned long orig_flags, unsigned long flags) |
1890 | { | 1898 | { |
diff --git a/init/Kconfig b/init/Kconfig index af6c7f8ba019..c26b8a1d2b57 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -441,6 +441,24 @@ config PREEMPT_RCU | |||
441 | This option enables preemptible-RCU code that is common between | 441 | This option enables preemptible-RCU code that is common between |
442 | the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. | 442 | the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. |
443 | 443 | ||
444 | config RCU_USER_QS | ||
445 | bool "Consider userspace as in RCU extended quiescent state" | ||
446 | depends on HAVE_RCU_USER_QS && SMP | ||
447 | help | ||
448 | This option sets hooks on kernel / userspace boundaries and | ||
449 | puts RCU in extended quiescent state when the CPU runs in | ||
450 | userspace. It means that when a CPU runs in userspace, it is | ||
451 | excluded from the global RCU state machine and thus doesn't | ||
452 | to keep the timer tick on for RCU. | ||
453 | |||
454 | config RCU_USER_QS_FORCE | ||
455 | bool "Force userspace extended QS by default" | ||
456 | depends on RCU_USER_QS | ||
457 | help | ||
458 | Set the hooks in user/kernel boundaries by default in order to | ||
459 | test this feature that treats userspace as an extended quiescent | ||
460 | state until we have a real user like a full adaptive nohz option. | ||
461 | |||
444 | config RCU_FANOUT | 462 | config RCU_FANOUT |
445 | int "Tree-based hierarchical RCU fanout value" | 463 | int "Tree-based hierarchical RCU fanout value" |
446 | range 2 64 if 64BIT | 464 | range 2 64 if 64BIT |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7387e46009d9..4fb2376ddf06 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -206,6 +206,9 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); | |||
206 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 206 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
207 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, | 207 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, |
208 | .dynticks = ATOMIC_INIT(1), | 208 | .dynticks = ATOMIC_INIT(1), |
209 | #if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) | ||
210 | .ignore_user_qs = true, | ||
211 | #endif | ||
209 | }; | 212 | }; |
210 | 213 | ||
211 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 214 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
@@ -322,16 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
322 | } | 325 | } |
323 | 326 | ||
324 | /* | 327 | /* |
325 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | 328 | * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state |
326 | * | 329 | * |
327 | * If the new value of the ->dynticks_nesting counter now is zero, | 330 | * If the new value of the ->dynticks_nesting counter now is zero, |
328 | * we really have entered idle, and must do the appropriate accounting. | 331 | * we really have entered idle, and must do the appropriate accounting. |
329 | * The caller must have disabled interrupts. | 332 | * The caller must have disabled interrupts. |
330 | */ | 333 | */ |
331 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | 334 | static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, |
335 | bool user) | ||
332 | { | 336 | { |
333 | trace_rcu_dyntick("Start", oldval, 0); | 337 | trace_rcu_dyntick("Start", oldval, 0); |
334 | if (!is_idle_task(current)) { | 338 | if (!user && !is_idle_task(current)) { |
335 | struct task_struct *idle = idle_task(smp_processor_id()); | 339 | struct task_struct *idle = idle_task(smp_processor_id()); |
336 | 340 | ||
337 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); | 341 | trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); |
@@ -348,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
348 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 352 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
349 | 353 | ||
350 | /* | 354 | /* |
351 | * The idle task is not permitted to enter the idle loop while | 355 | * It is illegal to enter an extended quiescent state while |
352 | * in an RCU read-side critical section. | 356 | * in an RCU read-side critical section. |
353 | */ | 357 | */ |
354 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), | 358 | rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), |
@@ -359,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
359 | "Illegal idle entry in RCU-sched read-side critical section."); | 363 | "Illegal idle entry in RCU-sched read-side critical section."); |
360 | } | 364 | } |
361 | 365 | ||
366 | /* | ||
367 | * Enter an RCU extended quiescent state, which can be either the | ||
368 | * idle loop or adaptive-tickless usermode execution. | ||
369 | */ | ||
370 | static void rcu_eqs_enter(bool user) | ||
371 | { | ||
372 | long long oldval; | ||
373 | struct rcu_dynticks *rdtp; | ||
374 | |||
375 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
376 | oldval = rdtp->dynticks_nesting; | ||
377 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); | ||
378 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | ||
379 | rdtp->dynticks_nesting = 0; | ||
380 | else | ||
381 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
382 | rcu_eqs_enter_common(rdtp, oldval, user); | ||
383 | } | ||
384 | |||
362 | /** | 385 | /** |
363 | * rcu_idle_enter - inform RCU that current CPU is entering idle | 386 | * rcu_idle_enter - inform RCU that current CPU is entering idle |
364 | * | 387 | * |
@@ -374,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | |||
374 | void rcu_idle_enter(void) | 397 | void rcu_idle_enter(void) |
375 | { | 398 | { |
376 | unsigned long flags; | 399 | unsigned long flags; |
377 | long long oldval; | 400 | |
401 | local_irq_save(flags); | ||
402 | rcu_eqs_enter(false); | ||
403 | local_irq_restore(flags); | ||
404 | } | ||
405 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | ||
406 | |||
407 | #ifdef CONFIG_RCU_USER_QS | ||
408 | /** | ||
409 | * rcu_user_enter - inform RCU that we are resuming userspace. | ||
410 | * | ||
411 | * Enter RCU idle mode right before resuming userspace. No use of RCU | ||
412 | * is permitted between this call and rcu_user_exit(). This way the | ||
413 | * CPU doesn't need to maintain the tick for RCU maintenance purposes | ||
414 | * when the CPU runs in userspace. | ||
415 | */ | ||
416 | void rcu_user_enter(void) | ||
417 | { | ||
418 | unsigned long flags; | ||
378 | struct rcu_dynticks *rdtp; | 419 | struct rcu_dynticks *rdtp; |
379 | 420 | ||
421 | /* | ||
422 | * Some contexts may involve an exception occuring in an irq, | ||
423 | * leading to that nesting: | ||
424 | * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | ||
425 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() | ||
426 | * helpers are enough to protect RCU uses inside the exception. So | ||
427 | * just return immediately if we detect we are in an IRQ. | ||
428 | */ | ||
429 | if (in_interrupt()) | ||
430 | return; | ||
431 | |||
432 | WARN_ON_ONCE(!current->mm); | ||
433 | |||
380 | local_irq_save(flags); | 434 | local_irq_save(flags); |
381 | rdtp = &__get_cpu_var(rcu_dynticks); | 435 | rdtp = &__get_cpu_var(rcu_dynticks); |
382 | oldval = rdtp->dynticks_nesting; | 436 | if (!rdtp->ignore_user_qs && !rdtp->in_user) { |
383 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); | 437 | rdtp->in_user = true; |
384 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) | 438 | rcu_eqs_enter(true); |
385 | rdtp->dynticks_nesting = 0; | 439 | } |
386 | else | ||
387 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | ||
388 | rcu_idle_enter_common(rdtp, oldval); | ||
389 | local_irq_restore(flags); | 440 | local_irq_restore(flags); |
390 | } | 441 | } |
391 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | 442 | |
443 | /** | ||
444 | * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace | ||
445 | * after the current irq returns. | ||
446 | * | ||
447 | * This is similar to rcu_user_enter() but in the context of a non-nesting | ||
448 | * irq. After this call, RCU enters into idle mode when the interrupt | ||
449 | * returns. | ||
450 | */ | ||
451 | void rcu_user_enter_after_irq(void) | ||
452 | { | ||
453 | unsigned long flags; | ||
454 | struct rcu_dynticks *rdtp; | ||
455 | |||
456 | local_irq_save(flags); | ||
457 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
458 | /* Ensure this irq is interrupting a non-idle RCU state. */ | ||
459 | WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); | ||
460 | rdtp->dynticks_nesting = 1; | ||
461 | local_irq_restore(flags); | ||
462 | } | ||
463 | #endif /* CONFIG_RCU_USER_QS */ | ||
392 | 464 | ||
393 | /** | 465 | /** |
394 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle | 466 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
@@ -420,18 +492,19 @@ void rcu_irq_exit(void) | |||
420 | if (rdtp->dynticks_nesting) | 492 | if (rdtp->dynticks_nesting) |
421 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); | 493 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); |
422 | else | 494 | else |
423 | rcu_idle_enter_common(rdtp, oldval); | 495 | rcu_eqs_enter_common(rdtp, oldval, true); |
424 | local_irq_restore(flags); | 496 | local_irq_restore(flags); |
425 | } | 497 | } |
426 | 498 | ||
427 | /* | 499 | /* |
428 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle | 500 | * rcu_eqs_exit_common - current CPU moving away from extended quiescent state |
429 | * | 501 | * |
430 | * If the new value of the ->dynticks_nesting counter was previously zero, | 502 | * If the new value of the ->dynticks_nesting counter was previously zero, |
431 | * we really have exited idle, and must do the appropriate accounting. | 503 | * we really have exited idle, and must do the appropriate accounting. |
432 | * The caller must have disabled interrupts. | 504 | * The caller must have disabled interrupts. |
433 | */ | 505 | */ |
434 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | 506 | static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, |
507 | int user) | ||
435 | { | 508 | { |
436 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | 509 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ |
437 | atomic_inc(&rdtp->dynticks); | 510 | atomic_inc(&rdtp->dynticks); |
@@ -440,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
440 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 513 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
441 | rcu_cleanup_after_idle(smp_processor_id()); | 514 | rcu_cleanup_after_idle(smp_processor_id()); |
442 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); | 515 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); |
443 | if (!is_idle_task(current)) { | 516 | if (!user && !is_idle_task(current)) { |
444 | struct task_struct *idle = idle_task(smp_processor_id()); | 517 | struct task_struct *idle = idle_task(smp_processor_id()); |
445 | 518 | ||
446 | trace_rcu_dyntick("Error on exit: not idle task", | 519 | trace_rcu_dyntick("Error on exit: not idle task", |
@@ -452,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
452 | } | 525 | } |
453 | } | 526 | } |
454 | 527 | ||
528 | /* | ||
529 | * Exit an RCU extended quiescent state, which can be either the | ||
530 | * idle loop or adaptive-tickless usermode execution. | ||
531 | */ | ||
532 | static void rcu_eqs_exit(bool user) | ||
533 | { | ||
534 | struct rcu_dynticks *rdtp; | ||
535 | long long oldval; | ||
536 | |||
537 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
538 | oldval = rdtp->dynticks_nesting; | ||
539 | WARN_ON_ONCE(oldval < 0); | ||
540 | if (oldval & DYNTICK_TASK_NEST_MASK) | ||
541 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | ||
542 | else | ||
543 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
544 | rcu_eqs_exit_common(rdtp, oldval, user); | ||
545 | } | ||
546 | |||
455 | /** | 547 | /** |
456 | * rcu_idle_exit - inform RCU that current CPU is leaving idle | 548 | * rcu_idle_exit - inform RCU that current CPU is leaving idle |
457 | * | 549 | * |
@@ -466,21 +558,67 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | |||
466 | void rcu_idle_exit(void) | 558 | void rcu_idle_exit(void) |
467 | { | 559 | { |
468 | unsigned long flags; | 560 | unsigned long flags; |
561 | |||
562 | local_irq_save(flags); | ||
563 | rcu_eqs_exit(false); | ||
564 | local_irq_restore(flags); | ||
565 | } | ||
566 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | ||
567 | |||
568 | #ifdef CONFIG_RCU_USER_QS | ||
569 | /** | ||
570 | * rcu_user_exit - inform RCU that we are exiting userspace. | ||
571 | * | ||
572 | * Exit RCU idle mode while entering the kernel because it can | ||
573 | * run a RCU read side critical section anytime. | ||
574 | */ | ||
575 | void rcu_user_exit(void) | ||
576 | { | ||
577 | unsigned long flags; | ||
469 | struct rcu_dynticks *rdtp; | 578 | struct rcu_dynticks *rdtp; |
470 | long long oldval; | 579 | |
580 | /* | ||
581 | * Some contexts may involve an exception occuring in an irq, | ||
582 | * leading to that nesting: | ||
583 | * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | ||
584 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() | ||
585 | * helpers are enough to protect RCU uses inside the exception. So | ||
586 | * just return immediately if we detect we are in an IRQ. | ||
587 | */ | ||
588 | if (in_interrupt()) | ||
589 | return; | ||
471 | 590 | ||
472 | local_irq_save(flags); | 591 | local_irq_save(flags); |
473 | rdtp = &__get_cpu_var(rcu_dynticks); | 592 | rdtp = &__get_cpu_var(rcu_dynticks); |
474 | oldval = rdtp->dynticks_nesting; | 593 | if (rdtp->in_user) { |
475 | WARN_ON_ONCE(oldval < 0); | 594 | rdtp->in_user = false; |
476 | if (oldval & DYNTICK_TASK_NEST_MASK) | 595 | rcu_eqs_exit(true); |
477 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | 596 | } |
478 | else | ||
479 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | ||
480 | rcu_idle_exit_common(rdtp, oldval); | ||
481 | local_irq_restore(flags); | 597 | local_irq_restore(flags); |
482 | } | 598 | } |
483 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | 599 | |
600 | /** | ||
601 | * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace | ||
602 | * idle mode after the current non-nesting irq returns. | ||
603 | * | ||
604 | * This is similar to rcu_user_exit() but in the context of an irq. | ||
605 | * This is called when the irq has interrupted a userspace RCU idle mode | ||
606 | * context. When the current non-nesting interrupt returns after this call, | ||
607 | * the CPU won't restore the RCU idle mode. | ||
608 | */ | ||
609 | void rcu_user_exit_after_irq(void) | ||
610 | { | ||
611 | unsigned long flags; | ||
612 | struct rcu_dynticks *rdtp; | ||
613 | |||
614 | local_irq_save(flags); | ||
615 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
616 | /* Ensure we are interrupting an RCU idle mode. */ | ||
617 | WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); | ||
618 | rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; | ||
619 | local_irq_restore(flags); | ||
620 | } | ||
621 | #endif /* CONFIG_RCU_USER_QS */ | ||
484 | 622 | ||
485 | /** | 623 | /** |
486 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | 624 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle |
@@ -515,7 +653,7 @@ void rcu_irq_enter(void) | |||
515 | if (oldval) | 653 | if (oldval) |
516 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); | 654 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); |
517 | else | 655 | else |
518 | rcu_idle_exit_common(rdtp, oldval); | 656 | rcu_eqs_exit_common(rdtp, oldval, true); |
519 | local_irq_restore(flags); | 657 | local_irq_restore(flags); |
520 | } | 658 | } |
521 | 659 | ||
@@ -579,6 +717,21 @@ int rcu_is_cpu_idle(void) | |||
579 | } | 717 | } |
580 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 718 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
581 | 719 | ||
720 | #ifdef CONFIG_RCU_USER_QS | ||
721 | void rcu_user_hooks_switch(struct task_struct *prev, | ||
722 | struct task_struct *next) | ||
723 | { | ||
724 | struct rcu_dynticks *rdtp; | ||
725 | |||
726 | /* Interrupts are disabled in context switch */ | ||
727 | rdtp = &__get_cpu_var(rcu_dynticks); | ||
728 | if (!rdtp->ignore_user_qs) { | ||
729 | clear_tsk_thread_flag(prev, TIF_NOHZ); | ||
730 | set_tsk_thread_flag(next, TIF_NOHZ); | ||
731 | } | ||
732 | } | ||
733 | #endif /* #ifdef CONFIG_RCU_USER_QS */ | ||
734 | |||
582 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) | 735 | #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) |
583 | 736 | ||
584 | /* | 737 | /* |
@@ -2473,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
2473 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2626 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2474 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); | 2627 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); |
2475 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2628 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
2629 | #ifdef CONFIG_RCU_USER_QS | ||
2630 | WARN_ON_ONCE(rdp->dynticks->in_user); | ||
2631 | #endif | ||
2476 | rdp->cpu = cpu; | 2632 | rdp->cpu = cpu; |
2477 | rdp->rsp = rsp; | 2633 | rdp->rsp = rsp; |
2478 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2634 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7576fd4d8ce6..5faf05d68326 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -102,6 +102,10 @@ struct rcu_dynticks { | |||
102 | /* idle-period nonlazy_posted snapshot. */ | 102 | /* idle-period nonlazy_posted snapshot. */ |
103 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ | 103 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ |
104 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 104 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
105 | #ifdef CONFIG_RCU_USER_QS | ||
106 | bool ignore_user_qs; /* Treat userspace as extended QS or not */ | ||
107 | bool in_user; /* Is the CPU in userland from RCU POV? */ | ||
108 | #endif | ||
105 | }; | 109 | }; |
106 | 110 | ||
107 | /* RCU's kthread states for tracing. */ | 111 | /* RCU's kthread states for tracing. */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 9c71c1b18e03..f92115488187 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1757,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu) | |||
1757 | if (!tne) | 1757 | if (!tne) |
1758 | return; | 1758 | return; |
1759 | 1759 | ||
1760 | /* Adaptive-tick mode, where usermode execution is idle to RCU. */ | ||
1761 | if (!is_idle_task(current)) { | ||
1762 | rdtp->dyntick_holdoff = jiffies - 1; | ||
1763 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | ||
1764 | trace_rcu_prep_idle("User dyntick with callbacks"); | ||
1765 | rdtp->idle_gp_timer_expires = | ||
1766 | round_up(jiffies + RCU_IDLE_GP_DELAY, | ||
1767 | RCU_IDLE_GP_DELAY); | ||
1768 | } else if (rcu_cpu_has_callbacks(cpu)) { | ||
1769 | rdtp->idle_gp_timer_expires = | ||
1770 | round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); | ||
1771 | trace_rcu_prep_idle("User dyntick with lazy callbacks"); | ||
1772 | } else { | ||
1773 | return; | ||
1774 | } | ||
1775 | tp = &rdtp->idle_gp_timer; | ||
1776 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | ||
1777 | return; | ||
1778 | } | ||
1779 | |||
1760 | /* | 1780 | /* |
1761 | * If this is an idle re-entry, for example, due to use of | 1781 | * If this is an idle re-entry, for example, due to use of |
1762 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle | 1782 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a48cdbc8631..3c4dec0594d6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2081,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2081 | #endif | 2081 | #endif |
2082 | 2082 | ||
2083 | /* Here we just switch the register state and the stack. */ | 2083 | /* Here we just switch the register state and the stack. */ |
2084 | rcu_switch(prev, next); | ||
2084 | switch_to(prev, next, prev); | 2085 | switch_to(prev, next, prev); |
2085 | 2086 | ||
2086 | barrier(); | 2087 | barrier(); |
@@ -3468,6 +3469,21 @@ asmlinkage void __sched schedule(void) | |||
3468 | } | 3469 | } |
3469 | EXPORT_SYMBOL(schedule); | 3470 | EXPORT_SYMBOL(schedule); |
3470 | 3471 | ||
3472 | #ifdef CONFIG_RCU_USER_QS | ||
3473 | asmlinkage void __sched schedule_user(void) | ||
3474 | { | ||
3475 | /* | ||
3476 | * If we come here after a random call to set_need_resched(), | ||
3477 | * or we have been woken up remotely but the IPI has not yet arrived, | ||
3478 | * we haven't yet exited the RCU idle mode. Do it here manually until | ||
3479 | * we find a better solution. | ||
3480 | */ | ||
3481 | rcu_user_exit(); | ||
3482 | schedule(); | ||
3483 | rcu_user_enter(); | ||
3484 | } | ||
3485 | #endif | ||
3486 | |||
3471 | /** | 3487 | /** |
3472 | * schedule_preempt_disabled - called with preemption disabled | 3488 | * schedule_preempt_disabled - called with preemption disabled |
3473 | * | 3489 | * |
@@ -3569,6 +3585,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3569 | /* Catch callers which need to be fixed */ | 3585 | /* Catch callers which need to be fixed */ |
3570 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3586 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3571 | 3587 | ||
3588 | rcu_user_exit(); | ||
3572 | do { | 3589 | do { |
3573 | add_preempt_count(PREEMPT_ACTIVE); | 3590 | add_preempt_count(PREEMPT_ACTIVE); |
3574 | local_irq_enable(); | 3591 | local_irq_enable(); |