aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/um/drivers/mconsole_kern.c1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/rcu.h32
-rw-r--r--arch/x86/include/asm/thread_info.h10
-rw-r--r--arch/x86/kernel/entry_64.S9
-rw-r--r--arch/x86/kernel/ptrace.c5
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/traps.c109
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--include/linux/rcupdate.h15
-rw-r--r--include/linux/sched.h8
-rw-r--r--init/Kconfig18
-rw-r--r--kernel/rcutree.c210
-rw-r--r--kernel/rcutree.h4
-rw-r--r--kernel/rcutree_plugin.h20
-rw-r--r--kernel/sched/core.c17
17 files changed, 407 insertions, 79 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa189cc5..1401a7587973 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -281,4 +281,14 @@ config SECCOMP_FILTER
281 281
282 See Documentation/prctl/seccomp_filter.txt for details. 282 See Documentation/prctl/seccomp_filter.txt for details.
283 283
284config HAVE_RCU_USER_QS
285 bool
286 help
287 Provide kernel entry/exit hooks necessary for userspace
288 RCU extended quiescent state. Syscalls need to be wrapped inside
289 rcu_user_exit()-rcu_user_enter() through the slow path using
290 TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
291 are already protected inside rcu_irq_enter/rcu_irq_exit() but
292 preemption or signal handling on irq exit still need to be protected.
293
284source "kernel/gcov/Kconfig" 294source "kernel/gcov/Kconfig"
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 664a60e8dfb4..c17de0db6736 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -705,6 +705,7 @@ static void stack_proc(void *arg)
705 struct task_struct *from = current, *to = arg; 705 struct task_struct *from = current, *to = arg;
706 706
707 to->thread.saved_task = from; 707 to->thread.saved_task = from;
708 rcu_switch(from, to);
708 switch_to(from, to, from); 709 switch_to(from, to, from);
709} 710}
710 711
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50a1d1f9b6d3..20c49b8450b8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
97 select KTIME_SCALAR if X86_32 97 select KTIME_SCALAR if X86_32
98 select GENERIC_STRNCPY_FROM_USER 98 select GENERIC_STRNCPY_FROM_USER
99 select GENERIC_STRNLEN_USER 99 select GENERIC_STRNLEN_USER
100 select HAVE_RCU_USER_QS if X86_64
100 101
101config INSTRUCTION_DECODER 102config INSTRUCTION_DECODER
102 def_bool (KPROBES || PERF_EVENTS || UPROBES) 103 def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644
index 000000000000..d1ac07a23979
--- /dev/null
+++ b/arch/x86/include/asm/rcu.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_X86_RCU_H
2#define _ASM_X86_RCU_H
3
4#ifndef __ASSEMBLY__
5
6#include <linux/rcupdate.h>
7#include <asm/ptrace.h>
8
9static inline void exception_enter(struct pt_regs *regs)
10{
11 rcu_user_exit();
12}
13
14static inline void exception_exit(struct pt_regs *regs)
15{
16#ifdef CONFIG_RCU_USER_QS
17 if (user_mode(regs))
18 rcu_user_enter();
19#endif
20}
21
22#else /* __ASSEMBLY__ */
23
24#ifdef CONFIG_RCU_USER_QS
25# define SCHEDULE_USER call schedule_user
26#else
27# define SCHEDULE_USER call schedule
28#endif
29
30#endif /* !__ASSEMBLY__ */
31
32#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007ec..c535d847e3b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,6 +89,7 @@ struct thread_info {
89#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 89#define TIF_NOTSC 16 /* TSC is not accessible in userland */
90#define TIF_IA32 17 /* IA32 compatibility process */ 90#define TIF_IA32 17 /* IA32 compatibility process */
91#define TIF_FORK 18 /* ret_from_fork */ 91#define TIF_FORK 18 /* ret_from_fork */
92#define TIF_NOHZ 19 /* in adaptive nohz mode */
92#define TIF_MEMDIE 20 /* is terminating due to OOM killer */ 93#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
93#define TIF_DEBUG 21 /* uses debug registers */ 94#define TIF_DEBUG 21 /* uses debug registers */
94#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 95#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
114#define _TIF_NOTSC (1 << TIF_NOTSC) 115#define _TIF_NOTSC (1 << TIF_NOTSC)
115#define _TIF_IA32 (1 << TIF_IA32) 116#define _TIF_IA32 (1 << TIF_IA32)
116#define _TIF_FORK (1 << TIF_FORK) 117#define _TIF_FORK (1 << TIF_FORK)
118#define _TIF_NOHZ (1 << TIF_NOHZ)
117#define _TIF_DEBUG (1 << TIF_DEBUG) 119#define _TIF_DEBUG (1 << TIF_DEBUG)
118#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 120#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
119#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 121#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
126/* work to do in syscall_trace_enter() */ 128/* work to do in syscall_trace_enter() */
127#define _TIF_WORK_SYSCALL_ENTRY \ 129#define _TIF_WORK_SYSCALL_ENTRY \
128 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ 130 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
129 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) 131 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
132 _TIF_NOHZ)
130 133
131/* work to do in syscall_trace_leave() */ 134/* work to do in syscall_trace_leave() */
132#define _TIF_WORK_SYSCALL_EXIT \ 135#define _TIF_WORK_SYSCALL_EXIT \
133 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ 136 (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
134 _TIF_SYSCALL_TRACEPOINT) 137 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
135 138
136/* work to do on interrupt/exception return */ 139/* work to do on interrupt/exception return */
137#define _TIF_WORK_MASK \ 140#define _TIF_WORK_MASK \
@@ -141,7 +144,8 @@ struct thread_info {
141 144
142/* work to do on any return to user space */ 145/* work to do on any return to user space */
143#define _TIF_ALLWORK_MASK \ 146#define _TIF_ALLWORK_MASK \
144 ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) 147 ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
148 _TIF_NOHZ)
145 149
146/* Only used for 64 bit */ 150/* Only used for 64 bit */
147#define _TIF_DO_NOTIFY_MASK \ 151#define _TIF_DO_NOTIFY_MASK \
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..1a8f3cbb6ee3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,7 @@
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h> 58#include <asm/asm.h>
59#include <asm/rcu.h>
59#include <linux/err.h> 60#include <linux/err.h>
60 61
61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 62/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -565,7 +566,7 @@ sysret_careful:
565 TRACE_IRQS_ON 566 TRACE_IRQS_ON
566 ENABLE_INTERRUPTS(CLBR_NONE) 567 ENABLE_INTERRUPTS(CLBR_NONE)
567 pushq_cfi %rdi 568 pushq_cfi %rdi
568 call schedule 569 SCHEDULE_USER
569 popq_cfi %rdi 570 popq_cfi %rdi
570 jmp sysret_check 571 jmp sysret_check
571 572
@@ -678,7 +679,7 @@ int_careful:
678 TRACE_IRQS_ON 679 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_NONE) 680 ENABLE_INTERRUPTS(CLBR_NONE)
680 pushq_cfi %rdi 681 pushq_cfi %rdi
681 call schedule 682 SCHEDULE_USER
682 popq_cfi %rdi 683 popq_cfi %rdi
683 DISABLE_INTERRUPTS(CLBR_NONE) 684 DISABLE_INTERRUPTS(CLBR_NONE)
684 TRACE_IRQS_OFF 685 TRACE_IRQS_OFF
@@ -974,7 +975,7 @@ retint_careful:
974 TRACE_IRQS_ON 975 TRACE_IRQS_ON
975 ENABLE_INTERRUPTS(CLBR_NONE) 976 ENABLE_INTERRUPTS(CLBR_NONE)
976 pushq_cfi %rdi 977 pushq_cfi %rdi
977 call schedule 978 SCHEDULE_USER
978 popq_cfi %rdi 979 popq_cfi %rdi
979 GET_THREAD_INFO(%rcx) 980 GET_THREAD_INFO(%rcx)
980 DISABLE_INTERRUPTS(CLBR_NONE) 981 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1449,7 +1450,7 @@ paranoid_userspace:
1449paranoid_schedule: 1450paranoid_schedule:
1450 TRACE_IRQS_ON 1451 TRACE_IRQS_ON
1451 ENABLE_INTERRUPTS(CLBR_ANY) 1452 ENABLE_INTERRUPTS(CLBR_ANY)
1452 call schedule 1453 SCHEDULE_USER
1453 DISABLE_INTERRUPTS(CLBR_ANY) 1454 DISABLE_INTERRUPTS(CLBR_ANY)
1454 TRACE_IRQS_OFF 1455 TRACE_IRQS_OFF
1455 jmp paranoid_userspace 1456 jmp paranoid_userspace
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0f..9f94f8ec26e4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
21#include <linux/signal.h> 21#include <linux/signal.h>
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26#include <asm/pgtable.h> 27#include <asm/pgtable.h>
@@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs)
1463{ 1464{
1464 long ret = 0; 1465 long ret = 0;
1465 1466
1467 rcu_user_exit();
1468
1466 /* 1469 /*
1467 * If we stepped into a sysenter/syscall insn, it trapped in 1470 * If we stepped into a sysenter/syscall insn, it trapped in
1468 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1471 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs)
1526 !test_thread_flag(TIF_SYSCALL_EMU); 1529 !test_thread_flag(TIF_SYSCALL_EMU);
1527 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1530 if (step || test_thread_flag(TIF_SYSCALL_TRACE))
1528 tracehook_report_syscall_exit(regs, step); 1531 tracehook_report_syscall_exit(regs, step);
1532
1533 rcu_user_enter();
1529} 1534}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376e..bca0ab903e57 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs)
779void 779void
780do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 780do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
781{ 781{
782 rcu_user_exit();
783
782#ifdef CONFIG_X86_MCE 784#ifdef CONFIG_X86_MCE
783 /* notify userspace of pending MCEs */ 785 /* notify userspace of pending MCEs */
784 if (thread_info_flags & _TIF_MCE_NOTIFY) 786 if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
804#ifdef CONFIG_X86_32 806#ifdef CONFIG_X86_32
805 clear_thread_flag(TIF_IRET); 807 clear_thread_flag(TIF_IRET);
806#endif /* CONFIG_X86_32 */ 808#endif /* CONFIG_X86_32 */
809
810 rcu_user_enter();
807} 811}
808 812
809void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 813void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..378967578f22 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
55#include <asm/i387.h> 55#include <asm/i387.h>
56#include <asm/fpu-internal.h> 56#include <asm/fpu-internal.h>
57#include <asm/mce.h> 57#include <asm/mce.h>
58#include <asm/rcu.h>
58 59
59#include <asm/mach_traps.h> 60#include <asm/mach_traps.h>
60 61
@@ -180,11 +181,15 @@ vm86_trap:
180#define DO_ERROR(trapnr, signr, str, name) \ 181#define DO_ERROR(trapnr, signr, str, name) \
181dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 182dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
182{ \ 183{ \
183 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 184 exception_enter(regs); \
184 == NOTIFY_STOP) \ 185 if (notify_die(DIE_TRAP, str, regs, error_code, \
186 trapnr, signr) == NOTIFY_STOP) { \
187 exception_exit(regs); \
185 return; \ 188 return; \
189 } \
186 conditional_sti(regs); \ 190 conditional_sti(regs); \
187 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 191 do_trap(trapnr, signr, str, regs, error_code, NULL); \
192 exception_exit(regs); \
188} 193}
189 194
190#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 195#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
@@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
195 info.si_errno = 0; \ 200 info.si_errno = 0; \
196 info.si_code = sicode; \ 201 info.si_code = sicode; \
197 info.si_addr = (void __user *)siaddr; \ 202 info.si_addr = (void __user *)siaddr; \
198 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 203 exception_enter(regs); \
199 == NOTIFY_STOP) \ 204 if (notify_die(DIE_TRAP, str, regs, error_code, \
205 trapnr, signr) == NOTIFY_STOP) { \
206 exception_exit(regs); \
200 return; \ 207 return; \
208 } \
201 conditional_sti(regs); \ 209 conditional_sti(regs); \
202 do_trap(trapnr, signr, str, regs, error_code, &info); \ 210 do_trap(trapnr, signr, str, regs, error_code, &info); \
211 exception_exit(regs); \
203} 212}
204 213
205DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, 214DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
222/* Runs on IST stack */ 231/* Runs on IST stack */
223dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) 232dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
224{ 233{
234 exception_enter(regs);
225 if (notify_die(DIE_TRAP, "stack segment", regs, error_code, 235 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
226 X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) 236 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
227 return; 237 preempt_conditional_sti(regs);
228 preempt_conditional_sti(regs); 238 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
229 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); 239 preempt_conditional_cli(regs);
230 preempt_conditional_cli(regs); 240 }
241 exception_exit(regs);
231} 242}
232 243
233dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 244dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
235 static const char str[] = "double fault"; 246 static const char str[] = "double fault";
236 struct task_struct *tsk = current; 247 struct task_struct *tsk = current;
237 248
249 exception_enter(regs);
238 /* Return not checked because double check cannot be ignored */ 250 /* Return not checked because double check cannot be ignored */
239 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 251 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
240 252
@@ -255,16 +267,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
255{ 267{
256 struct task_struct *tsk; 268 struct task_struct *tsk;
257 269
270 exception_enter(regs);
258 conditional_sti(regs); 271 conditional_sti(regs);
259 272
260#ifdef CONFIG_X86_32 273#ifdef CONFIG_X86_32
261 if (regs->flags & X86_VM_MASK) 274 if (regs->flags & X86_VM_MASK) {
262 goto gp_in_vm86; 275 local_irq_enable();
276 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
277 goto exit;
278 }
263#endif 279#endif
264 280
265 tsk = current; 281 tsk = current;
266 if (!user_mode(regs)) 282 if (!user_mode(regs)) {
267 goto gp_in_kernel; 283 if (fixup_exception(regs))
284 goto exit;
285
286 tsk->thread.error_code = error_code;
287 tsk->thread.trap_nr = X86_TRAP_GP;
288 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
289 X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
290 die("general protection fault", regs, error_code);
291 goto exit;
292 }
268 293
269 tsk->thread.error_code = error_code; 294 tsk->thread.error_code = error_code;
270 tsk->thread.trap_nr = X86_TRAP_GP; 295 tsk->thread.trap_nr = X86_TRAP_GP;
@@ -279,25 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
279 } 304 }
280 305
281 force_sig(SIGSEGV, tsk); 306 force_sig(SIGSEGV, tsk);
282 return; 307exit:
283 308 exception_exit(regs);
284#ifdef CONFIG_X86_32
285gp_in_vm86:
286 local_irq_enable();
287 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
288 return;
289#endif
290
291gp_in_kernel:
292 if (fixup_exception(regs))
293 return;
294
295 tsk->thread.error_code = error_code;
296 tsk->thread.trap_nr = X86_TRAP_GP;
297 if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
298 X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
299 return;
300 die("general protection fault", regs, error_code);
301} 309}
302 310
303/* May run on IST stack. */ 311/* May run on IST stack. */
@@ -312,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
312 ftrace_int3_handler(regs)) 320 ftrace_int3_handler(regs))
313 return; 321 return;
314#endif 322#endif
323 exception_enter(regs);
315#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 324#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
316 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 325 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
317 SIGTRAP) == NOTIFY_STOP) 326 SIGTRAP) == NOTIFY_STOP)
318 return; 327 goto exit;
319#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 328#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
320 329
321 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 330 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
322 SIGTRAP) == NOTIFY_STOP) 331 SIGTRAP) == NOTIFY_STOP)
323 return; 332 goto exit;
324 333
325 /* 334 /*
326 * Let others (NMI) know that the debug stack is in use 335 * Let others (NMI) know that the debug stack is in use
@@ -331,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
331 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 340 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
332 preempt_conditional_cli(regs); 341 preempt_conditional_cli(regs);
333 debug_stack_usage_dec(); 342 debug_stack_usage_dec();
343exit:
344 exception_exit(regs);
334} 345}
335 346
336#ifdef CONFIG_X86_64 347#ifdef CONFIG_X86_64
@@ -391,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
391 unsigned long dr6; 402 unsigned long dr6;
392 int si_code; 403 int si_code;
393 404
405 exception_enter(regs);
406
394 get_debugreg(dr6, 6); 407 get_debugreg(dr6, 6);
395 408
396 /* Filter out all the reserved bits which are preset to 1 */ 409 /* Filter out all the reserved bits which are preset to 1 */
@@ -406,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 419
407 /* Catch kmemcheck conditions first of all! */ 420 /* Catch kmemcheck conditions first of all! */
408 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 421 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
409 return; 422 goto exit;
410 423
411 /* DR6 may or may not be cleared by the CPU */ 424 /* DR6 may or may not be cleared by the CPU */
412 set_debugreg(0, 6); 425 set_debugreg(0, 6);
@@ -421,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
421 434
422 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, 435 if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
423 SIGTRAP) == NOTIFY_STOP) 436 SIGTRAP) == NOTIFY_STOP)
424 return; 437 goto exit;
425 438
426 /* 439 /*
427 * Let others (NMI) know that the debug stack is in use 440 * Let others (NMI) know that the debug stack is in use
@@ -437,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
437 X86_TRAP_DB); 450 X86_TRAP_DB);
438 preempt_conditional_cli(regs); 451 preempt_conditional_cli(regs);
439 debug_stack_usage_dec(); 452 debug_stack_usage_dec();
440 return; 453 goto exit;
441 } 454 }
442 455
443 /* 456 /*
@@ -458,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
458 preempt_conditional_cli(regs); 471 preempt_conditional_cli(regs);
459 debug_stack_usage_dec(); 472 debug_stack_usage_dec();
460 473
461 return; 474exit:
475 exception_exit(regs);
462} 476}
463 477
464/* 478/*
@@ -555,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
555#ifdef CONFIG_X86_32 569#ifdef CONFIG_X86_32
556 ignore_fpu_irq = 1; 570 ignore_fpu_irq = 1;
557#endif 571#endif
558 572 exception_enter(regs);
559 math_error(regs, error_code, X86_TRAP_MF); 573 math_error(regs, error_code, X86_TRAP_MF);
574 exception_exit(regs);
560} 575}
561 576
562dotraplinkage void 577dotraplinkage void
563do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 578do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
564{ 579{
580 exception_enter(regs);
565 math_error(regs, error_code, X86_TRAP_XF); 581 math_error(regs, error_code, X86_TRAP_XF);
582 exception_exit(regs);
566} 583}
567 584
568dotraplinkage void 585dotraplinkage void
@@ -629,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
629dotraplinkage void __kprobes 646dotraplinkage void __kprobes
630do_device_not_available(struct pt_regs *regs, long error_code) 647do_device_not_available(struct pt_regs *regs, long error_code)
631{ 648{
649 exception_enter(regs);
632#ifdef CONFIG_MATH_EMULATION 650#ifdef CONFIG_MATH_EMULATION
633 if (read_cr0() & X86_CR0_EM) { 651 if (read_cr0() & X86_CR0_EM) {
634 struct math_emu_info info = { }; 652 struct math_emu_info info = { };
@@ -637,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
637 655
638 info.regs = regs; 656 info.regs = regs;
639 math_emulate(&info); 657 math_emulate(&info);
658 exception_exit(regs);
640 return; 659 return;
641 } 660 }
642#endif 661#endif
@@ -644,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
644#ifdef CONFIG_X86_32 663#ifdef CONFIG_X86_32
645 conditional_sti(regs); 664 conditional_sti(regs);
646#endif 665#endif
666 exception_exit(regs);
647} 667}
648 668
649#ifdef CONFIG_X86_32 669#ifdef CONFIG_X86_32
650dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) 670dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
651{ 671{
652 siginfo_t info; 672 siginfo_t info;
673
674 exception_enter(regs);
653 local_irq_enable(); 675 local_irq_enable();
654 676
655 info.si_signo = SIGILL; 677 info.si_signo = SIGILL;
@@ -657,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
657 info.si_code = ILL_BADSTK; 679 info.si_code = ILL_BADSTK;
658 info.si_addr = NULL; 680 info.si_addr = NULL;
659 if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 681 if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
660 X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) 682 X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
661 return; 683 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
662 do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, 684 &info);
663 &info); 685 }
686 exception_exit(regs);
664} 687}
665#endif 688#endif
666 689
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d8e0bc..7dde46d68a25 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
18#include <asm/pgalloc.h> /* pgd_*(), ... */ 18#include <asm/pgalloc.h> /* pgd_*(), ... */
19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 19#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
20#include <asm/fixmap.h> /* VSYSCALL_START */ 20#include <asm/fixmap.h> /* VSYSCALL_START */
21#include <asm/rcu.h> /* exception_enter(), ... */
21 22
22/* 23/*
23 * Page fault error code bits: 24 * Page fault error code bits:
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
1000 * and the problem, and then passes it off to one of the appropriate 1001 * and the problem, and then passes it off to one of the appropriate
1001 * routines. 1002 * routines.
1002 */ 1003 */
1003dotraplinkage void __kprobes 1004static void __kprobes
1004do_page_fault(struct pt_regs *regs, unsigned long error_code) 1005__do_page_fault(struct pt_regs *regs, unsigned long error_code)
1005{ 1006{
1006 struct vm_area_struct *vma; 1007 struct vm_area_struct *vma;
1007 struct task_struct *tsk; 1008 struct task_struct *tsk;
@@ -1209,3 +1210,11 @@ good_area:
1209 1210
1210 up_read(&mm->mmap_sem); 1211 up_read(&mm->mmap_sem);
1211} 1212}
1213
1214dotraplinkage void __kprobes
1215do_page_fault(struct pt_regs *regs, unsigned long error_code)
1216{
1217 exception_enter(regs);
1218 __do_page_fault(regs, error_code);
1219 exception_exit(regs);
1220}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0fbbd52e01f9..7c968e4f929e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -191,6 +191,21 @@ extern void rcu_idle_enter(void);
191extern void rcu_idle_exit(void); 191extern void rcu_idle_exit(void);
192extern void rcu_irq_enter(void); 192extern void rcu_irq_enter(void);
193extern void rcu_irq_exit(void); 193extern void rcu_irq_exit(void);
194
195#ifdef CONFIG_RCU_USER_QS
196extern void rcu_user_enter(void);
197extern void rcu_user_exit(void);
198extern void rcu_user_enter_after_irq(void);
199extern void rcu_user_exit_after_irq(void);
200extern void rcu_user_hooks_switch(struct task_struct *prev,
201 struct task_struct *next);
202#else
203static inline void rcu_user_enter(void) { }
204static inline void rcu_user_exit(void) { }
205static inline void rcu_user_enter_after_irq(void) { }
206static inline void rcu_user_exit_after_irq(void) { }
207#endif /* CONFIG_RCU_USER_QS */
208
194extern void exit_rcu(void); 209extern void exit_rcu(void);
195 210
196/** 211/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 23bddac4bad8..335720a1fc33 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1885,6 +1885,14 @@ static inline void rcu_copy_process(struct task_struct *p)
1885 1885
1886#endif 1886#endif
1887 1887
1888static inline void rcu_switch(struct task_struct *prev,
1889 struct task_struct *next)
1890{
1891#ifdef CONFIG_RCU_USER_QS
1892 rcu_user_hooks_switch(prev, next);
1893#endif
1894}
1895
1888static inline void tsk_restore_flags(struct task_struct *task, 1896static inline void tsk_restore_flags(struct task_struct *task,
1889 unsigned long orig_flags, unsigned long flags) 1897 unsigned long orig_flags, unsigned long flags)
1890{ 1898{
diff --git a/init/Kconfig b/init/Kconfig
index af6c7f8ba019..c26b8a1d2b57 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -441,6 +441,24 @@ config PREEMPT_RCU
441 This option enables preemptible-RCU code that is common between 441 This option enables preemptible-RCU code that is common between
442 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. 442 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
443 443
444config RCU_USER_QS
445 bool "Consider userspace as in RCU extended quiescent state"
446 depends on HAVE_RCU_USER_QS && SMP
447 help
448 This option sets hooks on kernel / userspace boundaries and
449 puts RCU in extended quiescent state when the CPU runs in
450 userspace. It means that when a CPU runs in userspace, it is
451 excluded from the global RCU state machine and thus doesn't
452 to keep the timer tick on for RCU.
453
454config RCU_USER_QS_FORCE
455 bool "Force userspace extended QS by default"
456 depends on RCU_USER_QS
457 help
458 Set the hooks in user/kernel boundaries by default in order to
459 test this feature that treats userspace as an extended quiescent
460 state until we have a real user like a full adaptive nohz option.
461
444config RCU_FANOUT 462config RCU_FANOUT
445 int "Tree-based hierarchical RCU fanout value" 463 int "Tree-based hierarchical RCU fanout value"
446 range 2 64 if 64BIT 464 range 2 64 if 64BIT
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7387e46009d9..4fb2376ddf06 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -206,6 +206,9 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
206DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 206DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
207 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 207 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
208 .dynticks = ATOMIC_INIT(1), 208 .dynticks = ATOMIC_INIT(1),
209#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
210 .ignore_user_qs = true,
211#endif
209}; 212};
210 213
211static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 214static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
@@ -322,16 +325,17 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
322} 325}
323 326
324/* 327/*
325 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle 328 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
326 * 329 *
327 * If the new value of the ->dynticks_nesting counter now is zero, 330 * If the new value of the ->dynticks_nesting counter now is zero,
328 * we really have entered idle, and must do the appropriate accounting. 331 * we really have entered idle, and must do the appropriate accounting.
329 * The caller must have disabled interrupts. 332 * The caller must have disabled interrupts.
330 */ 333 */
331static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) 334static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
335 bool user)
332{ 336{
333 trace_rcu_dyntick("Start", oldval, 0); 337 trace_rcu_dyntick("Start", oldval, 0);
334 if (!is_idle_task(current)) { 338 if (!user && !is_idle_task(current)) {
335 struct task_struct *idle = idle_task(smp_processor_id()); 339 struct task_struct *idle = idle_task(smp_processor_id());
336 340
337 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 341 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
@@ -348,7 +352,7 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
348 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 352 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
349 353
350 /* 354 /*
351 * The idle task is not permitted to enter the idle loop while 355 * It is illegal to enter an extended quiescent state while
352 * in an RCU read-side critical section. 356 * in an RCU read-side critical section.
353 */ 357 */
354 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 358 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
@@ -359,6 +363,25 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
359 "Illegal idle entry in RCU-sched read-side critical section."); 363 "Illegal idle entry in RCU-sched read-side critical section.");
360} 364}
361 365
366/*
367 * Enter an RCU extended quiescent state, which can be either the
368 * idle loop or adaptive-tickless usermode execution.
369 */
370static void rcu_eqs_enter(bool user)
371{
372 long long oldval;
373 struct rcu_dynticks *rdtp;
374
375 rdtp = &__get_cpu_var(rcu_dynticks);
376 oldval = rdtp->dynticks_nesting;
377 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
378 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
379 rdtp->dynticks_nesting = 0;
380 else
381 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
382 rcu_eqs_enter_common(rdtp, oldval, user);
383}
384
362/** 385/**
363 * rcu_idle_enter - inform RCU that current CPU is entering idle 386 * rcu_idle_enter - inform RCU that current CPU is entering idle
364 * 387 *
@@ -374,21 +397,70 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
374void rcu_idle_enter(void) 397void rcu_idle_enter(void)
375{ 398{
376 unsigned long flags; 399 unsigned long flags;
377 long long oldval; 400
401 local_irq_save(flags);
402 rcu_eqs_enter(false);
403 local_irq_restore(flags);
404}
405EXPORT_SYMBOL_GPL(rcu_idle_enter);
406
407#ifdef CONFIG_RCU_USER_QS
408/**
409 * rcu_user_enter - inform RCU that we are resuming userspace.
410 *
411 * Enter RCU idle mode right before resuming userspace. No use of RCU
412 * is permitted between this call and rcu_user_exit(). This way the
413 * CPU doesn't need to maintain the tick for RCU maintenance purposes
414 * when the CPU runs in userspace.
415 */
416void rcu_user_enter(void)
417{
418 unsigned long flags;
378 struct rcu_dynticks *rdtp; 419 struct rcu_dynticks *rdtp;
379 420
421 /*
422 * Some contexts may involve an exception occuring in an irq,
423 * leading to that nesting:
424 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
425 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
426 * helpers are enough to protect RCU uses inside the exception. So
427 * just return immediately if we detect we are in an IRQ.
428 */
429 if (in_interrupt())
430 return;
431
432 WARN_ON_ONCE(!current->mm);
433
380 local_irq_save(flags); 434 local_irq_save(flags);
381 rdtp = &__get_cpu_var(rcu_dynticks); 435 rdtp = &__get_cpu_var(rcu_dynticks);
382 oldval = rdtp->dynticks_nesting; 436 if (!rdtp->ignore_user_qs && !rdtp->in_user) {
383 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); 437 rdtp->in_user = true;
384 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) 438 rcu_eqs_enter(true);
385 rdtp->dynticks_nesting = 0; 439 }
386 else
387 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
388 rcu_idle_enter_common(rdtp, oldval);
389 local_irq_restore(flags); 440 local_irq_restore(flags);
390} 441}
391EXPORT_SYMBOL_GPL(rcu_idle_enter); 442
443/**
444 * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
445 * after the current irq returns.
446 *
447 * This is similar to rcu_user_enter() but in the context of a non-nesting
448 * irq. After this call, RCU enters into idle mode when the interrupt
449 * returns.
450 */
451void rcu_user_enter_after_irq(void)
452{
453 unsigned long flags;
454 struct rcu_dynticks *rdtp;
455
456 local_irq_save(flags);
457 rdtp = &__get_cpu_var(rcu_dynticks);
458 /* Ensure this irq is interrupting a non-idle RCU state. */
459 WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
460 rdtp->dynticks_nesting = 1;
461 local_irq_restore(flags);
462}
463#endif /* CONFIG_RCU_USER_QS */
392 464
393/** 465/**
394 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 466 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -420,18 +492,19 @@ void rcu_irq_exit(void)
420 if (rdtp->dynticks_nesting) 492 if (rdtp->dynticks_nesting)
421 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); 493 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
422 else 494 else
423 rcu_idle_enter_common(rdtp, oldval); 495 rcu_eqs_enter_common(rdtp, oldval, true);
424 local_irq_restore(flags); 496 local_irq_restore(flags);
425} 497}
426 498
427/* 499/*
428 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle 500 * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
429 * 501 *
430 * If the new value of the ->dynticks_nesting counter was previously zero, 502 * If the new value of the ->dynticks_nesting counter was previously zero,
431 * we really have exited idle, and must do the appropriate accounting. 503 * we really have exited idle, and must do the appropriate accounting.
432 * The caller must have disabled interrupts. 504 * The caller must have disabled interrupts.
433 */ 505 */
434static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) 506static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
507 int user)
435{ 508{
436 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 509 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
437 atomic_inc(&rdtp->dynticks); 510 atomic_inc(&rdtp->dynticks);
@@ -440,7 +513,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
440 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 513 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
441 rcu_cleanup_after_idle(smp_processor_id()); 514 rcu_cleanup_after_idle(smp_processor_id());
442 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); 515 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
443 if (!is_idle_task(current)) { 516 if (!user && !is_idle_task(current)) {
444 struct task_struct *idle = idle_task(smp_processor_id()); 517 struct task_struct *idle = idle_task(smp_processor_id());
445 518
446 trace_rcu_dyntick("Error on exit: not idle task", 519 trace_rcu_dyntick("Error on exit: not idle task",
@@ -452,6 +525,25 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
452 } 525 }
453} 526}
454 527
528/*
529 * Exit an RCU extended quiescent state, which can be either the
530 * idle loop or adaptive-tickless usermode execution.
531 */
532static void rcu_eqs_exit(bool user)
533{
534 struct rcu_dynticks *rdtp;
535 long long oldval;
536
537 rdtp = &__get_cpu_var(rcu_dynticks);
538 oldval = rdtp->dynticks_nesting;
539 WARN_ON_ONCE(oldval < 0);
540 if (oldval & DYNTICK_TASK_NEST_MASK)
541 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
542 else
543 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
544 rcu_eqs_exit_common(rdtp, oldval, user);
545}
546
455/** 547/**
456 * rcu_idle_exit - inform RCU that current CPU is leaving idle 548 * rcu_idle_exit - inform RCU that current CPU is leaving idle
457 * 549 *
@@ -466,21 +558,67 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
466void rcu_idle_exit(void) 558void rcu_idle_exit(void)
467{ 559{
468 unsigned long flags; 560 unsigned long flags;
561
562 local_irq_save(flags);
563 rcu_eqs_exit(false);
564 local_irq_restore(flags);
565}
566EXPORT_SYMBOL_GPL(rcu_idle_exit);
567
568#ifdef CONFIG_RCU_USER_QS
569/**
570 * rcu_user_exit - inform RCU that we are exiting userspace.
571 *
572 * Exit RCU idle mode while entering the kernel because it can
573 * run a RCU read side critical section anytime.
574 */
575void rcu_user_exit(void)
576{
577 unsigned long flags;
469 struct rcu_dynticks *rdtp; 578 struct rcu_dynticks *rdtp;
470 long long oldval; 579
580 /*
581 * Some contexts may involve an exception occuring in an irq,
582 * leading to that nesting:
583 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
584 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
585 * helpers are enough to protect RCU uses inside the exception. So
586 * just return immediately if we detect we are in an IRQ.
587 */
588 if (in_interrupt())
589 return;
471 590
472 local_irq_save(flags); 591 local_irq_save(flags);
473 rdtp = &__get_cpu_var(rcu_dynticks); 592 rdtp = &__get_cpu_var(rcu_dynticks);
474 oldval = rdtp->dynticks_nesting; 593 if (rdtp->in_user) {
475 WARN_ON_ONCE(oldval < 0); 594 rdtp->in_user = false;
476 if (oldval & DYNTICK_TASK_NEST_MASK) 595 rcu_eqs_exit(true);
477 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 596 }
478 else
479 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
480 rcu_idle_exit_common(rdtp, oldval);
481 local_irq_restore(flags); 597 local_irq_restore(flags);
482} 598}
483EXPORT_SYMBOL_GPL(rcu_idle_exit); 599
600/**
601 * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
602 * idle mode after the current non-nesting irq returns.
603 *
604 * This is similar to rcu_user_exit() but in the context of an irq.
605 * This is called when the irq has interrupted a userspace RCU idle mode
606 * context. When the current non-nesting interrupt returns after this call,
607 * the CPU won't restore the RCU idle mode.
608 */
609void rcu_user_exit_after_irq(void)
610{
611 unsigned long flags;
612 struct rcu_dynticks *rdtp;
613
614 local_irq_save(flags);
615 rdtp = &__get_cpu_var(rcu_dynticks);
616 /* Ensure we are interrupting an RCU idle mode. */
617 WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
618 rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
619 local_irq_restore(flags);
620}
621#endif /* CONFIG_RCU_USER_QS */
484 622
485/** 623/**
486 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 624 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -515,7 +653,7 @@ void rcu_irq_enter(void)
515 if (oldval) 653 if (oldval)
516 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); 654 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
517 else 655 else
518 rcu_idle_exit_common(rdtp, oldval); 656 rcu_eqs_exit_common(rdtp, oldval, true);
519 local_irq_restore(flags); 657 local_irq_restore(flags);
520} 658}
521 659
@@ -579,6 +717,21 @@ int rcu_is_cpu_idle(void)
579} 717}
580EXPORT_SYMBOL(rcu_is_cpu_idle); 718EXPORT_SYMBOL(rcu_is_cpu_idle);
581 719
720#ifdef CONFIG_RCU_USER_QS
721void rcu_user_hooks_switch(struct task_struct *prev,
722 struct task_struct *next)
723{
724 struct rcu_dynticks *rdtp;
725
726 /* Interrupts are disabled in context switch */
727 rdtp = &__get_cpu_var(rcu_dynticks);
728 if (!rdtp->ignore_user_qs) {
729 clear_tsk_thread_flag(prev, TIF_NOHZ);
730 set_tsk_thread_flag(next, TIF_NOHZ);
731 }
732}
733#endif /* #ifdef CONFIG_RCU_USER_QS */
734
582#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 735#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
583 736
584/* 737/*
@@ -2473,6 +2626,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2473 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2626 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2474 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2627 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2475 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2628 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2629#ifdef CONFIG_RCU_USER_QS
2630 WARN_ON_ONCE(rdp->dynticks->in_user);
2631#endif
2476 rdp->cpu = cpu; 2632 rdp->cpu = cpu;
2477 rdp->rsp = rsp; 2633 rdp->rsp = rsp;
2478 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2634 raw_spin_unlock_irqrestore(&rnp->lock, flags);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7576fd4d8ce6..5faf05d68326 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -102,6 +102,10 @@ struct rcu_dynticks {
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105#ifdef CONFIG_RCU_USER_QS
106 bool ignore_user_qs; /* Treat userspace as extended QS or not */
107 bool in_user; /* Is the CPU in userland from RCU POV? */
108#endif
105}; 109};
106 110
107/* RCU's kthread states for tracing. */ 111/* RCU's kthread states for tracing. */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 9c71c1b18e03..f92115488187 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1757,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu)
1757 if (!tne) 1757 if (!tne)
1758 return; 1758 return;
1759 1759
1760 /* Adaptive-tick mode, where usermode execution is idle to RCU. */
1761 if (!is_idle_task(current)) {
1762 rdtp->dyntick_holdoff = jiffies - 1;
1763 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1764 trace_rcu_prep_idle("User dyntick with callbacks");
1765 rdtp->idle_gp_timer_expires =
1766 round_up(jiffies + RCU_IDLE_GP_DELAY,
1767 RCU_IDLE_GP_DELAY);
1768 } else if (rcu_cpu_has_callbacks(cpu)) {
1769 rdtp->idle_gp_timer_expires =
1770 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
1771 trace_rcu_prep_idle("User dyntick with lazy callbacks");
1772 } else {
1773 return;
1774 }
1775 tp = &rdtp->idle_gp_timer;
1776 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1777 return;
1778 }
1779
1760 /* 1780 /*
1761 * If this is an idle re-entry, for example, due to use of 1781 * If this is an idle re-entry, for example, due to use of
1762 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 1782 * RCU_NONIDLE() or the new idle-loop tracing API within the idle
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a48cdbc8631..3c4dec0594d6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2081,6 +2081,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2081#endif 2081#endif
2082 2082
2083 /* Here we just switch the register state and the stack. */ 2083 /* Here we just switch the register state and the stack. */
2084 rcu_switch(prev, next);
2084 switch_to(prev, next, prev); 2085 switch_to(prev, next, prev);
2085 2086
2086 barrier(); 2087 barrier();
@@ -3468,6 +3469,21 @@ asmlinkage void __sched schedule(void)
3468} 3469}
3469EXPORT_SYMBOL(schedule); 3470EXPORT_SYMBOL(schedule);
3470 3471
3472#ifdef CONFIG_RCU_USER_QS
3473asmlinkage void __sched schedule_user(void)
3474{
3475 /*
3476 * If we come here after a random call to set_need_resched(),
3477 * or we have been woken up remotely but the IPI has not yet arrived,
3478 * we haven't yet exited the RCU idle mode. Do it here manually until
3479 * we find a better solution.
3480 */
3481 rcu_user_exit();
3482 schedule();
3483 rcu_user_enter();
3484}
3485#endif
3486
3471/** 3487/**
3472 * schedule_preempt_disabled - called with preemption disabled 3488 * schedule_preempt_disabled - called with preemption disabled
3473 * 3489 *
@@ -3569,6 +3585,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
3569 /* Catch callers which need to be fixed */ 3585 /* Catch callers which need to be fixed */
3570 BUG_ON(ti->preempt_count || !irqs_disabled()); 3586 BUG_ON(ti->preempt_count || !irqs_disabled());
3571 3587
3588 rcu_user_exit();
3572 do { 3589 do {
3573 add_preempt_count(PREEMPT_ACTIVE); 3590 add_preempt_count(PREEMPT_ACTIVE);
3574 local_irq_enable(); 3591 local_irq_enable();