1 files changed, 88 insertions, 26 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..65349f07b878 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,29 +1,41 @@
+/*
+ * Context tracking: Probe on high level context boundaries such as kernel
+ * and userspace. This includes syscalls and exceptions entry/exit.
+ *
+ * This is used by RCU to remove its dependency on the timer tick while a CPU
+ * runs in userspace.
+ *
+ *  Started by Frederic Weisbecker:
+ *
+ * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
+ * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
+ *
+ */
 #include <linux/context_tracking.h>
+#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <linux/export.h>
-struct context_tracking {
+DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
-        /*
-         * When active is false, hooks are not set to
-         * minimize overhead: TIF flags are cleared
-         * and calls to user_enter/exit are ignored. This
-         * may be further optimized using static keys.
-         */
-        bool active;
-        enum {
-                IN_KERNEL = 0,
-                IN_USER,
-        } state;
-};
-static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
        .active = true,
 #endif
 };
+/**
+ * user_enter - Inform the context tracking that the CPU is going to
+ *              enter userspace mode.
+ *
+ * This function must be called right before we switch from the kernel
+ * to userspace, when it's guaranteed the remaining kernel instructions
+ * to execute won't use any RCU read side critical section because this
+ * function sets RCU in extended quiescent state.
+ */
 void user_enter(void)
 {
        unsigned long flags;
@@ -39,40 +51,90 @@ void user_enter(void)
        if (in_interrupt())
                return;
+        /* Kernel threads aren't supposed to go to userspace */
        WARN_ON_ONCE(!current->mm);
        local_irq_save(flags);
        if (__this_cpu_read(context_tracking.active) &&
            __this_cpu_read(context_tracking.state) != IN_USER) {
-                __this_cpu_write(context_tracking.state, IN_USER);
+                /*
+                 * At this stage, only low level arch entry code remains and
+                 * then we'll run in userspace. We can assume there won't be
+                 * any RCU read-side critical section until the next call to
+                 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+                 * on the tick.
+                 */
+                vtime_user_enter(current);
                rcu_user_enter();
+                __this_cpu_write(context_tracking.state, IN_USER);
        }
        local_irq_restore(flags);
 }
+/**
+ * user_exit - Inform the context tracking that the CPU is
+ *             exiting userspace mode and entering the kernel.
+ *
+ * This function must be called after we entered the kernel from userspace
+ * before any use of RCU read side critical section. This potentially include
+ * any high level kernel code like syscalls, exceptions, signal handling, etc...
+ *
+ * This call supports re-entrancy. This way it can be called from any exception
+ * handler without needing to know if we came from userspace or not.
+ */
 void user_exit(void)
 {
        unsigned long flags;
-        /*
-         * Some contexts may involve an exception occuring in an irq,
-         * leading to that nesting:
-         * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-         * This would mess up the dyntick_nesting count though. And rcu_irq_*()
-         * helpers are enough to protect RCU uses inside the exception. So
-         * just return immediately if we detect we are in an IRQ.
-         */
        if (in_interrupt())
                return;
        local_irq_save(flags);
        if (__this_cpu_read(context_tracking.state) == IN_USER) {
-                __this_cpu_write(context_tracking.state, IN_KERNEL);
+                /*
+                 * We are going to run code that may use RCU. Inform
+                 * RCU core about that (ie: we may need the tick again).
+                 */
                rcu_user_exit();
+                vtime_user_exit(current);
+                __this_cpu_write(context_tracking.state, IN_KERNEL);
        }
        local_irq_restore(flags);
 }
+void guest_enter(void)
+{
+        if (vtime_accounting_enabled())
+                vtime_guest_enter(current);
+        else
+                __guest_enter();
+}
+EXPORT_SYMBOL_GPL(guest_enter);
+void guest_exit(void)
+{
+        if (vtime_accounting_enabled())
+                vtime_guest_exit(current);
+        else
+                __guest_exit();
+}
+EXPORT_SYMBOL_GPL(guest_exit);
+/**
+ * context_tracking_task_switch - context switch the syscall callbacks
+ * @prev: the task that is being switched out
+ * @next: the task that is being switched in
+ *
+ * The context tracking uses the syscall slow path to implement its user-kernel
+ * boundaries probes on syscalls. This way it doesn't impact the syscall fast
+ * path on CPUs that don't do context tracking.
+ *
+ * But we need to clear the flag on the previous task because it may later
+ * migrate to some CPU that doesn't do the context tracking. As such the TIF
+ * flag may not be desired there.
+ */
 void context_tracking_task_switch(struct task_struct *prev,
                             struct task_struct *next)
 {

diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e0e07fd55508..65349f07b878 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c
@@ -1,29 +1,41 @@
		1	/*
		2	* Context tracking: Probe on high level context boundaries such as kernel
		3	* and userspace. This includes syscalls and exceptions entry/exit.
		4	*
		5	* This is used by RCU to remove its dependency on the timer tick while a CPU
		6	* runs in userspace.
		7	*
		8	* Started by Frederic Weisbecker:
		9	*
		10	* Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
		11	*
		12	* Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
		13	* Steven Rostedt, Peter Zijlstra for suggestions and improvements.
		14	*
		15	*/
		16
1	#include <linux/context_tracking.h>	17	#include <linux/context_tracking.h>
		18	#include <linux/kvm_host.h>
2	#include <linux/rcupdate.h>	19	#include <linux/rcupdate.h>
3	#include <linux/sched.h>	20	#include <linux/sched.h>
4	#include <linux/percpu.h>
5	#include <linux/hardirq.h>	21	#include <linux/hardirq.h>
		22	#include <linux/export.h>
6		23
7	struct context_tracking {	24	DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
8	/*
9	* When active is false, hooks are not set to
10	* minimize overhead: TIF flags are cleared
11	* and calls to user_enter/exit are ignored. This
12	* may be further optimized using static keys.
13	*/
14	bool active;
15	enum {
16	IN_KERNEL = 0,
17	IN_USER,
18	} state;
19	};
20
21	static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
22	#ifdef CONFIG_CONTEXT_TRACKING_FORCE	25	#ifdef CONFIG_CONTEXT_TRACKING_FORCE
23	.active = true,	26	.active = true,
24	#endif	27	#endif
25	};	28	};
26		29
		30	/**
		31	* user_enter - Inform the context tracking that the CPU is going to
		32	* enter userspace mode.
		33	*
		34	* This function must be called right before we switch from the kernel
		35	* to userspace, when it's guaranteed the remaining kernel instructions
		36	* to execute won't use any RCU read side critical section because this
		37	* function sets RCU in extended quiescent state.
		38	*/
27	void user_enter(void)	39	void user_enter(void)
28	{	40	{
29	unsigned long flags;	41	unsigned long flags;
@@ -39,40 +51,90 @@ void user_enter(void)
39	if (in_interrupt())	51	if (in_interrupt())
40	return;	52	return;
41		53
		54	/* Kernel threads aren't supposed to go to userspace */
42	WARN_ON_ONCE(!current->mm);	55	WARN_ON_ONCE(!current->mm);
43		56
44	local_irq_save(flags);	57	local_irq_save(flags);
45	if (__this_cpu_read(context_tracking.active) &&	58	if (__this_cpu_read(context_tracking.active) &&
46	__this_cpu_read(context_tracking.state) != IN_USER) {	59	__this_cpu_read(context_tracking.state) != IN_USER) {
47	__this_cpu_write(context_tracking.state, IN_USER);	60	/*
		61	* At this stage, only low level arch entry code remains and
		62	* then we'll run in userspace. We can assume there won't be
		63	* any RCU read-side critical section until the next call to
		64	* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
		65	* on the tick.
		66	*/
		67	vtime_user_enter(current);
48	rcu_user_enter();	68	rcu_user_enter();
		69	__this_cpu_write(context_tracking.state, IN_USER);
49	}	70	}
50	local_irq_restore(flags);	71	local_irq_restore(flags);
51	}	72	}
52		73
		74
		75	/**
		76	* user_exit - Inform the context tracking that the CPU is
		77	* exiting userspace mode and entering the kernel.
		78	*
		79	* This function must be called after we entered the kernel from userspace
		80	* before any use of RCU read side critical section. This potentially include
		81	* any high level kernel code like syscalls, exceptions, signal handling, etc...
		82	*
		83	* This call supports re-entrancy. This way it can be called from any exception
		84	* handler without needing to know if we came from userspace or not.
		85	*/
53	void user_exit(void)	86	void user_exit(void)
54	{	87	{
55	unsigned long flags;	88	unsigned long flags;
56		89
57	/*
58	* Some contexts may involve an exception occuring in an irq,
59	* leading to that nesting:
60	* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
61	* This would mess up the dyntick_nesting count though. And rcu_irq_*()
62	* helpers are enough to protect RCU uses inside the exception. So
63	* just return immediately if we detect we are in an IRQ.
64	*/
65	if (in_interrupt())	90	if (in_interrupt())
66	return;	91	return;
67		92
68	local_irq_save(flags);	93	local_irq_save(flags);
69	if (__this_cpu_read(context_tracking.state) == IN_USER) {	94	if (__this_cpu_read(context_tracking.state) == IN_USER) {
70	__this_cpu_write(context_tracking.state, IN_KERNEL);	95	/*
		96	* We are going to run code that may use RCU. Inform
		97	* RCU core about that (ie: we may need the tick again).
		98	*/
71	rcu_user_exit();	99	rcu_user_exit();
		100	vtime_user_exit(current);
		101	__this_cpu_write(context_tracking.state, IN_KERNEL);
72	}	102	}
73	local_irq_restore(flags);	103	local_irq_restore(flags);
74	}	104	}
75		105
		106	void guest_enter(void)
		107	{
		108	if (vtime_accounting_enabled())
		109	vtime_guest_enter(current);
		110	else
		111	__guest_enter();
		112	}
		113	EXPORT_SYMBOL_GPL(guest_enter);
		114
		115	void guest_exit(void)
		116	{
		117	if (vtime_accounting_enabled())
		118	vtime_guest_exit(current);
		119	else
		120	__guest_exit();
		121	}
		122	EXPORT_SYMBOL_GPL(guest_exit);
		123
		124
		125	/**
		126	* context_tracking_task_switch - context switch the syscall callbacks
		127	* @prev: the task that is being switched out
		128	* @next: the task that is being switched in
		129	*
		130	* The context tracking uses the syscall slow path to implement its user-kernel
		131	* boundaries probes on syscalls. This way it doesn't impact the syscall fast
		132	* path on CPUs that don't do context tracking.
		133	*
		134	* But we need to clear the flag on the previous task because it may later
		135	* migrate to some CPU that doesn't do the context tracking. As such the TIF
		136	* flag may not be desired there.
		137	*/
76	void context_tracking_task_switch(struct task_struct *prev,	138	void context_tracking_task_switch(struct task_struct *prev,
77	struct task_struct *next)	139	struct task_struct *next)
78	{	140	{