aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-16 10:49:30 -0400
committerIngo Molnar <mingo@kernel.org>2015-03-16 10:49:30 -0400
commit1524b745406a85ba201cb25df72110c1ccac0f72 (patch)
tree66e4ac8b03dab66849ef815d836a25f38da61a91
parentfba9e07208c0f9d92d9f73761c99c8612039da44 (diff)
parent126a6a542446f1a49b9f3c69237c87df3eb4e6e1 (diff)
Merge branch 'nohz/guest' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull full dynticks support for virt guests from Frederic Weisbecker: "Some measurements showed that disabling the tick on the host while the guest is running can be interesting on some workloads. Indeed the host tick is irrelevant while a vcpu runs, it consumes CPU time and cache footprint for no good reasons. Full dynticks already works in every context, but RCU prevents it to be effective outside userspace, because the CPU needs to take part of RCU grace period completion as long as RCU may be used on it, which is the case in kernel context. However guest is similar to userspace and idle in that we know RCU is unused on such context. Therefore a CPU in guest/userspace/idle context can let other CPUs report its own RCU quiescent state on its behalf and shut down the tick safely, provided it isn't needed for other reasons than RCU. This is called RCU extended quiescent state. This was already implemented for idle and userspace. This patchset now brings it for guest contexts through the following steps: - Generalize the context tracking APIs to also track guest state - Rename/sanitize a few CPP symbols accordingly - Report guest entry/exit to RCU and define this context area as an RCU extended quiescent state." Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h4
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--include/linux/context_tracking.h15
-rw-r--r--include/linux/context_tracking_state.h9
-rw-r--r--include/linux/kvm_host.h3
-rw-r--r--kernel/context_tracking.c59
-rw-r--r--kernel/sched/core.c2
7 files changed, 61 insertions, 33 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 942c7b1678e3..993090422690 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -106,10 +106,6 @@ struct kvmppc_vcpu_book3s {
106 spinlock_t mmu_lock; 106 spinlock_t mmu_lock;
107}; 107};
108 108
109#define CONTEXT_HOST 0
110#define CONTEXT_GUEST 1
111#define CONTEXT_GUEST_END 2
112
113#define VSID_REAL 0x07ffffffffc00000ULL 109#define VSID_REAL 0x07ffffffffc00000ULL
114#define VSID_BAT 0x07ffffffffb00000ULL 110#define VSID_BAT 0x07ffffffffb00000ULL
115#define VSID_64K 0x0800000000000000ULL 111#define VSID_64K 0x0800000000000000ULL
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9d2073e2ecc9..756f74eed35d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -123,7 +123,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
123 * but we need to notify RCU. 123 * but we need to notify RCU.
124 */ 124 */
125 rcu_nmi_enter(); 125 rcu_nmi_enter();
126 prev_state = IN_KERNEL; /* the value is irrelevant. */ 126 prev_state = CONTEXT_KERNEL; /* the value is irrelevant. */
127 } 127 }
128 128
129 /* 129 /*
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 37b81bd51ec0..2821838256b4 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -10,6 +10,8 @@
10#ifdef CONFIG_CONTEXT_TRACKING 10#ifdef CONFIG_CONTEXT_TRACKING
11extern void context_tracking_cpu_set(int cpu); 11extern void context_tracking_cpu_set(int cpu);
12 12
13extern void context_tracking_enter(enum ctx_state state);
14extern void context_tracking_exit(enum ctx_state state);
13extern void context_tracking_user_enter(void); 15extern void context_tracking_user_enter(void);
14extern void context_tracking_user_exit(void); 16extern void context_tracking_user_exit(void);
15extern void __context_tracking_task_switch(struct task_struct *prev, 17extern void __context_tracking_task_switch(struct task_struct *prev,
@@ -35,7 +37,8 @@ static inline enum ctx_state exception_enter(void)
35 return 0; 37 return 0;
36 38
37 prev_ctx = this_cpu_read(context_tracking.state); 39 prev_ctx = this_cpu_read(context_tracking.state);
38 context_tracking_user_exit(); 40 if (prev_ctx != CONTEXT_KERNEL)
41 context_tracking_exit(prev_ctx);
39 42
40 return prev_ctx; 43 return prev_ctx;
41} 44}
@@ -43,8 +46,8 @@ static inline enum ctx_state exception_enter(void)
43static inline void exception_exit(enum ctx_state prev_ctx) 46static inline void exception_exit(enum ctx_state prev_ctx)
44{ 47{
45 if (context_tracking_is_enabled()) { 48 if (context_tracking_is_enabled()) {
46 if (prev_ctx == IN_USER) 49 if (prev_ctx != CONTEXT_KERNEL)
47 context_tracking_user_enter(); 50 context_tracking_enter(prev_ctx);
48 } 51 }
49} 52}
50 53
@@ -78,10 +81,16 @@ static inline void guest_enter(void)
78 vtime_guest_enter(current); 81 vtime_guest_enter(current);
79 else 82 else
80 current->flags |= PF_VCPU; 83 current->flags |= PF_VCPU;
84
85 if (context_tracking_is_enabled())
86 context_tracking_enter(CONTEXT_GUEST);
81} 87}
82 88
83static inline void guest_exit(void) 89static inline void guest_exit(void)
84{ 90{
91 if (context_tracking_is_enabled())
92 context_tracking_exit(CONTEXT_GUEST);
93
85 if (vtime_accounting_enabled()) 94 if (vtime_accounting_enabled())
86 vtime_guest_exit(current); 95 vtime_guest_exit(current);
87 else 96 else
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index 97a81225d037..6b7b96a32b75 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -13,8 +13,9 @@ struct context_tracking {
13 */ 13 */
14 bool active; 14 bool active;
15 enum ctx_state { 15 enum ctx_state {
16 IN_KERNEL = 0, 16 CONTEXT_KERNEL = 0,
17 IN_USER, 17 CONTEXT_USER,
18 CONTEXT_GUEST,
18 } state; 19 } state;
19}; 20};
20 21
@@ -34,11 +35,13 @@ static inline bool context_tracking_cpu_is_enabled(void)
34 35
35static inline bool context_tracking_in_user(void) 36static inline bool context_tracking_in_user(void)
36{ 37{
37 return __this_cpu_read(context_tracking.state) == IN_USER; 38 return __this_cpu_read(context_tracking.state) == CONTEXT_USER;
38} 39}
39#else 40#else
40static inline bool context_tracking_in_user(void) { return false; } 41static inline bool context_tracking_in_user(void) { return false; }
41static inline bool context_tracking_active(void) { return false; } 42static inline bool context_tracking_active(void) { return false; }
43static inline bool context_tracking_is_enabled(void) { return false; }
44static inline bool context_tracking_cpu_is_enabled(void) { return false; }
42#endif /* CONFIG_CONTEXT_TRACKING */ 45#endif /* CONFIG_CONTEXT_TRACKING */
43 46
44#endif 47#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d12b2104d19b..cc8c61c5459c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -766,7 +766,8 @@ static inline void kvm_guest_enter(void)
766 * one time slice). Lets treat guest mode as quiescent state, just like 766 * one time slice). Lets treat guest mode as quiescent state, just like
767 * we do with user-mode execution. 767 * we do with user-mode execution.
768 */ 768 */
769 rcu_virt_note_context_switch(smp_processor_id()); 769 if (!context_tracking_cpu_is_enabled())
770 rcu_virt_note_context_switch(smp_processor_id());
770} 771}
771 772
772static inline void kvm_guest_exit(void) 773static inline void kvm_guest_exit(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 937ecdfdf258..72d59a1a6eb6 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -39,15 +39,15 @@ void context_tracking_cpu_set(int cpu)
39} 39}
40 40
41/** 41/**
42 * context_tracking_user_enter - Inform the context tracking that the CPU is going to 42 * context_tracking_enter - Inform the context tracking that the CPU is going
43 * enter userspace mode. 43 * enter user or guest space mode.
44 * 44 *
45 * This function must be called right before we switch from the kernel 45 * This function must be called right before we switch from the kernel
46 * to userspace, when it's guaranteed the remaining kernel instructions 46 * to user or guest space, when it's guaranteed the remaining kernel
47 * to execute won't use any RCU read side critical section because this 47 * instructions to execute won't use any RCU read side critical section
48 * function sets RCU in extended quiescent state. 48 * because this function sets RCU in extended quiescent state.
49 */ 49 */
50void context_tracking_user_enter(void) 50void context_tracking_enter(enum ctx_state state)
51{ 51{
52 unsigned long flags; 52 unsigned long flags;
53 53
@@ -75,9 +75,8 @@ void context_tracking_user_enter(void)
75 WARN_ON_ONCE(!current->mm); 75 WARN_ON_ONCE(!current->mm);
76 76
77 local_irq_save(flags); 77 local_irq_save(flags);
78 if ( __this_cpu_read(context_tracking.state) != IN_USER) { 78 if ( __this_cpu_read(context_tracking.state) != state) {
79 if (__this_cpu_read(context_tracking.active)) { 79 if (__this_cpu_read(context_tracking.active)) {
80 trace_user_enter(0);
81 /* 80 /*
82 * At this stage, only low level arch entry code remains and 81 * At this stage, only low level arch entry code remains and
83 * then we'll run in userspace. We can assume there won't be 82 * then we'll run in userspace. We can assume there won't be
@@ -85,7 +84,10 @@ void context_tracking_user_enter(void)
85 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency 84 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
86 * on the tick. 85 * on the tick.
87 */ 86 */
88 vtime_user_enter(current); 87 if (state == CONTEXT_USER) {
88 trace_user_enter(0);
89 vtime_user_enter(current);
90 }
89 rcu_user_enter(); 91 rcu_user_enter();
90 } 92 }
91 /* 93 /*
@@ -101,24 +103,32 @@ void context_tracking_user_enter(void)
101 * OTOH we can spare the calls to vtime and RCU when context_tracking.active 103 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
102 * is false because we know that CPU is not tickless. 104 * is false because we know that CPU is not tickless.
103 */ 105 */
104 __this_cpu_write(context_tracking.state, IN_USER); 106 __this_cpu_write(context_tracking.state, state);
105 } 107 }
106 local_irq_restore(flags); 108 local_irq_restore(flags);
107} 109}
110NOKPROBE_SYMBOL(context_tracking_enter);
111EXPORT_SYMBOL_GPL(context_tracking_enter);
112
113void context_tracking_user_enter(void)
114{
115 context_tracking_enter(CONTEXT_USER);
116}
108NOKPROBE_SYMBOL(context_tracking_user_enter); 117NOKPROBE_SYMBOL(context_tracking_user_enter);
109 118
110/** 119/**
111 * context_tracking_user_exit - Inform the context tracking that the CPU is 120 * context_tracking_exit - Inform the context tracking that the CPU is
112 * exiting userspace mode and entering the kernel. 121 * exiting user or guest mode and entering the kernel.
113 * 122 *
114 * This function must be called after we entered the kernel from userspace 123 * This function must be called after we entered the kernel from user or
115 * before any use of RCU read side critical section. This potentially include 124 * guest space before any use of RCU read side critical section. This
116 * any high level kernel code like syscalls, exceptions, signal handling, etc... 125 * potentially include any high level kernel code like syscalls, exceptions,
126 * signal handling, etc...
117 * 127 *
118 * This call supports re-entrancy. This way it can be called from any exception 128 * This call supports re-entrancy. This way it can be called from any exception
119 * handler without needing to know if we came from userspace or not. 129 * handler without needing to know if we came from userspace or not.
120 */ 130 */
121void context_tracking_user_exit(void) 131void context_tracking_exit(enum ctx_state state)
122{ 132{
123 unsigned long flags; 133 unsigned long flags;
124 134
@@ -129,20 +139,29 @@ void context_tracking_user_exit(void)
129 return; 139 return;
130 140
131 local_irq_save(flags); 141 local_irq_save(flags);
132 if (__this_cpu_read(context_tracking.state) == IN_USER) { 142 if (__this_cpu_read(context_tracking.state) == state) {
133 if (__this_cpu_read(context_tracking.active)) { 143 if (__this_cpu_read(context_tracking.active)) {
134 /* 144 /*
135 * We are going to run code that may use RCU. Inform 145 * We are going to run code that may use RCU. Inform
136 * RCU core about that (ie: we may need the tick again). 146 * RCU core about that (ie: we may need the tick again).
137 */ 147 */
138 rcu_user_exit(); 148 rcu_user_exit();
139 vtime_user_exit(current); 149 if (state == CONTEXT_USER) {
140 trace_user_exit(0); 150 vtime_user_exit(current);
151 trace_user_exit(0);
152 }
141 } 153 }
142 __this_cpu_write(context_tracking.state, IN_KERNEL); 154 __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
143 } 155 }
144 local_irq_restore(flags); 156 local_irq_restore(flags);
145} 157}
158NOKPROBE_SYMBOL(context_tracking_exit);
159EXPORT_SYMBOL_GPL(context_tracking_exit);
160
161void context_tracking_user_exit(void)
162{
163 context_tracking_exit(CONTEXT_USER);
164}
146NOKPROBE_SYMBOL(context_tracking_user_exit); 165NOKPROBE_SYMBOL(context_tracking_user_exit);
147 166
148/** 167/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f0f831e8a345..06b9a00871e0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2818,7 +2818,7 @@ asmlinkage __visible void __sched schedule_user(void)
2818 * we find a better solution. 2818 * we find a better solution.
2819 * 2819 *
2820 * NB: There are buggy callers of this function. Ideally we 2820 * NB: There are buggy callers of this function. Ideally we
2821 * should warn if prev_state != IN_USER, but that will trigger 2821 * should warn if prev_state != CONTEXT_USER, but that will trigger
2822 * too frequently to make sense yet. 2822 * too frequently to make sense yet.
2823 */ 2823 */
2824 enum ctx_state prev_state = exception_enter(); 2824 enum ctx_state prev_state = exception_enter();