aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:15:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:15:45 -0400
commitce4747963252a30613ebf1c1df3d83b9526a342e (patch)
tree6c61d1b1045a72965006324ae3805280be296e53 /arch/x86/mm
parent76f09aa464a1913efd596dd0edbf88f932fde08c (diff)
parenta5102476a24bce364b74f1110005542a2c964103 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm changes from Ingo Molnar: "The main change in this cycle is the rework of the TLB range flushing code, to simplify, fix and consolidate the code. By Dave Hansen" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Set TLB flush tunable to sane value (33) x86/mm: New tunable for single vs full TLB flush x86/mm: Add tracepoints for TLB flushes x86/mm: Unify remote INVLPG code x86/mm: Fix missed global TLB flush stat x86/mm: Rip out complicated, out-of-date, buggy TLB flushing x86/mm: Clean up the TLB flushing code x86/smep: Be more informative when signalling an SMEP fault
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/init.c7
-rw-r--r--arch/x86/mm/tlb.c103
3 files changed, 62 insertions, 54 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 36642793e315..1dbade870f90 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
577 577
578static const char nx_warning[] = KERN_CRIT 578static const char nx_warning[] = KERN_CRIT
579"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; 579"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
580static const char smep_warning[] = KERN_CRIT
581"unable to execute userspace code (SMEP?) (uid: %d)\n";
580 582
581static void 583static void
582show_fault_oops(struct pt_regs *regs, unsigned long error_code, 584show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
597 599
598 if (pte && pte_present(*pte) && !pte_exec(*pte)) 600 if (pte && pte_present(*pte) && !pte_exec(*pte))
599 printk(nx_warning, from_kuid(&init_user_ns, current_uid())); 601 printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
602 if (pte && pte_present(*pte) && pte_exec(*pte) &&
603 (pgd_flags(*pgd) & _PAGE_USER) &&
604 (read_cr4() & X86_CR4_SMEP))
605 printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
600 } 606 }
601 607
602 printk(KERN_ALERT "BUG: unable to handle kernel "); 608 printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f97130618113..66dba36f2343 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,13 @@
18#include <asm/dma.h> /* for MAX_DMA_PFN */ 18#include <asm/dma.h> /* for MAX_DMA_PFN */
19#include <asm/microcode.h> 19#include <asm/microcode.h>
20 20
21/*
22 * We need to define the tracepoints somewhere, and tlb.c
23 * is only compied when SMP=y.
24 */
25#define CREATE_TRACE_POINTS
26#include <trace/events/tlb.h>
27
21#include "mm_internal.h" 28#include "mm_internal.h"
22 29
23static unsigned long __initdata pgt_buf_start; 30static unsigned long __initdata pgt_buf_start;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index dd8dda167a24..1fe33987de02 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -49,6 +49,7 @@ void leave_mm(int cpu)
49 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { 49 if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
50 cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); 50 cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
51 load_cr3(swapper_pg_dir); 51 load_cr3(swapper_pg_dir);
52 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
52 } 53 }
53} 54}
54EXPORT_SYMBOL_GPL(leave_mm); 55EXPORT_SYMBOL_GPL(leave_mm);
@@ -102,20 +103,24 @@ static void flush_tlb_func(void *info)
102 103
103 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) 104 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
104 return; 105 return;
106 if (!f->flush_end)
107 f->flush_end = f->flush_start + PAGE_SIZE;
105 108
106 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); 109 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
107 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { 110 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (f->flush_end == TLB_FLUSH_ALL) 111 if (f->flush_end == TLB_FLUSH_ALL) {
109 local_flush_tlb(); 112 local_flush_tlb();
110 else if (!f->flush_end) 113 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
111 __flush_tlb_single(f->flush_start); 114 } else {
112 else {
113 unsigned long addr; 115 unsigned long addr;
116 unsigned long nr_pages =
117 f->flush_end - f->flush_start / PAGE_SIZE;
114 addr = f->flush_start; 118 addr = f->flush_start;
115 while (addr < f->flush_end) { 119 while (addr < f->flush_end) {
116 __flush_tlb_single(addr); 120 __flush_tlb_single(addr);
117 addr += PAGE_SIZE; 121 addr += PAGE_SIZE;
118 } 122 }
123 trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
119 } 124 }
120 } else 125 } else
121 leave_mm(smp_processor_id()); 126 leave_mm(smp_processor_id());
@@ -153,46 +158,45 @@ void flush_tlb_current_task(void)
153 158
154 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 159 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
155 local_flush_tlb(); 160 local_flush_tlb();
161 trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
156 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 162 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
157 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 163 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
158 preempt_enable(); 164 preempt_enable();
159} 165}
160 166
167/*
168 * See Documentation/x86/tlb.txt for details. We choose 33
169 * because it is large enough to cover the vast majority (at
170 * least 95%) of allocations, and is small enough that we are
171 * confident it will not cause too much overhead. Each single
172 * flush is about 100 ns, so this caps the maximum overhead at
173 * _about_ 3,000 ns.
174 *
175 * This is in units of pages.
176 */
177unsigned long tlb_single_page_flush_ceiling = 33;
178
161void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 179void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
162 unsigned long end, unsigned long vmflag) 180 unsigned long end, unsigned long vmflag)
163{ 181{
164 unsigned long addr; 182 unsigned long addr;
165 unsigned act_entries, tlb_entries = 0; 183 /* do a global flush by default */
166 unsigned long nr_base_pages; 184 unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
167 185
168 preempt_disable(); 186 preempt_disable();
169 if (current->active_mm != mm) 187 if (current->active_mm != mm)
170 goto flush_all; 188 goto out;
171 189
172 if (!current->mm) { 190 if (!current->mm) {
173 leave_mm(smp_processor_id()); 191 leave_mm(smp_processor_id());
174 goto flush_all; 192 goto out;
175 } 193 }
176 194
177 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 195 if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
178 || vmflag & VM_HUGETLB) { 196 base_pages_to_flush = (end - start) >> PAGE_SHIFT;
179 local_flush_tlb();
180 goto flush_all;
181 }
182
183 /* In modern CPU, last level tlb used for both data/ins */
184 if (vmflag & VM_EXEC)
185 tlb_entries = tlb_lli_4k[ENTRIES];
186 else
187 tlb_entries = tlb_lld_4k[ENTRIES];
188 197
189 /* Assume all of TLB entries was occupied by this task */ 198 if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
190 act_entries = tlb_entries >> tlb_flushall_shift; 199 base_pages_to_flush = TLB_FLUSH_ALL;
191 act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
192 nr_base_pages = (end - start) >> PAGE_SHIFT;
193
194 /* tlb_flushall_shift is on balance point, details in commit log */
195 if (nr_base_pages > act_entries) {
196 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 200 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
197 local_flush_tlb(); 201 local_flush_tlb();
198 } else { 202 } else {
@@ -201,17 +205,15 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
201 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 205 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
202 __flush_tlb_single(addr); 206 __flush_tlb_single(addr);
203 } 207 }
204
205 if (cpumask_any_but(mm_cpumask(mm),
206 smp_processor_id()) < nr_cpu_ids)
207 flush_tlb_others(mm_cpumask(mm), mm, start, end);
208 preempt_enable();
209 return;
210 } 208 }
211 209 trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
212flush_all: 210out:
211 if (base_pages_to_flush == TLB_FLUSH_ALL) {
212 start = 0UL;
213 end = TLB_FLUSH_ALL;
214 }
213 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 215 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
214 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 216 flush_tlb_others(mm_cpumask(mm), mm, start, end);
215 preempt_enable(); 217 preempt_enable();
216} 218}
217 219
@@ -260,32 +262,26 @@ static void do_kernel_range_flush(void *info)
260 262
261void flush_tlb_kernel_range(unsigned long start, unsigned long end) 263void flush_tlb_kernel_range(unsigned long start, unsigned long end)
262{ 264{
263 unsigned act_entries;
264 struct flush_tlb_info info;
265
266 /* In modern CPU, last level tlb used for both data/ins */
267 act_entries = tlb_lld_4k[ENTRIES];
268 265
269 /* Balance as user space task's flush, a bit conservative */ 266 /* Balance as user space task's flush, a bit conservative */
270 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || 267 if (end == TLB_FLUSH_ALL ||
271 (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) 268 (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
272
273 on_each_cpu(do_flush_tlb_all, NULL, 1); 269 on_each_cpu(do_flush_tlb_all, NULL, 1);
274 else { 270 } else {
271 struct flush_tlb_info info;
275 info.flush_start = start; 272 info.flush_start = start;
276 info.flush_end = end; 273 info.flush_end = end;
277 on_each_cpu(do_kernel_range_flush, &info, 1); 274 on_each_cpu(do_kernel_range_flush, &info, 1);
278 } 275 }
279} 276}
280 277
281#ifdef CONFIG_DEBUG_TLBFLUSH
282static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, 278static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
283 size_t count, loff_t *ppos) 279 size_t count, loff_t *ppos)
284{ 280{
285 char buf[32]; 281 char buf[32];
286 unsigned int len; 282 unsigned int len;
287 283
288 len = sprintf(buf, "%hd\n", tlb_flushall_shift); 284 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
289 return simple_read_from_buffer(user_buf, count, ppos, buf, len); 285 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
290} 286}
291 287
@@ -294,20 +290,20 @@ static ssize_t tlbflush_write_file(struct file *file,
294{ 290{
295 char buf[32]; 291 char buf[32];
296 ssize_t len; 292 ssize_t len;
297 s8 shift; 293 int ceiling;
298 294
299 len = min(count, sizeof(buf) - 1); 295 len = min(count, sizeof(buf) - 1);
300 if (copy_from_user(buf, user_buf, len)) 296 if (copy_from_user(buf, user_buf, len))
301 return -EFAULT; 297 return -EFAULT;
302 298
303 buf[len] = '\0'; 299 buf[len] = '\0';
304 if (kstrtos8(buf, 0, &shift)) 300 if (kstrtoint(buf, 0, &ceiling))
305 return -EINVAL; 301 return -EINVAL;
306 302
307 if (shift < -1 || shift >= BITS_PER_LONG) 303 if (ceiling < 0)
308 return -EINVAL; 304 return -EINVAL;
309 305
310 tlb_flushall_shift = shift; 306 tlb_single_page_flush_ceiling = ceiling;
311 return count; 307 return count;
312} 308}
313 309
@@ -317,11 +313,10 @@ static const struct file_operations fops_tlbflush = {
317 .llseek = default_llseek, 313 .llseek = default_llseek,
318}; 314};
319 315
320static int __init create_tlb_flushall_shift(void) 316static int __init create_tlb_single_page_flush_ceiling(void)
321{ 317{
322 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 318 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
323 arch_debugfs_dir, NULL, &fops_tlbflush); 319 arch_debugfs_dir, NULL, &fops_tlbflush);
324 return 0; 320 return 0;
325} 321}
326late_initcall(create_tlb_flushall_shift); 322late_initcall(create_tlb_single_page_flush_ceiling);
327#endif