diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 20:15:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 20:15:45 -0400 |
commit | ce4747963252a30613ebf1c1df3d83b9526a342e (patch) | |
tree | 6c61d1b1045a72965006324ae3805280be296e53 /arch/x86/mm | |
parent | 76f09aa464a1913efd596dd0edbf88f932fde08c (diff) | |
parent | a5102476a24bce364b74f1110005542a2c964103 (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm changes from Ingo Molnar:
"The main change in this cycle is the rework of the TLB range flushing
code, to simplify, fix and consolidate the code. By Dave Hansen"
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Set TLB flush tunable to sane value (33)
x86/mm: New tunable for single vs full TLB flush
x86/mm: Add tracepoints for TLB flushes
x86/mm: Unify remote INVLPG code
x86/mm: Fix missed global TLB flush stat
x86/mm: Rip out complicated, out-of-date, buggy TLB flushing
x86/mm: Clean up the TLB flushing code
x86/smep: Be more informative when signalling an SMEP fault
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 103 |
3 files changed, 62 insertions, 54 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 36642793e315..1dbade870f90 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) | |||
577 | 577 | ||
578 | static const char nx_warning[] = KERN_CRIT | 578 | static const char nx_warning[] = KERN_CRIT |
579 | "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; | 579 | "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; |
580 | static const char smep_warning[] = KERN_CRIT | ||
581 | "unable to execute userspace code (SMEP?) (uid: %d)\n"; | ||
580 | 582 | ||
581 | static void | 583 | static void |
582 | show_fault_oops(struct pt_regs *regs, unsigned long error_code, | 584 | show_fault_oops(struct pt_regs *regs, unsigned long error_code, |
@@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
597 | 599 | ||
598 | if (pte && pte_present(*pte) && !pte_exec(*pte)) | 600 | if (pte && pte_present(*pte) && !pte_exec(*pte)) |
599 | printk(nx_warning, from_kuid(&init_user_ns, current_uid())); | 601 | printk(nx_warning, from_kuid(&init_user_ns, current_uid())); |
602 | if (pte && pte_present(*pte) && pte_exec(*pte) && | ||
603 | (pgd_flags(*pgd) & _PAGE_USER) && | ||
604 | (read_cr4() & X86_CR4_SMEP)) | ||
605 | printk(smep_warning, from_kuid(&init_user_ns, current_uid())); | ||
600 | } | 606 | } |
601 | 607 | ||
602 | printk(KERN_ALERT "BUG: unable to handle kernel "); | 608 | printk(KERN_ALERT "BUG: unable to handle kernel "); |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f97130618113..66dba36f2343 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -18,6 +18,13 @@ | |||
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | #include <asm/microcode.h> | 19 | #include <asm/microcode.h> |
20 | 20 | ||
21 | /* | ||
22 | * We need to define the tracepoints somewhere, and tlb.c | ||
23 | * is only compied when SMP=y. | ||
24 | */ | ||
25 | #define CREATE_TRACE_POINTS | ||
26 | #include <trace/events/tlb.h> | ||
27 | |||
21 | #include "mm_internal.h" | 28 | #include "mm_internal.h" |
22 | 29 | ||
23 | static unsigned long __initdata pgt_buf_start; | 30 | static unsigned long __initdata pgt_buf_start; |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index dd8dda167a24..1fe33987de02 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -49,6 +49,7 @@ void leave_mm(int cpu) | |||
49 | if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { | 49 | if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { |
50 | cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); | 50 | cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); |
51 | load_cr3(swapper_pg_dir); | 51 | load_cr3(swapper_pg_dir); |
52 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | ||
52 | } | 53 | } |
53 | } | 54 | } |
54 | EXPORT_SYMBOL_GPL(leave_mm); | 55 | EXPORT_SYMBOL_GPL(leave_mm); |
@@ -102,20 +103,24 @@ static void flush_tlb_func(void *info) | |||
102 | 103 | ||
103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) | 104 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
104 | return; | 105 | return; |
106 | if (!f->flush_end) | ||
107 | f->flush_end = f->flush_start + PAGE_SIZE; | ||
105 | 108 | ||
106 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | 109 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | 110 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
108 | if (f->flush_end == TLB_FLUSH_ALL) | 111 | if (f->flush_end == TLB_FLUSH_ALL) { |
109 | local_flush_tlb(); | 112 | local_flush_tlb(); |
110 | else if (!f->flush_end) | 113 | trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); |
111 | __flush_tlb_single(f->flush_start); | 114 | } else { |
112 | else { | ||
113 | unsigned long addr; | 115 | unsigned long addr; |
116 | unsigned long nr_pages = | ||
117 | f->flush_end - f->flush_start / PAGE_SIZE; | ||
114 | addr = f->flush_start; | 118 | addr = f->flush_start; |
115 | while (addr < f->flush_end) { | 119 | while (addr < f->flush_end) { |
116 | __flush_tlb_single(addr); | 120 | __flush_tlb_single(addr); |
117 | addr += PAGE_SIZE; | 121 | addr += PAGE_SIZE; |
118 | } | 122 | } |
123 | trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); | ||
119 | } | 124 | } |
120 | } else | 125 | } else |
121 | leave_mm(smp_processor_id()); | 126 | leave_mm(smp_processor_id()); |
@@ -153,46 +158,45 @@ void flush_tlb_current_task(void) | |||
153 | 158 | ||
154 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | 159 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
155 | local_flush_tlb(); | 160 | local_flush_tlb(); |
161 | trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); | ||
156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 162 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 163 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
158 | preempt_enable(); | 164 | preempt_enable(); |
159 | } | 165 | } |
160 | 166 | ||
167 | /* | ||
168 | * See Documentation/x86/tlb.txt for details. We choose 33 | ||
169 | * because it is large enough to cover the vast majority (at | ||
170 | * least 95%) of allocations, and is small enough that we are | ||
171 | * confident it will not cause too much overhead. Each single | ||
172 | * flush is about 100 ns, so this caps the maximum overhead at | ||
173 | * _about_ 3,000 ns. | ||
174 | * | ||
175 | * This is in units of pages. | ||
176 | */ | ||
177 | unsigned long tlb_single_page_flush_ceiling = 33; | ||
178 | |||
161 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | 179 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, |
162 | unsigned long end, unsigned long vmflag) | 180 | unsigned long end, unsigned long vmflag) |
163 | { | 181 | { |
164 | unsigned long addr; | 182 | unsigned long addr; |
165 | unsigned act_entries, tlb_entries = 0; | 183 | /* do a global flush by default */ |
166 | unsigned long nr_base_pages; | 184 | unsigned long base_pages_to_flush = TLB_FLUSH_ALL; |
167 | 185 | ||
168 | preempt_disable(); | 186 | preempt_disable(); |
169 | if (current->active_mm != mm) | 187 | if (current->active_mm != mm) |
170 | goto flush_all; | 188 | goto out; |
171 | 189 | ||
172 | if (!current->mm) { | 190 | if (!current->mm) { |
173 | leave_mm(smp_processor_id()); | 191 | leave_mm(smp_processor_id()); |
174 | goto flush_all; | 192 | goto out; |
175 | } | 193 | } |
176 | 194 | ||
177 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 | 195 | if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) |
178 | || vmflag & VM_HUGETLB) { | 196 | base_pages_to_flush = (end - start) >> PAGE_SHIFT; |
179 | local_flush_tlb(); | ||
180 | goto flush_all; | ||
181 | } | ||
182 | |||
183 | /* In modern CPU, last level tlb used for both data/ins */ | ||
184 | if (vmflag & VM_EXEC) | ||
185 | tlb_entries = tlb_lli_4k[ENTRIES]; | ||
186 | else | ||
187 | tlb_entries = tlb_lld_4k[ENTRIES]; | ||
188 | 197 | ||
189 | /* Assume all of TLB entries was occupied by this task */ | 198 | if (base_pages_to_flush > tlb_single_page_flush_ceiling) { |
190 | act_entries = tlb_entries >> tlb_flushall_shift; | 199 | base_pages_to_flush = TLB_FLUSH_ALL; |
191 | act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; | ||
192 | nr_base_pages = (end - start) >> PAGE_SHIFT; | ||
193 | |||
194 | /* tlb_flushall_shift is on balance point, details in commit log */ | ||
195 | if (nr_base_pages > act_entries) { | ||
196 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); | 200 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
197 | local_flush_tlb(); | 201 | local_flush_tlb(); |
198 | } else { | 202 | } else { |
@@ -201,17 +205,15 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
201 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); | 205 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); |
202 | __flush_tlb_single(addr); | 206 | __flush_tlb_single(addr); |
203 | } | 207 | } |
204 | |||
205 | if (cpumask_any_but(mm_cpumask(mm), | ||
206 | smp_processor_id()) < nr_cpu_ids) | ||
207 | flush_tlb_others(mm_cpumask(mm), mm, start, end); | ||
208 | preempt_enable(); | ||
209 | return; | ||
210 | } | 208 | } |
211 | 209 | trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush); | |
212 | flush_all: | 210 | out: |
211 | if (base_pages_to_flush == TLB_FLUSH_ALL) { | ||
212 | start = 0UL; | ||
213 | end = TLB_FLUSH_ALL; | ||
214 | } | ||
213 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 215 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
214 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 216 | flush_tlb_others(mm_cpumask(mm), mm, start, end); |
215 | preempt_enable(); | 217 | preempt_enable(); |
216 | } | 218 | } |
217 | 219 | ||
@@ -260,32 +262,26 @@ static void do_kernel_range_flush(void *info) | |||
260 | 262 | ||
261 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | 263 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) |
262 | { | 264 | { |
263 | unsigned act_entries; | ||
264 | struct flush_tlb_info info; | ||
265 | |||
266 | /* In modern CPU, last level tlb used for both data/ins */ | ||
267 | act_entries = tlb_lld_4k[ENTRIES]; | ||
268 | 265 | ||
269 | /* Balance as user space task's flush, a bit conservative */ | 266 | /* Balance as user space task's flush, a bit conservative */ |
270 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || | 267 | if (end == TLB_FLUSH_ALL || |
271 | (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | 268 | (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) { |
272 | |||
273 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 269 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
274 | else { | 270 | } else { |
271 | struct flush_tlb_info info; | ||
275 | info.flush_start = start; | 272 | info.flush_start = start; |
276 | info.flush_end = end; | 273 | info.flush_end = end; |
277 | on_each_cpu(do_kernel_range_flush, &info, 1); | 274 | on_each_cpu(do_kernel_range_flush, &info, 1); |
278 | } | 275 | } |
279 | } | 276 | } |
280 | 277 | ||
281 | #ifdef CONFIG_DEBUG_TLBFLUSH | ||
282 | static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, | 278 | static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, |
283 | size_t count, loff_t *ppos) | 279 | size_t count, loff_t *ppos) |
284 | { | 280 | { |
285 | char buf[32]; | 281 | char buf[32]; |
286 | unsigned int len; | 282 | unsigned int len; |
287 | 283 | ||
288 | len = sprintf(buf, "%hd\n", tlb_flushall_shift); | 284 | len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling); |
289 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | 285 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); |
290 | } | 286 | } |
291 | 287 | ||
@@ -294,20 +290,20 @@ static ssize_t tlbflush_write_file(struct file *file, | |||
294 | { | 290 | { |
295 | char buf[32]; | 291 | char buf[32]; |
296 | ssize_t len; | 292 | ssize_t len; |
297 | s8 shift; | 293 | int ceiling; |
298 | 294 | ||
299 | len = min(count, sizeof(buf) - 1); | 295 | len = min(count, sizeof(buf) - 1); |
300 | if (copy_from_user(buf, user_buf, len)) | 296 | if (copy_from_user(buf, user_buf, len)) |
301 | return -EFAULT; | 297 | return -EFAULT; |
302 | 298 | ||
303 | buf[len] = '\0'; | 299 | buf[len] = '\0'; |
304 | if (kstrtos8(buf, 0, &shift)) | 300 | if (kstrtoint(buf, 0, &ceiling)) |
305 | return -EINVAL; | 301 | return -EINVAL; |
306 | 302 | ||
307 | if (shift < -1 || shift >= BITS_PER_LONG) | 303 | if (ceiling < 0) |
308 | return -EINVAL; | 304 | return -EINVAL; |
309 | 305 | ||
310 | tlb_flushall_shift = shift; | 306 | tlb_single_page_flush_ceiling = ceiling; |
311 | return count; | 307 | return count; |
312 | } | 308 | } |
313 | 309 | ||
@@ -317,11 +313,10 @@ static const struct file_operations fops_tlbflush = { | |||
317 | .llseek = default_llseek, | 313 | .llseek = default_llseek, |
318 | }; | 314 | }; |
319 | 315 | ||
320 | static int __init create_tlb_flushall_shift(void) | 316 | static int __init create_tlb_single_page_flush_ceiling(void) |
321 | { | 317 | { |
322 | debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, | 318 | debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR, |
323 | arch_debugfs_dir, NULL, &fops_tlbflush); | 319 | arch_debugfs_dir, NULL, &fops_tlbflush); |
324 | return 0; | 320 | return 0; |
325 | } | 321 | } |
326 | late_initcall(create_tlb_flushall_shift); | 322 | late_initcall(create_tlb_single_page_flush_ceiling); |
327 | #endif | ||