diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 49 | ||||
-rw-r--r-- | arch/x86/mm/k8topology_64.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/kmemcheck.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/opcode.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 24 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 48 |
10 files changed, 148 insertions, 46 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e8a204..79b0b372d2d0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void) | |||
229 | 229 | ||
230 | spin_lock_irqsave(&pgd_lock, flags); | 230 | spin_lock_irqsave(&pgd_lock, flags); |
231 | list_for_each_entry(page, &pgd_list, lru) { | 231 | list_for_each_entry(page, &pgd_list, lru) { |
232 | if (!vmalloc_sync_one(page_address(page), address)) | 232 | spinlock_t *pgt_lock; |
233 | pmd_t *ret; | ||
234 | |||
235 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
236 | |||
237 | spin_lock(pgt_lock); | ||
238 | ret = vmalloc_sync_one(page_address(page), address); | ||
239 | spin_unlock(pgt_lock); | ||
240 | |||
241 | if (!ret) | ||
233 | break; | 242 | break; |
234 | } | 243 | } |
235 | spin_unlock_irqrestore(&pgd_lock, flags); | 244 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -251,6 +260,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) | |||
251 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 260 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
252 | return -1; | 261 | return -1; |
253 | 262 | ||
263 | WARN_ON_ONCE(in_nmi()); | ||
264 | |||
254 | /* | 265 | /* |
255 | * Synchronize this task's top level page-table | 266 | * Synchronize this task's top level page-table |
256 | * with the 'reference' page table. | 267 | * with the 'reference' page table. |
@@ -326,29 +337,7 @@ out: | |||
326 | 337 | ||
327 | void vmalloc_sync_all(void) | 338 | void vmalloc_sync_all(void) |
328 | { | 339 | { |
329 | unsigned long address; | 340 | sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); |
330 | |||
331 | for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; | ||
332 | address += PGDIR_SIZE) { | ||
333 | |||
334 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
335 | unsigned long flags; | ||
336 | struct page *page; | ||
337 | |||
338 | if (pgd_none(*pgd_ref)) | ||
339 | continue; | ||
340 | |||
341 | spin_lock_irqsave(&pgd_lock, flags); | ||
342 | list_for_each_entry(page, &pgd_list, lru) { | ||
343 | pgd_t *pgd; | ||
344 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
345 | if (pgd_none(*pgd)) | ||
346 | set_pgd(pgd, *pgd_ref); | ||
347 | else | ||
348 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | ||
349 | } | ||
350 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
351 | } | ||
352 | } | 341 | } |
353 | 342 | ||
354 | /* | 343 | /* |
@@ -369,6 +358,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) | |||
369 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 358 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
370 | return -1; | 359 | return -1; |
371 | 360 | ||
361 | WARN_ON_ONCE(in_nmi()); | ||
362 | |||
372 | /* | 363 | /* |
373 | * Copy kernel mappings over when needed. This can also | 364 | * Copy kernel mappings over when needed. This can also |
374 | * happen within a race in page table update. In the later | 365 | * happen within a race in page table update. In the later |
@@ -894,8 +885,14 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
894 | if (pmd_large(*pmd)) | 885 | if (pmd_large(*pmd)) |
895 | return spurious_fault_check(error_code, (pte_t *) pmd); | 886 | return spurious_fault_check(error_code, (pte_t *) pmd); |
896 | 887 | ||
888 | /* | ||
889 | * Note: don't use pte_present() here, since it returns true | ||
890 | * if the _PAGE_PROTNONE bit is set. However, this aliases the | ||
891 | * _PAGE_GLOBAL bit, which for kernel pages give false positives | ||
892 | * when CONFIG_DEBUG_PAGEALLOC is used. | ||
893 | */ | ||
897 | pte = pte_offset_kernel(pmd, address); | 894 | pte = pte_offset_kernel(pmd, address); |
898 | if (!pte_present(*pte)) | 895 | if (!(pte_flags(*pte) & _PAGE_PRESENT)) |
899 | return 0; | 896 | return 0; |
900 | 897 | ||
901 | ret = spurious_fault_check(error_code, pte); | 898 | ret = spurious_fault_check(error_code, pte); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bca79091b9d6..558f2d332076 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -67,7 +67,7 @@ static __init void *alloc_low_page(void) | |||
67 | panic("alloc_low_page: ran out of memory"); | 67 | panic("alloc_low_page: ran out of memory"); |
68 | 68 | ||
69 | adr = __va(pfn * PAGE_SIZE); | 69 | adr = __va(pfn * PAGE_SIZE); |
70 | memset(adr, 0, PAGE_SIZE); | 70 | clear_page(adr); |
71 | return adr; | 71 | return adr; |
72 | } | 72 | } |
73 | 73 | ||
@@ -558,7 +558,7 @@ char swsusp_pg_dir[PAGE_SIZE] | |||
558 | 558 | ||
559 | static inline void save_pg_dir(void) | 559 | static inline void save_pg_dir(void) |
560 | { | 560 | { |
561 | memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); | 561 | copy_page(swsusp_pg_dir, swapper_pg_dir); |
562 | } | 562 | } |
563 | #else /* !CONFIG_ACPI_SLEEP */ | 563 | #else /* !CONFIG_ACPI_SLEEP */ |
564 | static inline void save_pg_dir(void) | 564 | static inline void save_pg_dir(void) |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9a6674689a20..c55f900fbf89 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -98,6 +98,43 @@ static int __init nonx32_setup(char *str) | |||
98 | __setup("noexec32=", nonx32_setup); | 98 | __setup("noexec32=", nonx32_setup); |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * When memory was added/removed make sure all the processes MM have | ||
102 | * suitable PGD entries in the local PGD level page. | ||
103 | */ | ||
104 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
105 | { | ||
106 | unsigned long address; | ||
107 | |||
108 | for (address = start; address <= end; address += PGDIR_SIZE) { | ||
109 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
110 | unsigned long flags; | ||
111 | struct page *page; | ||
112 | |||
113 | if (pgd_none(*pgd_ref)) | ||
114 | continue; | ||
115 | |||
116 | spin_lock_irqsave(&pgd_lock, flags); | ||
117 | list_for_each_entry(page, &pgd_list, lru) { | ||
118 | pgd_t *pgd; | ||
119 | spinlock_t *pgt_lock; | ||
120 | |||
121 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
122 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
123 | spin_lock(pgt_lock); | ||
124 | |||
125 | if (pgd_none(*pgd)) | ||
126 | set_pgd(pgd, *pgd_ref); | ||
127 | else | ||
128 | BUG_ON(pgd_page_vaddr(*pgd) | ||
129 | != pgd_page_vaddr(*pgd_ref)); | ||
130 | |||
131 | spin_unlock(pgt_lock); | ||
132 | } | ||
133 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | /* | ||
101 | * NOTE: This function is marked __ref because it calls __init function | 138 | * NOTE: This function is marked __ref because it calls __init function |
102 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. | 139 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. |
103 | */ | 140 | */ |
@@ -293,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
293 | panic("alloc_low_page: ran out of memory"); | 330 | panic("alloc_low_page: ran out of memory"); |
294 | 331 | ||
295 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | 332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
296 | memset(adr, 0, PAGE_SIZE); | 333 | clear_page(adr); |
297 | *phys = pfn * PAGE_SIZE; | 334 | *phys = pfn * PAGE_SIZE; |
298 | return adr; | 335 | return adr; |
299 | } | 336 | } |
@@ -534,11 +571,13 @@ kernel_physical_mapping_init(unsigned long start, | |||
534 | unsigned long end, | 571 | unsigned long end, |
535 | unsigned long page_size_mask) | 572 | unsigned long page_size_mask) |
536 | { | 573 | { |
537 | 574 | bool pgd_changed = false; | |
538 | unsigned long next, last_map_addr = end; | 575 | unsigned long next, last_map_addr = end; |
576 | unsigned long addr; | ||
539 | 577 | ||
540 | start = (unsigned long)__va(start); | 578 | start = (unsigned long)__va(start); |
541 | end = (unsigned long)__va(end); | 579 | end = (unsigned long)__va(end); |
580 | addr = start; | ||
542 | 581 | ||
543 | for (; start < end; start = next) { | 582 | for (; start < end; start = next) { |
544 | pgd_t *pgd = pgd_offset_k(start); | 583 | pgd_t *pgd = pgd_offset_k(start); |
@@ -563,7 +602,12 @@ kernel_physical_mapping_init(unsigned long start, | |||
563 | spin_lock(&init_mm.page_table_lock); | 602 | spin_lock(&init_mm.page_table_lock); |
564 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 603 | pgd_populate(&init_mm, pgd, __va(pud_phys)); |
565 | spin_unlock(&init_mm.page_table_lock); | 604 | spin_unlock(&init_mm.page_table_lock); |
605 | pgd_changed = true; | ||
566 | } | 606 | } |
607 | |||
608 | if (pgd_changed) | ||
609 | sync_global_pgds(addr, end); | ||
610 | |||
567 | __flush_tlb_all(); | 611 | __flush_tlb_all(); |
568 | 612 | ||
569 | return last_map_addr; | 613 | return last_map_addr; |
@@ -1003,6 +1047,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1003 | } | 1047 | } |
1004 | 1048 | ||
1005 | } | 1049 | } |
1050 | sync_global_pgds((unsigned long)start_page, end); | ||
1006 | return 0; | 1051 | return 0; |
1007 | } | 1052 | } |
1008 | 1053 | ||
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 970ed579d4e4..52d54bfc1ebb 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
23 | #include <asm/mpspec.h> | 23 | #include <asm/mpspec.h> |
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/amd_nb.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | 27 | static struct bootnode __initdata nodes[8]; |
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | 28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; |
@@ -54,8 +54,8 @@ static __init int find_northbridge(void) | |||
54 | static __init void early_get_boot_cpu_id(void) | 54 | static __init void early_get_boot_cpu_id(void) |
55 | { | 55 | { |
56 | /* | 56 | /* |
57 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get the APIC ID of the BSP so can use that to |
58 | * in k8_scan_nodes() | 58 | * create apicid_to_node in k8_scan_nodes() |
59 | */ | 59 | */ |
60 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
61 | /* | 61 | /* |
@@ -212,7 +212,7 @@ int __init k8_scan_nodes(void) | |||
212 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
213 | cores = (1<<bits); | 213 | cores = (1<<bits); |
214 | apicid_base = 0; | 214 | apicid_base = 0; |
215 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* get the APIC ID of the BSP early for systems with apicid lifting */ |
216 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
217 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index b3b531a4f8e5..d87dd6d042d6 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | |||
631 | if (!pte) | 631 | if (!pte) |
632 | return false; | 632 | return false; |
633 | 633 | ||
634 | WARN_ON_ONCE(in_nmi()); | ||
635 | |||
634 | if (error_code & 2) | 636 | if (error_code & 2) |
635 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | 637 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); |
636 | else | 638 | else |
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c index 63c19e27aa6f..324aa3f07237 100644 --- a/arch/x86/mm/kmemcheck/opcode.c +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -9,7 +9,7 @@ static bool opcode_is_prefix(uint8_t b) | |||
9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | 9 | b == 0xf0 || b == 0xf2 || b == 0xf3 |
10 | /* Group 2 */ | 10 | /* Group 2 */ |
11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | 11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 |
12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | 12 | || b == 0x64 || b == 0x65 |
13 | /* Group 3 */ | 13 | /* Group 3 */ |
14 | || b == 0x66 | 14 | || b == 0x66 |
15 | /* Group 4 */ | 15 | /* Group 4 */ |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a7bcc23ef96c..4962f1aeda6f 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/dma.h> | 18 | #include <asm/dma.h> |
19 | #include <asm/numa.h> | 19 | #include <asm/numa.h> |
20 | #include <asm/acpi.h> | 20 | #include <asm/acpi.h> |
21 | #include <asm/k8.h> | 21 | #include <asm/amd_nb.h> |
22 | 22 | ||
23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
24 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..8be8c7d7bc89 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
87 | #define UNSHARED_PTRS_PER_PGD \ | 87 | #define UNSHARED_PTRS_PER_PGD \ |
88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) |
89 | 89 | ||
90 | static void pgd_ctor(pgd_t *pgd) | 90 | |
91 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | ||
92 | { | ||
93 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | ||
94 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
95 | } | ||
96 | |||
97 | struct mm_struct *pgd_page_get_mm(struct page *page) | ||
98 | { | ||
99 | return (struct mm_struct *)page->index; | ||
100 | } | ||
101 | |||
102 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | ||
91 | { | 103 | { |
92 | /* If the pgd points to a shared pagetable level (either the | 104 | /* If the pgd points to a shared pagetable level (either the |
93 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 105 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
@@ -98,15 +110,13 @@ static void pgd_ctor(pgd_t *pgd) | |||
98 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 110 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
99 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 111 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
100 | KERNEL_PGD_PTRS); | 112 | KERNEL_PGD_PTRS); |
101 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
102 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
103 | KERNEL_PGD_BOUNDARY, | ||
104 | KERNEL_PGD_PTRS); | ||
105 | } | 113 | } |
106 | 114 | ||
107 | /* list required to sync kernel mapping updates */ | 115 | /* list required to sync kernel mapping updates */ |
108 | if (!SHARED_KERNEL_PMD) | 116 | if (!SHARED_KERNEL_PMD) { |
117 | pgd_set_mm(pgd, mm); | ||
109 | pgd_list_add(pgd); | 118 | pgd_list_add(pgd); |
119 | } | ||
110 | } | 120 | } |
111 | 121 | ||
112 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
@@ -272,7 +282,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
272 | */ | 282 | */ |
273 | spin_lock_irqsave(&pgd_lock, flags); | 283 | spin_lock_irqsave(&pgd_lock, flags); |
274 | 284 | ||
275 | pgd_ctor(pgd); | 285 | pgd_ctor(mm, pgd); |
276 | pgd_prepopulate_pmd(mm, pgd, pmds); | 286 | pgd_prepopulate_pmd(mm, pgd, pmds); |
277 | 287 | ||
278 | spin_unlock_irqrestore(&pgd_lock, flags); | 288 | spin_unlock_irqrestore(&pgd_lock, flags); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index f9897f7a9ef1..9c0d0d399c30 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
420 | return -1; | 420 | return -1; |
421 | } | 421 | } |
422 | 422 | ||
423 | for_each_node_mask(i, nodes_parsed) | 423 | for (i = 0; i < num_node_memblks; i++) |
424 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 424 | e820_register_active_regions(memblk_nodeid[i], |
425 | nodes[i].end >> PAGE_SHIFT); | 425 | node_memblk_range[i].start >> PAGE_SHIFT, |
426 | node_memblk_range[i].end >> PAGE_SHIFT); | ||
427 | |||
426 | /* for out of order entries in SRAT */ | 428 | /* for out of order entries in SRAT */ |
427 | sort_node_map(); | 429 | sort_node_map(); |
428 | if (!nodes_cover_memory(nodes)) { | 430 | if (!nodes_cover_memory(nodes)) { |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c03f14ab6667..49358481c733 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/cpu.h> | ||
8 | 9 | ||
9 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 11 | #include <asm/mmu_context.h> |
@@ -52,6 +53,8 @@ union smp_flush_state { | |||
52 | want false sharing in the per cpu data segment. */ | 53 | want false sharing in the per cpu data segment. */ |
53 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | 54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
54 | 55 | ||
56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
57 | |||
55 | /* | 58 | /* |
56 | * We cannot call mmdrop() because we are in interrupt context, | 59 | * We cannot call mmdrop() because we are in interrupt context, |
57 | * instead update mm->cpu_vm_mask. | 60 | * instead update mm->cpu_vm_mask. |
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
173 | union smp_flush_state *f; | 176 | union smp_flush_state *f; |
174 | 177 | ||
175 | /* Caller has disabled preemption */ | 178 | /* Caller has disabled preemption */ |
176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 179 | sender = this_cpu_read(tlb_vector_offset); |
177 | f = &flush_state[sender]; | 180 | f = &flush_state[sender]; |
178 | 181 | ||
179 | /* | 182 | /* |
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
218 | flush_tlb_others_ipi(cpumask, mm, va); | 221 | flush_tlb_others_ipi(cpumask, mm, va); |
219 | } | 222 | } |
220 | 223 | ||
224 | static void __cpuinit calculate_tlb_offset(void) | ||
225 | { | ||
226 | int cpu, node, nr_node_vecs; | ||
227 | /* | ||
228 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
229 | * will not cause inconsistency, as the write is atomic under X86. we | ||
230 | * might see more lock contentions in a short time, but after all CPU's | ||
231 | * tlb_vector_offset are changed, everything should go normal | ||
232 | * | ||
233 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
234 | * waste some vectors. | ||
235 | **/ | ||
236 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
237 | nr_node_vecs = 1; | ||
238 | else | ||
239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
240 | |||
241 | for_each_online_node(node) { | ||
242 | int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * | ||
243 | nr_node_vecs; | ||
244 | int cpu_offset = 0; | ||
245 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
246 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
247 | cpu_offset; | ||
248 | cpu_offset++; | ||
249 | cpu_offset = cpu_offset % nr_node_vecs; | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | |||
254 | static int tlb_cpuhp_notify(struct notifier_block *n, | ||
255 | unsigned long action, void *hcpu) | ||
256 | { | ||
257 | switch (action & 0xf) { | ||
258 | case CPU_ONLINE: | ||
259 | case CPU_DEAD: | ||
260 | calculate_tlb_offset(); | ||
261 | } | ||
262 | return NOTIFY_OK; | ||
263 | } | ||
264 | |||
221 | static int __cpuinit init_smp_flush(void) | 265 | static int __cpuinit init_smp_flush(void) |
222 | { | 266 | { |
223 | int i; | 267 | int i; |
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void) | |||
225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 269 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | 270 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
227 | 271 | ||
272 | calculate_tlb_offset(); | ||
273 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
228 | return 0; | 274 | return 0; |
229 | } | 275 | } |
230 | core_initcall(init_smp_flush); | 276 | core_initcall(init_smp_flush); |