diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-15 13:45:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-15 13:45:39 -0400 |
commit | 13c76ad87216513db2487aac84155aa57dfd46ce (patch) | |
tree | 265661a60dd960bc01e74a65367edd3161b1e018 | |
parent | 9cf8d6360c1589a97a98313729ed9e5db187f80b (diff) | |
parent | 8b8addf891de8a00e4d39fc32f93f7c5eb8feceb (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:
"The main changes in this cycle were:
- Enable full ASLR randomization for 32-bit programs (Hector
Marco-Gisbert)
- Add initial minimal INVPCI support, to flush global mappings (Andy
Lutomirski)
- Add KASAN enhancements (Andrey Ryabinin)
- Fix mmiotrace for huge pages (Karol Herbst)
- ... misc cleanups and small enhancements"
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm/32: Enable full randomization on i386 and X86_32
x86/mm/kmmio: Fix mmiotrace for hugepages
x86/mm: Avoid premature success when changing page attributes
x86/mm/ptdump: Remove paravirt_enabled()
x86/mm: Fix INVPCID asm constraint
x86/dmi: Switch dmi_remap() from ioremap() [uncached] to ioremap_cache()
x86/mm: If INVPCID is available, use it to flush global mappings
x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID
x86/mm: Add INVPCID helpers
x86/kasan: Write protect kasan zero shadow
x86/kasan: Clear kasan_zero_page after TLB flush
x86/mm/numa: Check for failures in numa_clear_kernel_node_hotplug()
x86/mm/numa: Clean up numa_clear_kernel_node_hotplug()
x86/mm: Make kmap_prot into a #define
x86/mm/32: Set NX in __supported_pte_mask before enabling paging
x86/mm: Streamline and restore probe_memory_block_size()
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/dmi.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/fixmap.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 57 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 6 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 24 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 88 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 67 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/setup_nx.c | 5 |
15 files changed, 217 insertions, 101 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 084775f7b052..4324f2437e6a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2566,6 +2566,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2566 | 2566 | ||
2567 | nointroute [IA-64] | 2567 | nointroute [IA-64] |
2568 | 2568 | ||
2569 | noinvpcid [X86] Disable the INVPCID cpu feature. | ||
2570 | |||
2569 | nojitter [IA-64] Disables jitter checking for ITC timers. | 2571 | nojitter [IA-64] Disables jitter checking for ITC timers. |
2570 | 2572 | ||
2571 | no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver | 2573 | no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver |
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index 535192f6bfad..3c69fed215c5 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -15,7 +15,7 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_early_remap early_ioremap | 16 | #define dmi_early_remap early_ioremap |
17 | #define dmi_early_unmap early_iounmap | 17 | #define dmi_early_unmap early_iounmap |
18 | #define dmi_remap ioremap | 18 | #define dmi_remap ioremap_cache |
19 | #define dmi_unmap iounmap | 19 | #define dmi_unmap iounmap |
20 | 20 | ||
21 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6d7d0e52ed5a..8554f960e21b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -138,7 +138,7 @@ extern void reserve_top_address(unsigned long reserve); | |||
138 | extern int fixmaps_set; | 138 | extern int fixmaps_set; |
139 | 139 | ||
140 | extern pte_t *kmap_pte; | 140 | extern pte_t *kmap_pte; |
141 | extern pgprot_t kmap_prot; | 141 | #define kmap_prot PAGE_KERNEL |
142 | extern pte_t *pkmap_page_table; | 142 | extern pte_t *pkmap_page_table; |
143 | 143 | ||
144 | void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); | 144 | void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0bb31cb8c73b..c24b4224d439 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -8,6 +8,54 @@ | |||
8 | #include <asm/cpufeature.h> | 8 | #include <asm/cpufeature.h> |
9 | #include <asm/special_insns.h> | 9 | #include <asm/special_insns.h> |
10 | 10 | ||
11 | static inline void __invpcid(unsigned long pcid, unsigned long addr, | ||
12 | unsigned long type) | ||
13 | { | ||
14 | struct { u64 d[2]; } desc = { { pcid, addr } }; | ||
15 | |||
16 | /* | ||
17 | * The memory clobber is because the whole point is to invalidate | ||
18 | * stale TLB entries and, especially if we're flushing global | ||
19 | * mappings, we don't want the compiler to reorder any subsequent | ||
20 | * memory accesses before the TLB flush. | ||
21 | * | ||
22 | * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and | ||
23 | * invpcid (%rcx), %rax in long mode. | ||
24 | */ | ||
25 | asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" | ||
26 | : : "m" (desc), "a" (type), "c" (&desc) : "memory"); | ||
27 | } | ||
28 | |||
29 | #define INVPCID_TYPE_INDIV_ADDR 0 | ||
30 | #define INVPCID_TYPE_SINGLE_CTXT 1 | ||
31 | #define INVPCID_TYPE_ALL_INCL_GLOBAL 2 | ||
32 | #define INVPCID_TYPE_ALL_NON_GLOBAL 3 | ||
33 | |||
34 | /* Flush all mappings for a given pcid and addr, not including globals. */ | ||
35 | static inline void invpcid_flush_one(unsigned long pcid, | ||
36 | unsigned long addr) | ||
37 | { | ||
38 | __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); | ||
39 | } | ||
40 | |||
41 | /* Flush all mappings for a given PCID, not including globals. */ | ||
42 | static inline void invpcid_flush_single_context(unsigned long pcid) | ||
43 | { | ||
44 | __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); | ||
45 | } | ||
46 | |||
47 | /* Flush all mappings, including globals, for all PCIDs. */ | ||
48 | static inline void invpcid_flush_all(void) | ||
49 | { | ||
50 | __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); | ||
51 | } | ||
52 | |||
53 | /* Flush all mappings for all PCIDs except globals. */ | ||
54 | static inline void invpcid_flush_all_nonglobals(void) | ||
55 | { | ||
56 | __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); | ||
57 | } | ||
58 | |||
11 | #ifdef CONFIG_PARAVIRT | 59 | #ifdef CONFIG_PARAVIRT |
12 | #include <asm/paravirt.h> | 60 | #include <asm/paravirt.h> |
13 | #else | 61 | #else |
@@ -105,6 +153,15 @@ static inline void __native_flush_tlb_global(void) | |||
105 | { | 153 | { |
106 | unsigned long flags; | 154 | unsigned long flags; |
107 | 155 | ||
156 | if (static_cpu_has(X86_FEATURE_INVPCID)) { | ||
157 | /* | ||
158 | * Using INVPCID is considerably faster than a pair of writes | ||
159 | * to CR4 sandwiched inside an IRQ flag save/restore. | ||
160 | */ | ||
161 | invpcid_flush_all(); | ||
162 | return; | ||
163 | } | ||
164 | |||
108 | /* | 165 | /* |
109 | * Read-modify-write to CR4 - protect it from preemption and | 166 | * Read-modify-write to CR4 - protect it from preemption and |
110 | * from interrupts. (Use the raw variant because this code can | 167 | * from interrupts. (Use the raw variant because this code can |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4e8d25d395ee..249461f95851 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -162,6 +162,22 @@ static int __init x86_mpx_setup(char *s) | |||
162 | } | 162 | } |
163 | __setup("nompx", x86_mpx_setup); | 163 | __setup("nompx", x86_mpx_setup); |
164 | 164 | ||
165 | static int __init x86_noinvpcid_setup(char *s) | ||
166 | { | ||
167 | /* noinvpcid doesn't accept parameters */ | ||
168 | if (s) | ||
169 | return -EINVAL; | ||
170 | |||
171 | /* do not emit a message if the feature is not present */ | ||
172 | if (!boot_cpu_has(X86_FEATURE_INVPCID)) | ||
173 | return 0; | ||
174 | |||
175 | setup_clear_cpu_cap(X86_FEATURE_INVPCID); | ||
176 | pr_info("noinvpcid: INVPCID feature disabled\n"); | ||
177 | return 0; | ||
178 | } | ||
179 | early_param("noinvpcid", x86_noinvpcid_setup); | ||
180 | |||
165 | #ifdef CONFIG_X86_32 | 181 | #ifdef CONFIG_X86_32 |
166 | static int cachesize_override = -1; | 182 | static int cachesize_override = -1; |
167 | static int disable_x86_serial_nr = 1; | 183 | static int disable_x86_serial_nr = 1; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index af1112980dd4..54cdbd2003fe 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -389,6 +389,12 @@ default_entry: | |||
389 | /* Make changes effective */ | 389 | /* Make changes effective */ |
390 | wrmsr | 390 | wrmsr |
391 | 391 | ||
392 | /* | ||
393 | * And make sure that all the mappings we set up have NX set from | ||
394 | * the beginning. | ||
395 | */ | ||
396 | orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) | ||
397 | |||
392 | enable_paging: | 398 | enable_paging: |
393 | 399 | ||
394 | /* | 400 | /* |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 4a6f1d9b5106..99bfb192803f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -358,20 +358,19 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | 358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) |
359 | #endif | 359 | #endif |
360 | 360 | ||
361 | #ifdef CONFIG_X86_64 | ||
362 | static inline bool is_hypervisor_range(int idx) | 361 | static inline bool is_hypervisor_range(int idx) |
363 | { | 362 | { |
363 | #ifdef CONFIG_X86_64 | ||
364 | /* | 364 | /* |
365 | * ffff800000000000 - ffff87ffffffffff is reserved for | 365 | * ffff800000000000 - ffff87ffffffffff is reserved for |
366 | * the hypervisor. | 366 | * the hypervisor. |
367 | */ | 367 | */ |
368 | return paravirt_enabled() && | 368 | return (idx >= pgd_index(__PAGE_OFFSET) - 16) && |
369 | (idx >= pgd_index(__PAGE_OFFSET) - 16) && | 369 | (idx < pgd_index(__PAGE_OFFSET)); |
370 | (idx < pgd_index(__PAGE_OFFSET)); | ||
371 | } | ||
372 | #else | 370 | #else |
373 | static inline bool is_hypervisor_range(int idx) { return false; } | 371 | return false; |
374 | #endif | 372 | #endif |
373 | } | ||
375 | 374 | ||
376 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | 375 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
377 | bool checkwx) | 376 | bool checkwx) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2ebfbaf61142..bd7a9b9e2e14 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -388,7 +388,6 @@ repeat: | |||
388 | } | 388 | } |
389 | 389 | ||
390 | pte_t *kmap_pte; | 390 | pte_t *kmap_pte; |
391 | pgprot_t kmap_prot; | ||
392 | 391 | ||
393 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) | 392 | static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr) |
394 | { | 393 | { |
@@ -405,8 +404,6 @@ static void __init kmap_init(void) | |||
405 | */ | 404 | */ |
406 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); | 405 | kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); |
407 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); | 406 | kmap_pte = kmap_get_fixmap_pte(kmap_vstart); |
408 | |||
409 | kmap_prot = PAGE_KERNEL; | ||
410 | } | 407 | } |
411 | 408 | ||
412 | #ifdef CONFIG_HIGHMEM | 409 | #ifdef CONFIG_HIGHMEM |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a40b755c67e3..214afda97911 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/numa.h> | 53 | #include <asm/numa.h> |
54 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
55 | #include <asm/init.h> | 55 | #include <asm/init.h> |
56 | #include <asm/uv/uv.h> | ||
56 | #include <asm/setup.h> | 57 | #include <asm/setup.h> |
57 | 58 | ||
58 | #include "mm_internal.h" | 59 | #include "mm_internal.h" |
@@ -1203,26 +1204,13 @@ int kern_addr_valid(unsigned long addr) | |||
1203 | 1204 | ||
1204 | static unsigned long probe_memory_block_size(void) | 1205 | static unsigned long probe_memory_block_size(void) |
1205 | { | 1206 | { |
1206 | /* start from 2g */ | 1207 | unsigned long bz = MIN_MEMORY_BLOCK_SIZE; |
1207 | unsigned long bz = 1UL<<31; | ||
1208 | 1208 | ||
1209 | if (totalram_pages >= (64ULL << (30 - PAGE_SHIFT))) { | 1209 | /* if system is UV or has 64GB of RAM or more, use large blocks */ |
1210 | pr_info("Using 2GB memory block size for large-memory system\n"); | 1210 | if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) |
1211 | return 2UL * 1024 * 1024 * 1024; | 1211 | bz = 2UL << 30; /* 2GB */ |
1212 | } | ||
1213 | |||
1214 | /* less than 64g installed */ | ||
1215 | if ((max_pfn << PAGE_SHIFT) < (16UL << 32)) | ||
1216 | return MIN_MEMORY_BLOCK_SIZE; | ||
1217 | |||
1218 | /* get the tail size */ | ||
1219 | while (bz > MIN_MEMORY_BLOCK_SIZE) { | ||
1220 | if (!((max_pfn << PAGE_SHIFT) & (bz - 1))) | ||
1221 | break; | ||
1222 | bz >>= 1; | ||
1223 | } | ||
1224 | 1212 | ||
1225 | printk(KERN_DEBUG "memory block size : %ldMB\n", bz >> 20); | 1213 | pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); |
1226 | 1214 | ||
1227 | return bz; | 1215 | return bz; |
1228 | } | 1216 | } |
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index d470cf219a2d..1b1110fa0057 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -120,11 +120,22 @@ void __init kasan_init(void) | |||
120 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), | 120 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), |
121 | (void *)KASAN_SHADOW_END); | 121 | (void *)KASAN_SHADOW_END); |
122 | 122 | ||
123 | memset(kasan_zero_page, 0, PAGE_SIZE); | ||
124 | |||
125 | load_cr3(init_level4_pgt); | 123 | load_cr3(init_level4_pgt); |
126 | __flush_tlb_all(); | 124 | __flush_tlb_all(); |
127 | init_task.kasan_depth = 0; | ||
128 | 125 | ||
126 | /* | ||
127 | * kasan_zero_page has been used as early shadow memory, thus it may | ||
128 | * contain some garbage. Now we can clear and write protect it, since | ||
129 | * after the TLB flush no one should write to it. | ||
130 | */ | ||
131 | memset(kasan_zero_page, 0, PAGE_SIZE); | ||
132 | for (i = 0; i < PTRS_PER_PTE; i++) { | ||
133 | pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO); | ||
134 | set_pte(&kasan_zero_pte[i], pte); | ||
135 | } | ||
136 | /* Flush TLBs again to be sure that write protection applied. */ | ||
137 | __flush_tlb_all(); | ||
138 | |||
139 | init_task.kasan_depth = 0; | ||
129 | pr_info("KernelAddressSanitizer initialized\n"); | 140 | pr_info("KernelAddressSanitizer initialized\n"); |
130 | } | 141 | } |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -33,7 +33,7 @@ | |||
33 | struct kmmio_fault_page { | 33 | struct kmmio_fault_page { |
34 | struct list_head list; | 34 | struct list_head list; |
35 | struct kmmio_fault_page *release_next; | 35 | struct kmmio_fault_page *release_next; |
36 | unsigned long page; /* location of the fault page */ | 36 | unsigned long addr; /* the requested address */ |
37 | pteval_t old_presence; /* page presence prior to arming */ | 37 | pteval_t old_presence; /* page presence prior to arming */ |
38 | bool armed; | 38 | bool armed; |
39 | 39 | ||
@@ -70,9 +70,16 @@ unsigned int kmmio_count; | |||
70 | static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; | 70 | static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; |
71 | static LIST_HEAD(kmmio_probes); | 71 | static LIST_HEAD(kmmio_probes); |
72 | 72 | ||
73 | static struct list_head *kmmio_page_list(unsigned long page) | 73 | static struct list_head *kmmio_page_list(unsigned long addr) |
74 | { | 74 | { |
75 | return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; | 75 | unsigned int l; |
76 | pte_t *pte = lookup_address(addr, &l); | ||
77 | |||
78 | if (!pte) | ||
79 | return NULL; | ||
80 | addr &= page_level_mask(l); | ||
81 | |||
82 | return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; | ||
76 | } | 83 | } |
77 | 84 | ||
78 | /* Accessed per-cpu */ | 85 | /* Accessed per-cpu */ |
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | |||
98 | } | 105 | } |
99 | 106 | ||
100 | /* You must be holding RCU read lock. */ | 107 | /* You must be holding RCU read lock. */ |
101 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | 108 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) |
102 | { | 109 | { |
103 | struct list_head *head; | 110 | struct list_head *head; |
104 | struct kmmio_fault_page *f; | 111 | struct kmmio_fault_page *f; |
112 | unsigned int l; | ||
113 | pte_t *pte = lookup_address(addr, &l); | ||
105 | 114 | ||
106 | page &= PAGE_MASK; | 115 | if (!pte) |
107 | head = kmmio_page_list(page); | 116 | return NULL; |
117 | addr &= page_level_mask(l); | ||
118 | head = kmmio_page_list(addr); | ||
108 | list_for_each_entry_rcu(f, head, list) { | 119 | list_for_each_entry_rcu(f, head, list) { |
109 | if (f->page == page) | 120 | if (f->addr == addr) |
110 | return f; | 121 | return f; |
111 | } | 122 | } |
112 | return NULL; | 123 | return NULL; |
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) | |||
137 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | 148 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
138 | { | 149 | { |
139 | unsigned int level; | 150 | unsigned int level; |
140 | pte_t *pte = lookup_address(f->page, &level); | 151 | pte_t *pte = lookup_address(f->addr, &level); |
141 | 152 | ||
142 | if (!pte) { | 153 | if (!pte) { |
143 | pr_err("no pte for page 0x%08lx\n", f->page); | 154 | pr_err("no pte for addr 0x%08lx\n", f->addr); |
144 | return -1; | 155 | return -1; |
145 | } | 156 | } |
146 | 157 | ||
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) | |||
156 | return -1; | 167 | return -1; |
157 | } | 168 | } |
158 | 169 | ||
159 | __flush_tlb_one(f->page); | 170 | __flush_tlb_one(f->addr); |
160 | return 0; | 171 | return 0; |
161 | } | 172 | } |
162 | 173 | ||
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
176 | int ret; | 187 | int ret; |
177 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); | 188 | WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); |
178 | if (f->armed) { | 189 | if (f->armed) { |
179 | pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", | 190 | pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", |
180 | f->page, f->count, !!f->old_presence); | 191 | f->addr, f->count, !!f->old_presence); |
181 | } | 192 | } |
182 | ret = clear_page_presence(f, true); | 193 | ret = clear_page_presence(f, true); |
183 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), | 194 | WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), |
184 | f->page); | 195 | f->addr); |
185 | f->armed = true; | 196 | f->armed = true; |
186 | return ret; | 197 | return ret; |
187 | } | 198 | } |
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
191 | { | 202 | { |
192 | int ret = clear_page_presence(f, false); | 203 | int ret = clear_page_presence(f, false); |
193 | WARN_ONCE(ret < 0, | 204 | WARN_ONCE(ret < 0, |
194 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | 205 | KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); |
195 | f->armed = false; | 206 | f->armed = false; |
196 | } | 207 | } |
197 | 208 | ||
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
215 | struct kmmio_context *ctx; | 226 | struct kmmio_context *ctx; |
216 | struct kmmio_fault_page *faultpage; | 227 | struct kmmio_fault_page *faultpage; |
217 | int ret = 0; /* default to fault not handled */ | 228 | int ret = 0; /* default to fault not handled */ |
229 | unsigned long page_base = addr; | ||
230 | unsigned int l; | ||
231 | pte_t *pte = lookup_address(addr, &l); | ||
232 | if (!pte) | ||
233 | return -EINVAL; | ||
234 | page_base &= page_level_mask(l); | ||
218 | 235 | ||
219 | /* | 236 | /* |
220 | * Preemption is now disabled to prevent process switch during | 237 | * Preemption is now disabled to prevent process switch during |
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
227 | preempt_disable(); | 244 | preempt_disable(); |
228 | rcu_read_lock(); | 245 | rcu_read_lock(); |
229 | 246 | ||
230 | faultpage = get_kmmio_fault_page(addr); | 247 | faultpage = get_kmmio_fault_page(page_base); |
231 | if (!faultpage) { | 248 | if (!faultpage) { |
232 | /* | 249 | /* |
233 | * Either this page fault is not caused by kmmio, or | 250 | * Either this page fault is not caused by kmmio, or |
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
239 | 256 | ||
240 | ctx = &get_cpu_var(kmmio_ctx); | 257 | ctx = &get_cpu_var(kmmio_ctx); |
241 | if (ctx->active) { | 258 | if (ctx->active) { |
242 | if (addr == ctx->addr) { | 259 | if (page_base == ctx->addr) { |
243 | /* | 260 | /* |
244 | * A second fault on the same page means some other | 261 | * A second fault on the same page means some other |
245 | * condition needs handling by do_page_fault(), the | 262 | * condition needs handling by do_page_fault(), the |
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
267 | ctx->active++; | 284 | ctx->active++; |
268 | 285 | ||
269 | ctx->fpage = faultpage; | 286 | ctx->fpage = faultpage; |
270 | ctx->probe = get_kmmio_probe(addr); | 287 | ctx->probe = get_kmmio_probe(page_base); |
271 | ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); | 288 | ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); |
272 | ctx->addr = addr; | 289 | ctx->addr = page_base; |
273 | 290 | ||
274 | if (ctx->probe && ctx->probe->pre_handler) | 291 | if (ctx->probe && ctx->probe->pre_handler) |
275 | ctx->probe->pre_handler(ctx->probe, regs, addr); | 292 | ctx->probe->pre_handler(ctx->probe, regs, addr); |
@@ -354,12 +371,11 @@ out: | |||
354 | } | 371 | } |
355 | 372 | ||
356 | /* You must be holding kmmio_lock. */ | 373 | /* You must be holding kmmio_lock. */ |
357 | static int add_kmmio_fault_page(unsigned long page) | 374 | static int add_kmmio_fault_page(unsigned long addr) |
358 | { | 375 | { |
359 | struct kmmio_fault_page *f; | 376 | struct kmmio_fault_page *f; |
360 | 377 | ||
361 | page &= PAGE_MASK; | 378 | f = get_kmmio_fault_page(addr); |
362 | f = get_kmmio_fault_page(page); | ||
363 | if (f) { | 379 | if (f) { |
364 | if (!f->count) | 380 | if (!f->count) |
365 | arm_kmmio_fault_page(f); | 381 | arm_kmmio_fault_page(f); |
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) | |||
372 | return -1; | 388 | return -1; |
373 | 389 | ||
374 | f->count = 1; | 390 | f->count = 1; |
375 | f->page = page; | 391 | f->addr = addr; |
376 | 392 | ||
377 | if (arm_kmmio_fault_page(f)) { | 393 | if (arm_kmmio_fault_page(f)) { |
378 | kfree(f); | 394 | kfree(f); |
379 | return -1; | 395 | return -1; |
380 | } | 396 | } |
381 | 397 | ||
382 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | 398 | list_add_rcu(&f->list, kmmio_page_list(f->addr)); |
383 | 399 | ||
384 | return 0; | 400 | return 0; |
385 | } | 401 | } |
386 | 402 | ||
387 | /* You must be holding kmmio_lock. */ | 403 | /* You must be holding kmmio_lock. */ |
388 | static void release_kmmio_fault_page(unsigned long page, | 404 | static void release_kmmio_fault_page(unsigned long addr, |
389 | struct kmmio_fault_page **release_list) | 405 | struct kmmio_fault_page **release_list) |
390 | { | 406 | { |
391 | struct kmmio_fault_page *f; | 407 | struct kmmio_fault_page *f; |
392 | 408 | ||
393 | page &= PAGE_MASK; | 409 | f = get_kmmio_fault_page(addr); |
394 | f = get_kmmio_fault_page(page); | ||
395 | if (!f) | 410 | if (!f) |
396 | return; | 411 | return; |
397 | 412 | ||
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) | |||
420 | int ret = 0; | 435 | int ret = 0; |
421 | unsigned long size = 0; | 436 | unsigned long size = 0; |
422 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | 437 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); |
438 | unsigned int l; | ||
439 | pte_t *pte; | ||
423 | 440 | ||
424 | spin_lock_irqsave(&kmmio_lock, flags); | 441 | spin_lock_irqsave(&kmmio_lock, flags); |
425 | if (get_kmmio_probe(p->addr)) { | 442 | if (get_kmmio_probe(p->addr)) { |
426 | ret = -EEXIST; | 443 | ret = -EEXIST; |
427 | goto out; | 444 | goto out; |
428 | } | 445 | } |
446 | |||
447 | pte = lookup_address(p->addr, &l); | ||
448 | if (!pte) { | ||
449 | ret = -EINVAL; | ||
450 | goto out; | ||
451 | } | ||
452 | |||
429 | kmmio_count++; | 453 | kmmio_count++; |
430 | list_add_rcu(&p->list, &kmmio_probes); | 454 | list_add_rcu(&p->list, &kmmio_probes); |
431 | while (size < size_lim) { | 455 | while (size < size_lim) { |
432 | if (add_kmmio_fault_page(p->addr + size)) | 456 | if (add_kmmio_fault_page(p->addr + size)) |
433 | pr_err("Unable to set page fault.\n"); | 457 | pr_err("Unable to set page fault.\n"); |
434 | size += PAGE_SIZE; | 458 | size += page_level_size(l); |
435 | } | 459 | } |
436 | out: | 460 | out: |
437 | spin_unlock_irqrestore(&kmmio_lock, flags); | 461 | spin_unlock_irqrestore(&kmmio_lock, flags); |
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
506 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | 530 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); |
507 | struct kmmio_fault_page *release_list = NULL; | 531 | struct kmmio_fault_page *release_list = NULL; |
508 | struct kmmio_delayed_release *drelease; | 532 | struct kmmio_delayed_release *drelease; |
533 | unsigned int l; | ||
534 | pte_t *pte; | ||
535 | |||
536 | pte = lookup_address(p->addr, &l); | ||
537 | if (!pte) | ||
538 | return; | ||
509 | 539 | ||
510 | spin_lock_irqsave(&kmmio_lock, flags); | 540 | spin_lock_irqsave(&kmmio_lock, flags); |
511 | while (size < size_lim) { | 541 | while (size < size_lim) { |
512 | release_kmmio_fault_page(p->addr + size, &release_list); | 542 | release_kmmio_fault_page(p->addr + size, &release_list); |
513 | size += PAGE_SIZE; | 543 | size += page_level_size(l); |
514 | } | 544 | } |
515 | list_del_rcu(&p->list); | 545 | list_del_rcu(&p->list); |
516 | kmmio_count--; | 546 | kmmio_count--; |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 72bb52f93c3d..d2dc0438d654 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd) | |||
94 | } | 94 | } |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 | ||
98 | * does, but not when emulating X86_32 | ||
99 | */ | ||
100 | static unsigned long mmap_legacy_base(unsigned long rnd) | ||
101 | { | ||
102 | if (mmap_is_ia32()) | ||
103 | return TASK_UNMAPPED_BASE; | ||
104 | else | ||
105 | return TASK_UNMAPPED_BASE + rnd; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * This function, called very early during the creation of a new | 97 | * This function, called very early during the creation of a new |
110 | * process VM image, sets up which VM layout function to use: | 98 | * process VM image, sets up which VM layout function to use: |
111 | */ | 99 | */ |
@@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
116 | if (current->flags & PF_RANDOMIZE) | 104 | if (current->flags & PF_RANDOMIZE) |
117 | random_factor = arch_mmap_rnd(); | 105 | random_factor = arch_mmap_rnd(); |
118 | 106 | ||
119 | mm->mmap_legacy_base = mmap_legacy_base(random_factor); | 107 | mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor; |
120 | 108 | ||
121 | if (mmap_is_legacy()) { | 109 | if (mmap_is_legacy()) { |
122 | mm->mmap_base = mm->mmap_legacy_base; | 110 | mm->mmap_base = mm->mmap_legacy_base; |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index d04f8094bc23..f70c1ff46125 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -465,46 +465,67 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | |||
465 | return true; | 465 | return true; |
466 | } | 466 | } |
467 | 467 | ||
468 | /* | ||
469 | * Mark all currently memblock-reserved physical memory (which covers the | ||
470 | * kernel's own memory ranges) as hot-unswappable. | ||
471 | */ | ||
468 | static void __init numa_clear_kernel_node_hotplug(void) | 472 | static void __init numa_clear_kernel_node_hotplug(void) |
469 | { | 473 | { |
470 | int i, nid; | 474 | nodemask_t reserved_nodemask = NODE_MASK_NONE; |
471 | nodemask_t numa_kernel_nodes = NODE_MASK_NONE; | 475 | struct memblock_region *mb_region; |
472 | phys_addr_t start, end; | 476 | int i; |
473 | struct memblock_region *r; | ||
474 | 477 | ||
475 | /* | 478 | /* |
479 | * We have to do some preprocessing of memblock regions, to | ||
480 | * make them suitable for reservation. | ||
481 | * | ||
476 | * At this time, all memory regions reserved by memblock are | 482 | * At this time, all memory regions reserved by memblock are |
477 | * used by the kernel. Set the nid in memblock.reserved will | 483 | * used by the kernel, but those regions are not split up |
478 | * mark out all the nodes the kernel resides in. | 484 | * along node boundaries yet, and don't necessarily have their |
485 | * node ID set yet either. | ||
486 | * | ||
487 | * So iterate over all memory known to the x86 architecture, | ||
488 | * and use those ranges to set the nid in memblock.reserved. | ||
489 | * This will split up the memblock regions along node | ||
490 | * boundaries and will set the node IDs as well. | ||
479 | */ | 491 | */ |
480 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | 492 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
481 | struct numa_memblk *mb = &numa_meminfo.blk[i]; | 493 | struct numa_memblk *mb = numa_meminfo.blk + i; |
494 | int ret; | ||
482 | 495 | ||
483 | memblock_set_node(mb->start, mb->end - mb->start, | 496 | ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid); |
484 | &memblock.reserved, mb->nid); | 497 | WARN_ON_ONCE(ret); |
485 | } | 498 | } |
486 | 499 | ||
487 | /* | 500 | /* |
488 | * Mark all kernel nodes. | 501 | * Now go over all reserved memblock regions, to construct a |
502 | * node mask of all kernel reserved memory areas. | ||
489 | * | 503 | * |
490 | * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo | 504 | * [ Note, when booting with mem=nn[kMG] or in a kdump kernel, |
491 | * may not include all the memblock.reserved memory ranges because | 505 | * numa_meminfo might not include all memblock.reserved |
492 | * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. | 506 | * memory ranges, because quirks such as trim_snb_memory() |
507 | * reserve specific pages for Sandy Bridge graphics. ] | ||
493 | */ | 508 | */ |
494 | for_each_memblock(reserved, r) | 509 | for_each_memblock(reserved, mb_region) { |
495 | if (r->nid != MAX_NUMNODES) | 510 | if (mb_region->nid != MAX_NUMNODES) |
496 | node_set(r->nid, numa_kernel_nodes); | 511 | node_set(mb_region->nid, reserved_nodemask); |
512 | } | ||
497 | 513 | ||
498 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | 514 | /* |
515 | * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory | ||
516 | * belonging to the reserved node mask. | ||
517 | * | ||
518 | * Note that this will include memory regions that reside | ||
519 | * on nodes that contain kernel memory - entire nodes | ||
520 | * become hot-unpluggable: | ||
521 | */ | ||
499 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | 522 | for (i = 0; i < numa_meminfo.nr_blks; i++) { |
500 | nid = numa_meminfo.blk[i].nid; | 523 | struct numa_memblk *mb = numa_meminfo.blk + i; |
501 | if (!node_isset(nid, numa_kernel_nodes)) | ||
502 | continue; | ||
503 | 524 | ||
504 | start = numa_meminfo.blk[i].start; | 525 | if (!node_isset(mb->nid, reserved_nodemask)) |
505 | end = numa_meminfo.blk[i].end; | 526 | continue; |
506 | 527 | ||
507 | memblock_clear_hotplug(start, end - start); | 528 | memblock_clear_hotplug(mb->start, mb->end - mb->start); |
508 | } | 529 | } |
509 | } | 530 | } |
510 | 531 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1c37e650acac..007ebe2d8157 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -1128,8 +1128,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | |||
1128 | /* | 1128 | /* |
1129 | * Ignore all non primary paths. | 1129 | * Ignore all non primary paths. |
1130 | */ | 1130 | */ |
1131 | if (!primary) | 1131 | if (!primary) { |
1132 | cpa->numpages = 1; | ||
1132 | return 0; | 1133 | return 0; |
1134 | } | ||
1133 | 1135 | ||
1134 | /* | 1136 | /* |
1135 | * Ignore the NULL PTE for kernel identity mapping, as it is expected | 1137 | * Ignore the NULL PTE for kernel identity mapping, as it is expected |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index f65a33f505b6..8bea84724a7d 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -32,9 +32,8 @@ early_param("noexec", noexec_setup); | |||
32 | 32 | ||
33 | void x86_configure_nx(void) | 33 | void x86_configure_nx(void) |
34 | { | 34 | { |
35 | if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) | 35 | /* If disable_nx is set, clear NX on all new mappings going forward. */ |
36 | __supported_pte_mask |= _PAGE_NX; | 36 | if (disable_nx) |
37 | else | ||
38 | __supported_pte_mask &= ~_PAGE_NX; | 37 | __supported_pte_mask &= ~_PAGE_NX; |
39 | } | 38 | } |
40 | 39 | ||