diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-02-14 05:55:18 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-02-14 05:55:18 -0500 |
commit | d2137d5af4259f50c19addb8246a186c9ffac325 (patch) | |
tree | 2f7e309f9cf8ef2f2698532c226edda38021fe69 /arch/x86/mm | |
parent | f005fe12b90c5b9fe180a09209a893e09affa8aa (diff) | |
parent | 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff) |
Merge branch 'linus' into x86/bootmem
Conflicts:
arch/x86/mm/numa_64.c
Merge reason: fix the conflict, update to latest -rc and pick up this
dependent fix from Yinghai:
e6d2e2b2b1e1: memblock: don't adjust size in memblock_find_base()
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/amdtopology_64.c | 87 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 28 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/error.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 22 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 181 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 66 | ||||
-rw-r--r-- | arch/x86/mm/setup_nx.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 36 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 5 |
13 files changed, 387 insertions, 94 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index ae6ad691a14a..49b334cdd64c 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/amd_nb.h> | 27 | #include <asm/amd_nb.h> |
28 | 28 | ||
29 | static struct bootnode __initdata nodes[8]; | 29 | static struct bootnode __initdata nodes[8]; |
30 | static unsigned char __initdata nodeids[8]; | ||
30 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | 31 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; |
31 | 32 | ||
32 | static __init int find_northbridge(void) | 33 | static __init int find_northbridge(void) |
@@ -66,20 +67,6 @@ static __init void early_get_boot_cpu_id(void) | |||
66 | if (smp_found_config) | 67 | if (smp_found_config) |
67 | early_get_smp_config(); | 68 | early_get_smp_config(); |
68 | #endif | 69 | #endif |
69 | early_init_lapic_mapping(); | ||
70 | } | ||
71 | |||
72 | int __init amd_get_nodes(struct bootnode *physnodes) | ||
73 | { | ||
74 | int i; | ||
75 | int ret = 0; | ||
76 | |||
77 | for_each_node_mask(i, nodes_parsed) { | ||
78 | physnodes[ret].start = nodes[i].start; | ||
79 | physnodes[ret].end = nodes[i].end; | ||
80 | ret++; | ||
81 | } | ||
82 | return ret; | ||
83 | } | 70 | } |
84 | 71 | ||
85 | int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | 72 | int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) |
@@ -114,7 +101,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
114 | base = read_pci_config(0, nb, 1, 0x40 + i*8); | 101 | base = read_pci_config(0, nb, 1, 0x40 + i*8); |
115 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); | 102 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); |
116 | 103 | ||
117 | nodeid = limit & 7; | 104 | nodeids[i] = nodeid = limit & 7; |
118 | if ((base & 3) == 0) { | 105 | if ((base & 3) == 0) { |
119 | if (i < numnodes) | 106 | if (i < numnodes) |
120 | pr_info("Skipping disabled node %d\n", i); | 107 | pr_info("Skipping disabled node %d\n", i); |
@@ -194,6 +181,76 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
194 | return 0; | 181 | return 0; |
195 | } | 182 | } |
196 | 183 | ||
184 | #ifdef CONFIG_NUMA_EMU | ||
185 | static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { | ||
186 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
187 | }; | ||
188 | |||
189 | void __init amd_get_nodes(struct bootnode *physnodes) | ||
190 | { | ||
191 | int i; | ||
192 | |||
193 | for_each_node_mask(i, nodes_parsed) { | ||
194 | physnodes[i].start = nodes[i].start; | ||
195 | physnodes[i].end = nodes[i].end; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | static int __init find_node_by_addr(unsigned long addr) | ||
200 | { | ||
201 | int ret = NUMA_NO_NODE; | ||
202 | int i; | ||
203 | |||
204 | for (i = 0; i < 8; i++) | ||
205 | if (addr >= nodes[i].start && addr < nodes[i].end) { | ||
206 | ret = i; | ||
207 | break; | ||
208 | } | ||
209 | return ret; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be | ||
214 | * setup to represent the physical topology but reflect the emulated | ||
215 | * environment. For each emulated node, the real node which it appears on is | ||
216 | * found and a fake pxm to nid mapping is created which mirrors the actual | ||
217 | * locality. node_distance() then represents the correct distances between | ||
218 | * emulated nodes by using the fake acpi mappings to pxms. | ||
219 | */ | ||
220 | void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) | ||
221 | { | ||
222 | unsigned int bits; | ||
223 | unsigned int cores; | ||
224 | unsigned int apicid_base = 0; | ||
225 | int i; | ||
226 | |||
227 | bits = boot_cpu_data.x86_coreid_bits; | ||
228 | cores = 1 << bits; | ||
229 | early_get_boot_cpu_id(); | ||
230 | if (boot_cpu_physical_apicid > 0) | ||
231 | apicid_base = boot_cpu_physical_apicid; | ||
232 | |||
233 | for (i = 0; i < nr_nodes; i++) { | ||
234 | int index; | ||
235 | int nid; | ||
236 | int j; | ||
237 | |||
238 | nid = find_node_by_addr(nodes[i].start); | ||
239 | if (nid == NUMA_NO_NODE) | ||
240 | continue; | ||
241 | |||
242 | index = nodeids[nid] << bits; | ||
243 | if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE) | ||
244 | for (j = apicid_base; j < cores + apicid_base; j++) | ||
245 | fake_apicid_to_node[index + j] = i; | ||
246 | #ifdef CONFIG_ACPI_NUMA | ||
247 | __acpi_map_pxm_to_node(nid, i); | ||
248 | #endif | ||
249 | } | ||
250 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | ||
251 | } | ||
252 | #endif /* CONFIG_NUMA_EMU */ | ||
253 | |||
197 | int __init amd_scan_nodes(void) | 254 | int __init amd_scan_nodes(void) |
198 | { | 255 | { |
199 | unsigned int bits; | 256 | unsigned int bits; |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 738e6593799d..dbe34b931374 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/vmstat.h> | 9 | #include <linux/vmstat.h> |
10 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
11 | #include <linux/swap.h> | ||
11 | 12 | ||
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | 14 | ||
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 90 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
90 | page = pte_page(pte); | 91 | page = pte_page(pte); |
91 | get_page(page); | 92 | get_page(page); |
93 | SetPageReferenced(page); | ||
92 | pages[*nr] = page; | 94 | pages[*nr] = page; |
93 | (*nr)++; | 95 | (*nr)++; |
94 | 96 | ||
@@ -103,6 +105,17 @@ static inline void get_head_page_multiple(struct page *page, int nr) | |||
103 | VM_BUG_ON(page != compound_head(page)); | 105 | VM_BUG_ON(page != compound_head(page)); |
104 | VM_BUG_ON(page_count(page) == 0); | 106 | VM_BUG_ON(page_count(page) == 0); |
105 | atomic_add(nr, &page->_count); | 107 | atomic_add(nr, &page->_count); |
108 | SetPageReferenced(page); | ||
109 | } | ||
110 | |||
111 | static inline void get_huge_page_tail(struct page *page) | ||
112 | { | ||
113 | /* | ||
114 | * __split_huge_page_refcount() cannot run | ||
115 | * from under us. | ||
116 | */ | ||
117 | VM_BUG_ON(atomic_read(&page->_count) < 0); | ||
118 | atomic_inc(&page->_count); | ||
106 | } | 119 | } |
107 | 120 | ||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | 121 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, |
@@ -128,6 +141,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
128 | do { | 141 | do { |
129 | VM_BUG_ON(compound_head(page) != head); | 142 | VM_BUG_ON(compound_head(page) != head); |
130 | pages[*nr] = page; | 143 | pages[*nr] = page; |
144 | if (PageTail(page)) | ||
145 | get_huge_page_tail(page); | ||
131 | (*nr)++; | 146 | (*nr)++; |
132 | page++; | 147 | page++; |
133 | refs++; | 148 | refs++; |
@@ -148,7 +163,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
148 | pmd_t pmd = *pmdp; | 163 | pmd_t pmd = *pmdp; |
149 | 164 | ||
150 | next = pmd_addr_end(addr, end); | 165 | next = pmd_addr_end(addr, end); |
151 | if (pmd_none(pmd)) | 166 | /* |
167 | * The pmd_trans_splitting() check below explains why | ||
168 | * pmdp_splitting_flush has to flush the tlb, to stop | ||
169 | * this gup-fast code from running while we set the | ||
170 | * splitting bit in the pmd. Returning zero will take | ||
171 | * the slow path that will call wait_split_huge_page() | ||
172 | * if the pmd is still in splitting state. gup-fast | ||
173 | * can't because it has irq disabled and | ||
174 | * wait_split_huge_page() would never return as the | ||
175 | * tlb flush IPI wouldn't run. | ||
176 | */ | ||
177 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
152 | return 0; | 178 | return 0; |
153 | if (unlikely(pmd_large(pmd))) { | 179 | if (unlikely(pmd_large(pmd))) { |
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | 180 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 35ee75d9061a..b8054e087ead 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -336,8 +336,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
336 | /* | 336 | /* |
337 | * We just marked the kernel text read only above, now that | 337 | * We just marked the kernel text read only above, now that |
338 | * we are going to free part of that, we need to make that | 338 | * we are going to free part of that, we need to make that |
339 | * writeable first. | 339 | * writeable and non-executable first. |
340 | */ | 340 | */ |
341 | set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); | ||
341 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); | 342 | set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); |
342 | 343 | ||
343 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); | 344 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0e969f9f401b..c821074b7f0b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/bugs.h> | 45 | #include <asm/bugs.h> |
46 | #include <asm/tlb.h> | 46 | #include <asm/tlb.h> |
47 | #include <asm/tlbflush.h> | 47 | #include <asm/tlbflush.h> |
48 | #include <asm/olpc_ofw.h> | ||
48 | #include <asm/pgalloc.h> | 49 | #include <asm/pgalloc.h> |
49 | #include <asm/sections.h> | 50 | #include <asm/sections.h> |
50 | #include <asm/paravirt.h> | 51 | #include <asm/paravirt.h> |
@@ -226,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
226 | 227 | ||
227 | static inline int is_kernel_text(unsigned long addr) | 228 | static inline int is_kernel_text(unsigned long addr) |
228 | { | 229 | { |
229 | if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) | 230 | if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) |
230 | return 1; | 231 | return 1; |
231 | return 0; | 232 | return 0; |
232 | } | 233 | } |
@@ -715,6 +716,7 @@ void __init paging_init(void) | |||
715 | /* | 716 | /* |
716 | * NOTE: at this point the bootmem allocator is fully available. | 717 | * NOTE: at this point the bootmem allocator is fully available. |
717 | */ | 718 | */ |
719 | olpc_dt_build_devicetree(); | ||
718 | sparse_init(); | 720 | sparse_init(); |
719 | zone_sizes_init(); | 721 | zone_sizes_init(); |
720 | } | 722 | } |
@@ -912,6 +914,23 @@ void set_kernel_text_ro(void) | |||
912 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 914 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
913 | } | 915 | } |
914 | 916 | ||
917 | static void mark_nxdata_nx(void) | ||
918 | { | ||
919 | /* | ||
920 | * When this called, init has already been executed and released, | ||
921 | * so everything past _etext sould be NX. | ||
922 | */ | ||
923 | unsigned long start = PFN_ALIGN(_etext); | ||
924 | /* | ||
925 | * This comes from is_kernel_text upper limit. Also HPAGE where used: | ||
926 | */ | ||
927 | unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; | ||
928 | |||
929 | if (__supported_pte_mask & _PAGE_NX) | ||
930 | printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); | ||
931 | set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); | ||
932 | } | ||
933 | |||
915 | void mark_rodata_ro(void) | 934 | void mark_rodata_ro(void) |
916 | { | 935 | { |
917 | unsigned long start = PFN_ALIGN(_text); | 936 | unsigned long start = PFN_ALIGN(_text); |
@@ -946,6 +965,7 @@ void mark_rodata_ro(void) | |||
946 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 965 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
947 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 966 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
948 | #endif | 967 | #endif |
968 | mark_nxdata_nx(); | ||
949 | } | 969 | } |
950 | #endif | 970 | #endif |
951 | 971 | ||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, | |||
185 | e->trace.entries = e->trace_entries; | 185 | e->trace.entries = e->trace_entries; |
186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | 186 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); |
187 | e->trace.skip = 0; | 187 | e->trace.skip = 0; |
188 | save_stack_trace_bp(&e->trace, regs->bp); | 188 | save_stack_trace_regs(&e->trace, regs); |
189 | 189 | ||
190 | /* Round address down to nearest 16 bytes */ | 190 | /* Round address down to nearest 16 bytes */ |
191 | shadow_copy = kmemcheck_shadow_lookup(address | 191 | shadow_copy = kmemcheck_shadow_lookup(address |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 787c52ca49c3..ebf6d7887a38 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -2,6 +2,28 @@ | |||
2 | #include <linux/topology.h> | 2 | #include <linux/topology.h> |
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <asm/numa.h> | ||
6 | #include <asm/acpi.h> | ||
7 | |||
8 | int __initdata numa_off; | ||
9 | |||
10 | static __init int numa_setup(char *opt) | ||
11 | { | ||
12 | if (!opt) | ||
13 | return -EINVAL; | ||
14 | if (!strncmp(opt, "off", 3)) | ||
15 | numa_off = 1; | ||
16 | #ifdef CONFIG_NUMA_EMU | ||
17 | if (!strncmp(opt, "fake=", 5)) | ||
18 | numa_emu_cmdline(opt + 5); | ||
19 | #endif | ||
20 | #ifdef CONFIG_ACPI_NUMA | ||
21 | if (!strncmp(opt, "noacpi", 6)) | ||
22 | acpi_numa = -1; | ||
23 | #endif | ||
24 | return 0; | ||
25 | } | ||
26 | early_param("numa", numa_setup); | ||
5 | 27 | ||
6 | /* | 28 | /* |
7 | * Which logical CPUs are on which nodes | 29 | * Which logical CPUs are on which nodes |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 7cc26ae0a15d..62cb634b5cf8 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -30,7 +30,6 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
31 | }; | 31 | }; |
32 | 32 | ||
33 | int numa_off __initdata; | ||
34 | static unsigned long __initdata nodemap_addr; | 33 | static unsigned long __initdata nodemap_addr; |
35 | static unsigned long __initdata nodemap_size; | 34 | static unsigned long __initdata nodemap_size; |
36 | 35 | ||
@@ -260,30 +259,35 @@ void __init numa_init_array(void) | |||
260 | #ifdef CONFIG_NUMA_EMU | 259 | #ifdef CONFIG_NUMA_EMU |
261 | /* Numa emulation */ | 260 | /* Numa emulation */ |
262 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 261 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
263 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | 262 | static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; |
264 | static char *cmdline __initdata; | 263 | static char *cmdline __initdata; |
265 | 264 | ||
265 | void __init numa_emu_cmdline(char *str) | ||
266 | { | ||
267 | cmdline = str; | ||
268 | } | ||
269 | |||
266 | static int __init setup_physnodes(unsigned long start, unsigned long end, | 270 | static int __init setup_physnodes(unsigned long start, unsigned long end, |
267 | int acpi, int amd) | 271 | int acpi, int amd) |
268 | { | 272 | { |
269 | int nr_nodes = 0; | ||
270 | int ret = 0; | 273 | int ret = 0; |
271 | int i; | 274 | int i; |
272 | 275 | ||
276 | memset(physnodes, 0, sizeof(physnodes)); | ||
273 | #ifdef CONFIG_ACPI_NUMA | 277 | #ifdef CONFIG_ACPI_NUMA |
274 | if (acpi) | 278 | if (acpi) |
275 | nr_nodes = acpi_get_nodes(physnodes); | 279 | acpi_get_nodes(physnodes, start, end); |
276 | #endif | 280 | #endif |
277 | #ifdef CONFIG_AMD_NUMA | 281 | #ifdef CONFIG_AMD_NUMA |
278 | if (amd) | 282 | if (amd) |
279 | nr_nodes = amd_get_nodes(physnodes); | 283 | amd_get_nodes(physnodes); |
280 | #endif | 284 | #endif |
281 | /* | 285 | /* |
282 | * Basic sanity checking on the physical node map: there may be errors | 286 | * Basic sanity checking on the physical node map: there may be errors |
283 | * if the SRAT or AMD code incorrectly reported the topology or the mem= | 287 | * if the SRAT or AMD code incorrectly reported the topology or the mem= |
284 | * kernel parameter is used. | 288 | * kernel parameter is used. |
285 | */ | 289 | */ |
286 | for (i = 0; i < nr_nodes; i++) { | 290 | for (i = 0; i < MAX_NUMNODES; i++) { |
287 | if (physnodes[i].start == physnodes[i].end) | 291 | if (physnodes[i].start == physnodes[i].end) |
288 | continue; | 292 | continue; |
289 | if (physnodes[i].start > end) { | 293 | if (physnodes[i].start > end) { |
@@ -298,17 +302,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, | |||
298 | physnodes[i].start = start; | 302 | physnodes[i].start = start; |
299 | if (physnodes[i].end > end) | 303 | if (physnodes[i].end > end) |
300 | physnodes[i].end = end; | 304 | physnodes[i].end = end; |
301 | } | ||
302 | |||
303 | /* | ||
304 | * Remove all nodes that have no memory or were truncated because of the | ||
305 | * limited address range. | ||
306 | */ | ||
307 | for (i = 0; i < nr_nodes; i++) { | ||
308 | if (physnodes[i].start == physnodes[i].end) | ||
309 | continue; | ||
310 | physnodes[ret].start = physnodes[i].start; | ||
311 | physnodes[ret].end = physnodes[i].end; | ||
312 | ret++; | 305 | ret++; |
313 | } | 306 | } |
314 | 307 | ||
@@ -324,6 +317,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, | |||
324 | return ret; | 317 | return ret; |
325 | } | 318 | } |
326 | 319 | ||
320 | static void __init fake_physnodes(int acpi, int amd, int nr_nodes) | ||
321 | { | ||
322 | int i; | ||
323 | |||
324 | BUG_ON(acpi && amd); | ||
325 | #ifdef CONFIG_ACPI_NUMA | ||
326 | if (acpi) | ||
327 | acpi_fake_nodes(nodes, nr_nodes); | ||
328 | #endif | ||
329 | #ifdef CONFIG_AMD_NUMA | ||
330 | if (amd) | ||
331 | amd_fake_nodes(nodes, nr_nodes); | ||
332 | #endif | ||
333 | if (!acpi && !amd) | ||
334 | for (i = 0; i < nr_cpu_ids; i++) | ||
335 | numa_set_node(i, 0); | ||
336 | } | ||
337 | |||
327 | /* | 338 | /* |
328 | * Setups up nid to range from addr to addr + size. If the end | 339 | * Setups up nid to range from addr to addr + size. If the end |
329 | * boundary is greater than max_addr, then max_addr is used instead. | 340 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -352,8 +363,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) | |||
352 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | 363 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr |
353 | * to max_addr. The return value is the number of nodes allocated. | 364 | * to max_addr. The return value is the number of nodes allocated. |
354 | */ | 365 | */ |
355 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, | 366 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) |
356 | int nr_phys_nodes, int nr_nodes) | ||
357 | { | 367 | { |
358 | nodemask_t physnode_mask = NODE_MASK_NONE; | 368 | nodemask_t physnode_mask = NODE_MASK_NONE; |
359 | u64 size; | 369 | u64 size; |
@@ -384,7 +394,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
384 | return -1; | 394 | return -1; |
385 | } | 395 | } |
386 | 396 | ||
387 | for (i = 0; i < nr_phys_nodes; i++) | 397 | for (i = 0; i < MAX_NUMNODES; i++) |
388 | if (physnodes[i].start != physnodes[i].end) | 398 | if (physnodes[i].start != physnodes[i].end) |
389 | node_set(i, physnode_mask); | 399 | node_set(i, physnode_mask); |
390 | 400 | ||
@@ -553,11 +563,9 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
553 | { | 563 | { |
554 | u64 addr = start_pfn << PAGE_SHIFT; | 564 | u64 addr = start_pfn << PAGE_SHIFT; |
555 | u64 max_addr = last_pfn << PAGE_SHIFT; | 565 | u64 max_addr = last_pfn << PAGE_SHIFT; |
556 | int num_phys_nodes; | ||
557 | int num_nodes; | 566 | int num_nodes; |
558 | int i; | 567 | int i; |
559 | 568 | ||
560 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd); | ||
561 | /* | 569 | /* |
562 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 570 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
563 | * the fixed node size. Otherwise, if it is just a single number N, | 571 | * the fixed node size. Otherwise, if it is just a single number N, |
@@ -572,7 +580,7 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
572 | unsigned long n; | 580 | unsigned long n; |
573 | 581 | ||
574 | n = simple_strtoul(cmdline, NULL, 0); | 582 | n = simple_strtoul(cmdline, NULL, 0); |
575 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); | 583 | num_nodes = split_nodes_interleave(addr, max_addr, n); |
576 | } | 584 | } |
577 | 585 | ||
578 | if (num_nodes < 0) | 586 | if (num_nodes < 0) |
@@ -596,7 +604,8 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
596 | init_memory_mapping_high(); | 604 | init_memory_mapping_high(); |
597 | for_each_node_mask(i, node_possible_map) | 605 | for_each_node_mask(i, node_possible_map) |
598 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 606 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
599 | acpi_fake_nodes(nodes, num_nodes); | 607 | setup_physnodes(addr, max_addr, acpi, amd); |
608 | fake_physnodes(acpi, amd, num_nodes); | ||
600 | numa_init_array(); | 609 | numa_init_array(); |
601 | return 0; | 610 | return 0; |
602 | } | 611 | } |
@@ -611,8 +620,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | |||
611 | nodes_clear(node_online_map); | 620 | nodes_clear(node_online_map); |
612 | 621 | ||
613 | #ifdef CONFIG_NUMA_EMU | 622 | #ifdef CONFIG_NUMA_EMU |
623 | setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, | ||
624 | acpi, amd); | ||
614 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) | 625 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) |
615 | return; | 626 | return; |
627 | setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, | ||
628 | acpi, amd); | ||
616 | nodes_clear(node_possible_map); | 629 | nodes_clear(node_possible_map); |
617 | nodes_clear(node_online_map); | 630 | nodes_clear(node_online_map); |
618 | #endif | 631 | #endif |
@@ -663,24 +676,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
663 | return pages; | 676 | return pages; |
664 | } | 677 | } |
665 | 678 | ||
666 | static __init int numa_setup(char *opt) | ||
667 | { | ||
668 | if (!opt) | ||
669 | return -EINVAL; | ||
670 | if (!strncmp(opt, "off", 3)) | ||
671 | numa_off = 1; | ||
672 | #ifdef CONFIG_NUMA_EMU | ||
673 | if (!strncmp(opt, "fake=", 5)) | ||
674 | cmdline = opt + 5; | ||
675 | #endif | ||
676 | #ifdef CONFIG_ACPI_NUMA | ||
677 | if (!strncmp(opt, "noacpi", 6)) | ||
678 | acpi_numa = -1; | ||
679 | #endif | ||
680 | return 0; | ||
681 | } | ||
682 | early_param("numa", numa_setup); | ||
683 | |||
684 | #ifdef CONFIG_NUMA | 679 | #ifdef CONFIG_NUMA |
685 | 680 | ||
686 | static __init int find_near_online_node(int node) | 681 | static __init int find_near_online_node(int node) |
@@ -769,6 +764,7 @@ void __cpuinit numa_clear_node(int cpu) | |||
769 | 764 | ||
770 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 765 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS |
771 | 766 | ||
767 | #ifndef CONFIG_NUMA_EMU | ||
772 | void __cpuinit numa_add_cpu(int cpu) | 768 | void __cpuinit numa_add_cpu(int cpu) |
773 | { | 769 | { |
774 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 770 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
@@ -778,34 +774,115 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
778 | { | 774 | { |
779 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 775 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); |
780 | } | 776 | } |
777 | #else | ||
778 | void __cpuinit numa_add_cpu(int cpu) | ||
779 | { | ||
780 | unsigned long addr; | ||
781 | u16 apicid; | ||
782 | int physnid; | ||
783 | int nid = NUMA_NO_NODE; | ||
784 | |||
785 | apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
786 | if (apicid != BAD_APICID) | ||
787 | nid = apicid_to_node[apicid]; | ||
788 | if (nid == NUMA_NO_NODE) | ||
789 | nid = early_cpu_to_node(cpu); | ||
790 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); | ||
791 | |||
792 | /* | ||
793 | * Use the starting address of the emulated node to find which physical | ||
794 | * node it is allocated on. | ||
795 | */ | ||
796 | addr = node_start_pfn(nid) << PAGE_SHIFT; | ||
797 | for (physnid = 0; physnid < MAX_NUMNODES; physnid++) | ||
798 | if (addr >= physnodes[physnid].start && | ||
799 | addr < physnodes[physnid].end) | ||
800 | break; | ||
801 | |||
802 | /* | ||
803 | * Map the cpu to each emulated node that is allocated on the physical | ||
804 | * node of the cpu's apic id. | ||
805 | */ | ||
806 | for_each_online_node(nid) { | ||
807 | addr = node_start_pfn(nid) << PAGE_SHIFT; | ||
808 | if (addr >= physnodes[physnid].start && | ||
809 | addr < physnodes[physnid].end) | ||
810 | cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); | ||
811 | } | ||
812 | } | ||
813 | |||
814 | void __cpuinit numa_remove_cpu(int cpu) | ||
815 | { | ||
816 | int i; | ||
817 | |||
818 | for_each_online_node(i) | ||
819 | cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); | ||
820 | } | ||
821 | #endif /* !CONFIG_NUMA_EMU */ | ||
781 | 822 | ||
782 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | 823 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ |
824 | static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | ||
825 | { | ||
826 | int node = early_cpu_to_node(cpu); | ||
827 | struct cpumask *mask; | ||
828 | char buf[64]; | ||
829 | |||
830 | mask = node_to_cpumask_map[node]; | ||
831 | if (!mask) { | ||
832 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | ||
833 | dump_stack(); | ||
834 | return NULL; | ||
835 | } | ||
836 | |||
837 | cpulist_scnprintf(buf, sizeof(buf), mask); | ||
838 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
839 | enable ? "numa_add_cpu" : "numa_remove_cpu", | ||
840 | cpu, node, buf); | ||
841 | return mask; | ||
842 | } | ||
783 | 843 | ||
784 | /* | 844 | /* |
785 | * --------- debug versions of the numa functions --------- | 845 | * --------- debug versions of the numa functions --------- |
786 | */ | 846 | */ |
847 | #ifndef CONFIG_NUMA_EMU | ||
787 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 848 | static void __cpuinit numa_set_cpumask(int cpu, int enable) |
788 | { | 849 | { |
789 | int node = early_cpu_to_node(cpu); | ||
790 | struct cpumask *mask; | 850 | struct cpumask *mask; |
791 | char buf[64]; | ||
792 | 851 | ||
793 | mask = node_to_cpumask_map[node]; | 852 | mask = debug_cpumask_set_cpu(cpu, enable); |
794 | if (mask == NULL) { | 853 | if (!mask) |
795 | printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node); | ||
796 | dump_stack(); | ||
797 | return; | 854 | return; |
798 | } | ||
799 | 855 | ||
800 | if (enable) | 856 | if (enable) |
801 | cpumask_set_cpu(cpu, mask); | 857 | cpumask_set_cpu(cpu, mask); |
802 | else | 858 | else |
803 | cpumask_clear_cpu(cpu, mask); | 859 | cpumask_clear_cpu(cpu, mask); |
860 | } | ||
861 | #else | ||
862 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
863 | { | ||
864 | int node = early_cpu_to_node(cpu); | ||
865 | struct cpumask *mask; | ||
866 | int i; | ||
804 | 867 | ||
805 | cpulist_scnprintf(buf, sizeof(buf), mask); | 868 | for_each_online_node(i) { |
806 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 869 | unsigned long addr; |
807 | enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); | 870 | |
871 | addr = node_start_pfn(i) << PAGE_SHIFT; | ||
872 | if (addr < physnodes[node].start || | ||
873 | addr >= physnodes[node].end) | ||
874 | continue; | ||
875 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
876 | if (!mask) | ||
877 | return; | ||
878 | |||
879 | if (enable) | ||
880 | cpumask_set_cpu(cpu, mask); | ||
881 | else | ||
882 | cpumask_clear_cpu(cpu, mask); | ||
883 | } | ||
808 | } | 884 | } |
885 | #endif /* CONFIG_NUMA_EMU */ | ||
809 | 886 | ||
810 | void __cpuinit numa_add_cpu(int cpu) | 887 | void __cpuinit numa_add_cpu(int cpu) |
811 | { | 888 | { |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 532e7933d606..d343b3c81f3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/pfn.h> | 13 | #include <linux/pfn.h> |
14 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/pci.h> | ||
16 | 17 | ||
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -260,8 +261,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
260 | * The BIOS area between 640k and 1Mb needs to be executable for | 261 | * The BIOS area between 640k and 1Mb needs to be executable for |
261 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | 262 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. |
262 | */ | 263 | */ |
263 | if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | 264 | #ifdef CONFIG_PCI_BIOS |
265 | if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) | ||
264 | pgprot_val(forbidden) |= _PAGE_NX; | 266 | pgprot_val(forbidden) |= _PAGE_NX; |
267 | #endif | ||
265 | 268 | ||
266 | /* | 269 | /* |
267 | * The kernel text needs to be executable for obvious reasons | 270 | * The kernel text needs to be executable for obvious reasons |
@@ -393,7 +396,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
393 | { | 396 | { |
394 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 397 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; |
395 | pte_t new_pte, old_pte, *tmp; | 398 | pte_t new_pte, old_pte, *tmp; |
396 | pgprot_t old_prot, new_prot; | 399 | pgprot_t old_prot, new_prot, req_prot; |
397 | int i, do_split = 1; | 400 | int i, do_split = 1; |
398 | unsigned int level; | 401 | unsigned int level; |
399 | 402 | ||
@@ -438,10 +441,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
438 | * We are safe now. Check whether the new pgprot is the same: | 441 | * We are safe now. Check whether the new pgprot is the same: |
439 | */ | 442 | */ |
440 | old_pte = *kpte; | 443 | old_pte = *kpte; |
441 | old_prot = new_prot = pte_pgprot(old_pte); | 444 | old_prot = new_prot = req_prot = pte_pgprot(old_pte); |
442 | 445 | ||
443 | pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | 446 | pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); |
444 | pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | 447 | pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); |
445 | 448 | ||
446 | /* | 449 | /* |
447 | * old_pte points to the large page base address. So we need | 450 | * old_pte points to the large page base address. So we need |
@@ -450,17 +453,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
450 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); | 453 | pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); |
451 | cpa->pfn = pfn; | 454 | cpa->pfn = pfn; |
452 | 455 | ||
453 | new_prot = static_protections(new_prot, address, pfn); | 456 | new_prot = static_protections(req_prot, address, pfn); |
454 | 457 | ||
455 | /* | 458 | /* |
456 | * We need to check the full range, whether | 459 | * We need to check the full range, whether |
457 | * static_protection() requires a different pgprot for one of | 460 | * static_protection() requires a different pgprot for one of |
458 | * the pages in the range we try to preserve: | 461 | * the pages in the range we try to preserve: |
459 | */ | 462 | */ |
460 | addr = address + PAGE_SIZE; | 463 | addr = address & pmask; |
461 | pfn++; | 464 | pfn = pte_pfn(old_pte); |
462 | for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { | 465 | for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { |
463 | pgprot_t chk_prot = static_protections(new_prot, addr, pfn); | 466 | pgprot_t chk_prot = static_protections(req_prot, addr, pfn); |
464 | 467 | ||
465 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | 468 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) |
466 | goto out_unlock; | 469 | goto out_unlock; |
@@ -483,7 +486,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
483 | * that we limited the number of possible pages already to | 486 | * that we limited the number of possible pages already to |
484 | * the number of pages in the large page. | 487 | * the number of pages in the large page. |
485 | */ | 488 | */ |
486 | if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | 489 | if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { |
487 | /* | 490 | /* |
488 | * The address is aligned and the number of pages | 491 | * The address is aligned and the number of pages |
489 | * covers the full page. | 492 | * covers the full page. |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8be8c7d7bc89..500242d3c96d 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
320 | return changed; | 320 | return changed; |
321 | } | 321 | } |
322 | 322 | ||
323 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
324 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
325 | unsigned long address, pmd_t *pmdp, | ||
326 | pmd_t entry, int dirty) | ||
327 | { | ||
328 | int changed = !pmd_same(*pmdp, entry); | ||
329 | |||
330 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
331 | |||
332 | if (changed && dirty) { | ||
333 | *pmdp = entry; | ||
334 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
335 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
336 | } | ||
337 | |||
338 | return changed; | ||
339 | } | ||
340 | #endif | ||
341 | |||
323 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 342 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
324 | unsigned long addr, pte_t *ptep) | 343 | unsigned long addr, pte_t *ptep) |
325 | { | 344 | { |
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
335 | return ret; | 354 | return ret; |
336 | } | 355 | } |
337 | 356 | ||
357 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
358 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
359 | unsigned long addr, pmd_t *pmdp) | ||
360 | { | ||
361 | int ret = 0; | ||
362 | |||
363 | if (pmd_young(*pmdp)) | ||
364 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
365 | (unsigned long *)pmdp); | ||
366 | |||
367 | if (ret) | ||
368 | pmd_update(vma->vm_mm, addr, pmdp); | ||
369 | |||
370 | return ret; | ||
371 | } | ||
372 | #endif | ||
373 | |||
338 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 374 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
339 | unsigned long address, pte_t *ptep) | 375 | unsigned long address, pte_t *ptep) |
340 | { | 376 | { |
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
347 | return young; | 383 | return young; |
348 | } | 384 | } |
349 | 385 | ||
386 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
387 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
388 | unsigned long address, pmd_t *pmdp) | ||
389 | { | ||
390 | int young; | ||
391 | |||
392 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
393 | |||
394 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
395 | if (young) | ||
396 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
397 | |||
398 | return young; | ||
399 | } | ||
400 | |||
401 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
402 | unsigned long address, pmd_t *pmdp) | ||
403 | { | ||
404 | int set; | ||
405 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
406 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
407 | (unsigned long *)pmdp); | ||
408 | if (set) { | ||
409 | pmd_update(vma->vm_mm, address, pmdp); | ||
410 | /* need tlb flush only to serialize against gup-fast */ | ||
411 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
412 | } | ||
413 | } | ||
414 | #endif | ||
415 | |||
350 | /** | 416 | /** |
351 | * reserve_top_address - reserves a hole in the top of kernel address space | 417 | * reserve_top_address - reserves a hole in the top of kernel address space |
352 | * @reserve - size of hole to reserve | 418 | * @reserve - size of hole to reserve |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index a3250aa34086..410531d3c292 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -41,7 +41,7 @@ void __init x86_report_nx(void) | |||
41 | { | 41 | { |
42 | if (!cpu_has_nx) { | 42 | if (!cpu_has_nx) { |
43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
44 | "missing in CPU or disabled in BIOS!\n"); | 44 | "missing in CPU!\n"); |
45 | } else { | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
47 | if (disable_nx) { | 47 | if (disable_nx) { |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index a17dffd136c1..ae96e7b8051d 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -59,7 +59,6 @@ static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; | |||
59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ | 59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ |
60 | static u8 __initdata apicid_to_pxm[MAX_APICID]; | 60 | static u8 __initdata apicid_to_pxm[MAX_APICID]; |
61 | 61 | ||
62 | int numa_off __initdata; | ||
63 | int acpi_numa __initdata; | 62 | int acpi_numa __initdata; |
64 | 63 | ||
65 | static __init void bad_srat(void) | 64 | static __init void bad_srat(void) |
@@ -92,6 +91,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) | |||
92 | /* mark this node as "seen" in node bitmap */ | 91 | /* mark this node as "seen" in node bitmap */ |
93 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); | 92 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); |
94 | 93 | ||
94 | /* don't need to check apic_id here, because it is always 8 bits */ | ||
95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; | 95 | apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; |
96 | 96 | ||
97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", | 97 | printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 0b961c8bffb4..4c03e13da138 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
134 | } | 134 | } |
135 | 135 | ||
136 | apic_id = pa->apic_id; | 136 | apic_id = pa->apic_id; |
137 | if (apic_id >= MAX_LOCAL_APIC) { | ||
138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
139 | return; | ||
140 | } | ||
137 | apicid_to_node[apic_id] = node; | 141 | apicid_to_node[apic_id] = node; |
138 | node_set(node, cpu_nodes_parsed); | 142 | node_set(node, cpu_nodes_parsed); |
139 | acpi_numa = 1; | 143 | acpi_numa = 1; |
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
168 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; | 172 | apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; |
169 | else | 173 | else |
170 | apic_id = pa->apic_id; | 174 | apic_id = pa->apic_id; |
175 | |||
176 | if (apic_id >= MAX_LOCAL_APIC) { | ||
177 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | ||
178 | return; | ||
179 | } | ||
180 | |||
171 | apicid_to_node[apic_id] = node; | 181 | apicid_to_node[apic_id] = node; |
172 | node_set(node, cpu_nodes_parsed); | 182 | node_set(node, cpu_nodes_parsed); |
173 | acpi_numa = 1; | 183 | acpi_numa = 1; |
@@ -339,18 +349,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
339 | 349 | ||
340 | void __init acpi_numa_arch_fixup(void) {} | 350 | void __init acpi_numa_arch_fixup(void) {} |
341 | 351 | ||
342 | int __init acpi_get_nodes(struct bootnode *physnodes) | 352 | #ifdef CONFIG_NUMA_EMU |
353 | void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, | ||
354 | unsigned long end) | ||
343 | { | 355 | { |
344 | int i; | 356 | int i; |
345 | int ret = 0; | ||
346 | 357 | ||
347 | for_each_node_mask(i, nodes_parsed) { | 358 | for_each_node_mask(i, nodes_parsed) { |
348 | physnodes[ret].start = nodes[i].start; | 359 | cutoff_node(i, start, end); |
349 | physnodes[ret].end = nodes[i].end; | 360 | physnodes[i].start = nodes[i].start; |
350 | ret++; | 361 | physnodes[i].end = nodes[i].end; |
351 | } | 362 | } |
352 | return ret; | ||
353 | } | 363 | } |
364 | #endif /* CONFIG_NUMA_EMU */ | ||
354 | 365 | ||
355 | /* Use the information discovered above to actually set up the nodes. */ | 366 | /* Use the information discovered above to actually set up the nodes. */ |
356 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 367 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
@@ -497,8 +508,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
497 | { | 508 | { |
498 | int i, j; | 509 | int i, j; |
499 | 510 | ||
500 | printk(KERN_INFO "Faking PXM affinity for fake nodes on real " | ||
501 | "topology.\n"); | ||
502 | for (i = 0; i < num_nodes; i++) { | 511 | for (i = 0; i < num_nodes; i++) { |
503 | int nid, pxm; | 512 | int nid, pxm; |
504 | 513 | ||
@@ -518,6 +527,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
518 | fake_apicid_to_node[j] == NUMA_NO_NODE) | 527 | fake_apicid_to_node[j] == NUMA_NO_NODE) |
519 | fake_apicid_to_node[j] = i; | 528 | fake_apicid_to_node[j] = i; |
520 | } | 529 | } |
530 | |||
531 | /* | ||
532 | * If there are apicid-to-node mappings for physical nodes that do not | ||
533 | * have a corresponding emulated node, it should default to a guaranteed | ||
534 | * value. | ||
535 | */ | ||
536 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
537 | if (apicid_to_node[i] != NUMA_NO_NODE && | ||
538 | fake_apicid_to_node[i] == NUMA_NO_NODE) | ||
539 | fake_apicid_to_node[i] = 0; | ||
540 | |||
521 | for (i = 0; i < num_nodes; i++) | 541 | for (i = 0; i < num_nodes; i++) |
522 | __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); | 542 | __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); |
523 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | 543 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 12cdbb17ad18..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
223 | 223 | ||
224 | static void __cpuinit calculate_tlb_offset(void) | 224 | static void __cpuinit calculate_tlb_offset(void) |
225 | { | 225 | { |
226 | int cpu, node, nr_node_vecs; | 226 | int cpu, node, nr_node_vecs, idx = 0; |
227 | /* | 227 | /* |
228 | * we are changing tlb_vector_offset for each CPU in runtime, but this | 228 | * we are changing tlb_vector_offset for each CPU in runtime, but this |
229 | * will not cause inconsistency, as the write is atomic under X86. we | 229 | * will not cause inconsistency, as the write is atomic under X86. we |
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) | |||
239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | 239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; |
240 | 240 | ||
241 | for_each_online_node(node) { | 241 | for_each_online_node(node) { |
242 | int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * | 242 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * |
243 | nr_node_vecs; | 243 | nr_node_vecs; |
244 | int cpu_offset = 0; | 244 | int cpu_offset = 0; |
245 | for_each_cpu(cpu, cpumask_of_node(node)) { | 245 | for_each_cpu(cpu, cpumask_of_node(node)) { |
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void) | |||
248 | cpu_offset++; | 248 | cpu_offset++; |
249 | cpu_offset = cpu_offset % nr_node_vecs; | 249 | cpu_offset = cpu_offset % nr_node_vecs; |
250 | } | 250 | } |
251 | idx++; | ||
251 | } | 252 | } |
252 | } | 253 | } |
253 | 254 | ||