aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-02-14 05:55:18 -0500
committerIngo Molnar <mingo@elte.hu>2011-02-14 05:55:18 -0500
commitd2137d5af4259f50c19addb8246a186c9ffac325 (patch)
tree2f7e309f9cf8ef2f2698532c226edda38021fe69 /arch/x86/mm
parentf005fe12b90c5b9fe180a09209a893e09affa8aa (diff)
parent795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff)
Merge branch 'linus' into x86/bootmem
Conflicts: arch/x86/mm/numa_64.c Merge reason: fix the conflict, update to latest -rc and pick up this dependent fix from Yinghai: e6d2e2b2b1e1: memblock: don't adjust size in memblock_find_base() Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/amdtopology_64.c87
-rw-r--r--arch/x86/mm/gup.c28
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/init_32.c22
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/mm/numa.c22
-rw-r--r--arch/x86/mm/numa_64.c181
-rw-r--r--arch/x86/mm/pageattr.c25
-rw-r--r--arch/x86/mm/pgtable.c66
-rw-r--r--arch/x86/mm/setup_nx.c2
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c36
-rw-r--r--arch/x86/mm/tlb.c5
13 files changed, 387 insertions, 94 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index ae6ad691a14a..49b334cdd64c 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -27,6 +27,7 @@
27#include <asm/amd_nb.h> 27#include <asm/amd_nb.h>
28 28
29static struct bootnode __initdata nodes[8]; 29static struct bootnode __initdata nodes[8];
30static unsigned char __initdata nodeids[8];
30static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
31 32
32static __init int find_northbridge(void) 33static __init int find_northbridge(void)
@@ -66,20 +67,6 @@ static __init void early_get_boot_cpu_id(void)
66 if (smp_found_config) 67 if (smp_found_config)
67 early_get_smp_config(); 68 early_get_smp_config();
68#endif 69#endif
69 early_init_lapic_mapping();
70}
71
72int __init amd_get_nodes(struct bootnode *physnodes)
73{
74 int i;
75 int ret = 0;
76
77 for_each_node_mask(i, nodes_parsed) {
78 physnodes[ret].start = nodes[i].start;
79 physnodes[ret].end = nodes[i].end;
80 ret++;
81 }
82 return ret;
83} 70}
84 71
85int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) 72int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
@@ -114,7 +101,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
114 base = read_pci_config(0, nb, 1, 0x40 + i*8); 101 base = read_pci_config(0, nb, 1, 0x40 + i*8);
115 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 102 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
116 103
117 nodeid = limit & 7; 104 nodeids[i] = nodeid = limit & 7;
118 if ((base & 3) == 0) { 105 if ((base & 3) == 0) {
119 if (i < numnodes) 106 if (i < numnodes)
120 pr_info("Skipping disabled node %d\n", i); 107 pr_info("Skipping disabled node %d\n", i);
@@ -194,6 +181,76 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
194 return 0; 181 return 0;
195} 182}
196 183
184#ifdef CONFIG_NUMA_EMU
185static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
186 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
187};
188
189void __init amd_get_nodes(struct bootnode *physnodes)
190{
191 int i;
192
193 for_each_node_mask(i, nodes_parsed) {
194 physnodes[i].start = nodes[i].start;
195 physnodes[i].end = nodes[i].end;
196 }
197}
198
199static int __init find_node_by_addr(unsigned long addr)
200{
201 int ret = NUMA_NO_NODE;
202 int i;
203
204 for (i = 0; i < 8; i++)
205 if (addr >= nodes[i].start && addr < nodes[i].end) {
206 ret = i;
207 break;
208 }
209 return ret;
210}
211
212/*
213 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
214 * setup to represent the physical topology but reflect the emulated
215 * environment. For each emulated node, the real node which it appears on is
216 * found and a fake pxm to nid mapping is created which mirrors the actual
217 * locality. node_distance() then represents the correct distances between
218 * emulated nodes by using the fake acpi mappings to pxms.
219 */
220void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
221{
222 unsigned int bits;
223 unsigned int cores;
224 unsigned int apicid_base = 0;
225 int i;
226
227 bits = boot_cpu_data.x86_coreid_bits;
228 cores = 1 << bits;
229 early_get_boot_cpu_id();
230 if (boot_cpu_physical_apicid > 0)
231 apicid_base = boot_cpu_physical_apicid;
232
233 for (i = 0; i < nr_nodes; i++) {
234 int index;
235 int nid;
236 int j;
237
238 nid = find_node_by_addr(nodes[i].start);
239 if (nid == NUMA_NO_NODE)
240 continue;
241
242 index = nodeids[nid] << bits;
243 if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
244 for (j = apicid_base; j < cores + apicid_base; j++)
245 fake_apicid_to_node[index + j] = i;
246#ifdef CONFIG_ACPI_NUMA
247 __acpi_map_pxm_to_node(nid, i);
248#endif
249 }
250 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
251}
252#endif /* CONFIG_NUMA_EMU */
253
197int __init amd_scan_nodes(void) 254int __init amd_scan_nodes(void)
198{ 255{
199 unsigned int bits; 256 unsigned int bits;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 738e6593799d..dbe34b931374 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -8,6 +8,7 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/vmstat.h> 9#include <linux/vmstat.h>
10#include <linux/highmem.h> 10#include <linux/highmem.h>
11#include <linux/swap.h>
11 12
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13 14
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
89 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); 90 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
90 page = pte_page(pte); 91 page = pte_page(pte);
91 get_page(page); 92 get_page(page);
93 SetPageReferenced(page);
92 pages[*nr] = page; 94 pages[*nr] = page;
93 (*nr)++; 95 (*nr)++;
94 96
@@ -103,6 +105,17 @@ static inline void get_head_page_multiple(struct page *page, int nr)
103 VM_BUG_ON(page != compound_head(page)); 105 VM_BUG_ON(page != compound_head(page));
104 VM_BUG_ON(page_count(page) == 0); 106 VM_BUG_ON(page_count(page) == 0);
105 atomic_add(nr, &page->_count); 107 atomic_add(nr, &page->_count);
108 SetPageReferenced(page);
109}
110
111static inline void get_huge_page_tail(struct page *page)
112{
113 /*
114 * __split_huge_page_refcount() cannot run
115 * from under us.
116 */
117 VM_BUG_ON(atomic_read(&page->_count) < 0);
118 atomic_inc(&page->_count);
106} 119}
107 120
108static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, 121static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
@@ -128,6 +141,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
128 do { 141 do {
129 VM_BUG_ON(compound_head(page) != head); 142 VM_BUG_ON(compound_head(page) != head);
130 pages[*nr] = page; 143 pages[*nr] = page;
144 if (PageTail(page))
145 get_huge_page_tail(page);
131 (*nr)++; 146 (*nr)++;
132 page++; 147 page++;
133 refs++; 148 refs++;
@@ -148,7 +163,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
148 pmd_t pmd = *pmdp; 163 pmd_t pmd = *pmdp;
149 164
150 next = pmd_addr_end(addr, end); 165 next = pmd_addr_end(addr, end);
151 if (pmd_none(pmd)) 166 /*
167 * The pmd_trans_splitting() check below explains why
168 * pmdp_splitting_flush has to flush the tlb, to stop
169 * this gup-fast code from running while we set the
170 * splitting bit in the pmd. Returning zero will take
171 * the slow path that will call wait_split_huge_page()
172 * if the pmd is still in splitting state. gup-fast
173 * can't because it has irq disabled and
174 * wait_split_huge_page() would never return as the
175 * tlb flush IPI wouldn't run.
176 */
177 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
152 return 0; 178 return 0;
153 if (unlikely(pmd_large(pmd))) { 179 if (unlikely(pmd_large(pmd))) {
154 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) 180 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 35ee75d9061a..b8054e087ead 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -336,8 +336,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
336 /* 336 /*
337 * We just marked the kernel text read only above, now that 337 * We just marked the kernel text read only above, now that
338 * we are going to free part of that, we need to make that 338 * we are going to free part of that, we need to make that
339 * writeable first. 339 * writeable and non-executable first.
340 */ 340 */
341 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
341 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 342 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
342 343
343 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 344 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e969f9f401b..c821074b7f0b 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -45,6 +45,7 @@
45#include <asm/bugs.h> 45#include <asm/bugs.h>
46#include <asm/tlb.h> 46#include <asm/tlb.h>
47#include <asm/tlbflush.h> 47#include <asm/tlbflush.h>
48#include <asm/olpc_ofw.h>
48#include <asm/pgalloc.h> 49#include <asm/pgalloc.h>
49#include <asm/sections.h> 50#include <asm/sections.h>
50#include <asm/paravirt.h> 51#include <asm/paravirt.h>
@@ -226,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
226 227
227static inline int is_kernel_text(unsigned long addr) 228static inline int is_kernel_text(unsigned long addr)
228{ 229{
229 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 230 if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
230 return 1; 231 return 1;
231 return 0; 232 return 0;
232} 233}
@@ -715,6 +716,7 @@ void __init paging_init(void)
715 /* 716 /*
716 * NOTE: at this point the bootmem allocator is fully available. 717 * NOTE: at this point the bootmem allocator is fully available.
717 */ 718 */
719 olpc_dt_build_devicetree();
718 sparse_init(); 720 sparse_init();
719 zone_sizes_init(); 721 zone_sizes_init();
720} 722}
@@ -912,6 +914,23 @@ void set_kernel_text_ro(void)
912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 914 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
913} 915}
914 916
917static void mark_nxdata_nx(void)
918{
919 /*
920 * When this called, init has already been executed and released,
921 * so everything past _etext sould be NX.
922 */
923 unsigned long start = PFN_ALIGN(_etext);
924 /*
925 * This comes from is_kernel_text upper limit. Also HPAGE where used:
926 */
927 unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
928
929 if (__supported_pte_mask & _PAGE_NX)
930 printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
931 set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
932}
933
915void mark_rodata_ro(void) 934void mark_rodata_ro(void)
916{ 935{
917 unsigned long start = PFN_ALIGN(_text); 936 unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +965,7 @@ void mark_rodata_ro(void)
946 printk(KERN_INFO "Testing CPA: write protecting again\n"); 965 printk(KERN_INFO "Testing CPA: write protecting again\n");
947 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 966 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
948#endif 967#endif
968 mark_nxdata_nx();
949} 969}
950#endif 970#endif
951 971
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 787c52ca49c3..ebf6d7887a38 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -2,6 +2,28 @@
2#include <linux/topology.h> 2#include <linux/topology.h>
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <asm/numa.h>
6#include <asm/acpi.h>
7
8int __initdata numa_off;
9
10static __init int numa_setup(char *opt)
11{
12 if (!opt)
13 return -EINVAL;
14 if (!strncmp(opt, "off", 3))
15 numa_off = 1;
16#ifdef CONFIG_NUMA_EMU
17 if (!strncmp(opt, "fake=", 5))
18 numa_emu_cmdline(opt + 5);
19#endif
20#ifdef CONFIG_ACPI_NUMA
21 if (!strncmp(opt, "noacpi", 6))
22 acpi_numa = -1;
23#endif
24 return 0;
25}
26early_param("numa", numa_setup);
5 27
6/* 28/*
7 * Which logical CPUs are on which nodes 29 * Which logical CPUs are on which nodes
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7cc26ae0a15d..62cb634b5cf8 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -30,7 +30,6 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
30 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 30 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
31}; 31};
32 32
33int numa_off __initdata;
34static unsigned long __initdata nodemap_addr; 33static unsigned long __initdata nodemap_addr;
35static unsigned long __initdata nodemap_size; 34static unsigned long __initdata nodemap_size;
36 35
@@ -260,30 +259,35 @@ void __init numa_init_array(void)
260#ifdef CONFIG_NUMA_EMU 259#ifdef CONFIG_NUMA_EMU
261/* Numa emulation */ 260/* Numa emulation */
262static struct bootnode nodes[MAX_NUMNODES] __initdata; 261static struct bootnode nodes[MAX_NUMNODES] __initdata;
263static struct bootnode physnodes[MAX_NUMNODES] __initdata; 262static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
264static char *cmdline __initdata; 263static char *cmdline __initdata;
265 264
265void __init numa_emu_cmdline(char *str)
266{
267 cmdline = str;
268}
269
266static int __init setup_physnodes(unsigned long start, unsigned long end, 270static int __init setup_physnodes(unsigned long start, unsigned long end,
267 int acpi, int amd) 271 int acpi, int amd)
268{ 272{
269 int nr_nodes = 0;
270 int ret = 0; 273 int ret = 0;
271 int i; 274 int i;
272 275
276 memset(physnodes, 0, sizeof(physnodes));
273#ifdef CONFIG_ACPI_NUMA 277#ifdef CONFIG_ACPI_NUMA
274 if (acpi) 278 if (acpi)
275 nr_nodes = acpi_get_nodes(physnodes); 279 acpi_get_nodes(physnodes, start, end);
276#endif 280#endif
277#ifdef CONFIG_AMD_NUMA 281#ifdef CONFIG_AMD_NUMA
278 if (amd) 282 if (amd)
279 nr_nodes = amd_get_nodes(physnodes); 283 amd_get_nodes(physnodes);
280#endif 284#endif
281 /* 285 /*
282 * Basic sanity checking on the physical node map: there may be errors 286 * Basic sanity checking on the physical node map: there may be errors
283 * if the SRAT or AMD code incorrectly reported the topology or the mem= 287 * if the SRAT or AMD code incorrectly reported the topology or the mem=
284 * kernel parameter is used. 288 * kernel parameter is used.
285 */ 289 */
286 for (i = 0; i < nr_nodes; i++) { 290 for (i = 0; i < MAX_NUMNODES; i++) {
287 if (physnodes[i].start == physnodes[i].end) 291 if (physnodes[i].start == physnodes[i].end)
288 continue; 292 continue;
289 if (physnodes[i].start > end) { 293 if (physnodes[i].start > end) {
@@ -298,17 +302,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
298 physnodes[i].start = start; 302 physnodes[i].start = start;
299 if (physnodes[i].end > end) 303 if (physnodes[i].end > end)
300 physnodes[i].end = end; 304 physnodes[i].end = end;
301 }
302
303 /*
304 * Remove all nodes that have no memory or were truncated because of the
305 * limited address range.
306 */
307 for (i = 0; i < nr_nodes; i++) {
308 if (physnodes[i].start == physnodes[i].end)
309 continue;
310 physnodes[ret].start = physnodes[i].start;
311 physnodes[ret].end = physnodes[i].end;
312 ret++; 305 ret++;
313 } 306 }
314 307
@@ -324,6 +317,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
324 return ret; 317 return ret;
325} 318}
326 319
320static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
321{
322 int i;
323
324 BUG_ON(acpi && amd);
325#ifdef CONFIG_ACPI_NUMA
326 if (acpi)
327 acpi_fake_nodes(nodes, nr_nodes);
328#endif
329#ifdef CONFIG_AMD_NUMA
330 if (amd)
331 amd_fake_nodes(nodes, nr_nodes);
332#endif
333 if (!acpi && !amd)
334 for (i = 0; i < nr_cpu_ids; i++)
335 numa_set_node(i, 0);
336}
337
327/* 338/*
328 * Setups up nid to range from addr to addr + size. If the end 339 * Setups up nid to range from addr to addr + size. If the end
329 * boundary is greater than max_addr, then max_addr is used instead. 340 * boundary is greater than max_addr, then max_addr is used instead.
@@ -352,8 +363,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
352 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr 363 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
353 * to max_addr. The return value is the number of nodes allocated. 364 * to max_addr. The return value is the number of nodes allocated.
354 */ 365 */
355static int __init split_nodes_interleave(u64 addr, u64 max_addr, 366static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
356 int nr_phys_nodes, int nr_nodes)
357{ 367{
358 nodemask_t physnode_mask = NODE_MASK_NONE; 368 nodemask_t physnode_mask = NODE_MASK_NONE;
359 u64 size; 369 u64 size;
@@ -384,7 +394,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
384 return -1; 394 return -1;
385 } 395 }
386 396
387 for (i = 0; i < nr_phys_nodes; i++) 397 for (i = 0; i < MAX_NUMNODES; i++)
388 if (physnodes[i].start != physnodes[i].end) 398 if (physnodes[i].start != physnodes[i].end)
389 node_set(i, physnode_mask); 399 node_set(i, physnode_mask);
390 400
@@ -553,11 +563,9 @@ static int __init numa_emulation(unsigned long start_pfn,
553{ 563{
554 u64 addr = start_pfn << PAGE_SHIFT; 564 u64 addr = start_pfn << PAGE_SHIFT;
555 u64 max_addr = last_pfn << PAGE_SHIFT; 565 u64 max_addr = last_pfn << PAGE_SHIFT;
556 int num_phys_nodes;
557 int num_nodes; 566 int num_nodes;
558 int i; 567 int i;
559 568
560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
561 /* 569 /*
562 * If the numa=fake command-line contains a 'M' or 'G', it represents 570 * If the numa=fake command-line contains a 'M' or 'G', it represents
563 * the fixed node size. Otherwise, if it is just a single number N, 571 * the fixed node size. Otherwise, if it is just a single number N,
@@ -572,7 +580,7 @@ static int __init numa_emulation(unsigned long start_pfn,
572 unsigned long n; 580 unsigned long n;
573 581
574 n = simple_strtoul(cmdline, NULL, 0); 582 n = simple_strtoul(cmdline, NULL, 0);
575 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); 583 num_nodes = split_nodes_interleave(addr, max_addr, n);
576 } 584 }
577 585
578 if (num_nodes < 0) 586 if (num_nodes < 0)
@@ -596,7 +604,8 @@ static int __init numa_emulation(unsigned long start_pfn,
596 init_memory_mapping_high(); 604 init_memory_mapping_high();
597 for_each_node_mask(i, node_possible_map) 605 for_each_node_mask(i, node_possible_map)
598 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 606 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
599 acpi_fake_nodes(nodes, num_nodes); 607 setup_physnodes(addr, max_addr, acpi, amd);
608 fake_physnodes(acpi, amd, num_nodes);
600 numa_init_array(); 609 numa_init_array();
601 return 0; 610 return 0;
602} 611}
@@ -611,8 +620,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
611 nodes_clear(node_online_map); 620 nodes_clear(node_online_map);
612 621
613#ifdef CONFIG_NUMA_EMU 622#ifdef CONFIG_NUMA_EMU
623 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
624 acpi, amd);
614 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) 625 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
615 return; 626 return;
627 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
628 acpi, amd);
616 nodes_clear(node_possible_map); 629 nodes_clear(node_possible_map);
617 nodes_clear(node_online_map); 630 nodes_clear(node_online_map);
618#endif 631#endif
@@ -663,24 +676,6 @@ unsigned long __init numa_free_all_bootmem(void)
663 return pages; 676 return pages;
664} 677}
665 678
666static __init int numa_setup(char *opt)
667{
668 if (!opt)
669 return -EINVAL;
670 if (!strncmp(opt, "off", 3))
671 numa_off = 1;
672#ifdef CONFIG_NUMA_EMU
673 if (!strncmp(opt, "fake=", 5))
674 cmdline = opt + 5;
675#endif
676#ifdef CONFIG_ACPI_NUMA
677 if (!strncmp(opt, "noacpi", 6))
678 acpi_numa = -1;
679#endif
680 return 0;
681}
682early_param("numa", numa_setup);
683
684#ifdef CONFIG_NUMA 679#ifdef CONFIG_NUMA
685 680
686static __init int find_near_online_node(int node) 681static __init int find_near_online_node(int node)
@@ -769,6 +764,7 @@ void __cpuinit numa_clear_node(int cpu)
769 764
770#ifndef CONFIG_DEBUG_PER_CPU_MAPS 765#ifndef CONFIG_DEBUG_PER_CPU_MAPS
771 766
767#ifndef CONFIG_NUMA_EMU
772void __cpuinit numa_add_cpu(int cpu) 768void __cpuinit numa_add_cpu(int cpu)
773{ 769{
774 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 770 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -778,34 +774,115 @@ void __cpuinit numa_remove_cpu(int cpu)
778{ 774{
779 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 775 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
780} 776}
777#else
778void __cpuinit numa_add_cpu(int cpu)
779{
780 unsigned long addr;
781 u16 apicid;
782 int physnid;
783 int nid = NUMA_NO_NODE;
784
785 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
786 if (apicid != BAD_APICID)
787 nid = apicid_to_node[apicid];
788 if (nid == NUMA_NO_NODE)
789 nid = early_cpu_to_node(cpu);
790 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
791
792 /*
793 * Use the starting address of the emulated node to find which physical
794 * node it is allocated on.
795 */
796 addr = node_start_pfn(nid) << PAGE_SHIFT;
797 for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
798 if (addr >= physnodes[physnid].start &&
799 addr < physnodes[physnid].end)
800 break;
801
802 /*
803 * Map the cpu to each emulated node that is allocated on the physical
804 * node of the cpu's apic id.
805 */
806 for_each_online_node(nid) {
807 addr = node_start_pfn(nid) << PAGE_SHIFT;
808 if (addr >= physnodes[physnid].start &&
809 addr < physnodes[physnid].end)
810 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
811 }
812}
813
814void __cpuinit numa_remove_cpu(int cpu)
815{
816 int i;
817
818 for_each_online_node(i)
819 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
820}
821#endif /* !CONFIG_NUMA_EMU */
781 822
782#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 823#else /* CONFIG_DEBUG_PER_CPU_MAPS */
824static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
825{
826 int node = early_cpu_to_node(cpu);
827 struct cpumask *mask;
828 char buf[64];
829
830 mask = node_to_cpumask_map[node];
831 if (!mask) {
832 pr_err("node_to_cpumask_map[%i] NULL\n", node);
833 dump_stack();
834 return NULL;
835 }
836
837 cpulist_scnprintf(buf, sizeof(buf), mask);
838 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
839 enable ? "numa_add_cpu" : "numa_remove_cpu",
840 cpu, node, buf);
841 return mask;
842}
783 843
784/* 844/*
785 * --------- debug versions of the numa functions --------- 845 * --------- debug versions of the numa functions ---------
786 */ 846 */
847#ifndef CONFIG_NUMA_EMU
787static void __cpuinit numa_set_cpumask(int cpu, int enable) 848static void __cpuinit numa_set_cpumask(int cpu, int enable)
788{ 849{
789 int node = early_cpu_to_node(cpu);
790 struct cpumask *mask; 850 struct cpumask *mask;
791 char buf[64];
792 851
793 mask = node_to_cpumask_map[node]; 852 mask = debug_cpumask_set_cpu(cpu, enable);
794 if (mask == NULL) { 853 if (!mask)
795 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
796 dump_stack();
797 return; 854 return;
798 }
799 855
800 if (enable) 856 if (enable)
801 cpumask_set_cpu(cpu, mask); 857 cpumask_set_cpu(cpu, mask);
802 else 858 else
803 cpumask_clear_cpu(cpu, mask); 859 cpumask_clear_cpu(cpu, mask);
860}
861#else
862static void __cpuinit numa_set_cpumask(int cpu, int enable)
863{
864 int node = early_cpu_to_node(cpu);
865 struct cpumask *mask;
866 int i;
804 867
805 cpulist_scnprintf(buf, sizeof(buf), mask); 868 for_each_online_node(i) {
806 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 869 unsigned long addr;
807 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 870
871 addr = node_start_pfn(i) << PAGE_SHIFT;
872 if (addr < physnodes[node].start ||
873 addr >= physnodes[node].end)
874 continue;
875 mask = debug_cpumask_set_cpu(cpu, enable);
876 if (!mask)
877 return;
878
879 if (enable)
880 cpumask_set_cpu(cpu, mask);
881 else
882 cpumask_clear_cpu(cpu, mask);
883 }
808} 884}
885#endif /* CONFIG_NUMA_EMU */
809 886
810void __cpuinit numa_add_cpu(int cpu) 887void __cpuinit numa_add_cpu(int cpu)
811{ 888{
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 532e7933d606..d343b3c81f3c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -13,6 +13,7 @@
13#include <linux/pfn.h> 13#include <linux/pfn.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/pci.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -260,8 +261,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
260 * The BIOS area between 640k and 1Mb needs to be executable for 261 * The BIOS area between 640k and 1Mb needs to be executable for
261 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 262 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
262 */ 263 */
263 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 264#ifdef CONFIG_PCI_BIOS
265 if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
264 pgprot_val(forbidden) |= _PAGE_NX; 266 pgprot_val(forbidden) |= _PAGE_NX;
267#endif
265 268
266 /* 269 /*
267 * The kernel text needs to be executable for obvious reasons 270 * The kernel text needs to be executable for obvious reasons
@@ -393,7 +396,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
393{ 396{
394 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 397 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
395 pte_t new_pte, old_pte, *tmp; 398 pte_t new_pte, old_pte, *tmp;
396 pgprot_t old_prot, new_prot; 399 pgprot_t old_prot, new_prot, req_prot;
397 int i, do_split = 1; 400 int i, do_split = 1;
398 unsigned int level; 401 unsigned int level;
399 402
@@ -438,10 +441,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
438 * We are safe now. Check whether the new pgprot is the same: 441 * We are safe now. Check whether the new pgprot is the same:
439 */ 442 */
440 old_pte = *kpte; 443 old_pte = *kpte;
441 old_prot = new_prot = pte_pgprot(old_pte); 444 old_prot = new_prot = req_prot = pte_pgprot(old_pte);
442 445
443 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 446 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
444 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 447 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
445 448
446 /* 449 /*
447 * old_pte points to the large page base address. So we need 450 * old_pte points to the large page base address. So we need
@@ -450,17 +453,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
450 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); 453 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
451 cpa->pfn = pfn; 454 cpa->pfn = pfn;
452 455
453 new_prot = static_protections(new_prot, address, pfn); 456 new_prot = static_protections(req_prot, address, pfn);
454 457
455 /* 458 /*
456 * We need to check the full range, whether 459 * We need to check the full range, whether
457 * static_protection() requires a different pgprot for one of 460 * static_protection() requires a different pgprot for one of
458 * the pages in the range we try to preserve: 461 * the pages in the range we try to preserve:
459 */ 462 */
460 addr = address + PAGE_SIZE; 463 addr = address & pmask;
461 pfn++; 464 pfn = pte_pfn(old_pte);
462 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 465 for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
463 pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 466 pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
464 467
465 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 468 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
466 goto out_unlock; 469 goto out_unlock;
@@ -483,7 +486,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
483 * that we limited the number of possible pages already to 486 * that we limited the number of possible pages already to
484 * the number of pages in the large page. 487 * the number of pages in the large page.
485 */ 488 */
486 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 489 if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
487 /* 490 /*
488 * The address is aligned and the number of pages 491 * The address is aligned and the number of pages
489 * covers the full page. 492 * covers the full page.
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8be8c7d7bc89..500242d3c96d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
320 return changed; 320 return changed;
321} 321}
322 322
323#ifdef CONFIG_TRANSPARENT_HUGEPAGE
324int pmdp_set_access_flags(struct vm_area_struct *vma,
325 unsigned long address, pmd_t *pmdp,
326 pmd_t entry, int dirty)
327{
328 int changed = !pmd_same(*pmdp, entry);
329
330 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
331
332 if (changed && dirty) {
333 *pmdp = entry;
334 pmd_update_defer(vma->vm_mm, address, pmdp);
335 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
336 }
337
338 return changed;
339}
340#endif
341
323int ptep_test_and_clear_young(struct vm_area_struct *vma, 342int ptep_test_and_clear_young(struct vm_area_struct *vma,
324 unsigned long addr, pte_t *ptep) 343 unsigned long addr, pte_t *ptep)
325{ 344{
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
335 return ret; 354 return ret;
336} 355}
337 356
357#ifdef CONFIG_TRANSPARENT_HUGEPAGE
358int pmdp_test_and_clear_young(struct vm_area_struct *vma,
359 unsigned long addr, pmd_t *pmdp)
360{
361 int ret = 0;
362
363 if (pmd_young(*pmdp))
364 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
365 (unsigned long *)pmdp);
366
367 if (ret)
368 pmd_update(vma->vm_mm, addr, pmdp);
369
370 return ret;
371}
372#endif
373
338int ptep_clear_flush_young(struct vm_area_struct *vma, 374int ptep_clear_flush_young(struct vm_area_struct *vma,
339 unsigned long address, pte_t *ptep) 375 unsigned long address, pte_t *ptep)
340{ 376{
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
347 return young; 383 return young;
348} 384}
349 385
386#ifdef CONFIG_TRANSPARENT_HUGEPAGE
387int pmdp_clear_flush_young(struct vm_area_struct *vma,
388 unsigned long address, pmd_t *pmdp)
389{
390 int young;
391
392 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
393
394 young = pmdp_test_and_clear_young(vma, address, pmdp);
395 if (young)
396 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
397
398 return young;
399}
400
401void pmdp_splitting_flush(struct vm_area_struct *vma,
402 unsigned long address, pmd_t *pmdp)
403{
404 int set;
405 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
406 set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
407 (unsigned long *)pmdp);
408 if (set) {
409 pmd_update(vma->vm_mm, address, pmdp);
410 /* need tlb flush only to serialize against gup-fast */
411 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
412 }
413}
414#endif
415
350/** 416/**
351 * reserve_top_address - reserves a hole in the top of kernel address space 417 * reserve_top_address - reserves a hole in the top of kernel address space
352 * @reserve - size of hole to reserve 418 * @reserve - size of hole to reserve
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index a3250aa34086..410531d3c292 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
41{ 41{
42 if (!cpu_has_nx) { 42 if (!cpu_has_nx) {
43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
44 "missing in CPU or disabled in BIOS!\n"); 44 "missing in CPU!\n");
45 } else { 45 } else {
46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
47 if (disable_nx) { 47 if (disable_nx) {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd136c1..ae96e7b8051d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -59,7 +59,6 @@ static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
59static int __initdata num_memory_chunks; /* total number of memory chunks */ 59static int __initdata num_memory_chunks; /* total number of memory chunks */
60static u8 __initdata apicid_to_pxm[MAX_APICID]; 60static u8 __initdata apicid_to_pxm[MAX_APICID];
61 61
62int numa_off __initdata;
63int acpi_numa __initdata; 62int acpi_numa __initdata;
64 63
65static __init void bad_srat(void) 64static __init void bad_srat(void)
@@ -92,6 +91,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
92 /* mark this node as "seen" in node bitmap */ 91 /* mark this node as "seen" in node bitmap */
93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); 92 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
94 93
94 /* don't need to check apic_id here, because it is always 8 bits */
95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; 95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
96 96
97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", 97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 0b961c8bffb4..4c03e13da138 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
134 } 134 }
135 135
136 apic_id = pa->apic_id; 136 apic_id = pa->apic_id;
137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
137 apicid_to_node[apic_id] = node; 141 apicid_to_node[apic_id] = node;
138 node_set(node, cpu_nodes_parsed); 142 node_set(node, cpu_nodes_parsed);
139 acpi_numa = 1; 143 acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; 172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else 173 else
170 apic_id = pa->apic_id; 174 apic_id = pa->apic_id;
175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
171 apicid_to_node[apic_id] = node; 181 apicid_to_node[apic_id] = node;
172 node_set(node, cpu_nodes_parsed); 182 node_set(node, cpu_nodes_parsed);
173 acpi_numa = 1; 183 acpi_numa = 1;
@@ -339,18 +349,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
339 349
340void __init acpi_numa_arch_fixup(void) {} 350void __init acpi_numa_arch_fixup(void) {}
341 351
342int __init acpi_get_nodes(struct bootnode *physnodes) 352#ifdef CONFIG_NUMA_EMU
353void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
354 unsigned long end)
343{ 355{
344 int i; 356 int i;
345 int ret = 0;
346 357
347 for_each_node_mask(i, nodes_parsed) { 358 for_each_node_mask(i, nodes_parsed) {
348 physnodes[ret].start = nodes[i].start; 359 cutoff_node(i, start, end);
349 physnodes[ret].end = nodes[i].end; 360 physnodes[i].start = nodes[i].start;
350 ret++; 361 physnodes[i].end = nodes[i].end;
351 } 362 }
352 return ret;
353} 363}
364#endif /* CONFIG_NUMA_EMU */
354 365
355/* Use the information discovered above to actually set up the nodes. */ 366/* Use the information discovered above to actually set up the nodes. */
356int __init acpi_scan_nodes(unsigned long start, unsigned long end) 367int __init acpi_scan_nodes(unsigned long start, unsigned long end)
@@ -497,8 +508,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
497{ 508{
498 int i, j; 509 int i, j;
499 510
500 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
501 "topology.\n");
502 for (i = 0; i < num_nodes; i++) { 511 for (i = 0; i < num_nodes; i++) {
503 int nid, pxm; 512 int nid, pxm;
504 513
@@ -518,6 +527,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
518 fake_apicid_to_node[j] == NUMA_NO_NODE) 527 fake_apicid_to_node[j] == NUMA_NO_NODE)
519 fake_apicid_to_node[j] = i; 528 fake_apicid_to_node[j] = i;
520 } 529 }
530
531 /*
532 * If there are apicid-to-node mappings for physical nodes that do not
533 * have a corresponding emulated node, it should default to a guaranteed
534 * value.
535 */
536 for (i = 0; i < MAX_LOCAL_APIC; i++)
537 if (apicid_to_node[i] != NUMA_NO_NODE &&
538 fake_apicid_to_node[i] == NUMA_NO_NODE)
539 fake_apicid_to_node[i] = 0;
540
521 for (i = 0; i < num_nodes; i++) 541 for (i = 0; i < num_nodes; i++)
522 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); 542 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
523 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); 543 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 12cdbb17ad18..6acc724d5d8f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
223 223
224static void __cpuinit calculate_tlb_offset(void) 224static void __cpuinit calculate_tlb_offset(void)
225{ 225{
226 int cpu, node, nr_node_vecs; 226 int cpu, node, nr_node_vecs, idx = 0;
227 /* 227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this 228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we 229 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; 239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240 240
241 for_each_online_node(node) { 241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * 242 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs; 243 nr_node_vecs;
244 int cpu_offset = 0; 244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) { 245 for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void)
248 cpu_offset++; 248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs; 249 cpu_offset = cpu_offset % nr_node_vecs;
250 } 250 }
251 idx++;
251 } 252 }
252} 253}
253 254