diff options
Diffstat (limited to 'arch/x86_64/mm')
-rw-r--r-- | arch/x86_64/mm/fault.c | 5 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 172 | ||||
-rw-r--r-- | arch/x86_64/mm/k8topology.c | 9 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 306 | ||||
-rw-r--r-- | arch/x86_64/mm/pageattr.c | 32 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 8 |
6 files changed, 306 insertions, 226 deletions
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 6ada7231f3ab..de99dba2c515 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -585,7 +585,7 @@ do_sigbus: | |||
585 | } | 585 | } |
586 | 586 | ||
587 | DEFINE_SPINLOCK(pgd_lock); | 587 | DEFINE_SPINLOCK(pgd_lock); |
588 | struct page *pgd_list; | 588 | LIST_HEAD(pgd_list); |
589 | 589 | ||
590 | void vmalloc_sync_all(void) | 590 | void vmalloc_sync_all(void) |
591 | { | 591 | { |
@@ -605,8 +605,7 @@ void vmalloc_sync_all(void) | |||
605 | if (pgd_none(*pgd_ref)) | 605 | if (pgd_none(*pgd_ref)) |
606 | continue; | 606 | continue; |
607 | spin_lock(&pgd_lock); | 607 | spin_lock(&pgd_lock); |
608 | for (page = pgd_list; page; | 608 | list_for_each_entry(page, &pgd_list, lru) { |
609 | page = (struct page *)page->index) { | ||
610 | pgd_t *pgd; | 609 | pgd_t *pgd; |
611 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 610 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
612 | if (pgd_none(*pgd)) | 611 | if (pgd_none(*pgd)) |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index ec31534eb104..282b0a8f00ad 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -22,10 +22,12 @@ | |||
22 | #include <linux/bootmem.h> | 22 | #include <linux/bootmem.h> |
23 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
24 | #include <linux/pci.h> | 24 | #include <linux/pci.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/dma-mapping.h> | 27 | #include <linux/dma-mapping.h> |
27 | #include <linux/module.h> | 28 | #include <linux/module.h> |
28 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
30 | #include <linux/nmi.h> | ||
29 | 31 | ||
30 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
31 | #include <asm/system.h> | 33 | #include <asm/system.h> |
@@ -46,7 +48,7 @@ | |||
46 | #define Dprintk(x...) | 48 | #define Dprintk(x...) |
47 | #endif | 49 | #endif |
48 | 50 | ||
49 | struct dma_mapping_ops* dma_ops; | 51 | const struct dma_mapping_ops* dma_ops; |
50 | EXPORT_SYMBOL(dma_ops); | 52 | EXPORT_SYMBOL(dma_ops); |
51 | 53 | ||
52 | static unsigned long dma_reserve __initdata; | 54 | static unsigned long dma_reserve __initdata; |
@@ -72,6 +74,11 @@ void show_mem(void) | |||
72 | 74 | ||
73 | for_each_online_pgdat(pgdat) { | 75 | for_each_online_pgdat(pgdat) { |
74 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 76 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
77 | /* this loop can take a while with 256 GB and 4k pages | ||
78 | so update the NMI watchdog */ | ||
79 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { | ||
80 | touch_nmi_watchdog(); | ||
81 | } | ||
75 | page = pfn_to_page(pgdat->node_start_pfn + i); | 82 | page = pfn_to_page(pgdat->node_start_pfn + i); |
76 | total++; | 83 | total++; |
77 | if (PageReserved(page)) | 84 | if (PageReserved(page)) |
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |||
167 | 174 | ||
168 | unsigned long __initdata table_start, table_end; | 175 | unsigned long __initdata table_start, table_end; |
169 | 176 | ||
170 | extern pmd_t temp_boot_pmds[]; | 177 | static __meminit void *alloc_low_page(unsigned long *phys) |
171 | |||
172 | static struct temp_map { | ||
173 | pmd_t *pmd; | ||
174 | void *address; | ||
175 | int allocated; | ||
176 | } temp_mappings[] __initdata = { | ||
177 | { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) }, | ||
178 | { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, | ||
179 | {} | ||
180 | }; | ||
181 | |||
182 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) | ||
183 | { | 178 | { |
184 | struct temp_map *ti; | 179 | unsigned long pfn = table_end++; |
185 | int i; | ||
186 | unsigned long pfn = table_end++, paddr; | ||
187 | void *adr; | 180 | void *adr; |
188 | 181 | ||
189 | if (after_bootmem) { | 182 | if (after_bootmem) { |
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys) | |||
194 | 187 | ||
195 | if (pfn >= end_pfn) | 188 | if (pfn >= end_pfn) |
196 | panic("alloc_low_page: ran out of memory"); | 189 | panic("alloc_low_page: ran out of memory"); |
197 | for (i = 0; temp_mappings[i].allocated; i++) { | 190 | |
198 | if (!temp_mappings[i].pmd) | 191 | adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); |
199 | panic("alloc_low_page: ran out of temp mappings"); | ||
200 | } | ||
201 | ti = &temp_mappings[i]; | ||
202 | paddr = (pfn << PAGE_SHIFT) & PMD_MASK; | ||
203 | set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); | ||
204 | ti->allocated = 1; | ||
205 | __flush_tlb(); | ||
206 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | ||
207 | memset(adr, 0, PAGE_SIZE); | 192 | memset(adr, 0, PAGE_SIZE); |
208 | *index = i; | 193 | *phys = pfn * PAGE_SIZE; |
209 | *phys = pfn * PAGE_SIZE; | 194 | return adr; |
210 | return adr; | 195 | } |
211 | } | ||
212 | 196 | ||
213 | static __meminit void unmap_low_page(int i) | 197 | static __meminit void unmap_low_page(void *adr) |
214 | { | 198 | { |
215 | struct temp_map *ti; | ||
216 | 199 | ||
217 | if (after_bootmem) | 200 | if (after_bootmem) |
218 | return; | 201 | return; |
219 | 202 | ||
220 | ti = &temp_mappings[i]; | 203 | early_iounmap(adr, PAGE_SIZE); |
221 | set_pmd(ti->pmd, __pmd(0)); | ||
222 | ti->allocated = 0; | ||
223 | } | 204 | } |
224 | 205 | ||
225 | /* Must run before zap_low_mappings */ | 206 | /* Must run before zap_low_mappings */ |
226 | __init void *early_ioremap(unsigned long addr, unsigned long size) | 207 | __init void *early_ioremap(unsigned long addr, unsigned long size) |
227 | { | 208 | { |
228 | unsigned long map = round_down(addr, LARGE_PAGE_SIZE); | 209 | unsigned long vaddr; |
229 | 210 | pmd_t *pmd, *last_pmd; | |
230 | /* actually usually some more */ | 211 | int i, pmds; |
231 | if (size >= LARGE_PAGE_SIZE) { | 212 | |
232 | return NULL; | 213 | pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; |
214 | vaddr = __START_KERNEL_map; | ||
215 | pmd = level2_kernel_pgt; | ||
216 | last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | ||
217 | for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { | ||
218 | for (i = 0; i < pmds; i++) { | ||
219 | if (pmd_present(pmd[i])) | ||
220 | goto next; | ||
221 | } | ||
222 | vaddr += addr & ~PMD_MASK; | ||
223 | addr &= PMD_MASK; | ||
224 | for (i = 0; i < pmds; i++, addr += PMD_SIZE) | ||
225 | set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); | ||
226 | __flush_tlb(); | ||
227 | return (void *)vaddr; | ||
228 | next: | ||
229 | ; | ||
233 | } | 230 | } |
234 | set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | 231 | printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); |
235 | map += LARGE_PAGE_SIZE; | 232 | return NULL; |
236 | set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
237 | __flush_tlb(); | ||
238 | return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); | ||
239 | } | 233 | } |
240 | 234 | ||
241 | /* To avoid virtual aliases later */ | 235 | /* To avoid virtual aliases later */ |
242 | __init void early_iounmap(void *addr, unsigned long size) | 236 | __init void early_iounmap(void *addr, unsigned long size) |
243 | { | 237 | { |
244 | if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) | 238 | unsigned long vaddr; |
245 | printk("early_iounmap: bad address %p\n", addr); | 239 | pmd_t *pmd; |
246 | set_pmd(temp_mappings[0].pmd, __pmd(0)); | 240 | int i, pmds; |
247 | set_pmd(temp_mappings[1].pmd, __pmd(0)); | 241 | |
242 | vaddr = (unsigned long)addr; | ||
243 | pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | ||
244 | pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
245 | for (i = 0; i < pmds; i++) | ||
246 | pmd_clear(pmd + i); | ||
248 | __flush_tlb(); | 247 | __flush_tlb(); |
249 | } | 248 | } |
250 | 249 | ||
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
289 | 288 | ||
290 | 289 | ||
291 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { | 290 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { |
292 | int map; | ||
293 | unsigned long pmd_phys; | 291 | unsigned long pmd_phys; |
294 | pud_t *pud = pud_page + pud_index(addr); | 292 | pud_t *pud = pud_page + pud_index(addr); |
295 | pmd_t *pmd; | 293 | pmd_t *pmd; |
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
307 | continue; | 305 | continue; |
308 | } | 306 | } |
309 | 307 | ||
310 | pmd = alloc_low_page(&map, &pmd_phys); | 308 | pmd = alloc_low_page(&pmd_phys); |
311 | spin_lock(&init_mm.page_table_lock); | 309 | spin_lock(&init_mm.page_table_lock); |
312 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 310 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
313 | phys_pmd_init(pmd, addr, end); | 311 | phys_pmd_init(pmd, addr, end); |
314 | spin_unlock(&init_mm.page_table_lock); | 312 | spin_unlock(&init_mm.page_table_lock); |
315 | unmap_low_page(map); | 313 | unmap_low_page(pmd); |
316 | } | 314 | } |
317 | __flush_tlb(); | 315 | __flush_tlb(); |
318 | } | 316 | } |
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
364 | end = (unsigned long)__va(end); | 362 | end = (unsigned long)__va(end); |
365 | 363 | ||
366 | for (; start < end; start = next) { | 364 | for (; start < end; start = next) { |
367 | int map; | ||
368 | unsigned long pud_phys; | 365 | unsigned long pud_phys; |
369 | pgd_t *pgd = pgd_offset_k(start); | 366 | pgd_t *pgd = pgd_offset_k(start); |
370 | pud_t *pud; | 367 | pud_t *pud; |
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
372 | if (after_bootmem) | 369 | if (after_bootmem) |
373 | pud = pud_offset(pgd, start & PGDIR_MASK); | 370 | pud = pud_offset(pgd, start & PGDIR_MASK); |
374 | else | 371 | else |
375 | pud = alloc_low_page(&map, &pud_phys); | 372 | pud = alloc_low_page(&pud_phys); |
376 | 373 | ||
377 | next = start + PGDIR_SIZE; | 374 | next = start + PGDIR_SIZE; |
378 | if (next > end) | 375 | if (next > end) |
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
380 | phys_pud_init(pud, __pa(start), __pa(next)); | 377 | phys_pud_init(pud, __pa(start), __pa(next)); |
381 | if (!after_bootmem) | 378 | if (!after_bootmem) |
382 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 379 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); |
383 | unmap_low_page(map); | 380 | unmap_low_page(pud); |
384 | } | 381 | } |
385 | 382 | ||
386 | if (!after_bootmem) | 383 | if (!after_bootmem) |
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
388 | __flush_tlb_all(); | 385 | __flush_tlb_all(); |
389 | } | 386 | } |
390 | 387 | ||
391 | void __cpuinit zap_low_mappings(int cpu) | ||
392 | { | ||
393 | if (cpu == 0) { | ||
394 | pgd_t *pgd = pgd_offset_k(0UL); | ||
395 | pgd_clear(pgd); | ||
396 | } else { | ||
397 | /* | ||
398 | * For AP's, zap the low identity mappings by changing the cr3 | ||
399 | * to init_level4_pgt and doing local flush tlb all | ||
400 | */ | ||
401 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
402 | } | ||
403 | __flush_tlb_all(); | ||
404 | } | ||
405 | |||
406 | #ifndef CONFIG_NUMA | 388 | #ifndef CONFIG_NUMA |
407 | void __init paging_init(void) | 389 | void __init paging_init(void) |
408 | { | 390 | { |
@@ -579,15 +561,6 @@ void __init mem_init(void) | |||
579 | reservedpages << (PAGE_SHIFT-10), | 561 | reservedpages << (PAGE_SHIFT-10), |
580 | datasize >> 10, | 562 | datasize >> 10, |
581 | initsize >> 10); | 563 | initsize >> 10); |
582 | |||
583 | #ifdef CONFIG_SMP | ||
584 | /* | ||
585 | * Sync boot_level4_pgt mappings with the init_level4_pgt | ||
586 | * except for the low identity mappings which are already zapped | ||
587 | * in init_level4_pgt. This sync-up is essential for AP's bringup | ||
588 | */ | ||
589 | memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); | ||
590 | #endif | ||
591 | } | 564 | } |
592 | 565 | ||
593 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 566 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
597 | if (begin >= end) | 570 | if (begin >= end) |
598 | return; | 571 | return; |
599 | 572 | ||
600 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 573 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
601 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 574 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
602 | ClearPageReserved(virt_to_page(addr)); | 575 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
603 | init_page_count(virt_to_page(addr)); | 576 | ClearPageReserved(page); |
604 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 577 | init_page_count(page); |
605 | POISON_FREE_INITMEM, PAGE_SIZE); | 578 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
606 | free_page(addr); | 579 | if (addr >= __START_KERNEL_map) |
580 | change_page_attr_addr(addr, 1, __pgprot(0)); | ||
581 | __free_page(page); | ||
607 | totalram_pages++; | 582 | totalram_pages++; |
608 | } | 583 | } |
584 | if (addr > __START_KERNEL_map) | ||
585 | global_flush_tlb(); | ||
609 | } | 586 | } |
610 | 587 | ||
611 | void free_initmem(void) | 588 | void free_initmem(void) |
612 | { | 589 | { |
613 | memset(__initdata_begin, POISON_FREE_INITDATA, | ||
614 | __initdata_end - __initdata_begin); | ||
615 | free_init_pages("unused kernel memory", | 590 | free_init_pages("unused kernel memory", |
616 | (unsigned long)(&__init_begin), | 591 | __pa_symbol(&__init_begin), |
617 | (unsigned long)(&__init_end)); | 592 | __pa_symbol(&__init_end)); |
618 | } | 593 | } |
619 | 594 | ||
620 | #ifdef CONFIG_DEBUG_RODATA | 595 | #ifdef CONFIG_DEBUG_RODATA |
621 | 596 | ||
622 | void mark_rodata_ro(void) | 597 | void mark_rodata_ro(void) |
623 | { | 598 | { |
624 | unsigned long addr = (unsigned long)__start_rodata; | 599 | unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size; |
625 | 600 | ||
626 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 601 | #ifdef CONFIG_HOTPLUG_CPU |
627 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | 602 | /* It must still be possible to apply SMP alternatives. */ |
603 | if (num_possible_cpus() > 1) | ||
604 | start = PFN_ALIGN(__va(__pa_symbol(&_etext))); | ||
605 | #endif | ||
606 | size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start; | ||
607 | change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
628 | 608 | ||
629 | printk ("Write protecting the kernel read-only data: %luk\n", | 609 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
630 | (__end_rodata - __start_rodata) >> 10); | 610 | size >> 10); |
631 | 611 | ||
632 | /* | 612 | /* |
633 | * change_page_attr_addr() requires a global_flush_tlb() call after it. | 613 | * change_page_attr_addr() requires a global_flush_tlb() call after it. |
@@ -642,7 +622,7 @@ void mark_rodata_ro(void) | |||
642 | #ifdef CONFIG_BLK_DEV_INITRD | 622 | #ifdef CONFIG_BLK_DEV_INITRD |
643 | void free_initrd_mem(unsigned long start, unsigned long end) | 623 | void free_initrd_mem(unsigned long start, unsigned long end) |
644 | { | 624 | { |
645 | free_init_pages("initrd memory", start, end); | 625 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
646 | } | 626 | } |
647 | #endif | 627 | #endif |
648 | 628 | ||
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index b5b8dba28b4e..f983c75825d0 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c | |||
@@ -49,11 +49,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
49 | int found = 0; | 49 | int found = 0; |
50 | u32 reg; | 50 | u32 reg; |
51 | unsigned numnodes; | 51 | unsigned numnodes; |
52 | nodemask_t nodes_parsed; | ||
53 | unsigned dualcore = 0; | 52 | unsigned dualcore = 0; |
54 | 53 | ||
55 | nodes_clear(nodes_parsed); | ||
56 | |||
57 | if (!early_pci_allowed()) | 54 | if (!early_pci_allowed()) |
58 | return -1; | 55 | return -1; |
59 | 56 | ||
@@ -65,6 +62,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
65 | 62 | ||
66 | reg = read_pci_config(0, nb, 0, 0x60); | 63 | reg = read_pci_config(0, nb, 0, 0x60); |
67 | numnodes = ((reg >> 4) & 0xF) + 1; | 64 | numnodes = ((reg >> 4) & 0xF) + 1; |
65 | if (numnodes <= 1) | ||
66 | return -1; | ||
68 | 67 | ||
69 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 68 | printk(KERN_INFO "Number of nodes %d\n", numnodes); |
70 | 69 | ||
@@ -102,7 +101,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
102 | nodeid, (base>>8)&3, (limit>>8) & 3); | 101 | nodeid, (base>>8)&3, (limit>>8) & 3); |
103 | return -1; | 102 | return -1; |
104 | } | 103 | } |
105 | if (node_isset(nodeid, nodes_parsed)) { | 104 | if (node_isset(nodeid, node_possible_map)) { |
106 | printk(KERN_INFO "Node %d already present. Skipping\n", | 105 | printk(KERN_INFO "Node %d already present. Skipping\n", |
107 | nodeid); | 106 | nodeid); |
108 | continue; | 107 | continue; |
@@ -155,7 +154,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
155 | 154 | ||
156 | prevbase = base; | 155 | prevbase = base; |
157 | 156 | ||
158 | node_set(nodeid, nodes_parsed); | 157 | node_set(nodeid, node_possible_map); |
159 | } | 158 | } |
160 | 159 | ||
161 | if (!found) | 160 | if (!found) |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 41b8fb069924..51548947ad3b 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -273,125 +273,213 @@ void __init numa_init_array(void) | |||
273 | 273 | ||
274 | #ifdef CONFIG_NUMA_EMU | 274 | #ifdef CONFIG_NUMA_EMU |
275 | /* Numa emulation */ | 275 | /* Numa emulation */ |
276 | int numa_fake __initdata = 0; | 276 | #define E820_ADDR_HOLE_SIZE(start, end) \ |
277 | (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \ | ||
278 | PAGE_SHIFT) | ||
279 | char *cmdline __initdata; | ||
277 | 280 | ||
278 | /* | 281 | /* |
279 | * This function is used to find out if the start and end correspond to | 282 | * Setups up nid to range from addr to addr + size. If the end boundary is |
280 | * different zones. | 283 | * greater than max_addr, then max_addr is used instead. The return value is 0 |
284 | * if there is additional memory left for allocation past addr and -1 otherwise. | ||
285 | * addr is adjusted to be at the end of the node. | ||
281 | */ | 286 | */ |
282 | int zone_cross_over(unsigned long start, unsigned long end) | 287 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, |
288 | u64 size, u64 max_addr) | ||
283 | { | 289 | { |
284 | if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) && | 290 | int ret = 0; |
285 | (end >= (MAX_DMA32_PFN << PAGE_SHIFT))) | 291 | nodes[nid].start = *addr; |
286 | return 1; | 292 | *addr += size; |
287 | return 0; | 293 | if (*addr >= max_addr) { |
294 | *addr = max_addr; | ||
295 | ret = -1; | ||
296 | } | ||
297 | nodes[nid].end = *addr; | ||
298 | node_set(nid, node_possible_map); | ||
299 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | ||
300 | nodes[nid].start, nodes[nid].end, | ||
301 | (nodes[nid].end - nodes[nid].start) >> 20); | ||
302 | return ret; | ||
288 | } | 303 | } |
289 | 304 | ||
290 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 305 | /* |
306 | * Splits num_nodes nodes up equally starting at node_start. The return value | ||
307 | * is the number of nodes split up and addr is adjusted to be at the end of the | ||
308 | * last node allocated. | ||
309 | */ | ||
310 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | ||
311 | u64 max_addr, int node_start, | ||
312 | int num_nodes) | ||
291 | { | 313 | { |
292 | int i, big; | 314 | unsigned int big; |
293 | struct bootnode nodes[MAX_NUMNODES]; | 315 | u64 size; |
294 | unsigned long sz, old_sz; | 316 | int i; |
295 | unsigned long hole_size; | ||
296 | unsigned long start, end; | ||
297 | unsigned long max_addr = (end_pfn << PAGE_SHIFT); | ||
298 | |||
299 | start = (start_pfn << PAGE_SHIFT); | ||
300 | hole_size = e820_hole_size(start, max_addr); | ||
301 | sz = (max_addr - start - hole_size) / numa_fake; | ||
302 | |||
303 | /* Kludge needed for the hash function */ | ||
304 | |||
305 | old_sz = sz; | ||
306 | /* | ||
307 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
308 | */ | ||
309 | sz &= FAKE_NODE_MIN_HASH_MASK; | ||
310 | 317 | ||
318 | if (num_nodes <= 0) | ||
319 | return -1; | ||
320 | if (num_nodes > MAX_NUMNODES) | ||
321 | num_nodes = MAX_NUMNODES; | ||
322 | size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) / | ||
323 | num_nodes; | ||
311 | /* | 324 | /* |
312 | * We ensure that each node is at least 64MB big. Smaller than this | 325 | * Calculate the number of big nodes that can be allocated as a result |
313 | * size can cause VM hiccups. | 326 | * of consolidating the leftovers. |
314 | */ | 327 | */ |
315 | if (sz == 0) { | 328 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / |
316 | printk(KERN_INFO "Not enough memory for %d nodes. Reducing " | 329 | FAKE_NODE_MIN_SIZE; |
317 | "the number of nodes\n", numa_fake); | 330 | |
318 | numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE; | 331 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ |
319 | printk(KERN_INFO "Number of fake nodes will be = %d\n", | 332 | size &= FAKE_NODE_MIN_HASH_MASK; |
320 | numa_fake); | 333 | if (!size) { |
321 | sz = FAKE_NODE_MIN_SIZE; | 334 | printk(KERN_ERR "Not enough memory for each node. " |
335 | "NUMA emulation disabled.\n"); | ||
336 | return -1; | ||
322 | } | 337 | } |
323 | /* | 338 | |
324 | * Find out how many nodes can get an extra NODE_MIN_SIZE granule. | 339 | for (i = node_start; i < num_nodes + node_start; i++) { |
325 | * This logic ensures the extra memory gets distributed among as many | 340 | u64 end = *addr + size; |
326 | * nodes as possible (as compared to one single node getting all that | ||
327 | * extra memory. | ||
328 | */ | ||
329 | big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE; | ||
330 | printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: " | ||
331 | "%d\n", | ||
332 | (sz >> 20), (hole_size >> 20), big); | ||
333 | memset(&nodes,0,sizeof(nodes)); | ||
334 | end = start; | ||
335 | for (i = 0; i < numa_fake; i++) { | ||
336 | /* | ||
337 | * In case we are not able to allocate enough memory for all | ||
338 | * the nodes, we reduce the number of fake nodes. | ||
339 | */ | ||
340 | if (end >= max_addr) { | ||
341 | numa_fake = i - 1; | ||
342 | break; | ||
343 | } | ||
344 | start = nodes[i].start = end; | ||
345 | /* | ||
346 | * Final node can have all the remaining memory. | ||
347 | */ | ||
348 | if (i == numa_fake-1) | ||
349 | sz = max_addr - start; | ||
350 | end = nodes[i].start + sz; | ||
351 | /* | ||
352 | * Fir "big" number of nodes get extra granule. | ||
353 | */ | ||
354 | if (i < big) | 341 | if (i < big) |
355 | end += FAKE_NODE_MIN_SIZE; | 342 | end += FAKE_NODE_MIN_SIZE; |
356 | /* | 343 | /* |
357 | * Iterate over the range to ensure that this node gets at | 344 | * The final node can have the remaining system RAM. Other |
358 | * least sz amount of RAM (excluding holes) | 345 | * nodes receive roughly the same amount of available pages. |
359 | */ | 346 | */ |
360 | while ((end - start - e820_hole_size(start, end)) < sz) { | 347 | if (i == num_nodes + node_start - 1) |
361 | end += FAKE_NODE_MIN_SIZE; | 348 | end = max_addr; |
362 | if (end >= max_addr) | 349 | else |
363 | break; | 350 | while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) < |
351 | size) { | ||
352 | end += FAKE_NODE_MIN_SIZE; | ||
353 | if (end > max_addr) { | ||
354 | end = max_addr; | ||
355 | break; | ||
356 | } | ||
357 | } | ||
358 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | ||
359 | break; | ||
360 | } | ||
361 | return i - node_start + 1; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Splits the remaining system RAM into chunks of size. The remaining memory is | ||
366 | * always assigned to a final node and can be asymmetric. Returns the number of | ||
367 | * nodes split. | ||
368 | */ | ||
369 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | ||
370 | u64 max_addr, int node_start, u64 size) | ||
371 | { | ||
372 | int i = node_start; | ||
373 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
374 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | ||
375 | ; | ||
376 | return i - node_start; | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Sets up the system RAM area from start_pfn to end_pfn according to the | ||
381 | * numa=fake command-line option. | ||
382 | */ | ||
383 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | ||
384 | { | ||
385 | struct bootnode nodes[MAX_NUMNODES]; | ||
386 | u64 addr = start_pfn << PAGE_SHIFT; | ||
387 | u64 max_addr = end_pfn << PAGE_SHIFT; | ||
388 | int num_nodes = 0; | ||
389 | int coeff_flag; | ||
390 | int coeff = -1; | ||
391 | int num = 0; | ||
392 | u64 size; | ||
393 | int i; | ||
394 | |||
395 | memset(&nodes, 0, sizeof(nodes)); | ||
396 | /* | ||
397 | * If the numa=fake command-line is just a single number N, split the | ||
398 | * system RAM into N fake nodes. | ||
399 | */ | ||
400 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | ||
401 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, | ||
402 | simple_strtol(cmdline, NULL, 0)); | ||
403 | if (num_nodes < 0) | ||
404 | return num_nodes; | ||
405 | goto out; | ||
406 | } | ||
407 | |||
408 | /* Parse the command line. */ | ||
409 | for (coeff_flag = 0; ; cmdline++) { | ||
410 | if (*cmdline && isdigit(*cmdline)) { | ||
411 | num = num * 10 + *cmdline - '0'; | ||
412 | continue; | ||
364 | } | 413 | } |
365 | /* | 414 | if (*cmdline == '*') { |
366 | * Look at the next node to make sure there is some real memory | 415 | if (num > 0) |
367 | * to map. Bad things happen when the only memory present | 416 | coeff = num; |
368 | * in a zone on a fake node is IO hole. | 417 | coeff_flag = 1; |
369 | */ | 418 | } |
370 | while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) { | 419 | if (!*cmdline || *cmdline == ',') { |
371 | if (zone_cross_over(start, end + sz)) { | 420 | if (!coeff_flag) |
372 | end = (MAX_DMA32_PFN << PAGE_SHIFT); | 421 | coeff = 1; |
422 | /* | ||
423 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
424 | * Command-line coefficients are in megabytes. | ||
425 | */ | ||
426 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
427 | if (size) | ||
428 | for (i = 0; i < coeff; i++, num_nodes++) | ||
429 | if (setup_node_range(num_nodes, nodes, | ||
430 | &addr, size, max_addr) < 0) | ||
431 | goto done; | ||
432 | if (!*cmdline) | ||
373 | break; | 433 | break; |
374 | } | 434 | coeff_flag = 0; |
375 | if (end >= max_addr) | 435 | coeff = -1; |
436 | } | ||
437 | num = 0; | ||
438 | } | ||
439 | done: | ||
440 | if (!num_nodes) | ||
441 | return -1; | ||
442 | /* Fill remainder of system RAM, if appropriate. */ | ||
443 | if (addr < max_addr) { | ||
444 | if (coeff_flag && coeff < 0) { | ||
445 | /* Split remaining nodes into num-sized chunks */ | ||
446 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | ||
447 | num_nodes, num); | ||
448 | goto out; | ||
449 | } | ||
450 | switch (*(cmdline - 1)) { | ||
451 | case '*': | ||
452 | /* Split remaining nodes into coeff chunks */ | ||
453 | if (coeff <= 0) | ||
376 | break; | 454 | break; |
377 | end += FAKE_NODE_MIN_SIZE; | 455 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, |
456 | num_nodes, coeff); | ||
457 | break; | ||
458 | case ',': | ||
459 | /* Do not allocate remaining system RAM */ | ||
460 | break; | ||
461 | default: | ||
462 | /* Give one final node */ | ||
463 | setup_node_range(num_nodes, nodes, &addr, | ||
464 | max_addr - addr, max_addr); | ||
465 | num_nodes++; | ||
378 | } | 466 | } |
379 | if (end > max_addr) | 467 | } |
380 | end = max_addr; | 468 | out: |
381 | nodes[i].end = end; | 469 | memnode_shift = compute_hash_shift(nodes, num_nodes); |
382 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 470 | if (memnode_shift < 0) { |
383 | i, | 471 | memnode_shift = 0; |
384 | nodes[i].start, nodes[i].end, | 472 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " |
385 | (nodes[i].end - nodes[i].start) >> 20); | 473 | "disabled.\n"); |
386 | node_set_online(i); | 474 | return -1; |
387 | } | 475 | } |
388 | memnode_shift = compute_hash_shift(nodes, numa_fake); | 476 | |
389 | if (memnode_shift < 0) { | 477 | /* |
390 | memnode_shift = 0; | 478 | * We need to vacate all active ranges that may have been registered by |
391 | printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); | 479 | * SRAT. |
392 | return -1; | 480 | */ |
393 | } | 481 | remove_all_active_ranges(); |
394 | for_each_online_node(i) { | 482 | for_each_node_mask(i, node_possible_map) { |
395 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 483 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
396 | nodes[i].end >> PAGE_SHIFT); | 484 | nodes[i].end >> PAGE_SHIFT); |
397 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 485 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
@@ -399,26 +487,32 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
399 | numa_init_array(); | 487 | numa_init_array(); |
400 | return 0; | 488 | return 0; |
401 | } | 489 | } |
402 | #endif | 490 | #undef E820_ADDR_HOLE_SIZE |
491 | #endif /* CONFIG_NUMA_EMU */ | ||
403 | 492 | ||
404 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 493 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
405 | { | 494 | { |
406 | int i; | 495 | int i; |
407 | 496 | ||
497 | nodes_clear(node_possible_map); | ||
498 | |||
408 | #ifdef CONFIG_NUMA_EMU | 499 | #ifdef CONFIG_NUMA_EMU |
409 | if (numa_fake && !numa_emulation(start_pfn, end_pfn)) | 500 | if (cmdline && !numa_emulation(start_pfn, end_pfn)) |
410 | return; | 501 | return; |
502 | nodes_clear(node_possible_map); | ||
411 | #endif | 503 | #endif |
412 | 504 | ||
413 | #ifdef CONFIG_ACPI_NUMA | 505 | #ifdef CONFIG_ACPI_NUMA |
414 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 506 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
415 | end_pfn << PAGE_SHIFT)) | 507 | end_pfn << PAGE_SHIFT)) |
416 | return; | 508 | return; |
509 | nodes_clear(node_possible_map); | ||
417 | #endif | 510 | #endif |
418 | 511 | ||
419 | #ifdef CONFIG_K8_NUMA | 512 | #ifdef CONFIG_K8_NUMA |
420 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) | 513 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) |
421 | return; | 514 | return; |
515 | nodes_clear(node_possible_map); | ||
422 | #endif | 516 | #endif |
423 | printk(KERN_INFO "%s\n", | 517 | printk(KERN_INFO "%s\n", |
424 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | 518 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
@@ -432,6 +526,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
432 | memnodemap[0] = 0; | 526 | memnodemap[0] = 0; |
433 | nodes_clear(node_online_map); | 527 | nodes_clear(node_online_map); |
434 | node_set_online(0); | 528 | node_set_online(0); |
529 | node_set(0, node_possible_map); | ||
435 | for (i = 0; i < NR_CPUS; i++) | 530 | for (i = 0; i < NR_CPUS; i++) |
436 | numa_set_node(i, 0); | 531 | numa_set_node(i, 0); |
437 | node_to_cpumask[0] = cpumask_of_cpu(0); | 532 | node_to_cpumask[0] = cpumask_of_cpu(0); |
@@ -486,11 +581,8 @@ static __init int numa_setup(char *opt) | |||
486 | if (!strncmp(opt,"off",3)) | 581 | if (!strncmp(opt,"off",3)) |
487 | numa_off = 1; | 582 | numa_off = 1; |
488 | #ifdef CONFIG_NUMA_EMU | 583 | #ifdef CONFIG_NUMA_EMU |
489 | if(!strncmp(opt, "fake=", 5)) { | 584 | if (!strncmp(opt, "fake=", 5)) |
490 | numa_fake = simple_strtoul(opt+5,NULL,0); ; | 585 | cmdline = opt + 5; |
491 | if (numa_fake >= MAX_NUMNODES) | ||
492 | numa_fake = MAX_NUMNODES; | ||
493 | } | ||
494 | #endif | 586 | #endif |
495 | #ifdef CONFIG_ACPI_NUMA | 587 | #ifdef CONFIG_ACPI_NUMA |
496 | if (!strncmp(opt,"noacpi",6)) | 588 | if (!strncmp(opt,"noacpi",6)) |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 081409aa3452..bf4aa8dd4254 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
51 | SetPagePrivate(base); | 51 | SetPagePrivate(base); |
52 | page_private(base) = 0; | 52 | page_private(base) = 0; |
53 | 53 | ||
54 | address = __pa(address); | ||
55 | addr = address & LARGE_PAGE_MASK; | 54 | addr = address & LARGE_PAGE_MASK; |
56 | pbase = (pte_t *)page_address(base); | 55 | pbase = (pte_t *)page_address(base); |
57 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 56 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { |
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage) | |||
101 | * No more special protections in this 2/4MB area - revert to a | 100 | * No more special protections in this 2/4MB area - revert to a |
102 | * large page again. | 101 | * large page again. |
103 | */ | 102 | */ |
104 | static void revert_page(unsigned long address, pgprot_t ref_prot) | 103 | static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot) |
105 | { | 104 | { |
106 | pgd_t *pgd; | 105 | pgd_t *pgd; |
107 | pud_t *pud; | 106 | pud_t *pud; |
108 | pmd_t *pmd; | 107 | pmd_t *pmd; |
109 | pte_t large_pte; | 108 | pte_t large_pte; |
110 | unsigned long pfn; | ||
111 | 109 | ||
112 | pgd = pgd_offset_k(address); | 110 | pgd = pgd_offset_k(address); |
113 | BUG_ON(pgd_none(*pgd)); | 111 | BUG_ON(pgd_none(*pgd)); |
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) | |||
115 | BUG_ON(pud_none(*pud)); | 113 | BUG_ON(pud_none(*pud)); |
116 | pmd = pmd_offset(pud, address); | 114 | pmd = pmd_offset(pud, address); |
117 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); | 115 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); |
118 | pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; | ||
119 | large_pte = pfn_pte(pfn, ref_prot); | 116 | large_pte = pfn_pte(pfn, ref_prot); |
120 | large_pte = pte_mkhuge(large_pte); | 117 | large_pte = pte_mkhuge(large_pte); |
121 | set_pte((pte_t *)pmd, large_pte); | 118 | set_pte((pte_t *)pmd, large_pte); |
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
141 | */ | 138 | */ |
142 | struct page *split; | 139 | struct page *split; |
143 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); | 140 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); |
144 | split = split_large_page(address, prot, ref_prot2); | 141 | split = split_large_page(pfn << PAGE_SHIFT, prot, |
142 | ref_prot2); | ||
145 | if (!split) | 143 | if (!split) |
146 | return -ENOMEM; | 144 | return -ENOMEM; |
147 | set_pte(kpte, mk_pte(split, ref_prot2)); | 145 | set_pte(kpte, mk_pte(split, ref_prot2)); |
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
160 | 158 | ||
161 | if (page_private(kpte_page) == 0) { | 159 | if (page_private(kpte_page) == 0) { |
162 | save_page(kpte_page); | 160 | save_page(kpte_page); |
163 | revert_page(address, ref_prot); | 161 | revert_page(address, pfn, ref_prot); |
164 | } | 162 | } |
165 | return 0; | 163 | return 0; |
166 | } | 164 | } |
@@ -180,22 +178,32 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
180 | */ | 178 | */ |
181 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | 179 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) |
182 | { | 180 | { |
183 | int err = 0; | 181 | unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT; |
182 | int err = 0, kernel_map = 0; | ||
184 | int i; | 183 | int i; |
185 | 184 | ||
185 | if (address >= __START_KERNEL_map | ||
186 | && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { | ||
187 | address = (unsigned long)__va(__pa(address)); | ||
188 | kernel_map = 1; | ||
189 | } | ||
190 | |||
186 | down_write(&init_mm.mmap_sem); | 191 | down_write(&init_mm.mmap_sem); |
187 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { | 192 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { |
188 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; | 193 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; |
189 | 194 | ||
190 | err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | 195 | if (!kernel_map || pte_present(pfn_pte(0, prot))) { |
191 | if (err) | 196 | err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); |
192 | break; | 197 | if (err) |
198 | break; | ||
199 | } | ||
193 | /* Handle kernel mapping too which aliases part of the | 200 | /* Handle kernel mapping too which aliases part of the |
194 | * lowmem */ | 201 | * lowmem */ |
195 | if (__pa(address) < KERNEL_TEXT_SIZE) { | 202 | if ((pfn >= phys_base_pfn) && |
203 | ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) { | ||
196 | unsigned long addr2; | 204 | unsigned long addr2; |
197 | pgprot_t prot2; | 205 | pgprot_t prot2; |
198 | addr2 = __START_KERNEL_map + __pa(address); | 206 | addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT); |
199 | /* Make sure the kernel mappings stay executable */ | 207 | /* Make sure the kernel mappings stay executable */ |
200 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); | 208 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); |
201 | err = __change_page_attr(addr2, pfn, prot2, | 209 | err = __change_page_attr(addr2, pfn, prot2, |
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 2efe215fc76a..1e76bb0a7277 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -419,19 +419,21 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
419 | return -1; | 419 | return -1; |
420 | } | 420 | } |
421 | 421 | ||
422 | node_possible_map = nodes_parsed; | ||
423 | |||
422 | /* Finally register nodes */ | 424 | /* Finally register nodes */ |
423 | for_each_node_mask(i, nodes_parsed) | 425 | for_each_node_mask(i, node_possible_map) |
424 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 426 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
425 | /* Try again in case setup_node_bootmem missed one due | 427 | /* Try again in case setup_node_bootmem missed one due |
426 | to missing bootmem */ | 428 | to missing bootmem */ |
427 | for_each_node_mask(i, nodes_parsed) | 429 | for_each_node_mask(i, node_possible_map) |
428 | if (!node_online(i)) | 430 | if (!node_online(i)) |
429 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 431 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
430 | 432 | ||
431 | for (i = 0; i < NR_CPUS; i++) { | 433 | for (i = 0; i < NR_CPUS; i++) { |
432 | if (cpu_to_node[i] == NUMA_NO_NODE) | 434 | if (cpu_to_node[i] == NUMA_NO_NODE) |
433 | continue; | 435 | continue; |
434 | if (!node_isset(cpu_to_node[i], nodes_parsed)) | 436 | if (!node_isset(cpu_to_node[i], node_possible_map)) |
435 | numa_set_node(i, NUMA_NO_NODE); | 437 | numa_set_node(i, NUMA_NO_NODE); |
436 | } | 438 | } |
437 | numa_init_array(); | 439 | numa_init_array(); |