diff options
Diffstat (limited to 'arch/x86_64/mm')
-rw-r--r-- | arch/x86_64/mm/fault.c | 81 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 44 | ||||
-rw-r--r-- | arch/x86_64/mm/k8topology.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 39 | ||||
-rw-r--r-- | arch/x86_64/mm/pageattr.c | 63 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 8 |
6 files changed, 149 insertions, 88 deletions
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2e7c3c8ffe03..55250593d8c9 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address) | |||
264 | return -1; | 264 | return -1; |
265 | if (pgd_none(*pgd)) | 265 | if (pgd_none(*pgd)) |
266 | set_pgd(pgd, *pgd_ref); | 266 | set_pgd(pgd, *pgd_ref); |
267 | else | ||
268 | BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); | ||
267 | 269 | ||
268 | /* Below here mismatches are bugs because these lower tables | 270 | /* Below here mismatches are bugs because these lower tables |
269 | are shared */ | 271 | are shared */ |
@@ -312,21 +314,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
312 | unsigned long flags; | 314 | unsigned long flags; |
313 | siginfo_t info; | 315 | siginfo_t info; |
314 | 316 | ||
317 | tsk = current; | ||
318 | mm = tsk->mm; | ||
319 | prefetchw(&mm->mmap_sem); | ||
320 | |||
315 | /* get the address */ | 321 | /* get the address */ |
316 | __asm__("movq %%cr2,%0":"=r" (address)); | 322 | __asm__("movq %%cr2,%0":"=r" (address)); |
317 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
318 | SIGSEGV) == NOTIFY_STOP) | ||
319 | return; | ||
320 | |||
321 | if (likely(regs->eflags & X86_EFLAGS_IF)) | ||
322 | local_irq_enable(); | ||
323 | 323 | ||
324 | if (unlikely(page_fault_trace)) | ||
325 | printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", | ||
326 | regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); | ||
327 | |||
328 | tsk = current; | ||
329 | mm = tsk->mm; | ||
330 | info.si_code = SEGV_MAPERR; | 324 | info.si_code = SEGV_MAPERR; |
331 | 325 | ||
332 | 326 | ||
@@ -351,10 +345,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
351 | */ | 345 | */ |
352 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 346 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && |
353 | ((address >= VMALLOC_START && address < VMALLOC_END))) { | 347 | ((address >= VMALLOC_START && address < VMALLOC_END))) { |
354 | if (vmalloc_fault(address) < 0) | 348 | if (vmalloc_fault(address) >= 0) |
355 | goto bad_area_nosemaphore; | 349 | return; |
356 | return; | ||
357 | } | 350 | } |
351 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
352 | SIGSEGV) == NOTIFY_STOP) | ||
353 | return; | ||
358 | /* | 354 | /* |
359 | * Don't take the mm semaphore here. If we fixup a prefetch | 355 | * Don't take the mm semaphore here. If we fixup a prefetch |
360 | * fault we could otherwise deadlock. | 356 | * fault we could otherwise deadlock. |
@@ -362,6 +358,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
362 | goto bad_area_nosemaphore; | 358 | goto bad_area_nosemaphore; |
363 | } | 359 | } |
364 | 360 | ||
361 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
362 | SIGSEGV) == NOTIFY_STOP) | ||
363 | return; | ||
364 | |||
365 | if (likely(regs->eflags & X86_EFLAGS_IF)) | ||
366 | local_irq_enable(); | ||
367 | |||
368 | if (unlikely(page_fault_trace)) | ||
369 | printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", | ||
370 | regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); | ||
371 | |||
365 | if (unlikely(error_code & PF_RSVD)) | 372 | if (unlikely(error_code & PF_RSVD)) |
366 | pgtable_bad(address, regs, error_code); | 373 | pgtable_bad(address, regs, error_code); |
367 | 374 | ||
@@ -571,9 +578,51 @@ do_sigbus: | |||
571 | return; | 578 | return; |
572 | } | 579 | } |
573 | 580 | ||
581 | DEFINE_SPINLOCK(pgd_lock); | ||
582 | struct page *pgd_list; | ||
583 | |||
584 | void vmalloc_sync_all(void) | ||
585 | { | ||
586 | /* Note that races in the updates of insync and start aren't | ||
587 | problematic: | ||
588 | insync can only get set bits added, and updates to start are only | ||
589 | improving performance (without affecting correctness if undone). */ | ||
590 | static DECLARE_BITMAP(insync, PTRS_PER_PGD); | ||
591 | static unsigned long start = VMALLOC_START & PGDIR_MASK; | ||
592 | unsigned long address; | ||
593 | |||
594 | for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { | ||
595 | if (!test_bit(pgd_index(address), insync)) { | ||
596 | const pgd_t *pgd_ref = pgd_offset_k(address); | ||
597 | struct page *page; | ||
598 | |||
599 | if (pgd_none(*pgd_ref)) | ||
600 | continue; | ||
601 | spin_lock(&pgd_lock); | ||
602 | for (page = pgd_list; page; | ||
603 | page = (struct page *)page->index) { | ||
604 | pgd_t *pgd; | ||
605 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | ||
606 | if (pgd_none(*pgd)) | ||
607 | set_pgd(pgd, *pgd_ref); | ||
608 | else | ||
609 | BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); | ||
610 | } | ||
611 | spin_unlock(&pgd_lock); | ||
612 | set_bit(pgd_index(address), insync); | ||
613 | } | ||
614 | if (address == start) | ||
615 | start = address + PGDIR_SIZE; | ||
616 | } | ||
617 | /* Check that there is no need to do the same for the modules area. */ | ||
618 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | ||
619 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | ||
620 | (__START_KERNEL & PGDIR_MASK))); | ||
621 | } | ||
622 | |||
574 | static int __init enable_pagefaulttrace(char *str) | 623 | static int __init enable_pagefaulttrace(char *str) |
575 | { | 624 | { |
576 | page_fault_trace = 1; | 625 | page_fault_trace = 1; |
577 | return 0; | 626 | return 1; |
578 | } | 627 | } |
579 | __setup("pagefaulttrace", enable_pagefaulttrace); | 628 | __setup("pagefaulttrace", enable_pagefaulttrace); |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 7af1742aa958..e5f7f1c34462 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -72,7 +72,7 @@ void show_mem(void) | |||
72 | show_free_areas(); | 72 | show_free_areas(); |
73 | printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 73 | printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
74 | 74 | ||
75 | for_each_pgdat(pgdat) { | 75 | for_each_online_pgdat(pgdat) { |
76 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 76 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
77 | page = pfn_to_page(pgdat->node_start_pfn + i); | 77 | page = pfn_to_page(pgdat->node_start_pfn + i); |
78 | total++; | 78 | total++; |
@@ -94,7 +94,7 @@ void show_mem(void) | |||
94 | 94 | ||
95 | int after_bootmem; | 95 | int after_bootmem; |
96 | 96 | ||
97 | static void *spp_getpage(void) | 97 | static __init void *spp_getpage(void) |
98 | { | 98 | { |
99 | void *ptr; | 99 | void *ptr; |
100 | if (after_bootmem) | 100 | if (after_bootmem) |
@@ -108,7 +108,7 @@ static void *spp_getpage(void) | |||
108 | return ptr; | 108 | return ptr; |
109 | } | 109 | } |
110 | 110 | ||
111 | static void set_pte_phys(unsigned long vaddr, | 111 | static __init void set_pte_phys(unsigned long vaddr, |
112 | unsigned long phys, pgprot_t prot) | 112 | unsigned long phys, pgprot_t prot) |
113 | { | 113 | { |
114 | pgd_t *pgd; | 114 | pgd_t *pgd; |
@@ -157,7 +157,8 @@ static void set_pte_phys(unsigned long vaddr, | |||
157 | } | 157 | } |
158 | 158 | ||
159 | /* NOTE: this is meant to be run only at boot */ | 159 | /* NOTE: this is meant to be run only at boot */ |
160 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | 160 | void __init |
161 | __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | ||
161 | { | 162 | { |
162 | unsigned long address = __fix_to_virt(idx); | 163 | unsigned long address = __fix_to_virt(idx); |
163 | 164 | ||
@@ -225,6 +226,33 @@ static __meminit void unmap_low_page(int i) | |||
225 | ti->allocated = 0; | 226 | ti->allocated = 0; |
226 | } | 227 | } |
227 | 228 | ||
229 | /* Must run before zap_low_mappings */ | ||
230 | __init void *early_ioremap(unsigned long addr, unsigned long size) | ||
231 | { | ||
232 | unsigned long map = round_down(addr, LARGE_PAGE_SIZE); | ||
233 | |||
234 | /* actually usually some more */ | ||
235 | if (size >= LARGE_PAGE_SIZE) { | ||
236 | printk("SMBIOS area too long %lu\n", size); | ||
237 | return NULL; | ||
238 | } | ||
239 | set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
240 | map += LARGE_PAGE_SIZE; | ||
241 | set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
242 | __flush_tlb(); | ||
243 | return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); | ||
244 | } | ||
245 | |||
246 | /* To avoid virtual aliases later */ | ||
247 | __init void early_iounmap(void *addr, unsigned long size) | ||
248 | { | ||
249 | if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) | ||
250 | printk("early_iounmap: bad address %p\n", addr); | ||
251 | set_pmd(temp_mappings[0].pmd, __pmd(0)); | ||
252 | set_pmd(temp_mappings[1].pmd, __pmd(0)); | ||
253 | __flush_tlb(); | ||
254 | } | ||
255 | |||
228 | static void __meminit | 256 | static void __meminit |
229 | phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) | 257 | phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) |
230 | { | 258 | { |
@@ -344,7 +372,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
344 | pud_t *pud; | 372 | pud_t *pud; |
345 | 373 | ||
346 | if (after_bootmem) | 374 | if (after_bootmem) |
347 | pud = pud_offset_k(pgd, __PAGE_OFFSET); | 375 | pud = pud_offset_k(pgd, start & PGDIR_MASK); |
348 | else | 376 | else |
349 | pud = alloc_low_page(&map, &pud_phys); | 377 | pud = alloc_low_page(&map, &pud_phys); |
350 | 378 | ||
@@ -486,7 +514,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
486 | void online_page(struct page *page) | 514 | void online_page(struct page *page) |
487 | { | 515 | { |
488 | ClearPageReserved(page); | 516 | ClearPageReserved(page); |
489 | set_page_count(page, 1); | 517 | init_page_count(page); |
490 | __free_page(page); | 518 | __free_page(page); |
491 | totalram_pages++; | 519 | totalram_pages++; |
492 | num_physpages++; | 520 | num_physpages++; |
@@ -592,7 +620,7 @@ void free_initmem(void) | |||
592 | addr = (unsigned long)(&__init_begin); | 620 | addr = (unsigned long)(&__init_begin); |
593 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { | 621 | for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { |
594 | ClearPageReserved(virt_to_page(addr)); | 622 | ClearPageReserved(virt_to_page(addr)); |
595 | set_page_count(virt_to_page(addr), 1); | 623 | init_page_count(virt_to_page(addr)); |
596 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); | 624 | memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); |
597 | free_page(addr); | 625 | free_page(addr); |
598 | totalram_pages++; | 626 | totalram_pages++; |
@@ -632,7 +660,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 660 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
633 | for (; start < end; start += PAGE_SIZE) { | 661 | for (; start < end; start += PAGE_SIZE) { |
634 | ClearPageReserved(virt_to_page(start)); | 662 | ClearPageReserved(virt_to_page(start)); |
635 | set_page_count(virt_to_page(start), 1); | 663 | init_page_count(virt_to_page(start)); |
636 | free_page(start); | 664 | free_page(start); |
637 | totalram_pages++; | 665 | totalram_pages++; |
638 | } | 666 | } |
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index dd60e71fdba6..7c45c2d2b8b2 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c | |||
@@ -43,7 +43,7 @@ static __init int find_northbridge(void) | |||
43 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 43 | int __init k8_scan_nodes(unsigned long start, unsigned long end) |
44 | { | 44 | { |
45 | unsigned long prevbase; | 45 | unsigned long prevbase; |
46 | struct node nodes[8]; | 46 | struct bootnode nodes[8]; |
47 | int nodeid, i, nb; | 47 | int nodeid, i, nb; |
48 | unsigned char nodeids[8]; | 48 | unsigned char nodeids[8]; |
49 | int found = 0; | 49 | int found = 0; |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 22e51beee8d3..4be82d6e2b48 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -25,8 +25,7 @@ | |||
25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 25 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
26 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | 26 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
27 | 27 | ||
28 | int memnode_shift; | 28 | struct memnode memnode; |
29 | u8 memnodemap[NODEMAPSIZE]; | ||
30 | 29 | ||
31 | unsigned char cpu_to_node[NR_CPUS] __read_mostly = { | 30 | unsigned char cpu_to_node[NR_CPUS] __read_mostly = { |
32 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | 31 | [0 ... NR_CPUS-1] = NUMA_NO_NODE |
@@ -47,7 +46,7 @@ int numa_off __initdata; | |||
47 | * -1 if node overlap or lost ram (shift too big) | 46 | * -1 if node overlap or lost ram (shift too big) |
48 | */ | 47 | */ |
49 | static int __init | 48 | static int __init |
50 | populate_memnodemap(const struct node *nodes, int numnodes, int shift) | 49 | populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) |
51 | { | 50 | { |
52 | int i; | 51 | int i; |
53 | int res = -1; | 52 | int res = -1; |
@@ -74,7 +73,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift) | |||
74 | return res; | 73 | return res; |
75 | } | 74 | } |
76 | 75 | ||
77 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 76 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) |
78 | { | 77 | { |
79 | int shift = 20; | 78 | int shift = 20; |
80 | 79 | ||
@@ -149,7 +148,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
149 | /* Initialize final allocator for a zone */ | 148 | /* Initialize final allocator for a zone */ |
150 | void __init setup_node_zones(int nodeid) | 149 | void __init setup_node_zones(int nodeid) |
151 | { | 150 | { |
152 | unsigned long start_pfn, end_pfn; | 151 | unsigned long start_pfn, end_pfn, memmapsize, limit; |
153 | unsigned long zones[MAX_NR_ZONES]; | 152 | unsigned long zones[MAX_NR_ZONES]; |
154 | unsigned long holes[MAX_NR_ZONES]; | 153 | unsigned long holes[MAX_NR_ZONES]; |
155 | 154 | ||
@@ -159,6 +158,16 @@ void __init setup_node_zones(int nodeid) | |||
159 | Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", | 158 | Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", |
160 | nodeid, start_pfn, end_pfn); | 159 | nodeid, start_pfn, end_pfn); |
161 | 160 | ||
161 | /* Try to allocate mem_map at end to not fill up precious <4GB | ||
162 | memory. */ | ||
163 | memmapsize = sizeof(struct page) * (end_pfn-start_pfn); | ||
164 | limit = end_pfn << PAGE_SHIFT; | ||
165 | NODE_DATA(nodeid)->node_mem_map = | ||
166 | __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, | ||
167 | memmapsize, SMP_CACHE_BYTES, | ||
168 | round_down(limit - memmapsize, PAGE_SIZE), | ||
169 | limit); | ||
170 | |||
162 | size_zones(zones, holes, start_pfn, end_pfn); | 171 | size_zones(zones, holes, start_pfn, end_pfn); |
163 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 172 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
164 | start_pfn, holes); | 173 | start_pfn, holes); |
@@ -191,7 +200,7 @@ int numa_fake __initdata = 0; | |||
191 | static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 200 | static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) |
192 | { | 201 | { |
193 | int i; | 202 | int i; |
194 | struct node nodes[MAX_NUMNODES]; | 203 | struct bootnode nodes[MAX_NUMNODES]; |
195 | unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; | 204 | unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; |
196 | 205 | ||
197 | /* Kludge needed for the hash function */ | 206 | /* Kludge needed for the hash function */ |
@@ -357,8 +366,7 @@ void __init init_cpu_to_node(void) | |||
357 | 366 | ||
358 | EXPORT_SYMBOL(cpu_to_node); | 367 | EXPORT_SYMBOL(cpu_to_node); |
359 | EXPORT_SYMBOL(node_to_cpumask); | 368 | EXPORT_SYMBOL(node_to_cpumask); |
360 | EXPORT_SYMBOL(memnode_shift); | 369 | EXPORT_SYMBOL(memnode); |
361 | EXPORT_SYMBOL(memnodemap); | ||
362 | EXPORT_SYMBOL(node_data); | 370 | EXPORT_SYMBOL(node_data); |
363 | 371 | ||
364 | #ifdef CONFIG_DISCONTIGMEM | 372 | #ifdef CONFIG_DISCONTIGMEM |
@@ -369,21 +377,6 @@ EXPORT_SYMBOL(node_data); | |||
369 | * Should do that. | 377 | * Should do that. |
370 | */ | 378 | */ |
371 | 379 | ||
372 | /* Requires pfn_valid(pfn) to be true */ | ||
373 | struct page *pfn_to_page(unsigned long pfn) | ||
374 | { | ||
375 | int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); | ||
376 | return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; | ||
377 | } | ||
378 | EXPORT_SYMBOL(pfn_to_page); | ||
379 | |||
380 | unsigned long page_to_pfn(struct page *page) | ||
381 | { | ||
382 | return (long)(((page) - page_zone(page)->zone_mem_map) + | ||
383 | page_zone(page)->zone_start_pfn); | ||
384 | } | ||
385 | EXPORT_SYMBOL(page_to_pfn); | ||
386 | |||
387 | int pfn_valid(unsigned long pfn) | 380 | int pfn_valid(unsigned long pfn) |
388 | { | 381 | { |
389 | unsigned nid; | 382 | unsigned nid; |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 35f1f1aab063..531ad21447b1 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
45 | pte_t *pbase; | 45 | pte_t *pbase; |
46 | if (!base) | 46 | if (!base) |
47 | return NULL; | 47 | return NULL; |
48 | /* | ||
49 | * page_private is used to track the number of entries in | ||
50 | * the page table page have non standard attributes. | ||
51 | */ | ||
52 | SetPagePrivate(base); | ||
53 | page_private(base) = 0; | ||
54 | |||
48 | address = __pa(address); | 55 | address = __pa(address); |
49 | addr = address & LARGE_PAGE_MASK; | 56 | addr = address & LARGE_PAGE_MASK; |
50 | pbase = (pte_t *)page_address(base); | 57 | pbase = (pte_t *)page_address(base); |
@@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address) | |||
77 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | 84 | on_each_cpu(flush_kernel_map, (void *)address, 1, 1); |
78 | } | 85 | } |
79 | 86 | ||
80 | struct deferred_page { | 87 | static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ |
81 | struct deferred_page *next; | ||
82 | struct page *fpage; | ||
83 | unsigned long address; | ||
84 | }; | ||
85 | static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ | ||
86 | 88 | ||
87 | static inline void save_page(unsigned long address, struct page *fpage) | 89 | static inline void save_page(struct page *fpage) |
88 | { | 90 | { |
89 | struct deferred_page *df; | 91 | fpage->lru.next = (struct list_head *)deferred_pages; |
90 | df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); | 92 | deferred_pages = fpage; |
91 | if (!df) { | ||
92 | flush_map(address); | ||
93 | __free_page(fpage); | ||
94 | } else { | ||
95 | df->next = df_list; | ||
96 | df->fpage = fpage; | ||
97 | df->address = address; | ||
98 | df_list = df; | ||
99 | } | ||
100 | } | 93 | } |
101 | 94 | ||
102 | /* | 95 | /* |
@@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
138 | set_pte(kpte, pfn_pte(pfn, prot)); | 131 | set_pte(kpte, pfn_pte(pfn, prot)); |
139 | } else { | 132 | } else { |
140 | /* | 133 | /* |
141 | * split_large_page will take the reference for this change_page_attr | 134 | * split_large_page will take the reference for this |
142 | * on the split page. | 135 | * change_page_attr on the split page. |
143 | */ | 136 | */ |
144 | 137 | ||
145 | struct page *split; | 138 | struct page *split; |
@@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
151 | set_pte(kpte,mk_pte(split, ref_prot2)); | 144 | set_pte(kpte,mk_pte(split, ref_prot2)); |
152 | kpte_page = split; | 145 | kpte_page = split; |
153 | } | 146 | } |
154 | get_page(kpte_page); | 147 | page_private(kpte_page)++; |
155 | } else if ((kpte_flags & _PAGE_PSE) == 0) { | 148 | } else if ((kpte_flags & _PAGE_PSE) == 0) { |
156 | set_pte(kpte, pfn_pte(pfn, ref_prot)); | 149 | set_pte(kpte, pfn_pte(pfn, ref_prot)); |
157 | __put_page(kpte_page); | 150 | BUG_ON(page_private(kpte_page) == 0); |
151 | page_private(kpte_page)--; | ||
158 | } else | 152 | } else |
159 | BUG(); | 153 | BUG(); |
160 | 154 | ||
161 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ | 155 | /* on x86-64 the direct mapping set at boot is not using 4k pages */ |
162 | BUG_ON(PageReserved(kpte_page)); | 156 | BUG_ON(PageReserved(kpte_page)); |
163 | 157 | ||
164 | switch (page_count(kpte_page)) { | 158 | if (page_private(kpte_page) == 0) { |
165 | case 1: | 159 | save_page(kpte_page); |
166 | save_page(address, kpte_page); | ||
167 | revert_page(address, ref_prot); | 160 | revert_page(address, ref_prot); |
168 | break; | ||
169 | case 0: | ||
170 | BUG(); /* memleak and failed 2M page regeneration */ | ||
171 | } | 161 | } |
172 | return 0; | 162 | return 0; |
173 | } | 163 | } |
@@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |||
220 | 210 | ||
221 | void global_flush_tlb(void) | 211 | void global_flush_tlb(void) |
222 | { | 212 | { |
223 | struct deferred_page *df, *next_df; | 213 | struct page *dpage; |
224 | 214 | ||
225 | down_read(&init_mm.mmap_sem); | 215 | down_read(&init_mm.mmap_sem); |
226 | df = xchg(&df_list, NULL); | 216 | dpage = xchg(&deferred_pages, NULL); |
227 | up_read(&init_mm.mmap_sem); | 217 | up_read(&init_mm.mmap_sem); |
228 | flush_map((df && !df->next) ? df->address : 0); | 218 | |
229 | for (; df; df = next_df) { | 219 | flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); |
230 | next_df = df->next; | 220 | while (dpage) { |
231 | if (df->fpage) | 221 | struct page *tmp = dpage; |
232 | __free_page(df->fpage); | 222 | dpage = (struct page *)dpage->lru.next; |
233 | kfree(df); | 223 | ClearPagePrivate(tmp); |
224 | __free_page(tmp); | ||
234 | } | 225 | } |
235 | } | 226 | } |
236 | 227 | ||
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 482c25767369..2eb879590dc4 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -23,7 +23,7 @@ static struct acpi_table_slit *acpi_slit; | |||
23 | 23 | ||
24 | static nodemask_t nodes_parsed __initdata; | 24 | static nodemask_t nodes_parsed __initdata; |
25 | static nodemask_t nodes_found __initdata; | 25 | static nodemask_t nodes_found __initdata; |
26 | static struct node nodes[MAX_NUMNODES] __initdata; | 26 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
27 | static u8 pxm2node[256] = { [0 ... 255] = 0xff }; | 27 | static u8 pxm2node[256] = { [0 ... 255] = 0xff }; |
28 | 28 | ||
29 | /* Too small nodes confuse the VM badly. Usually they result | 29 | /* Too small nodes confuse the VM badly. Usually they result |
@@ -57,7 +57,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) | |||
57 | { | 57 | { |
58 | int i; | 58 | int i; |
59 | for_each_node_mask(i, nodes_parsed) { | 59 | for_each_node_mask(i, nodes_parsed) { |
60 | struct node *nd = &nodes[i]; | 60 | struct bootnode *nd = &nodes[i]; |
61 | if (nd->start == nd->end) | 61 | if (nd->start == nd->end) |
62 | continue; | 62 | continue; |
63 | if (nd->end > start && nd->start < end) | 63 | if (nd->end > start && nd->start < end) |
@@ -70,7 +70,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) | |||
70 | 70 | ||
71 | static __init void cutoff_node(int i, unsigned long start, unsigned long end) | 71 | static __init void cutoff_node(int i, unsigned long start, unsigned long end) |
72 | { | 72 | { |
73 | struct node *nd = &nodes[i]; | 73 | struct bootnode *nd = &nodes[i]; |
74 | if (nd->start < start) { | 74 | if (nd->start < start) { |
75 | nd->start = start; | 75 | nd->start = start; |
76 | if (nd->end < nd->start) | 76 | if (nd->end < nd->start) |
@@ -159,7 +159,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) | |||
159 | void __init | 159 | void __init |
160 | acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | 160 | acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) |
161 | { | 161 | { |
162 | struct node *nd; | 162 | struct bootnode *nd; |
163 | unsigned long start, end; | 163 | unsigned long start, end; |
164 | int node, pxm; | 164 | int node, pxm; |
165 | int i; | 165 | int i; |