aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/init_64.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/mm/init_64.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/mm/init_64.c')
-rw-r--r--arch/x86/mm/init_64.c239
1 files changed, 109 insertions, 130 deletions
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a6674689a20..bbaaa005bf0e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -21,12 +21,14 @@
21#include <linux/initrd.h> 21#include <linux/initrd.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/memblock.h>
24#include <linux/proc_fs.h> 25#include <linux/proc_fs.h>
25#include <linux/pci.h> 26#include <linux/pci.h>
26#include <linux/pfn.h> 27#include <linux/pfn.h>
27#include <linux/poison.h> 28#include <linux/poison.h>
28#include <linux/dma-mapping.h> 29#include <linux/dma-mapping.h>
29#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/memory.h>
30#include <linux/memory_hotplug.h> 32#include <linux/memory_hotplug.h>
31#include <linux/nmi.h> 33#include <linux/nmi.h>
32#include <linux/gfp.h> 34#include <linux/gfp.h>
@@ -50,9 +52,8 @@
50#include <asm/numa.h> 52#include <asm/numa.h>
51#include <asm/cacheflush.h> 53#include <asm/cacheflush.h>
52#include <asm/init.h> 54#include <asm/init.h>
53#include <linux/bootmem.h> 55#include <asm/uv/uv.h>
54 56#include <asm/setup.h>
55static unsigned long dma_reserve __initdata;
56 57
57static int __init parse_direct_gbpages_off(char *arg) 58static int __init parse_direct_gbpages_off(char *arg)
58{ 59{
@@ -98,6 +99,43 @@ static int __init nonx32_setup(char *str)
98__setup("noexec32=", nonx32_setup); 99__setup("noexec32=", nonx32_setup);
99 100
100/* 101/*
102 * When memory was added/removed make sure all the processes MM have
103 * suitable PGD entries in the local PGD level page.
104 */
105void sync_global_pgds(unsigned long start, unsigned long end)
106{
107 unsigned long address;
108
109 for (address = start; address <= end; address += PGDIR_SIZE) {
110 const pgd_t *pgd_ref = pgd_offset_k(address);
111 struct page *page;
112
113 if (pgd_none(*pgd_ref))
114 continue;
115
116 spin_lock(&pgd_lock);
117 list_for_each_entry(page, &pgd_list, lru) {
118 pgd_t *pgd;
119 spinlock_t *pgt_lock;
120
121 pgd = (pgd_t *)page_address(page) + pgd_index(address);
122 /* the pgt_lock only for Xen */
123 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
124 spin_lock(pgt_lock);
125
126 if (pgd_none(*pgd))
127 set_pgd(pgd, *pgd_ref);
128 else
129 BUG_ON(pgd_page_vaddr(*pgd)
130 != pgd_page_vaddr(*pgd_ref));
131
132 spin_unlock(pgt_lock);
133 }
134 spin_unlock(&pgd_lock);
135 }
136}
137
138/*
101 * NOTE: This function is marked __ref because it calls __init function 139 * NOTE: This function is marked __ref because it calls __init function
102 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 140 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
103 */ 141 */
@@ -258,18 +296,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
258 * to the compile time generated pmds. This results in invalid pmds up 296 * to the compile time generated pmds. This results in invalid pmds up
259 * to the point where we hit the physaddr 0 mapping. 297 * to the point where we hit the physaddr 0 mapping.
260 * 298 *
261 * We limit the mappings to the region from _text to _end. _end is 299 * We limit the mappings to the region from _text to _brk_end. _brk_end
262 * rounded up to the 2MB boundary. This catches the invalid pmds as 300 * is rounded up to the 2MB boundary. This catches the invalid pmds as
263 * well, as they are located before _text: 301 * well, as they are located before _text:
264 */ 302 */
265void __init cleanup_highmap(void) 303void __init cleanup_highmap(void)
266{ 304{
267 unsigned long vaddr = __START_KERNEL_map; 305 unsigned long vaddr = __START_KERNEL_map;
268 unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; 306 unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
307 unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
269 pmd_t *pmd = level2_kernel_pgt; 308 pmd_t *pmd = level2_kernel_pgt;
270 pmd_t *last_pmd = pmd + PTRS_PER_PMD;
271 309
272 for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { 310 for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
273 if (pmd_none(*pmd)) 311 if (pmd_none(*pmd))
274 continue; 312 continue;
275 if (vaddr < (unsigned long) _text || vaddr > end) 313 if (vaddr < (unsigned long) _text || vaddr > end)
@@ -279,7 +317,7 @@ void __init cleanup_highmap(void)
279 317
280static __ref void *alloc_low_page(unsigned long *phys) 318static __ref void *alloc_low_page(unsigned long *phys)
281{ 319{
282 unsigned long pfn = e820_table_end++; 320 unsigned long pfn = pgt_buf_end++;
283 void *adr; 321 void *adr;
284 322
285 if (after_bootmem) { 323 if (after_bootmem) {
@@ -289,21 +327,37 @@ static __ref void *alloc_low_page(unsigned long *phys)
289 return adr; 327 return adr;
290 } 328 }
291 329
292 if (pfn >= e820_table_top) 330 if (pfn >= pgt_buf_top)
293 panic("alloc_low_page: ran out of memory"); 331 panic("alloc_low_page: ran out of memory");
294 332
295 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 333 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
296 memset(adr, 0, PAGE_SIZE); 334 clear_page(adr);
297 *phys = pfn * PAGE_SIZE; 335 *phys = pfn * PAGE_SIZE;
298 return adr; 336 return adr;
299} 337}
300 338
339static __ref void *map_low_page(void *virt)
340{
341 void *adr;
342 unsigned long phys, left;
343
344 if (after_bootmem)
345 return virt;
346
347 phys = __pa(virt);
348 left = phys & (PAGE_SIZE - 1);
349 adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
350 adr = (void *)(((unsigned long)adr) | left);
351
352 return adr;
353}
354
301static __ref void unmap_low_page(void *adr) 355static __ref void unmap_low_page(void *adr)
302{ 356{
303 if (after_bootmem) 357 if (after_bootmem)
304 return; 358 return;
305 359
306 early_iounmap(adr, PAGE_SIZE); 360 early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
307} 361}
308 362
309static unsigned long __meminit 363static unsigned long __meminit
@@ -351,15 +405,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
351} 405}
352 406
353static unsigned long __meminit 407static unsigned long __meminit
354phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
355 pgprot_t prot)
356{
357 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
358
359 return phys_pte_init(pte, address, end, prot);
360}
361
362static unsigned long __meminit
363phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 408phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
364 unsigned long page_size_mask, pgprot_t prot) 409 unsigned long page_size_mask, pgprot_t prot)
365{ 410{
@@ -385,8 +430,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
385 if (pmd_val(*pmd)) { 430 if (pmd_val(*pmd)) {
386 if (!pmd_large(*pmd)) { 431 if (!pmd_large(*pmd)) {
387 spin_lock(&init_mm.page_table_lock); 432 spin_lock(&init_mm.page_table_lock);
388 last_map_addr = phys_pte_update(pmd, address, 433 pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
434 last_map_addr = phys_pte_init(pte, address,
389 end, prot); 435 end, prot);
436 unmap_low_page(pte);
390 spin_unlock(&init_mm.page_table_lock); 437 spin_unlock(&init_mm.page_table_lock);
391 continue; 438 continue;
392 } 439 }
@@ -433,18 +480,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
433} 480}
434 481
435static unsigned long __meminit 482static unsigned long __meminit
436phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
437 unsigned long page_size_mask, pgprot_t prot)
438{
439 pmd_t *pmd = pmd_offset(pud, 0);
440 unsigned long last_map_addr;
441
442 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
443 __flush_tlb_all();
444 return last_map_addr;
445}
446
447static unsigned long __meminit
448phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 483phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
449 unsigned long page_size_mask) 484 unsigned long page_size_mask)
450{ 485{
@@ -469,8 +504,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
469 504
470 if (pud_val(*pud)) { 505 if (pud_val(*pud)) {
471 if (!pud_large(*pud)) { 506 if (!pud_large(*pud)) {
472 last_map_addr = phys_pmd_update(pud, addr, end, 507 pmd = map_low_page(pmd_offset(pud, 0));
508 last_map_addr = phys_pmd_init(pmd, addr, end,
473 page_size_mask, prot); 509 page_size_mask, prot);
510 unmap_low_page(pmd);
511 __flush_tlb_all();
474 continue; 512 continue;
475 } 513 }
476 /* 514 /*
@@ -518,27 +556,18 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
518 return last_map_addr; 556 return last_map_addr;
519} 557}
520 558
521static unsigned long __meminit
522phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
523 unsigned long page_size_mask)
524{
525 pud_t *pud;
526
527 pud = (pud_t *)pgd_page_vaddr(*pgd);
528
529 return phys_pud_init(pud, addr, end, page_size_mask);
530}
531
532unsigned long __meminit 559unsigned long __meminit
533kernel_physical_mapping_init(unsigned long start, 560kernel_physical_mapping_init(unsigned long start,
534 unsigned long end, 561 unsigned long end,
535 unsigned long page_size_mask) 562 unsigned long page_size_mask)
536{ 563{
537 564 bool pgd_changed = false;
538 unsigned long next, last_map_addr = end; 565 unsigned long next, last_map_addr = end;
566 unsigned long addr;
539 567
540 start = (unsigned long)__va(start); 568 start = (unsigned long)__va(start);
541 end = (unsigned long)__va(end); 569 end = (unsigned long)__va(end);
570 addr = start;
542 571
543 for (; start < end; start = next) { 572 for (; start < end; start = next) {
544 pgd_t *pgd = pgd_offset_k(start); 573 pgd_t *pgd = pgd_offset_k(start);
@@ -550,8 +579,10 @@ kernel_physical_mapping_init(unsigned long start,
550 next = end; 579 next = end;
551 580
552 if (pgd_val(*pgd)) { 581 if (pgd_val(*pgd)) {
553 last_map_addr = phys_pud_update(pgd, __pa(start), 582 pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
583 last_map_addr = phys_pud_init(pud, __pa(start),
554 __pa(end), page_size_mask); 584 __pa(end), page_size_mask);
585 unmap_low_page(pud);
555 continue; 586 continue;
556 } 587 }
557 588
@@ -563,33 +594,21 @@ kernel_physical_mapping_init(unsigned long start,
563 spin_lock(&init_mm.page_table_lock); 594 spin_lock(&init_mm.page_table_lock);
564 pgd_populate(&init_mm, pgd, __va(pud_phys)); 595 pgd_populate(&init_mm, pgd, __va(pud_phys));
565 spin_unlock(&init_mm.page_table_lock); 596 spin_unlock(&init_mm.page_table_lock);
597 pgd_changed = true;
566 } 598 }
599
600 if (pgd_changed)
601 sync_global_pgds(addr, end);
602
567 __flush_tlb_all(); 603 __flush_tlb_all();
568 604
569 return last_map_addr; 605 return last_map_addr;
570} 606}
571 607
572#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
573void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 609void __init initmem_init(void)
574 int acpi, int k8) 610{
575{ 611 memblock_x86_register_active_regions(0, 0, max_pfn);
576#ifndef CONFIG_NO_BOOTMEM
577 unsigned long bootmap_size, bootmap;
578
579 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
580 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
581 PAGE_SIZE);
582 if (bootmap == -1L)
583 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
584 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
585 /* don't touch min_low_pfn */
586 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
587 0, end_pfn);
588 e820_register_active_regions(0, start_pfn, end_pfn);
589 free_bootmem_with_active_regions(0, end_pfn);
590#else
591 e820_register_active_regions(0, start_pfn, end_pfn);
592#endif
593} 612}
594#endif 613#endif
595 614
@@ -598,7 +617,9 @@ void __init paging_init(void)
598 unsigned long max_zone_pfns[MAX_NR_ZONES]; 617 unsigned long max_zone_pfns[MAX_NR_ZONES];
599 618
600 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 619 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
620#ifdef CONFIG_ZONE_DMA
601 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 621 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
622#endif
602 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 623 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
603 max_zone_pfns[ZONE_NORMAL] = max_pfn; 624 max_zone_pfns[ZONE_NORMAL] = max_pfn;
604 625
@@ -661,14 +682,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
661} 682}
662EXPORT_SYMBOL_GPL(arch_add_memory); 683EXPORT_SYMBOL_GPL(arch_add_memory);
663 684
664#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
665int memory_add_physaddr_to_nid(u64 start)
666{
667 return 0;
668}
669EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
670#endif
671
672#endif /* CONFIG_MEMORY_HOTPLUG */ 685#endif /* CONFIG_MEMORY_HOTPLUG */
673 686
674static struct kcore_list kcore_vsyscall; 687static struct kcore_list kcore_vsyscall;
@@ -799,52 +812,6 @@ void mark_rodata_ro(void)
799 812
800#endif 813#endif
801 814
802int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
803 int flags)
804{
805#ifdef CONFIG_NUMA
806 int nid, next_nid;
807 int ret;
808#endif
809 unsigned long pfn = phys >> PAGE_SHIFT;
810
811 if (pfn >= max_pfn) {
812 /*
813 * This can happen with kdump kernels when accessing
814 * firmware tables:
815 */
816 if (pfn < max_pfn_mapped)
817 return -EFAULT;
818
819 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
820 phys, len);
821 return -EFAULT;
822 }
823
824 /* Should check here against the e820 map to avoid double free */
825#ifdef CONFIG_NUMA
826 nid = phys_to_nid(phys);
827 next_nid = phys_to_nid(phys + len - 1);
828 if (nid == next_nid)
829 ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
830 else
831 ret = reserve_bootmem(phys, len, flags);
832
833 if (ret != 0)
834 return ret;
835
836#else
837 reserve_bootmem(phys, len, flags);
838#endif
839
840 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
841 dma_reserve += len / PAGE_SIZE;
842 set_dma_reserve(dma_reserve);
843 }
844
845 return 0;
846}
847
848int kern_addr_valid(unsigned long addr) 815int kern_addr_valid(unsigned long addr)
849{ 816{
850 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; 817 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -890,18 +857,18 @@ static struct vm_area_struct gate_vma = {
890 .vm_flags = VM_READ | VM_EXEC 857 .vm_flags = VM_READ | VM_EXEC
891}; 858};
892 859
893struct vm_area_struct *get_gate_vma(struct task_struct *tsk) 860struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
894{ 861{
895#ifdef CONFIG_IA32_EMULATION 862#ifdef CONFIG_IA32_EMULATION
896 if (test_tsk_thread_flag(tsk, TIF_IA32)) 863 if (!mm || mm->context.ia32_compat)
897 return NULL; 864 return NULL;
898#endif 865#endif
899 return &gate_vma; 866 return &gate_vma;
900} 867}
901 868
902int in_gate_area(struct task_struct *task, unsigned long addr) 869int in_gate_area(struct mm_struct *mm, unsigned long addr)
903{ 870{
904 struct vm_area_struct *vma = get_gate_vma(task); 871 struct vm_area_struct *vma = get_gate_vma(mm);
905 872
906 if (!vma) 873 if (!vma)
907 return 0; 874 return 0;
@@ -910,11 +877,11 @@ int in_gate_area(struct task_struct *task, unsigned long addr)
910} 877}
911 878
912/* 879/*
913 * Use this when you have no reliable task/vma, typically from interrupt 880 * Use this when you have no reliable mm, typically from interrupt
914 * context. It is less reliable than using the task's vma and may give 881 * context. It is less reliable than using a task's mm and may give
915 * false positives: 882 * false positives.
916 */ 883 */
917int in_gate_area_no_task(unsigned long addr) 884int in_gate_area_no_mm(unsigned long addr)
918{ 885{
919 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 886 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
920} 887}
@@ -928,6 +895,17 @@ const char *arch_vma_name(struct vm_area_struct *vma)
928 return NULL; 895 return NULL;
929} 896}
930 897
898#ifdef CONFIG_X86_UV
899unsigned long memory_block_size_bytes(void)
900{
901 if (is_uv_system()) {
902 printk(KERN_INFO "UV: memory block size 2GB\n");
903 return 2UL * 1024 * 1024 * 1024;
904 }
905 return MIN_MEMORY_BLOCK_SIZE;
906}
907#endif
908
931#ifdef CONFIG_SPARSEMEM_VMEMMAP 909#ifdef CONFIG_SPARSEMEM_VMEMMAP
932/* 910/*
933 * Initialise the sparsemem vmemmap using huge-pages at the PMD level. 911 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
@@ -1003,6 +981,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
1003 } 981 }
1004 982
1005 } 983 }
984 sync_global_pgds((unsigned long)start_page, end);
1006 return 0; 985 return 0;
1007} 986}
1008 987