diff options
Diffstat (limited to 'arch/x86_64/mm/init.c')
-rw-r--r-- | arch/x86_64/mm/init.c | 172 |
1 files changed, 76 insertions, 96 deletions
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index ec31534eb104..282b0a8f00ad 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -22,10 +22,12 @@ | |||
22 | #include <linux/bootmem.h> | 22 | #include <linux/bootmem.h> |
23 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
24 | #include <linux/pci.h> | 24 | #include <linux/pci.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/dma-mapping.h> | 27 | #include <linux/dma-mapping.h> |
27 | #include <linux/module.h> | 28 | #include <linux/module.h> |
28 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
30 | #include <linux/nmi.h> | ||
29 | 31 | ||
30 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
31 | #include <asm/system.h> | 33 | #include <asm/system.h> |
@@ -46,7 +48,7 @@ | |||
46 | #define Dprintk(x...) | 48 | #define Dprintk(x...) |
47 | #endif | 49 | #endif |
48 | 50 | ||
49 | struct dma_mapping_ops* dma_ops; | 51 | const struct dma_mapping_ops* dma_ops; |
50 | EXPORT_SYMBOL(dma_ops); | 52 | EXPORT_SYMBOL(dma_ops); |
51 | 53 | ||
52 | static unsigned long dma_reserve __initdata; | 54 | static unsigned long dma_reserve __initdata; |
@@ -72,6 +74,11 @@ void show_mem(void) | |||
72 | 74 | ||
73 | for_each_online_pgdat(pgdat) { | 75 | for_each_online_pgdat(pgdat) { |
74 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 76 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
77 | /* this loop can take a while with 256 GB and 4k pages | ||
78 | so update the NMI watchdog */ | ||
79 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { | ||
80 | touch_nmi_watchdog(); | ||
81 | } | ||
75 | page = pfn_to_page(pgdat->node_start_pfn + i); | 82 | page = pfn_to_page(pgdat->node_start_pfn + i); |
76 | total++; | 83 | total++; |
77 | if (PageReserved(page)) | 84 | if (PageReserved(page)) |
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |||
167 | 174 | ||
168 | unsigned long __initdata table_start, table_end; | 175 | unsigned long __initdata table_start, table_end; |
169 | 176 | ||
170 | extern pmd_t temp_boot_pmds[]; | 177 | static __meminit void *alloc_low_page(unsigned long *phys) |
171 | |||
172 | static struct temp_map { | ||
173 | pmd_t *pmd; | ||
174 | void *address; | ||
175 | int allocated; | ||
176 | } temp_mappings[] __initdata = { | ||
177 | { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) }, | ||
178 | { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, | ||
179 | {} | ||
180 | }; | ||
181 | |||
182 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) | ||
183 | { | 178 | { |
184 | struct temp_map *ti; | 179 | unsigned long pfn = table_end++; |
185 | int i; | ||
186 | unsigned long pfn = table_end++, paddr; | ||
187 | void *adr; | 180 | void *adr; |
188 | 181 | ||
189 | if (after_bootmem) { | 182 | if (after_bootmem) { |
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys) | |||
194 | 187 | ||
195 | if (pfn >= end_pfn) | 188 | if (pfn >= end_pfn) |
196 | panic("alloc_low_page: ran out of memory"); | 189 | panic("alloc_low_page: ran out of memory"); |
197 | for (i = 0; temp_mappings[i].allocated; i++) { | 190 | |
198 | if (!temp_mappings[i].pmd) | 191 | adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); |
199 | panic("alloc_low_page: ran out of temp mappings"); | ||
200 | } | ||
201 | ti = &temp_mappings[i]; | ||
202 | paddr = (pfn << PAGE_SHIFT) & PMD_MASK; | ||
203 | set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); | ||
204 | ti->allocated = 1; | ||
205 | __flush_tlb(); | ||
206 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | ||
207 | memset(adr, 0, PAGE_SIZE); | 192 | memset(adr, 0, PAGE_SIZE); |
208 | *index = i; | 193 | *phys = pfn * PAGE_SIZE; |
209 | *phys = pfn * PAGE_SIZE; | 194 | return adr; |
210 | return adr; | 195 | } |
211 | } | ||
212 | 196 | ||
213 | static __meminit void unmap_low_page(int i) | 197 | static __meminit void unmap_low_page(void *adr) |
214 | { | 198 | { |
215 | struct temp_map *ti; | ||
216 | 199 | ||
217 | if (after_bootmem) | 200 | if (after_bootmem) |
218 | return; | 201 | return; |
219 | 202 | ||
220 | ti = &temp_mappings[i]; | 203 | early_iounmap(adr, PAGE_SIZE); |
221 | set_pmd(ti->pmd, __pmd(0)); | ||
222 | ti->allocated = 0; | ||
223 | } | 204 | } |
224 | 205 | ||
225 | /* Must run before zap_low_mappings */ | 206 | /* Must run before zap_low_mappings */ |
226 | __init void *early_ioremap(unsigned long addr, unsigned long size) | 207 | __init void *early_ioremap(unsigned long addr, unsigned long size) |
227 | { | 208 | { |
228 | unsigned long map = round_down(addr, LARGE_PAGE_SIZE); | 209 | unsigned long vaddr; |
229 | 210 | pmd_t *pmd, *last_pmd; | |
230 | /* actually usually some more */ | 211 | int i, pmds; |
231 | if (size >= LARGE_PAGE_SIZE) { | 212 | |
232 | return NULL; | 213 | pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; |
214 | vaddr = __START_KERNEL_map; | ||
215 | pmd = level2_kernel_pgt; | ||
216 | last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | ||
217 | for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { | ||
218 | for (i = 0; i < pmds; i++) { | ||
219 | if (pmd_present(pmd[i])) | ||
220 | goto next; | ||
221 | } | ||
222 | vaddr += addr & ~PMD_MASK; | ||
223 | addr &= PMD_MASK; | ||
224 | for (i = 0; i < pmds; i++, addr += PMD_SIZE) | ||
225 | set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); | ||
226 | __flush_tlb(); | ||
227 | return (void *)vaddr; | ||
228 | next: | ||
229 | ; | ||
233 | } | 230 | } |
234 | set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | 231 | printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); |
235 | map += LARGE_PAGE_SIZE; | 232 | return NULL; |
236 | set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
237 | __flush_tlb(); | ||
238 | return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); | ||
239 | } | 233 | } |
240 | 234 | ||
241 | /* To avoid virtual aliases later */ | 235 | /* To avoid virtual aliases later */ |
242 | __init void early_iounmap(void *addr, unsigned long size) | 236 | __init void early_iounmap(void *addr, unsigned long size) |
243 | { | 237 | { |
244 | if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) | 238 | unsigned long vaddr; |
245 | printk("early_iounmap: bad address %p\n", addr); | 239 | pmd_t *pmd; |
246 | set_pmd(temp_mappings[0].pmd, __pmd(0)); | 240 | int i, pmds; |
247 | set_pmd(temp_mappings[1].pmd, __pmd(0)); | 241 | |
242 | vaddr = (unsigned long)addr; | ||
243 | pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | ||
244 | pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
245 | for (i = 0; i < pmds; i++) | ||
246 | pmd_clear(pmd + i); | ||
248 | __flush_tlb(); | 247 | __flush_tlb(); |
249 | } | 248 | } |
250 | 249 | ||
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
289 | 288 | ||
290 | 289 | ||
291 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { | 290 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { |
292 | int map; | ||
293 | unsigned long pmd_phys; | 291 | unsigned long pmd_phys; |
294 | pud_t *pud = pud_page + pud_index(addr); | 292 | pud_t *pud = pud_page + pud_index(addr); |
295 | pmd_t *pmd; | 293 | pmd_t *pmd; |
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
307 | continue; | 305 | continue; |
308 | } | 306 | } |
309 | 307 | ||
310 | pmd = alloc_low_page(&map, &pmd_phys); | 308 | pmd = alloc_low_page(&pmd_phys); |
311 | spin_lock(&init_mm.page_table_lock); | 309 | spin_lock(&init_mm.page_table_lock); |
312 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 310 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
313 | phys_pmd_init(pmd, addr, end); | 311 | phys_pmd_init(pmd, addr, end); |
314 | spin_unlock(&init_mm.page_table_lock); | 312 | spin_unlock(&init_mm.page_table_lock); |
315 | unmap_low_page(map); | 313 | unmap_low_page(pmd); |
316 | } | 314 | } |
317 | __flush_tlb(); | 315 | __flush_tlb(); |
318 | } | 316 | } |
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
364 | end = (unsigned long)__va(end); | 362 | end = (unsigned long)__va(end); |
365 | 363 | ||
366 | for (; start < end; start = next) { | 364 | for (; start < end; start = next) { |
367 | int map; | ||
368 | unsigned long pud_phys; | 365 | unsigned long pud_phys; |
369 | pgd_t *pgd = pgd_offset_k(start); | 366 | pgd_t *pgd = pgd_offset_k(start); |
370 | pud_t *pud; | 367 | pud_t *pud; |
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
372 | if (after_bootmem) | 369 | if (after_bootmem) |
373 | pud = pud_offset(pgd, start & PGDIR_MASK); | 370 | pud = pud_offset(pgd, start & PGDIR_MASK); |
374 | else | 371 | else |
375 | pud = alloc_low_page(&map, &pud_phys); | 372 | pud = alloc_low_page(&pud_phys); |
376 | 373 | ||
377 | next = start + PGDIR_SIZE; | 374 | next = start + PGDIR_SIZE; |
378 | if (next > end) | 375 | if (next > end) |
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
380 | phys_pud_init(pud, __pa(start), __pa(next)); | 377 | phys_pud_init(pud, __pa(start), __pa(next)); |
381 | if (!after_bootmem) | 378 | if (!after_bootmem) |
382 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 379 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); |
383 | unmap_low_page(map); | 380 | unmap_low_page(pud); |
384 | } | 381 | } |
385 | 382 | ||
386 | if (!after_bootmem) | 383 | if (!after_bootmem) |
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
388 | __flush_tlb_all(); | 385 | __flush_tlb_all(); |
389 | } | 386 | } |
390 | 387 | ||
391 | void __cpuinit zap_low_mappings(int cpu) | ||
392 | { | ||
393 | if (cpu == 0) { | ||
394 | pgd_t *pgd = pgd_offset_k(0UL); | ||
395 | pgd_clear(pgd); | ||
396 | } else { | ||
397 | /* | ||
398 | * For AP's, zap the low identity mappings by changing the cr3 | ||
399 | * to init_level4_pgt and doing local flush tlb all | ||
400 | */ | ||
401 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
402 | } | ||
403 | __flush_tlb_all(); | ||
404 | } | ||
405 | |||
406 | #ifndef CONFIG_NUMA | 388 | #ifndef CONFIG_NUMA |
407 | void __init paging_init(void) | 389 | void __init paging_init(void) |
408 | { | 390 | { |
@@ -579,15 +561,6 @@ void __init mem_init(void) | |||
579 | reservedpages << (PAGE_SHIFT-10), | 561 | reservedpages << (PAGE_SHIFT-10), |
580 | datasize >> 10, | 562 | datasize >> 10, |
581 | initsize >> 10); | 563 | initsize >> 10); |
582 | |||
583 | #ifdef CONFIG_SMP | ||
584 | /* | ||
585 | * Sync boot_level4_pgt mappings with the init_level4_pgt | ||
586 | * except for the low identity mappings which are already zapped | ||
587 | * in init_level4_pgt. This sync-up is essential for AP's bringup | ||
588 | */ | ||
589 | memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); | ||
590 | #endif | ||
591 | } | 564 | } |
592 | 565 | ||
593 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 566 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
597 | if (begin >= end) | 570 | if (begin >= end) |
598 | return; | 571 | return; |
599 | 572 | ||
600 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 573 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
601 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 574 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
602 | ClearPageReserved(virt_to_page(addr)); | 575 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
603 | init_page_count(virt_to_page(addr)); | 576 | ClearPageReserved(page); |
604 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 577 | init_page_count(page); |
605 | POISON_FREE_INITMEM, PAGE_SIZE); | 578 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
606 | free_page(addr); | 579 | if (addr >= __START_KERNEL_map) |
580 | change_page_attr_addr(addr, 1, __pgprot(0)); | ||
581 | __free_page(page); | ||
607 | totalram_pages++; | 582 | totalram_pages++; |
608 | } | 583 | } |
584 | if (addr > __START_KERNEL_map) | ||
585 | global_flush_tlb(); | ||
609 | } | 586 | } |
610 | 587 | ||
611 | void free_initmem(void) | 588 | void free_initmem(void) |
612 | { | 589 | { |
613 | memset(__initdata_begin, POISON_FREE_INITDATA, | ||
614 | __initdata_end - __initdata_begin); | ||
615 | free_init_pages("unused kernel memory", | 590 | free_init_pages("unused kernel memory", |
616 | (unsigned long)(&__init_begin), | 591 | __pa_symbol(&__init_begin), |
617 | (unsigned long)(&__init_end)); | 592 | __pa_symbol(&__init_end)); |
618 | } | 593 | } |
619 | 594 | ||
620 | #ifdef CONFIG_DEBUG_RODATA | 595 | #ifdef CONFIG_DEBUG_RODATA |
621 | 596 | ||
622 | void mark_rodata_ro(void) | 597 | void mark_rodata_ro(void) |
623 | { | 598 | { |
624 | unsigned long addr = (unsigned long)__start_rodata; | 599 | unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size; |
625 | 600 | ||
626 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 601 | #ifdef CONFIG_HOTPLUG_CPU |
627 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | 602 | /* It must still be possible to apply SMP alternatives. */ |
603 | if (num_possible_cpus() > 1) | ||
604 | start = PFN_ALIGN(__va(__pa_symbol(&_etext))); | ||
605 | #endif | ||
606 | size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start; | ||
607 | change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
628 | 608 | ||
629 | printk ("Write protecting the kernel read-only data: %luk\n", | 609 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
630 | (__end_rodata - __start_rodata) >> 10); | 610 | size >> 10); |
631 | 611 | ||
632 | /* | 612 | /* |
633 | * change_page_attr_addr() requires a global_flush_tlb() call after it. | 613 | * change_page_attr_addr() requires a global_flush_tlb() call after it. |
@@ -642,7 +622,7 @@ void mark_rodata_ro(void) | |||
642 | #ifdef CONFIG_BLK_DEV_INITRD | 622 | #ifdef CONFIG_BLK_DEV_INITRD |
643 | void free_initrd_mem(unsigned long start, unsigned long end) | 623 | void free_initrd_mem(unsigned long start, unsigned long end) |
644 | { | 624 | { |
645 | free_init_pages("initrd memory", start, end); | 625 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
646 | } | 626 | } |
647 | #endif | 627 | #endif |
648 | 628 | ||