aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/mm/init.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/mm/init.c')
-rw-r--r--arch/x86_64/mm/init.c172
1 files changed, 76 insertions, 96 deletions
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index ec31534eb104..282b0a8f00ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -22,10 +22,12 @@
22#include <linux/bootmem.h> 22#include <linux/bootmem.h>
23#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
24#include <linux/pci.h> 24#include <linux/pci.h>
25#include <linux/pfn.h>
25#include <linux/poison.h> 26#include <linux/poison.h>
26#include <linux/dma-mapping.h> 27#include <linux/dma-mapping.h>
27#include <linux/module.h> 28#include <linux/module.h>
28#include <linux/memory_hotplug.h> 29#include <linux/memory_hotplug.h>
30#include <linux/nmi.h>
29 31
30#include <asm/processor.h> 32#include <asm/processor.h>
31#include <asm/system.h> 33#include <asm/system.h>
@@ -46,7 +48,7 @@
46#define Dprintk(x...) 48#define Dprintk(x...)
47#endif 49#endif
48 50
49struct dma_mapping_ops* dma_ops; 51const struct dma_mapping_ops* dma_ops;
50EXPORT_SYMBOL(dma_ops); 52EXPORT_SYMBOL(dma_ops);
51 53
52static unsigned long dma_reserve __initdata; 54static unsigned long dma_reserve __initdata;
@@ -72,6 +74,11 @@ void show_mem(void)
72 74
73 for_each_online_pgdat(pgdat) { 75 for_each_online_pgdat(pgdat) {
74 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 /* this loop can take a while with 256 GB and 4k pages
78 so update the NMI watchdog */
79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 touch_nmi_watchdog();
81 }
75 page = pfn_to_page(pgdat->node_start_pfn + i); 82 page = pfn_to_page(pgdat->node_start_pfn + i);
76 total++; 83 total++;
77 if (PageReserved(page)) 84 if (PageReserved(page))
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 174
168unsigned long __initdata table_start, table_end; 175unsigned long __initdata table_start, table_end;
169 176
170extern pmd_t temp_boot_pmds[]; 177static __meminit void *alloc_low_page(unsigned long *phys)
171
172static struct temp_map {
173 pmd_t *pmd;
174 void *address;
175 int allocated;
176} temp_mappings[] __initdata = {
177 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
178 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
179 {}
180};
181
182static __meminit void *alloc_low_page(int *index, unsigned long *phys)
183{ 178{
184 struct temp_map *ti; 179 unsigned long pfn = table_end++;
185 int i;
186 unsigned long pfn = table_end++, paddr;
187 void *adr; 180 void *adr;
188 181
189 if (after_bootmem) { 182 if (after_bootmem) {
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys)
194 187
195 if (pfn >= end_pfn) 188 if (pfn >= end_pfn)
196 panic("alloc_low_page: ran out of memory"); 189 panic("alloc_low_page: ran out of memory");
197 for (i = 0; temp_mappings[i].allocated; i++) { 190
198 if (!temp_mappings[i].pmd) 191 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
199 panic("alloc_low_page: ran out of temp mappings");
200 }
201 ti = &temp_mappings[i];
202 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
203 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
204 ti->allocated = 1;
205 __flush_tlb();
206 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
207 memset(adr, 0, PAGE_SIZE); 192 memset(adr, 0, PAGE_SIZE);
208 *index = i; 193 *phys = pfn * PAGE_SIZE;
209 *phys = pfn * PAGE_SIZE; 194 return adr;
210 return adr; 195}
211}
212 196
213static __meminit void unmap_low_page(int i) 197static __meminit void unmap_low_page(void *adr)
214{ 198{
215 struct temp_map *ti;
216 199
217 if (after_bootmem) 200 if (after_bootmem)
218 return; 201 return;
219 202
220 ti = &temp_mappings[i]; 203 early_iounmap(adr, PAGE_SIZE);
221 set_pmd(ti->pmd, __pmd(0));
222 ti->allocated = 0;
223} 204}
224 205
225/* Must run before zap_low_mappings */ 206/* Must run before zap_low_mappings */
226__init void *early_ioremap(unsigned long addr, unsigned long size) 207__init void *early_ioremap(unsigned long addr, unsigned long size)
227{ 208{
228 unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 209 unsigned long vaddr;
229 210 pmd_t *pmd, *last_pmd;
230 /* actually usually some more */ 211 int i, pmds;
231 if (size >= LARGE_PAGE_SIZE) { 212
232 return NULL; 213 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
214 vaddr = __START_KERNEL_map;
215 pmd = level2_kernel_pgt;
216 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
217 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
218 for (i = 0; i < pmds; i++) {
219 if (pmd_present(pmd[i]))
220 goto next;
221 }
222 vaddr += addr & ~PMD_MASK;
223 addr &= PMD_MASK;
224 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
225 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
226 __flush_tlb();
227 return (void *)vaddr;
228 next:
229 ;
233 } 230 }
234 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 231 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
235 map += LARGE_PAGE_SIZE; 232 return NULL;
236 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
237 __flush_tlb();
238 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
239} 233}
240 234
241/* To avoid virtual aliases later */ 235/* To avoid virtual aliases later */
242__init void early_iounmap(void *addr, unsigned long size) 236__init void early_iounmap(void *addr, unsigned long size)
243{ 237{
244 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) 238 unsigned long vaddr;
245 printk("early_iounmap: bad address %p\n", addr); 239 pmd_t *pmd;
246 set_pmd(temp_mappings[0].pmd, __pmd(0)); 240 int i, pmds;
247 set_pmd(temp_mappings[1].pmd, __pmd(0)); 241
242 vaddr = (unsigned long)addr;
243 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
244 pmd = level2_kernel_pgt + pmd_index(vaddr);
245 for (i = 0; i < pmds; i++)
246 pmd_clear(pmd + i);
248 __flush_tlb(); 247 __flush_tlb();
249} 248}
250 249
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
289 288
290 289
291 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { 290 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
292 int map;
293 unsigned long pmd_phys; 291 unsigned long pmd_phys;
294 pud_t *pud = pud_page + pud_index(addr); 292 pud_t *pud = pud_page + pud_index(addr);
295 pmd_t *pmd; 293 pmd_t *pmd;
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne
307 continue; 305 continue;
308 } 306 }
309 307
310 pmd = alloc_low_page(&map, &pmd_phys); 308 pmd = alloc_low_page(&pmd_phys);
311 spin_lock(&init_mm.page_table_lock); 309 spin_lock(&init_mm.page_table_lock);
312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 310 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313 phys_pmd_init(pmd, addr, end); 311 phys_pmd_init(pmd, addr, end);
314 spin_unlock(&init_mm.page_table_lock); 312 spin_unlock(&init_mm.page_table_lock);
315 unmap_low_page(map); 313 unmap_low_page(pmd);
316 } 314 }
317 __flush_tlb(); 315 __flush_tlb();
318} 316}
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
364 end = (unsigned long)__va(end); 362 end = (unsigned long)__va(end);
365 363
366 for (; start < end; start = next) { 364 for (; start < end; start = next) {
367 int map;
368 unsigned long pud_phys; 365 unsigned long pud_phys;
369 pgd_t *pgd = pgd_offset_k(start); 366 pgd_t *pgd = pgd_offset_k(start);
370 pud_t *pud; 367 pud_t *pud;
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
372 if (after_bootmem) 369 if (after_bootmem)
373 pud = pud_offset(pgd, start & PGDIR_MASK); 370 pud = pud_offset(pgd, start & PGDIR_MASK);
374 else 371 else
375 pud = alloc_low_page(&map, &pud_phys); 372 pud = alloc_low_page(&pud_phys);
376 373
377 next = start + PGDIR_SIZE; 374 next = start + PGDIR_SIZE;
378 if (next > end) 375 if (next > end)
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
380 phys_pud_init(pud, __pa(start), __pa(next)); 377 phys_pud_init(pud, __pa(start), __pa(next));
381 if (!after_bootmem) 378 if (!after_bootmem)
382 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); 379 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
383 unmap_low_page(map); 380 unmap_low_page(pud);
384 } 381 }
385 382
386 if (!after_bootmem) 383 if (!after_bootmem)
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
388 __flush_tlb_all(); 385 __flush_tlb_all();
389} 386}
390 387
391void __cpuinit zap_low_mappings(int cpu)
392{
393 if (cpu == 0) {
394 pgd_t *pgd = pgd_offset_k(0UL);
395 pgd_clear(pgd);
396 } else {
397 /*
398 * For AP's, zap the low identity mappings by changing the cr3
399 * to init_level4_pgt and doing local flush tlb all
400 */
401 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
402 }
403 __flush_tlb_all();
404}
405
406#ifndef CONFIG_NUMA 388#ifndef CONFIG_NUMA
407void __init paging_init(void) 389void __init paging_init(void)
408{ 390{
@@ -579,15 +561,6 @@ void __init mem_init(void)
579 reservedpages << (PAGE_SHIFT-10), 561 reservedpages << (PAGE_SHIFT-10),
580 datasize >> 10, 562 datasize >> 10,
581 initsize >> 10); 563 initsize >> 10);
582
583#ifdef CONFIG_SMP
584 /*
585 * Sync boot_level4_pgt mappings with the init_level4_pgt
586 * except for the low identity mappings which are already zapped
587 * in init_level4_pgt. This sync-up is essential for AP's bringup
588 */
589 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
590#endif
591} 564}
592 565
593void free_init_pages(char *what, unsigned long begin, unsigned long end) 566void free_init_pages(char *what, unsigned long begin, unsigned long end)
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
597 if (begin >= end) 570 if (begin >= end)
598 return; 571 return;
599 572
600 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 573 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
601 for (addr = begin; addr < end; addr += PAGE_SIZE) { 574 for (addr = begin; addr < end; addr += PAGE_SIZE) {
602 ClearPageReserved(virt_to_page(addr)); 575 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
603 init_page_count(virt_to_page(addr)); 576 ClearPageReserved(page);
604 memset((void *)(addr & ~(PAGE_SIZE-1)), 577 init_page_count(page);
605 POISON_FREE_INITMEM, PAGE_SIZE); 578 memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
606 free_page(addr); 579 if (addr >= __START_KERNEL_map)
580 change_page_attr_addr(addr, 1, __pgprot(0));
581 __free_page(page);
607 totalram_pages++; 582 totalram_pages++;
608 } 583 }
584 if (addr > __START_KERNEL_map)
585 global_flush_tlb();
609} 586}
610 587
611void free_initmem(void) 588void free_initmem(void)
612{ 589{
613 memset(__initdata_begin, POISON_FREE_INITDATA,
614 __initdata_end - __initdata_begin);
615 free_init_pages("unused kernel memory", 590 free_init_pages("unused kernel memory",
616 (unsigned long)(&__init_begin), 591 __pa_symbol(&__init_begin),
617 (unsigned long)(&__init_end)); 592 __pa_symbol(&__init_end));
618} 593}
619 594
620#ifdef CONFIG_DEBUG_RODATA 595#ifdef CONFIG_DEBUG_RODATA
621 596
622void mark_rodata_ro(void) 597void mark_rodata_ro(void)
623{ 598{
624 unsigned long addr = (unsigned long)__start_rodata; 599 unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size;
625 600
626 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 601#ifdef CONFIG_HOTPLUG_CPU
627 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); 602 /* It must still be possible to apply SMP alternatives. */
603 if (num_possible_cpus() > 1)
604 start = PFN_ALIGN(__va(__pa_symbol(&_etext)));
605#endif
606 size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start;
607 change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO);
628 608
629 printk ("Write protecting the kernel read-only data: %luk\n", 609 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
630 (__end_rodata - __start_rodata) >> 10); 610 size >> 10);
631 611
632 /* 612 /*
633 * change_page_attr_addr() requires a global_flush_tlb() call after it. 613 * change_page_attr_addr() requires a global_flush_tlb() call after it.
@@ -642,7 +622,7 @@ void mark_rodata_ro(void)
642#ifdef CONFIG_BLK_DEV_INITRD 622#ifdef CONFIG_BLK_DEV_INITRD
643void free_initrd_mem(unsigned long start, unsigned long end) 623void free_initrd_mem(unsigned long start, unsigned long end)
644{ 624{
645 free_init_pages("initrd memory", start, end); 625 free_init_pages("initrd memory", __pa(start), __pa(end));
646} 626}
647#endif 627#endif
648 628