diff options
author | Ian Campbell <ijc@hellion.org.uk> | 2008-02-09 17:24:09 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2008-02-09 17:24:09 -0500 |
commit | 551889a6e2a24a9c06fd453ea03b57b7746ffdc0 (patch) | |
tree | d906bbc4e4a96e243a14416bf02feb7a4ffd4d7a /arch/x86/mm | |
parent | 185c045c245f46485ad8bbd8cc1100e986ff3f13 (diff) |
x86: construct 32-bit boot time page tables in native format.
Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled
kernel are in PAE format.
early_ioremap is updated to use the standard page table accessors.
Clear any mappings beyond max_low_pfn from the boot page tables in
native_pagetable_setup_start because the initial mappings can extend
beyond the range of physical memory and into the vmalloc area.
Derived from patches by Eric Biederman and H. Peter Anvin.
[ jeremy@goop.org: PAE swapper_pg_dir needs to be page-sized fix ]
Signed-off-by: Ian Campbell <ijc@hellion.org.uk>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mika Penttilä <mika.penttila@kolumbus.fi>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/init_32.c | 72 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 55 |
2 files changed, 58 insertions, 69 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d1bc04006d16..54aba3cf9efe 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/pgalloc.h> | 46 | #include <asm/pgalloc.h> |
47 | #include <asm/sections.h> | 47 | #include <asm/sections.h> |
48 | #include <asm/paravirt.h> | 48 | #include <asm/paravirt.h> |
49 | #include <asm/setup.h> | ||
49 | 50 | ||
50 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 51 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
51 | 52 | ||
@@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; | |||
328 | 329 | ||
329 | void __init native_pagetable_setup_start(pgd_t *base) | 330 | void __init native_pagetable_setup_start(pgd_t *base) |
330 | { | 331 | { |
331 | #ifdef CONFIG_X86_PAE | 332 | unsigned long pfn, va; |
332 | int i; | 333 | pgd_t *pgd; |
334 | pud_t *pud; | ||
335 | pmd_t *pmd; | ||
336 | pte_t *pte; | ||
333 | 337 | ||
334 | /* | 338 | /* |
335 | * Init entries of the first-level page table to the | 339 | * Remove any mappings which extend past the end of physical |
336 | * zero page, if they haven't already been set up. | 340 | * memory from the boot time page table: |
337 | * | ||
338 | * In a normal native boot, we'll be running on a | ||
339 | * pagetable rooted in swapper_pg_dir, but not in PAE | ||
340 | * mode, so this will end up clobbering the mappings | ||
341 | * for the lower 24Mbytes of the address space, | ||
342 | * without affecting the kernel address space. | ||
343 | */ | 341 | */ |
344 | for (i = 0; i < USER_PTRS_PER_PGD; i++) | 342 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { |
345 | set_pgd(&base[i], | 343 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); |
346 | __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | 344 | pgd = base + pgd_index(va); |
347 | 345 | if (!pgd_present(*pgd)) | |
348 | /* Make sure kernel address space is empty so that a pagetable | 346 | break; |
349 | will be allocated for it. */ | 347 | |
350 | memset(&base[USER_PTRS_PER_PGD], 0, | 348 | pud = pud_offset(pgd, va); |
351 | KERNEL_PGD_PTRS * sizeof(pgd_t)); | 349 | pmd = pmd_offset(pud, va); |
352 | #else | 350 | if (!pmd_present(*pmd)) |
351 | break; | ||
352 | |||
353 | pte = pte_offset_kernel(pmd, va); | ||
354 | if (!pte_present(*pte)) | ||
355 | break; | ||
356 | |||
357 | pte_clear(NULL, va, pte); | ||
358 | } | ||
353 | paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); | 359 | paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT); |
354 | #endif | ||
355 | } | 360 | } |
356 | 361 | ||
357 | void __init native_pagetable_setup_done(pgd_t *base) | 362 | void __init native_pagetable_setup_done(pgd_t *base) |
358 | { | 363 | { |
359 | #ifdef CONFIG_X86_PAE | ||
360 | /* | ||
361 | * Add low memory identity-mappings - SMP needs it when | ||
362 | * starting up on an AP from real-mode. In the non-PAE | ||
363 | * case we already have these mappings through head.S. | ||
364 | * All user-space mappings are explicitly cleared after | ||
365 | * SMP startup. | ||
366 | */ | ||
367 | set_pgd(&base[0], base[USER_PTRS_PER_PGD]); | ||
368 | #endif | ||
369 | } | 364 | } |
370 | 365 | ||
371 | /* | 366 | /* |
@@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base) | |||
374 | * the boot process. | 369 | * the boot process. |
375 | * | 370 | * |
376 | * If we're booting on native hardware, this will be a pagetable | 371 | * If we're booting on native hardware, this will be a pagetable |
377 | * constructed in arch/i386/kernel/head.S, and not running in PAE mode | 372 | * constructed in arch/x86/kernel/head_32.S. The root of the |
378 | * (even if we'll end up running in PAE). The root of the pagetable | 373 | * pagetable will be swapper_pg_dir. |
379 | * will be swapper_pg_dir. | ||
380 | * | 374 | * |
381 | * If we're booting paravirtualized under a hypervisor, then there are | 375 | * If we're booting paravirtualized under a hypervisor, then there are |
382 | * more options: we may already be running PAE, and the pagetable may | 376 | * more options: we may already be running PAE, and the pagetable may |
@@ -537,14 +531,6 @@ void __init paging_init(void) | |||
537 | 531 | ||
538 | load_cr3(swapper_pg_dir); | 532 | load_cr3(swapper_pg_dir); |
539 | 533 | ||
540 | #ifdef CONFIG_X86_PAE | ||
541 | /* | ||
542 | * We will bail out later - printk doesn't work right now so | ||
543 | * the user would just see a hanging kernel. | ||
544 | */ | ||
545 | if (cpu_has_pae) | ||
546 | set_in_cr4(X86_CR4_PAE); | ||
547 | #endif | ||
548 | __flush_tlb_all(); | 534 | __flush_tlb_all(); |
549 | 535 | ||
550 | kmap_init(); | 536 | kmap_init(); |
@@ -675,10 +661,6 @@ void __init mem_init(void) | |||
675 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | 661 | BUG_ON((unsigned long)high_memory > VMALLOC_START); |
676 | #endif /* double-sanity-check paranoia */ | 662 | #endif /* double-sanity-check paranoia */ |
677 | 663 | ||
678 | #ifdef CONFIG_X86_PAE | ||
679 | if (!cpu_has_pae) | ||
680 | panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); | ||
681 | #endif | ||
682 | if (boot_cpu_data.wp_works_ok < 0) | 664 | if (boot_cpu_data.wp_works_ok < 0) |
683 | test_wp_bit(); | 665 | test_wp_bit(); |
684 | 666 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ee6648fe6b15..1106b7f477bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str) | |||
260 | early_param("early_ioremap_debug", early_ioremap_debug_setup); | 260 | early_param("early_ioremap_debug", early_ioremap_debug_setup); |
261 | 261 | ||
262 | static __initdata int after_paging_init; | 262 | static __initdata int after_paging_init; |
263 | static __initdata unsigned long bm_pte[1024] | 263 | static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] |
264 | __attribute__((aligned(PAGE_SIZE))); | 264 | __attribute__((aligned(PAGE_SIZE))); |
265 | 265 | ||
266 | static inline unsigned long * __init early_ioremap_pgd(unsigned long addr) | 266 | static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) |
267 | { | 267 | { |
268 | return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023); | 268 | pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; |
269 | pud_t *pud = pud_offset(pgd, addr); | ||
270 | pmd_t *pmd = pmd_offset(pud, addr); | ||
271 | |||
272 | return pmd; | ||
269 | } | 273 | } |
270 | 274 | ||
271 | static inline unsigned long * __init early_ioremap_pte(unsigned long addr) | 275 | static inline pte_t * __init early_ioremap_pte(unsigned long addr) |
272 | { | 276 | { |
273 | return bm_pte + ((addr >> PAGE_SHIFT) & 1023); | 277 | return &bm_pte[pte_index(addr)]; |
274 | } | 278 | } |
275 | 279 | ||
276 | void __init early_ioremap_init(void) | 280 | void __init early_ioremap_init(void) |
277 | { | 281 | { |
278 | unsigned long *pgd; | 282 | pmd_t *pmd; |
279 | 283 | ||
280 | if (early_ioremap_debug) | 284 | if (early_ioremap_debug) |
281 | printk(KERN_INFO "early_ioremap_init()\n"); | 285 | printk(KERN_INFO "early_ioremap_init()\n"); |
282 | 286 | ||
283 | pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); | 287 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
284 | *pgd = __pa(bm_pte) | _PAGE_TABLE; | ||
285 | memset(bm_pte, 0, sizeof(bm_pte)); | 288 | memset(bm_pte, 0, sizeof(bm_pte)); |
289 | set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE)); | ||
290 | |||
286 | /* | 291 | /* |
287 | * The boot-ioremap range spans multiple pgds, for which | 292 | * The boot-ioremap range spans multiple pmds, for which |
288 | * we are not prepared: | 293 | * we are not prepared: |
289 | */ | 294 | */ |
290 | if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) { | 295 | if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { |
291 | WARN_ON(1); | 296 | WARN_ON(1); |
292 | printk(KERN_WARNING "pgd %p != %p\n", | 297 | printk(KERN_WARNING "pmd %p != %p\n", |
293 | pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))); | 298 | pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); |
294 | printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", | 299 | printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", |
295 | fix_to_virt(FIX_BTMAP_BEGIN)); | 300 | fix_to_virt(FIX_BTMAP_BEGIN)); |
296 | printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", | 301 | printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", |
297 | fix_to_virt(FIX_BTMAP_END)); | 302 | fix_to_virt(FIX_BTMAP_END)); |
298 | 303 | ||
299 | printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); | 304 | printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); |
300 | printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", | 305 | printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", |
@@ -304,28 +309,29 @@ void __init early_ioremap_init(void) | |||
304 | 309 | ||
305 | void __init early_ioremap_clear(void) | 310 | void __init early_ioremap_clear(void) |
306 | { | 311 | { |
307 | unsigned long *pgd; | 312 | pmd_t *pmd; |
308 | 313 | ||
309 | if (early_ioremap_debug) | 314 | if (early_ioremap_debug) |
310 | printk(KERN_INFO "early_ioremap_clear()\n"); | 315 | printk(KERN_INFO "early_ioremap_clear()\n"); |
311 | 316 | ||
312 | pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); | 317 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
313 | *pgd = 0; | 318 | pmd_clear(pmd); |
314 | paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT); | 319 | paravirt_release_pt(__pa(pmd) >> PAGE_SHIFT); |
315 | __flush_tlb_all(); | 320 | __flush_tlb_all(); |
316 | } | 321 | } |
317 | 322 | ||
318 | void __init early_ioremap_reset(void) | 323 | void __init early_ioremap_reset(void) |
319 | { | 324 | { |
320 | enum fixed_addresses idx; | 325 | enum fixed_addresses idx; |
321 | unsigned long *pte, phys, addr; | 326 | unsigned long addr, phys; |
327 | pte_t *pte; | ||
322 | 328 | ||
323 | after_paging_init = 1; | 329 | after_paging_init = 1; |
324 | for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { | 330 | for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { |
325 | addr = fix_to_virt(idx); | 331 | addr = fix_to_virt(idx); |
326 | pte = early_ioremap_pte(addr); | 332 | pte = early_ioremap_pte(addr); |
327 | if (*pte & _PAGE_PRESENT) { | 333 | if (pte_present(*pte)) { |
328 | phys = *pte & PAGE_MASK; | 334 | phys = pte_val(*pte) & PAGE_MASK; |
329 | set_fixmap(idx, phys); | 335 | set_fixmap(idx, phys); |
330 | } | 336 | } |
331 | } | 337 | } |
@@ -334,7 +340,8 @@ void __init early_ioremap_reset(void) | |||
334 | static void __init __early_set_fixmap(enum fixed_addresses idx, | 340 | static void __init __early_set_fixmap(enum fixed_addresses idx, |
335 | unsigned long phys, pgprot_t flags) | 341 | unsigned long phys, pgprot_t flags) |
336 | { | 342 | { |
337 | unsigned long *pte, addr = __fix_to_virt(idx); | 343 | unsigned long addr = __fix_to_virt(idx); |
344 | pte_t *pte; | ||
338 | 345 | ||
339 | if (idx >= __end_of_fixed_addresses) { | 346 | if (idx >= __end_of_fixed_addresses) { |
340 | BUG(); | 347 | BUG(); |
@@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, | |||
342 | } | 349 | } |
343 | pte = early_ioremap_pte(addr); | 350 | pte = early_ioremap_pte(addr); |
344 | if (pgprot_val(flags)) | 351 | if (pgprot_val(flags)) |
345 | *pte = (phys & PAGE_MASK) | pgprot_val(flags); | 352 | set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); |
346 | else | 353 | else |
347 | *pte = 0; | 354 | pte_clear(NULL, addr, pte); |
348 | __flush_tlb_one(addr); | 355 | __flush_tlb_one(addr); |
349 | } | 356 | } |
350 | 357 | ||