diff options
-rw-r--r-- | arch/x86/mm/init_32.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 68 | ||||
-rw-r--r-- | include/asm-x86/pgalloc_32.h | 22 | ||||
-rw-r--r-- | include/asm-x86/pgtable-3level.h | 39 | ||||
-rw-r--r-- | include/asm-x86/pgtable_32.h | 3 |
5 files changed, 47 insertions, 98 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 02d269c07b96..da524fb22422 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -704,19 +704,6 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
704 | } | 704 | } |
705 | #endif | 705 | #endif |
706 | 706 | ||
707 | struct kmem_cache *pmd_cache; | ||
708 | |||
709 | void __init pgtable_cache_init(void) | ||
710 | { | ||
711 | if (PTRS_PER_PMD > 1) { | ||
712 | pmd_cache = kmem_cache_create("pmd", | ||
713 | PTRS_PER_PMD*sizeof(pmd_t), | ||
714 | PTRS_PER_PMD*sizeof(pmd_t), | ||
715 | SLAB_PANIC, | ||
716 | pmd_ctor); | ||
717 | } | ||
718 | } | ||
719 | |||
720 | /* | 707 | /* |
721 | * This function cannot be __init, since exceptions don't work in that | 708 | * This function cannot be __init, since exceptions don't work in that |
722 | * section. Put this after the callers, so that it cannot be inlined. | 709 | * section. Put this after the callers, so that it cannot be inlined. |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3a6c9200058d..5ca3552474ae 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -195,11 +195,6 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
195 | return pte; | 195 | return pte; |
196 | } | 196 | } |
197 | 197 | ||
198 | void pmd_ctor(struct kmem_cache *cache, void *pmd) | ||
199 | { | ||
200 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | ||
201 | } | ||
202 | |||
203 | /* | 198 | /* |
204 | * List of all pgd's needed for non-PAE so it can invalidate entries | 199 | * List of all pgd's needed for non-PAE so it can invalidate entries |
205 | * in both cached and uncached pgd's; not needed for PAE since the | 200 | * in both cached and uncached pgd's; not needed for PAE since the |
@@ -285,7 +280,6 @@ static void pgd_dtor(void *pgd) | |||
285 | if (SHARED_KERNEL_PMD) | 280 | if (SHARED_KERNEL_PMD) |
286 | return; | 281 | return; |
287 | 282 | ||
288 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | ||
289 | spin_lock_irqsave(&pgd_lock, flags); | 283 | spin_lock_irqsave(&pgd_lock, flags); |
290 | pgd_list_del(pgd); | 284 | pgd_list_del(pgd); |
291 | spin_unlock_irqrestore(&pgd_lock, flags); | 285 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -367,84 +361,22 @@ static void pgd_mop_up_pmds(pgd_t *pgd) | |||
367 | } | 361 | } |
368 | #endif /* CONFIG_X86_PAE */ | 362 | #endif /* CONFIG_X86_PAE */ |
369 | 363 | ||
370 | /* If we allocate a pmd for part of the kernel address space, then | ||
371 | make sure its initialized with the appropriate kernel mappings. | ||
372 | Otherwise use a cached zeroed pmd. */ | ||
373 | static pmd_t *pmd_cache_alloc(int idx) | ||
374 | { | ||
375 | pmd_t *pmd; | ||
376 | |||
377 | if (idx >= USER_PTRS_PER_PGD) { | ||
378 | pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | ||
379 | |||
380 | if (pmd) | ||
381 | memcpy(pmd, | ||
382 | (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | ||
383 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
384 | } else | ||
385 | pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
386 | |||
387 | return pmd; | ||
388 | } | ||
389 | |||
390 | static void pmd_cache_free(pmd_t *pmd, int idx) | ||
391 | { | ||
392 | if (idx >= USER_PTRS_PER_PGD) | ||
393 | free_page((unsigned long)pmd); | ||
394 | else | ||
395 | kmem_cache_free(pmd_cache, pmd); | ||
396 | } | ||
397 | |||
398 | pgd_t *pgd_alloc(struct mm_struct *mm) | 364 | pgd_t *pgd_alloc(struct mm_struct *mm) |
399 | { | 365 | { |
400 | int i; | ||
401 | pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); | 366 | pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); |
402 | 367 | ||
403 | if (PTRS_PER_PMD == 1 || !pgd) | ||
404 | return pgd; | ||
405 | |||
406 | mm->pgd = pgd; /* so that alloc_pd can use it */ | 368 | mm->pgd = pgd; /* so that alloc_pd can use it */ |
407 | 369 | ||
408 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | ||
409 | pmd_t *pmd = pmd_cache_alloc(i); | ||
410 | |||
411 | if (!pmd) | ||
412 | goto out_oom; | ||
413 | |||
414 | paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT); | ||
415 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | ||
416 | } | ||
417 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { | 370 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { |
418 | quicklist_free(0, pgd_dtor, pgd); | 371 | quicklist_free(0, pgd_dtor, pgd); |
419 | pgd = NULL; | 372 | pgd = NULL; |
420 | } | 373 | } |
421 | 374 | ||
422 | return pgd; | 375 | return pgd; |
423 | |||
424 | out_oom: | ||
425 | for (i--; i >= 0; i--) { | ||
426 | pgd_t pgdent = pgd[i]; | ||
427 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
428 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
429 | pmd_cache_free(pmd, i); | ||
430 | } | ||
431 | quicklist_free(0, pgd_dtor, pgd); | ||
432 | return NULL; | ||
433 | } | 376 | } |
434 | 377 | ||
435 | void pgd_free(pgd_t *pgd) | 378 | void pgd_free(pgd_t *pgd) |
436 | { | 379 | { |
437 | int i; | ||
438 | |||
439 | /* in the PAE case user pgd entries are overwritten before usage */ | ||
440 | if (PTRS_PER_PMD > 1) | ||
441 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | ||
442 | pgd_t pgdent = pgd[i]; | ||
443 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
444 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
445 | pmd_cache_free(pmd, i); | ||
446 | } | ||
447 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | ||
448 | pgd_mop_up_pmds(pgd); | 380 | pgd_mop_up_pmds(pgd); |
449 | quicklist_free(0, pgd_dtor, pgd); | 381 | quicklist_free(0, pgd_dtor, pgd); |
450 | } | 382 | } |
diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h index 3482c3427897..0caa37a9a25f 100644 --- a/include/asm-x86/pgalloc_32.h +++ b/include/asm-x86/pgalloc_32.h | |||
@@ -63,21 +63,35 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | |||
63 | */ | 63 | */ |
64 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | 64 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) |
65 | { | 65 | { |
66 | BUG(); | 66 | return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); |
67 | return (pmd_t *)2; | ||
68 | } | 67 | } |
69 | 68 | ||
70 | static inline void pmd_free(pmd_t *pmd) | 69 | static inline void pmd_free(pmd_t *pmd) |
71 | { | 70 | { |
71 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | ||
72 | free_page((unsigned long)pmd); | ||
72 | } | 73 | } |
73 | 74 | ||
74 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) | 75 | static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) |
75 | { | 76 | { |
77 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
78 | tlb_remove_page(tlb, virt_to_page(pmd)); | ||
76 | } | 79 | } |
77 | 80 | ||
78 | static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | 81 | static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) |
79 | { | 82 | { |
80 | BUG(); | 83 | paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT); |
84 | |||
85 | /* Note: almost everything apart from _PAGE_PRESENT is | ||
86 | reserved at the pmd (PDPT) level. */ | ||
87 | set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT)); | ||
88 | |||
89 | /* | ||
90 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush | ||
91 | * the TLB via cr3 if the top-level pgd is changed... | ||
92 | */ | ||
93 | if (mm == current->active_mm) | ||
94 | write_cr3(read_cr3()); | ||
81 | } | 95 | } |
82 | #endif /* CONFIG_X86_PAE */ | 96 | #endif /* CONFIG_X86_PAE */ |
83 | 97 | ||
diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index 62a1ffbc8784..ed4c6f0e57ec 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h | |||
@@ -15,9 +15,19 @@ | |||
15 | #define pgd_ERROR(e) \ | 15 | #define pgd_ERROR(e) \ |
16 | printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) | 16 | printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) |
17 | 17 | ||
18 | #define pud_none(pud) 0 | 18 | |
19 | #define pud_bad(pud) 0 | 19 | static inline int pud_none(pud_t pud) |
20 | #define pud_present(pud) 1 | 20 | { |
21 | return pud_val(pud) == 0; | ||
22 | } | ||
23 | static inline int pud_bad(pud_t pud) | ||
24 | { | ||
25 | return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; | ||
26 | } | ||
27 | static inline int pud_present(pud_t pud) | ||
28 | { | ||
29 | return pud_val(pud) & _PAGE_PRESENT; | ||
30 | } | ||
21 | 31 | ||
22 | /* Rules for using set_pte: the pte being assigned *must* be | 32 | /* Rules for using set_pte: the pte being assigned *must* be |
23 | * either not present or in a state where the hardware will | 33 | * either not present or in a state where the hardware will |
@@ -58,7 +68,7 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | |||
58 | } | 68 | } |
59 | static inline void native_set_pud(pud_t *pudp, pud_t pud) | 69 | static inline void native_set_pud(pud_t *pudp, pud_t pud) |
60 | { | 70 | { |
61 | *pudp = pud; | 71 | set_64bit((unsigned long long *)(pudp),native_pud_val(pud)); |
62 | } | 72 | } |
63 | 73 | ||
64 | /* | 74 | /* |
@@ -81,13 +91,20 @@ static inline void native_pmd_clear(pmd_t *pmd) | |||
81 | *(tmp + 1) = 0; | 91 | *(tmp + 1) = 0; |
82 | } | 92 | } |
83 | 93 | ||
84 | /* | 94 | static inline void pud_clear(pud_t *pudp) |
85 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush | 95 | { |
86 | * the TLB via cr3 if the top-level pgd is changed... | 96 | set_pud(pudp, __pud(0)); |
87 | * We do not let the generic code free and clear pgd entries due to | 97 | |
88 | * this erratum. | 98 | /* |
89 | */ | 99 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush |
90 | static inline void pud_clear (pud_t * pud) { } | 100 | * the TLB via cr3 if the top-level pgd is changed... |
101 | * | ||
102 | * XXX I don't think we need to worry about this here, since | ||
103 | * when clearing the pud, the calling code needs to flush the | ||
104 | * tlb anyway. But do it now for safety's sake. - jsgf | ||
105 | */ | ||
106 | write_cr3(read_cr3()); | ||
107 | } | ||
91 | 108 | ||
92 | #define pud_page(pud) \ | 109 | #define pud_page(pud) \ |
93 | ((struct page *) __va(pud_val(pud) & PAGE_MASK)) | 110 | ((struct page *) __va(pud_val(pud) & PAGE_MASK)) |
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index ca7b150ca8b7..7b61cb5989b0 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h | |||
@@ -31,8 +31,7 @@ extern spinlock_t pgd_lock; | |||
31 | extern struct page *pgd_list; | 31 | extern struct page *pgd_list; |
32 | void check_pgt_cache(void); | 32 | void check_pgt_cache(void); |
33 | 33 | ||
34 | void pmd_ctor(struct kmem_cache *, void *); | 34 | static inline void pgtable_cache_init(void) {} |
35 | void pgtable_cache_init(void); | ||
36 | void paging_init(void); | 35 | void paging_init(void); |
37 | 36 | ||
38 | 37 | ||