diff options
-rw-r--r-- | arch/i386/kernel/paravirt.c | 1 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 5 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 18 | ||||
-rw-r--r-- | arch/i386/mm/pageattr.c | 2 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 88 | ||||
-rw-r--r-- | include/asm-i386/paravirt.h | 1 | ||||
-rw-r--r-- | include/asm-i386/pgtable-2level-defs.h | 2 | ||||
-rw-r--r-- | include/asm-i386/pgtable-2level.h | 2 | ||||
-rw-r--r-- | include/asm-i386/pgtable-3level-defs.h | 6 | ||||
-rw-r--r-- | include/asm-i386/pgtable-3level.h | 2 | ||||
-rw-r--r-- | include/asm-i386/pgtable.h | 2 |
11 files changed, 101 insertions, 28 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 47d075bdfb95..2040a831d5b3 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -132,6 +132,7 @@ struct paravirt_ops paravirt_ops = { | |||
132 | .name = "bare hardware", | 132 | .name = "bare hardware", |
133 | .paravirt_enabled = 0, | 133 | .paravirt_enabled = 0, |
134 | .kernel_rpl = 0, | 134 | .kernel_rpl = 0, |
135 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | ||
135 | 136 | ||
136 | .patch = native_patch, | 137 | .patch = native_patch, |
137 | .banner = default_banner, | 138 | .banner = default_banner, |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index c6a0a06258e6..f534c29e80b2 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -603,7 +603,6 @@ do_sigbus: | |||
603 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 603 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
604 | } | 604 | } |
605 | 605 | ||
606 | #ifndef CONFIG_X86_PAE | ||
607 | void vmalloc_sync_all(void) | 606 | void vmalloc_sync_all(void) |
608 | { | 607 | { |
609 | /* | 608 | /* |
@@ -616,6 +615,9 @@ void vmalloc_sync_all(void) | |||
616 | static unsigned long start = TASK_SIZE; | 615 | static unsigned long start = TASK_SIZE; |
617 | unsigned long address; | 616 | unsigned long address; |
618 | 617 | ||
618 | if (SHARED_KERNEL_PMD) | ||
619 | return; | ||
620 | |||
619 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 621 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); |
620 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 622 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { |
621 | if (!test_bit(pgd_index(address), insync)) { | 623 | if (!test_bit(pgd_index(address), insync)) { |
@@ -638,4 +640,3 @@ void vmalloc_sync_all(void) | |||
638 | start = address + PGDIR_SIZE; | 640 | start = address + PGDIR_SIZE; |
639 | } | 641 | } |
640 | } | 642 | } |
641 | #endif | ||
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index e8545dcf06c5..dbe16f63a566 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -745,6 +745,8 @@ struct kmem_cache *pmd_cache; | |||
745 | 745 | ||
746 | void __init pgtable_cache_init(void) | 746 | void __init pgtable_cache_init(void) |
747 | { | 747 | { |
748 | size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); | ||
749 | |||
748 | if (PTRS_PER_PMD > 1) { | 750 | if (PTRS_PER_PMD > 1) { |
749 | pmd_cache = kmem_cache_create("pmd", | 751 | pmd_cache = kmem_cache_create("pmd", |
750 | PTRS_PER_PMD*sizeof(pmd_t), | 752 | PTRS_PER_PMD*sizeof(pmd_t), |
@@ -754,13 +756,23 @@ void __init pgtable_cache_init(void) | |||
754 | NULL); | 756 | NULL); |
755 | if (!pmd_cache) | 757 | if (!pmd_cache) |
756 | panic("pgtable_cache_init(): cannot create pmd cache"); | 758 | panic("pgtable_cache_init(): cannot create pmd cache"); |
759 | |||
760 | if (!SHARED_KERNEL_PMD) { | ||
761 | /* If we're in PAE mode and have a non-shared | ||
762 | kernel pmd, then the pgd size must be a | ||
763 | page size. This is because the pgd_list | ||
764 | links through the page structure, so there | ||
765 | can only be one pgd per page for this to | ||
766 | work. */ | ||
767 | pgd_size = PAGE_SIZE; | ||
768 | } | ||
757 | } | 769 | } |
758 | pgd_cache = kmem_cache_create("pgd", | 770 | pgd_cache = kmem_cache_create("pgd", |
759 | PTRS_PER_PGD*sizeof(pgd_t), | 771 | pgd_size, |
760 | PTRS_PER_PGD*sizeof(pgd_t), | 772 | pgd_size, |
761 | 0, | 773 | 0, |
762 | pgd_ctor, | 774 | pgd_ctor, |
763 | PTRS_PER_PMD == 1 ? pgd_dtor : NULL); | 775 | (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL); |
764 | if (!pgd_cache) | 776 | if (!pgd_cache) |
765 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 777 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
766 | } | 778 | } |
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index ea6b6d4a0a2a..47bd477c8ecc 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | |||
91 | unsigned long flags; | 91 | unsigned long flags; |
92 | 92 | ||
93 | set_pte_atomic(kpte, pte); /* change init_mm */ | 93 | set_pte_atomic(kpte, pte); /* change init_mm */ |
94 | if (PTRS_PER_PMD > 1) | 94 | if (SHARED_KERNEL_PMD) |
95 | return; | 95 | return; |
96 | 96 | ||
97 | spin_lock_irqsave(&pgd_lock, flags); | 97 | spin_lock_irqsave(&pgd_lock, flags); |
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 99c09edc3dbb..9a96c1647428 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -232,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
232 | set_page_private(next, (unsigned long)pprev); | 232 | set_page_private(next, (unsigned long)pprev); |
233 | } | 233 | } |
234 | 234 | ||
235 | #if (PTRS_PER_PMD == 1) | ||
236 | /* Non-PAE pgd constructor */ | ||
235 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 237 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
236 | { | 238 | { |
237 | unsigned long flags; | 239 | unsigned long flags; |
238 | 240 | ||
239 | if (PTRS_PER_PMD == 1) { | 241 | /* !PAE, no pagetable sharing */ |
240 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | 242 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
241 | spin_lock_irqsave(&pgd_lock, flags); | 243 | |
242 | } | 244 | spin_lock_irqsave(&pgd_lock, flags); |
243 | 245 | ||
246 | /* must happen under lock */ | ||
244 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | 247 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
245 | swapper_pg_dir + USER_PTRS_PER_PGD, | 248 | swapper_pg_dir + USER_PTRS_PER_PGD, |
246 | KERNEL_PGD_PTRS); | 249 | KERNEL_PGD_PTRS); |
247 | |||
248 | if (PTRS_PER_PMD > 1) | ||
249 | return; | ||
250 | |||
251 | /* must happen under lock */ | ||
252 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | 250 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
253 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | 251 | __pa(swapper_pg_dir) >> PAGE_SHIFT, |
254 | USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | 252 | USER_PTRS_PER_PGD, |
255 | 253 | KERNEL_PGD_PTRS); | |
256 | pgd_list_add(pgd); | 254 | pgd_list_add(pgd); |
257 | spin_unlock_irqrestore(&pgd_lock, flags); | 255 | spin_unlock_irqrestore(&pgd_lock, flags); |
258 | } | 256 | } |
257 | #else /* PTRS_PER_PMD > 1 */ | ||
258 | /* PAE pgd constructor */ | ||
259 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
260 | { | ||
261 | /* PAE, kernel PMD may be shared */ | ||
262 | |||
263 | if (SHARED_KERNEL_PMD) { | ||
264 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
265 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
266 | KERNEL_PGD_PTRS); | ||
267 | } else { | ||
268 | unsigned long flags; | ||
269 | |||
270 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
271 | spin_lock_irqsave(&pgd_lock, flags); | ||
272 | pgd_list_add(pgd); | ||
273 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
274 | } | ||
275 | } | ||
276 | #endif /* PTRS_PER_PMD */ | ||
259 | 277 | ||
260 | /* never called when PTRS_PER_PMD > 1 */ | ||
261 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 278 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
262 | { | 279 | { |
263 | unsigned long flags; /* can be called from interrupt context */ | 280 | unsigned long flags; /* can be called from interrupt context */ |
264 | 281 | ||
282 | BUG_ON(SHARED_KERNEL_PMD); | ||
283 | |||
265 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | 284 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
266 | spin_lock_irqsave(&pgd_lock, flags); | 285 | spin_lock_irqsave(&pgd_lock, flags); |
267 | pgd_list_del(pgd); | 286 | pgd_list_del(pgd); |
268 | spin_unlock_irqrestore(&pgd_lock, flags); | 287 | spin_unlock_irqrestore(&pgd_lock, flags); |
269 | } | 288 | } |
270 | 289 | ||
290 | #define UNSHARED_PTRS_PER_PGD \ | ||
291 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
292 | |||
293 | /* If we allocate a pmd for part of the kernel address space, then | ||
294 | make sure its initialized with the appropriate kernel mappings. | ||
295 | Otherwise use a cached zeroed pmd. */ | ||
296 | static pmd_t *pmd_cache_alloc(int idx) | ||
297 | { | ||
298 | pmd_t *pmd; | ||
299 | |||
300 | if (idx >= USER_PTRS_PER_PGD) { | ||
301 | pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | ||
302 | |||
303 | if (pmd) | ||
304 | memcpy(pmd, | ||
305 | (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | ||
306 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
307 | } else | ||
308 | pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
309 | |||
310 | return pmd; | ||
311 | } | ||
312 | |||
313 | static void pmd_cache_free(pmd_t *pmd, int idx) | ||
314 | { | ||
315 | if (idx >= USER_PTRS_PER_PGD) | ||
316 | free_page((unsigned long)pmd); | ||
317 | else | ||
318 | kmem_cache_free(pmd_cache, pmd); | ||
319 | } | ||
320 | |||
271 | pgd_t *pgd_alloc(struct mm_struct *mm) | 321 | pgd_t *pgd_alloc(struct mm_struct *mm) |
272 | { | 322 | { |
273 | int i; | 323 | int i; |
@@ -276,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
276 | if (PTRS_PER_PMD == 1 || !pgd) | 326 | if (PTRS_PER_PMD == 1 || !pgd) |
277 | return pgd; | 327 | return pgd; |
278 | 328 | ||
279 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 329 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
280 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 330 | pmd_t *pmd = pmd_cache_alloc(i); |
331 | |||
281 | if (!pmd) | 332 | if (!pmd) |
282 | goto out_oom; | 333 | goto out_oom; |
334 | |||
283 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | 335 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
284 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 336 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
285 | } | 337 | } |
@@ -290,7 +342,7 @@ out_oom: | |||
290 | pgd_t pgdent = pgd[i]; | 342 | pgd_t pgdent = pgd[i]; |
291 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 343 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
292 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 344 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
293 | kmem_cache_free(pmd_cache, pmd); | 345 | pmd_cache_free(pmd, i); |
294 | } | 346 | } |
295 | kmem_cache_free(pgd_cache, pgd); | 347 | kmem_cache_free(pgd_cache, pgd); |
296 | return NULL; | 348 | return NULL; |
@@ -302,11 +354,11 @@ void pgd_free(pgd_t *pgd) | |||
302 | 354 | ||
303 | /* in the PAE case user pgd entries are overwritten before usage */ | 355 | /* in the PAE case user pgd entries are overwritten before usage */ |
304 | if (PTRS_PER_PMD > 1) | 356 | if (PTRS_PER_PMD > 1) |
305 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 357 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
306 | pgd_t pgdent = pgd[i]; | 358 | pgd_t pgdent = pgd[i]; |
307 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 359 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
308 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 360 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
309 | kmem_cache_free(pmd_cache, pmd); | 361 | pmd_cache_free(pmd, i); |
310 | } | 362 | } |
311 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 363 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
312 | kmem_cache_free(pgd_cache, pgd); | 364 | kmem_cache_free(pgd_cache, pgd); |
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index c49b44cdd8ee..f93599dc7756 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h | |||
@@ -35,6 +35,7 @@ struct desc_struct; | |||
35 | struct paravirt_ops | 35 | struct paravirt_ops |
36 | { | 36 | { |
37 | unsigned int kernel_rpl; | 37 | unsigned int kernel_rpl; |
38 | int shared_kernel_pmd; | ||
38 | int paravirt_enabled; | 39 | int paravirt_enabled; |
39 | const char *name; | 40 | const char *name; |
40 | 41 | ||
diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h index 02518079f816..0f71c9f13da4 100644 --- a/include/asm-i386/pgtable-2level-defs.h +++ b/include/asm-i386/pgtable-2level-defs.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _I386_PGTABLE_2LEVEL_DEFS_H | 1 | #ifndef _I386_PGTABLE_2LEVEL_DEFS_H |
2 | #define _I386_PGTABLE_2LEVEL_DEFS_H | 2 | #define _I386_PGTABLE_2LEVEL_DEFS_H |
3 | 3 | ||
4 | #define SHARED_KERNEL_PMD 0 | ||
5 | |||
4 | /* | 6 | /* |
5 | * traditional i386 two-level paging structure: | 7 | * traditional i386 two-level paging structure: |
6 | */ | 8 | */ |
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 043a2bcfa86a..781fe4bcc962 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h | |||
@@ -82,6 +82,4 @@ static inline int pte_exec_kernel(pte_t pte) | |||
82 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) | 82 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
83 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | 83 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
84 | 84 | ||
85 | void vmalloc_sync_all(void); | ||
86 | |||
87 | #endif /* _I386_PGTABLE_2LEVEL_H */ | 85 | #endif /* _I386_PGTABLE_2LEVEL_H */ |
diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h index eb3a1ea88671..c0df89f66e8b 100644 --- a/include/asm-i386/pgtable-3level-defs.h +++ b/include/asm-i386/pgtable-3level-defs.h | |||
@@ -1,6 +1,12 @@ | |||
1 | #ifndef _I386_PGTABLE_3LEVEL_DEFS_H | 1 | #ifndef _I386_PGTABLE_3LEVEL_DEFS_H |
2 | #define _I386_PGTABLE_3LEVEL_DEFS_H | 2 | #define _I386_PGTABLE_3LEVEL_DEFS_H |
3 | 3 | ||
4 | #ifdef CONFIG_PARAVIRT | ||
5 | #define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd) | ||
6 | #else | ||
7 | #define SHARED_KERNEL_PMD 1 | ||
8 | #endif | ||
9 | |||
4 | /* | 10 | /* |
5 | * PGDIR_SHIFT determines what a top-level page table entry can map | 11 | * PGDIR_SHIFT determines what a top-level page table entry can map |
6 | */ | 12 | */ |
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index be6017f37a91..664bfee5a2f2 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h | |||
@@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) | |||
200 | 200 | ||
201 | #define __pmd_free_tlb(tlb, x) do { } while (0) | 201 | #define __pmd_free_tlb(tlb, x) do { } while (0) |
202 | 202 | ||
203 | #define vmalloc_sync_all() ((void)0) | ||
204 | |||
205 | #endif /* _I386_PGTABLE_3LEVEL_H */ | 203 | #endif /* _I386_PGTABLE_3LEVEL_H */ |
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 0790ad6ed440..5b88a6a1278e 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h | |||
@@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re | |||
243 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } | 243 | static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } |
244 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } | 244 | static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } |
245 | 245 | ||
246 | extern void vmalloc_sync_all(void); | ||
247 | |||
246 | #ifdef CONFIG_X86_PAE | 248 | #ifdef CONFIG_X86_PAE |
247 | # include <asm/pgtable-3level.h> | 249 | # include <asm/pgtable-3level.h> |
248 | #else | 250 | #else |