aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/kernel/paravirt.c1
-rw-r--r--arch/i386/mm/fault.c5
-rw-r--r--arch/i386/mm/init.c18
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/mm/pgtable.c88
-rw-r--r--include/asm-i386/paravirt.h1
-rw-r--r--include/asm-i386/pgtable-2level-defs.h2
-rw-r--r--include/asm-i386/pgtable-2level.h2
-rw-r--r--include/asm-i386/pgtable-3level-defs.h6
-rw-r--r--include/asm-i386/pgtable-3level.h2
-rw-r--r--include/asm-i386/pgtable.h2
11 files changed, 101 insertions, 28 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 47d075bdfb95..2040a831d5b3 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -132,6 +132,7 @@ struct paravirt_ops paravirt_ops = {
132 .name = "bare hardware", 132 .name = "bare hardware",
133 .paravirt_enabled = 0, 133 .paravirt_enabled = 0,
134 .kernel_rpl = 0, 134 .kernel_rpl = 0,
135 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
135 136
136 .patch = native_patch, 137 .patch = native_patch,
137 .banner = default_banner, 138 .banner = default_banner,
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index c6a0a06258e6..f534c29e80b2 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -603,7 +603,6 @@ do_sigbus:
603 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 603 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
604} 604}
605 605
606#ifndef CONFIG_X86_PAE
607void vmalloc_sync_all(void) 606void vmalloc_sync_all(void)
608{ 607{
609 /* 608 /*
@@ -616,6 +615,9 @@ void vmalloc_sync_all(void)
616 static unsigned long start = TASK_SIZE; 615 static unsigned long start = TASK_SIZE;
617 unsigned long address; 616 unsigned long address;
618 617
618 if (SHARED_KERNEL_PMD)
619 return;
620
619 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); 621 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
620 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { 622 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
621 if (!test_bit(pgd_index(address), insync)) { 623 if (!test_bit(pgd_index(address), insync)) {
@@ -638,4 +640,3 @@ void vmalloc_sync_all(void)
638 start = address + PGDIR_SIZE; 640 start = address + PGDIR_SIZE;
639 } 641 }
640} 642}
641#endif
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index e8545dcf06c5..dbe16f63a566 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -745,6 +745,8 @@ struct kmem_cache *pmd_cache;
745 745
746void __init pgtable_cache_init(void) 746void __init pgtable_cache_init(void)
747{ 747{
748 size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
749
748 if (PTRS_PER_PMD > 1) { 750 if (PTRS_PER_PMD > 1) {
749 pmd_cache = kmem_cache_create("pmd", 751 pmd_cache = kmem_cache_create("pmd",
750 PTRS_PER_PMD*sizeof(pmd_t), 752 PTRS_PER_PMD*sizeof(pmd_t),
@@ -754,13 +756,23 @@ void __init pgtable_cache_init(void)
754 NULL); 756 NULL);
755 if (!pmd_cache) 757 if (!pmd_cache)
756 panic("pgtable_cache_init(): cannot create pmd cache"); 758 panic("pgtable_cache_init(): cannot create pmd cache");
759
760 if (!SHARED_KERNEL_PMD) {
761 /* If we're in PAE mode and have a non-shared
762 kernel pmd, then the pgd size must be a
763 page size. This is because the pgd_list
764 links through the page structure, so there
765 can only be one pgd per page for this to
766 work. */
767 pgd_size = PAGE_SIZE;
768 }
757 } 769 }
758 pgd_cache = kmem_cache_create("pgd", 770 pgd_cache = kmem_cache_create("pgd",
759 PTRS_PER_PGD*sizeof(pgd_t), 771 pgd_size,
760 PTRS_PER_PGD*sizeof(pgd_t), 772 pgd_size,
761 0, 773 0,
762 pgd_ctor, 774 pgd_ctor,
763 PTRS_PER_PMD == 1 ? pgd_dtor : NULL); 775 (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
764 if (!pgd_cache) 776 if (!pgd_cache)
765 panic("pgtable_cache_init(): Cannot create pgd cache"); 777 panic("pgtable_cache_init(): Cannot create pgd cache");
766} 778}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index ea6b6d4a0a2a..47bd477c8ecc 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
91 unsigned long flags; 91 unsigned long flags;
92 92
93 set_pte_atomic(kpte, pte); /* change init_mm */ 93 set_pte_atomic(kpte, pte); /* change init_mm */
94 if (PTRS_PER_PMD > 1) 94 if (SHARED_KERNEL_PMD)
95 return; 95 return;
96 96
97 spin_lock_irqsave(&pgd_lock, flags); 97 spin_lock_irqsave(&pgd_lock, flags);
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 99c09edc3dbb..9a96c1647428 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -232,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd)
232 set_page_private(next, (unsigned long)pprev); 232 set_page_private(next, (unsigned long)pprev);
233} 233}
234 234
235#if (PTRS_PER_PMD == 1)
236/* Non-PAE pgd constructor */
235void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) 237void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
236{ 238{
237 unsigned long flags; 239 unsigned long flags;
238 240
239 if (PTRS_PER_PMD == 1) { 241 /* !PAE, no pagetable sharing */
240 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); 242 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
241 spin_lock_irqsave(&pgd_lock, flags); 243
242 } 244 spin_lock_irqsave(&pgd_lock, flags);
243 245
246 /* must happen under lock */
244 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 247 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
245 swapper_pg_dir + USER_PTRS_PER_PGD, 248 swapper_pg_dir + USER_PTRS_PER_PGD,
246 KERNEL_PGD_PTRS); 249 KERNEL_PGD_PTRS);
247
248 if (PTRS_PER_PMD > 1)
249 return;
250
251 /* must happen under lock */
252 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, 250 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
253 __pa(swapper_pg_dir) >> PAGE_SHIFT, 251 __pa(swapper_pg_dir) >> PAGE_SHIFT,
254 USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); 252 USER_PTRS_PER_PGD,
255 253 KERNEL_PGD_PTRS);
256 pgd_list_add(pgd); 254 pgd_list_add(pgd);
257 spin_unlock_irqrestore(&pgd_lock, flags); 255 spin_unlock_irqrestore(&pgd_lock, flags);
258} 256}
257#else /* PTRS_PER_PMD > 1 */
258/* PAE pgd constructor */
259void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
260{
261 /* PAE, kernel PMD may be shared */
262
263 if (SHARED_KERNEL_PMD) {
264 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
265 swapper_pg_dir + USER_PTRS_PER_PGD,
266 KERNEL_PGD_PTRS);
267 } else {
268 unsigned long flags;
269
270 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
271 spin_lock_irqsave(&pgd_lock, flags);
272 pgd_list_add(pgd);
273 spin_unlock_irqrestore(&pgd_lock, flags);
274 }
275}
276#endif /* PTRS_PER_PMD */
259 277
260/* never called when PTRS_PER_PMD > 1 */
261void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) 278void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
262{ 279{
263 unsigned long flags; /* can be called from interrupt context */ 280 unsigned long flags; /* can be called from interrupt context */
264 281
282 BUG_ON(SHARED_KERNEL_PMD);
283
265 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); 284 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
266 spin_lock_irqsave(&pgd_lock, flags); 285 spin_lock_irqsave(&pgd_lock, flags);
267 pgd_list_del(pgd); 286 pgd_list_del(pgd);
268 spin_unlock_irqrestore(&pgd_lock, flags); 287 spin_unlock_irqrestore(&pgd_lock, flags);
269} 288}
270 289
290#define UNSHARED_PTRS_PER_PGD \
291 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
292
293/* If we allocate a pmd for part of the kernel address space, then
294 make sure its initialized with the appropriate kernel mappings.
295 Otherwise use a cached zeroed pmd. */
296static pmd_t *pmd_cache_alloc(int idx)
297{
298 pmd_t *pmd;
299
300 if (idx >= USER_PTRS_PER_PGD) {
301 pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
302
303 if (pmd)
304 memcpy(pmd,
305 (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
306 sizeof(pmd_t) * PTRS_PER_PMD);
307 } else
308 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
309
310 return pmd;
311}
312
313static void pmd_cache_free(pmd_t *pmd, int idx)
314{
315 if (idx >= USER_PTRS_PER_PGD)
316 free_page((unsigned long)pmd);
317 else
318 kmem_cache_free(pmd_cache, pmd);
319}
320
271pgd_t *pgd_alloc(struct mm_struct *mm) 321pgd_t *pgd_alloc(struct mm_struct *mm)
272{ 322{
273 int i; 323 int i;
@@ -276,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
276 if (PTRS_PER_PMD == 1 || !pgd) 326 if (PTRS_PER_PMD == 1 || !pgd)
277 return pgd; 327 return pgd;
278 328
279 for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 329 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
280 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 330 pmd_t *pmd = pmd_cache_alloc(i);
331
281 if (!pmd) 332 if (!pmd)
282 goto out_oom; 333 goto out_oom;
334
283 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); 335 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
284 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 336 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
285 } 337 }
@@ -290,7 +342,7 @@ out_oom:
290 pgd_t pgdent = pgd[i]; 342 pgd_t pgdent = pgd[i];
291 void* pmd = (void *)__va(pgd_val(pgdent)-1); 343 void* pmd = (void *)__va(pgd_val(pgdent)-1);
292 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 344 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
293 kmem_cache_free(pmd_cache, pmd); 345 pmd_cache_free(pmd, i);
294 } 346 }
295 kmem_cache_free(pgd_cache, pgd); 347 kmem_cache_free(pgd_cache, pgd);
296 return NULL; 348 return NULL;
@@ -302,11 +354,11 @@ void pgd_free(pgd_t *pgd)
302 354
303 /* in the PAE case user pgd entries are overwritten before usage */ 355 /* in the PAE case user pgd entries are overwritten before usage */
304 if (PTRS_PER_PMD > 1) 356 if (PTRS_PER_PMD > 1)
305 for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 357 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
306 pgd_t pgdent = pgd[i]; 358 pgd_t pgdent = pgd[i];
307 void* pmd = (void *)__va(pgd_val(pgdent)-1); 359 void* pmd = (void *)__va(pgd_val(pgdent)-1);
308 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); 360 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
309 kmem_cache_free(pmd_cache, pmd); 361 pmd_cache_free(pmd, i);
310 } 362 }
311 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 363 /* in the non-PAE case, free_pgtables() clears user pgd entries */
312 kmem_cache_free(pgd_cache, pgd); 364 kmem_cache_free(pgd_cache, pgd);
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index c49b44cdd8ee..f93599dc7756 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -35,6 +35,7 @@ struct desc_struct;
35struct paravirt_ops 35struct paravirt_ops
36{ 36{
37 unsigned int kernel_rpl; 37 unsigned int kernel_rpl;
38 int shared_kernel_pmd;
38 int paravirt_enabled; 39 int paravirt_enabled;
39 const char *name; 40 const char *name;
40 41
diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h
index 02518079f816..0f71c9f13da4 100644
--- a/include/asm-i386/pgtable-2level-defs.h
+++ b/include/asm-i386/pgtable-2level-defs.h
@@ -1,6 +1,8 @@
1#ifndef _I386_PGTABLE_2LEVEL_DEFS_H 1#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
2#define _I386_PGTABLE_2LEVEL_DEFS_H 2#define _I386_PGTABLE_2LEVEL_DEFS_H
3 3
4#define SHARED_KERNEL_PMD 0
5
4/* 6/*
5 * traditional i386 two-level paging structure: 7 * traditional i386 two-level paging structure:
6 */ 8 */
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 043a2bcfa86a..781fe4bcc962 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -82,6 +82,4 @@ static inline int pte_exec_kernel(pte_t pte)
82#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) 82#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
83#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 83#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
84 84
85void vmalloc_sync_all(void);
86
87#endif /* _I386_PGTABLE_2LEVEL_H */ 85#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h
index eb3a1ea88671..c0df89f66e8b 100644
--- a/include/asm-i386/pgtable-3level-defs.h
+++ b/include/asm-i386/pgtable-3level-defs.h
@@ -1,6 +1,12 @@
1#ifndef _I386_PGTABLE_3LEVEL_DEFS_H 1#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
2#define _I386_PGTABLE_3LEVEL_DEFS_H 2#define _I386_PGTABLE_3LEVEL_DEFS_H
3 3
4#ifdef CONFIG_PARAVIRT
5#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd)
6#else
7#define SHARED_KERNEL_PMD 1
8#endif
9
4/* 10/*
5 * PGDIR_SHIFT determines what a top-level page table entry can map 11 * PGDIR_SHIFT determines what a top-level page table entry can map
6 */ 12 */
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index be6017f37a91..664bfee5a2f2 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
200 200
201#define __pmd_free_tlb(tlb, x) do { } while (0) 201#define __pmd_free_tlb(tlb, x) do { } while (0)
202 202
203#define vmalloc_sync_all() ((void)0)
204
205#endif /* _I386_PGTABLE_3LEVEL_H */ 203#endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 0790ad6ed440..5b88a6a1278e 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re
243static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } 243static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
244static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; } 244static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
245 245
246extern void vmalloc_sync_all(void);
247
246#ifdef CONFIG_X86_PAE 248#ifdef CONFIG_X86_PAE
247# include <asm/pgtable-3level.h> 249# include <asm/pgtable-3level.h>
248#else 250#else