aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pgtable_32.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-01-30 07:33:40 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:33:40 -0500
commit8fe3deef013bebdbed1f75ae59ef9707fb6e5cc7 (patch)
treec743ab7d0cf74c46f57b36571691d69c1ca4389b /arch/x86/mm/pgtable_32.c
parent1c70e9bd832642b712181e32d1bbf2436058a3df (diff)
x86: preallocate pmds at pgd creation time
In PAE mode, an update to the pgd requires a cr3 reload to make sure the processor notices the changes. Since this also has the side-effect of flushing the tlb, its an expensive operation which we want to avoid where possible. This patch mitigates the cost of installing the initial set of pmds on process creation by preallocating them when the pgd is allocated. This avoids up to three tlb flushes during exec, as it creates the new process address space while the pagetable is in active use. The pmds will be freed as part of the normal pagetable teardown in free_pgtables, which is called in munmap and process exit. However, free_pgtables will only free parts of the pagetable which actually contain mappings, so stray pmds may still be attached to the pgd at pgd_free time. We must mop them up to prevent a memory leak. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Andi Kleen <ak@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: William Irwin <wli@holomorphy.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/mm/pgtable_32.c')
-rw-r--r--arch/x86/mm/pgtable_32.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f85ee44720d2..33ddddfc26b0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -294,6 +294,70 @@ static void pgd_dtor(void *pgd)
294#define UNSHARED_PTRS_PER_PGD \ 294#define UNSHARED_PTRS_PER_PGD \
295 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 295 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
296 296
297#ifdef CONFIG_X86_PAE
298/*
299 * Mop up any pmd pages which may still be attached to the pgd.
300 * Normally they will be freed by munmap/exit_mmap, but any pmd we
301 * preallocate which never got a corresponding vma will need to be
302 * freed manually.
303 */
304static void pgd_mop_up_pmds(pgd_t *pgdp)
305{
306 int i;
307
308 for(i = 0; i < USER_PTRS_PER_PGD; i++) {
309 pgd_t pgd = pgdp[i];
310
311 if (pgd_val(pgd) != 0) {
312 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
313
314 pgdp[i] = native_make_pgd(0);
315
316 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
317 pmd_free(pmd);
318 }
319 }
320}
321
322/*
323 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
324 * updating the top-level pagetable entries to guarantee the
325 * processor notices the update. Since this is expensive, and
326 * all 4 top-level entries are used almost immediately in a
327 * new process's life, we just pre-populate them here.
328 */
329static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
330{
331 pud_t *pud;
332 unsigned long addr;
333 int i;
334
335 pud = pud_offset(pgd, 0);
336 for (addr = i = 0; i < USER_PTRS_PER_PGD; i++, pud++, addr += PUD_SIZE) {
337 pmd_t *pmd = pmd_alloc_one(mm, addr);
338
339 if (!pmd) {
340 pgd_mop_up_pmds(pgd);
341 return 0;
342 }
343
344 pud_populate(mm, pud, pmd);
345 }
346
347 return 1;
348}
349#else /* !CONFIG_X86_PAE */
350/* No need to prepopulate any pagetable entries in non-PAE modes. */
351static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
352{
353 return 1;
354}
355
356static void pgd_mop_up_pmds(pgd_t *pgd)
357{
358}
359#endif /* CONFIG_X86_PAE */
360
297/* If we allocate a pmd for part of the kernel address space, then 361/* If we allocate a pmd for part of the kernel address space, then
298 make sure its initialized with the appropriate kernel mappings. 362 make sure its initialized with the appropriate kernel mappings.
299 Otherwise use a cached zeroed pmd. */ 363 Otherwise use a cached zeroed pmd. */
@@ -341,6 +405,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
341 paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT); 405 paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
342 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 406 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
343 } 407 }
408 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
409 quicklist_free(0, pgd_dtor, pgd);
410 pgd = NULL;
411 }
412
344 return pgd; 413 return pgd;
345 414
346out_oom: 415out_oom:
@@ -367,6 +436,7 @@ void pgd_free(pgd_t *pgd)
367 pmd_cache_free(pmd, i); 436 pmd_cache_free(pmd, i);
368 } 437 }
369 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 438 /* in the non-PAE case, free_pgtables() clears user pgd entries */
439 pgd_mop_up_pmds(pgd);
370 quicklist_free(0, pgd_dtor, pgd); 440 quicklist_free(0, pgd_dtor, pgd);
371} 441}
372 442