aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Campbell <ian.campbell@citrix.com>2010-02-17 05:38:10 -0500
committerIngo Molnar <mingo@elte.hu>2010-02-25 04:28:19 -0500
commit14315592009c17035cac81f4954d5a1f4d71e489 (patch)
tree2e67453497e0b0ba08be2aeafe971430a5a28f67
parente808bae2407a087bfd40200a27587898e5a9909d (diff)
x86, mm: Allow highmem user page tables to be disabled at boot time
Distros generally (I looked at Debian, RHEL5 and SLES11) seem to enable CONFIG_HIGHPTE for any x86 configuration which has highmem enabled. This means that the overhead applies even to machines which have a fairly modest amount of high memory and which therefore do not really benefit from allocating PTEs in high memory but still pay the price of the additional mapping operations. Running kernbench on a 4G box I found that with CONFIG_HIGHPTE=y but no actual highptes being allocated there was a reduction in system time used from 59.737s to 55.9s. With CONFIG_HIGHPTE=y and highmem PTEs being allocated: Average Optimal load -j 4 Run (std deviation): Elapsed Time 175.396 (0.238914) User Time 515.983 (5.85019) System Time 59.737 (1.26727) Percent CPU 263.8 (71.6796) Context Switches 39989.7 (4672.64) Sleeps 42617.7 (246.307) With CONFIG_HIGHPTE=y but with no highmem PTEs being allocated: Average Optimal load -j 4 Run (std deviation): Elapsed Time 174.278 (0.831968) User Time 515.659 (6.07012) System Time 55.9 (1.07799) Percent CPU 263.8 (71.266) Context Switches 39929.6 (4485.13) Sleeps 42583.7 (373.039) This patch allows the user to control the allocation of PTEs in highmem from the command line ("userpte=nohigh") but retains the status-quo as the default. It is possible that some simple heuristic could be developed which allows auto-tuning of this option however I don't have a sufficiently large machine available to me to perform any particularly meaningful experiments. We could probably handwave up an argument for a threshold at 16G of total RAM. Assuming 768M of lowmem we have 196608 potential lowmem PTE pages. Each page can map 2M of RAM in a PAE-enabled configuration, meaning a maximum of 384G of RAM could potentially be mapped using lowmem PTEs. Even allowing generous factor of 10 to account for other required lowmem allocations, generous slop to account for page sharing (which reduces the total amount of RAM mappable by a given number of PT pages) and other innacuracies in the estimations it would seem that even a 32G machine would not have a particularly pressing need for highmem PTEs. I think 32G could be considered to be at the upper bound of what might be sensible on a 32 bit machine (although I think in practice 64G is still supported). It's seems questionable if HIGHPTE is even a win for any amount of RAM you would sensibly run a 32 bit kernel on rather than going 64 bit. Signed-off-by: Ian Campbell <ian.campbell@citrix.com> LKML-Reference: <1266403090-20162-1-git-send-email-ian.campbell@citrix.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--Documentation/kernel-parameters.txt7
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/mm/pgtable.c31
3 files changed, 38 insertions, 5 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 736d4560288..67c69ffe7b7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2694,6 +2694,13 @@ and is between 256 and 4096 characters. It is defined in the file
2694 medium is write-protected). 2694 medium is write-protected).
2695 Example: quirks=0419:aaf5:rl,0421:0433:rc 2695 Example: quirks=0419:aaf5:rl,0421:0433:rc
2696 2696
2697 userpte=
2698 [X86] Flags controlling user PTE allocations.
2699
2700 nohigh = do not allocate PTE pages in
2701 HIGHMEM regardless of setting
2702 of CONFIG_HIGHPTE.
2703
2697 vdso= [X86,SH] 2704 vdso= [X86,SH]
2698 vdso=2: enable compat VDSO (default with COMPAT_VDSO) 2705 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
2699 vdso=1: enable VDSO (default) 2706 vdso=1: enable VDSO (default)
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 0e8c2a0fd92..271de94c381 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -23,6 +23,11 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
23#endif 23#endif
24 24
25/* 25/*
26 * Flags to use when allocating a user page table page.
27 */
28extern gfp_t __userpte_alloc_gfp;
29
30/*
26 * Allocate and free page tables. 31 * Allocate and free page tables.
27 */ 32 */
28extern pgd_t *pgd_alloc(struct mm_struct *); 33extern pgd_t *pgd_alloc(struct mm_struct *);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e3599..c9ba9deafe8 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
6 6
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO 7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8 8
9#ifdef CONFIG_HIGHPTE
10#define PGALLOC_USER_GFP __GFP_HIGHMEM
11#else
12#define PGALLOC_USER_GFP 0
13#endif
14
15gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
16
9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 17pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
10{ 18{
11 return (pte_t *)__get_free_page(PGALLOC_GFP); 19 return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
15{ 23{
16 struct page *pte; 24 struct page *pte;
17 25
18#ifdef CONFIG_HIGHPTE 26 pte = alloc_pages(__userpte_alloc_gfp, 0);
19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
20#else
21 pte = alloc_pages(PGALLOC_GFP, 0);
22#endif
23 if (pte) 27 if (pte)
24 pgtable_page_ctor(pte); 28 pgtable_page_ctor(pte);
25 return pte; 29 return pte;
26} 30}
27 31
32static int __init setup_userpte(char *arg)
33{
34 if (!arg)
35 return -EINVAL;
36
37 /*
38 * "userpte=nohigh" disables allocation of user pagetables in
39 * high memory.
40 */
41 if (strcmp(arg, "nohigh") == 0)
42 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
43 else
44 return -EINVAL;
45 return 0;
46}
47early_param("userpte", setup_userpte);
48
28void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 49void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
29{ 50{
30 pgtable_page_dtor(pte); 51 pgtable_page_dtor(pte);