aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-12-06 23:32:03 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:21 -0500
commit39dde65c9940c97fcd178a3d2b1c57ed8b7b68aa (patch)
tree750818d68ac7381f80fec31491e1d1c78df4b9f6 /arch
parente1dbeda60a7ea9e82a908d93c07308d104d50d79 (diff)
[PATCH] shared page table for hugetlb page
Following up with the work on shared page table done by Dave McCracken. This set of patch target shared page table for hugetlb memory only. The shared page table is particular useful in the situation of large number of independent processes sharing large shared memory segments. In the normal page case, the amount of memory saved from process' page table is quite significant. For hugetlb, the saving on page table memory is not the primary objective (as hugetlb itself already cuts down page table overhead significantly), instead, the purpose of using shared page table on hugetlb is to allow faster TLB refill and smaller cache pollution upon TLB miss. With PT sharing, pte entries are shared among hundreds of processes, the cache consumption used by all the page table is smaller and in return, application gets much higher cache hit ratio. One other effect is that cache hit ratio with hardware page walker hitting on pte in cache will be higher and this helps to reduce tlb miss latency. These two effects contribute to higher application performance. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Hugh Dickins <hugh@veritas.com> Cc: Dave McCracken <dmccr@us.ibm.com> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Adam Litke <agl@us.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/mm/hugetlbpage.c112
-rw-r--r--arch/ia64/mm/hugetlbpage.c5
-rw-r--r--arch/powerpc/mm/hugetlbpage.c5
-rw-r--r--arch/sh/mm/hugetlbpage.c5
-rw-r--r--arch/sh64/mm/hugetlbpage.c5
-rw-r--r--arch/sparc64/mm/hugetlbpage.c5
6 files changed, 136 insertions, 1 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 1719a8141f81..34728e4afe48 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -17,6 +17,113 @@
17#include <asm/tlb.h> 17#include <asm/tlb.h>
18#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
19 19
20static unsigned long page_table_shareable(struct vm_area_struct *svma,
21 struct vm_area_struct *vma,
22 unsigned long addr, pgoff_t idx)
23{
24 unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
25 svma->vm_start;
26 unsigned long sbase = saddr & PUD_MASK;
27 unsigned long s_end = sbase + PUD_SIZE;
28
29 /*
30 * match the virtual addresses, permission and the alignment of the
31 * page table page.
32 */
33 if (pmd_index(addr) != pmd_index(saddr) ||
34 vma->vm_flags != svma->vm_flags ||
35 sbase < svma->vm_start || svma->vm_end < s_end)
36 return 0;
37
38 return saddr;
39}
40
41static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
42{
43 unsigned long base = addr & PUD_MASK;
44 unsigned long end = base + PUD_SIZE;
45
46 /*
47 * check on proper vm_flags and page table alignment
48 */
49 if (vma->vm_flags & VM_MAYSHARE &&
50 vma->vm_start <= base && end <= vma->vm_end)
51 return 1;
52 return 0;
53}
54
55/*
56 * search for a shareable pmd page for hugetlb.
57 */
58static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
59{
60 struct vm_area_struct *vma = find_vma(mm, addr);
61 struct address_space *mapping = vma->vm_file->f_mapping;
62 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
63 vma->vm_pgoff;
64 struct prio_tree_iter iter;
65 struct vm_area_struct *svma;
66 unsigned long saddr;
67 pte_t *spte = NULL;
68
69 if (!vma_shareable(vma, addr))
70 return;
71
72 spin_lock(&mapping->i_mmap_lock);
73 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
74 if (svma == vma)
75 continue;
76
77 saddr = page_table_shareable(svma, vma, addr, idx);
78 if (saddr) {
79 spte = huge_pte_offset(svma->vm_mm, saddr);
80 if (spte) {
81 get_page(virt_to_page(spte));
82 break;
83 }
84 }
85 }
86
87 if (!spte)
88 goto out;
89
90 spin_lock(&mm->page_table_lock);
91 if (pud_none(*pud))
92 pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK);
93 else
94 put_page(virt_to_page(spte));
95 spin_unlock(&mm->page_table_lock);
96out:
97 spin_unlock(&mapping->i_mmap_lock);
98}
99
100/*
101 * unmap huge page backed by shared pte.
102 *
103 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
104 * indicated by page_count > 1, unmap is achieved by clearing pud and
105 * decrementing the ref count. If count == 1, the pte page is not shared.
106 *
107 * called with vma->vm_mm->page_table_lock held.
108 *
109 * returns: 1 successfully unmapped a shared pte page
110 * 0 the underlying pte page is not shared, or it is the last user
111 */
112int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
113{
114 pgd_t *pgd = pgd_offset(mm, *addr);
115 pud_t *pud = pud_offset(pgd, *addr);
116
117 BUG_ON(page_count(virt_to_page(ptep)) == 0);
118 if (page_count(virt_to_page(ptep)) == 1)
119 return 0;
120
121 pud_clear(pud);
122 put_page(virt_to_page(ptep));
123 *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
124 return 1;
125}
126
20pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 127pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
21{ 128{
22 pgd_t *pgd; 129 pgd_t *pgd;
@@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
25 132
26 pgd = pgd_offset(mm, addr); 133 pgd = pgd_offset(mm, addr);
27 pud = pud_alloc(mm, pgd, addr); 134 pud = pud_alloc(mm, pgd, addr);
28 if (pud) 135 if (pud) {
136 if (pud_none(*pud))
137 huge_pmd_share(mm, addr, pud);
29 pte = (pte_t *) pmd_alloc(mm, pud, addr); 138 pte = (pte_t *) pmd_alloc(mm, pud, addr);
139 }
30 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); 140 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
31 141
32 return pte; 142 return pte;
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index f3a9585e98a8..0c7e94edc20e 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -64,6 +64,11 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
64 return pte; 64 return pte;
65} 65}
66 66
67int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
68{
69 return 0;
70}
71
67#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } 72#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
68 73
69/* 74/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 506d89768d45..424a8f57e155 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -146,6 +146,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
146 return hugepte_offset(hpdp, addr); 146 return hugepte_offset(hpdp, addr);
147} 147}
148 148
149int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
150{
151 return 0;
152}
153
149static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) 154static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
150{ 155{
151 pte_t *hugepte = hugepd_page(*hpdp); 156 pte_t *hugepte = hugepd_page(*hpdp);
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 329059d6b54a..cf2c2ee35a37 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -63,6 +63,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
63 return pte; 63 return pte;
64} 64}
65 65
66int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
67{
68 return 0;
69}
70
66struct page *follow_huge_addr(struct mm_struct *mm, 71struct page *follow_huge_addr(struct mm_struct *mm,
67 unsigned long address, int write) 72 unsigned long address, int write)
68{ 73{
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index 187cf01750b8..4b455f611146 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -53,6 +53,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
53 return pte; 53 return pte;
54} 54}
55 55
56int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
57{
58 return 0;
59}
60
56void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 61void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
57 pte_t *ptep, pte_t entry) 62 pte_t *ptep, pte_t entry)
58{ 63{
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index 53b9b1f528e5..33fd0b265e70 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -235,6 +235,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
235 return pte; 235 return pte;
236} 236}
237 237
238int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
239{
240 return 0;
241}
242
238void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 243void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
239 pte_t *ptep, pte_t entry) 244 pte_t *ptep, pte_t entry)
240{ 245{