aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2009-10-26 15:24:31 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-10-30 02:20:58 -0400
commita4fe3ce7699bfe1bd88f816b55d42d8fe1dac655 (patch)
treeb72c982ffbb9f05d78a952288d60c4dc2d31a4d9 /arch/powerpc/kernel
parenta0668cdc154e54bf0c85182e0535eea237d53146 (diff)
powerpc/mm: Allow more flexible layouts for hugepage pagetables
Currently each available hugepage size uses a slightly different pagetable layout: that is, the bottem level table of pointers to hugepages is a different size, and may branch off from the normal page tables at a different level. Every hugepage aware path that needs to walk the pagetables must therefore look up the hugepage size from the slice info first, and work out the correct way to walk the pagetables accordingly. Future hardware is likely to add more possible hugepage sizes, more layout options and more mess. This patch, therefore reworks the handling of hugepage pagetables to reduce this complexity. In the new scheme, instead of having to consult the slice mask, pagetable walking code can check a flag in the PGD/PUD/PMD entries to see where to branch off to hugepage pagetables, and the entry also contains the information (eseentially hugepage shift) necessary to then interpret that table without recourse to the slice mask. This scheme can be extended neatly to handle multiple levels of self-describing "special" hugepage pagetables, although for now we assume only one level exists. This approach means that only the pagetable allocation path needs to know how the pagetables should be set out. All other (hugepage) pagetable walking paths can just interpret the structure as they go. There already was a flag bit in PGD/PUD/PMD entries for hugepage directory pointers, but it was only used for debug. We alter that flag bit to instead be a 0 in the MSB to indicate a hugepage pagetable pointer (normally it would be 1 since the pointer lies in the linear mapping). This means that asm pagetable walking can test for (and punt on) hugepage pointers with the same test that checks for unpopulated page directory entries (beq becomes bge), since hugepage pointers will always be positive, and normal pointers always negative. While we're at it, we get rid of the confusing (and grep defeating) #defining of hugepte_shift to be the same thing as mmu_huge_psizes. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/perf_callchain.c20
1 files changed, 5 insertions, 15 deletions
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 0a03cf70d247..936f04dbfc6f 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -119,13 +119,6 @@ static void perf_callchain_kernel(struct pt_regs *regs,
119} 119}
120 120
121#ifdef CONFIG_PPC64 121#ifdef CONFIG_PPC64
122
123#ifdef CONFIG_HUGETLB_PAGE
124#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize])
125#else
126#define is_huge_psize(pagesize) 0
127#endif
128
129/* 122/*
130 * On 64-bit we don't want to invoke hash_page on user addresses from 123 * On 64-bit we don't want to invoke hash_page on user addresses from
131 * interrupt context, so if the access faults, we read the page tables 124 * interrupt context, so if the access faults, we read the page tables
@@ -135,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
135{ 128{
136 pgd_t *pgdir; 129 pgd_t *pgdir;
137 pte_t *ptep, pte; 130 pte_t *ptep, pte;
138 int pagesize; 131 unsigned shift;
139 unsigned long addr = (unsigned long) ptr; 132 unsigned long addr = (unsigned long) ptr;
140 unsigned long offset; 133 unsigned long offset;
141 unsigned long pfn; 134 unsigned long pfn;
@@ -145,17 +138,14 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
145 if (!pgdir) 138 if (!pgdir)
146 return -EFAULT; 139 return -EFAULT;
147 140
148 pagesize = get_slice_psize(current->mm, addr); 141 ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
142 if (!shift)
143 shift = PAGE_SHIFT;
149 144
150 /* align address to page boundary */ 145 /* align address to page boundary */
151 offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); 146 offset = addr & ((1UL << shift) - 1);
152 addr -= offset; 147 addr -= offset;
153 148
154 if (is_huge_psize(pagesize))
155 ptep = huge_pte_offset(current->mm, addr);
156 else
157 ptep = find_linux_pte(pgdir, addr);
158
159 if (ptep == NULL) 149 if (ptep == NULL)
160 return -EFAULT; 150 return -EFAULT;
161 pte = *ptep; 151 pte = *ptep;