aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2013-04-28 05:37:33 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-04-30 02:00:07 -0400
commit5c1f6ee9a31cbdac90bbb8ae1ba4475031ac74b4 (patch)
tree9b68dca4f1968199567b2fe631f346795d8eee69 /arch/powerpc/include
parentd614bb041209fd7cb5e4b35e11a7b2f6ee8f62b8 (diff)
powerpc: Reduce PTE table memory wastage
We allocate one page for the last level of linux page table. With THP and large page size of 16MB, that would mean we are wasting large part of that page. To map 16MB area, we only need a PTE space of 2K with 64K page size. This patch reduce the space wastage by sharing the page allocated for the last level of linux page table with multiple pmd entries. We call these smaller chunks PTE page fragments and allocated page, PTE page. In order to support systems which doesn't have 64K HPTE support, we also add another 2K to PTE page fragment. The second half of the PTE fragments is used for storing slot and secondary bit information of an HPTE. With this we now have a 4K PTE fragment. We use a simple approach to share the PTE page. On allocation, we bump the PTE page refcount to 16 and share the PTE page with the next 16 pte alloc request. This should help in the node locality of the PTE page fragment, assuming that the immediate pte alloc request will mostly come from the same NUMA node. We don't try to reuse the freed PTE page fragment. Hence we could be waisting some space. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/include')
-rw-r--r--arch/powerpc/include/asm/mmu-book3e.h4
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h4
-rw-r--r--arch/powerpc/include/asm/page.h4
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h82
4 files changed, 37 insertions, 57 deletions
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 99d43e0c1e4a..8bd560c5fe84 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -231,6 +231,10 @@ typedef struct {
231 u64 high_slices_psize; /* 4 bits per slice for now */ 231 u64 high_slices_psize; /* 4 bits per slice for now */
232 u16 user_psize; /* page size index */ 232 u16 user_psize; /* page size index */
233#endif 233#endif
234#ifdef CONFIG_PPC_64K_PAGES
235 /* for 4K PTE fragment support */
236 void *pte_frag;
237#endif
234} mm_context_t; 238} mm_context_t;
235 239
236/* Page size definitions, common between 32 and 64-bit 240/* Page size definitions, common between 32 and 64-bit
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 05895cff1345..de9e577f329c 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -516,6 +516,10 @@ typedef struct {
516 unsigned long acop; /* mask of enabled coprocessor types */ 516 unsigned long acop; /* mask of enabled coprocessor types */
517 unsigned int cop_pid; /* pid value used with coprocessors */ 517 unsigned int cop_pid; /* pid value used with coprocessors */
518#endif /* CONFIG_PPC_ICSWX */ 518#endif /* CONFIG_PPC_ICSWX */
519#ifdef CONFIG_PPC_64K_PAGES
520 /* for 4K PTE fragment support */
521 void *pte_frag;
522#endif
519} mm_context_t; 523} mm_context_t;
520 524
521 525
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 711e83a79e3d..988c812aab5b 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -393,7 +393,11 @@ void arch_free_page(struct page *page, int order);
393 393
394struct vm_area_struct; 394struct vm_area_struct;
395 395
396#ifdef CONFIG_PPC_64K_PAGES
397typedef pte_t *pgtable_t;
398#else
396typedef struct page *pgtable_t; 399typedef struct page *pgtable_t;
400#endif
397 401
398#include <asm-generic/memory_model.h> 402#include <asm-generic/memory_model.h>
399#endif /* __ASSEMBLY__ */ 403#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index d39012352f94..91acb12bac92 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -152,6 +152,23 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
152} 152}
153 153
154#else /* if CONFIG_PPC_64K_PAGES */ 154#else /* if CONFIG_PPC_64K_PAGES */
155/*
156 * we support 16 fragments per PTE page.
157 */
158#define PTE_FRAG_NR 16
159/*
160 * We use a 2K PTE page fragment and another 2K for storing
161 * real_pte_t hash index
162 */
163#define PTE_FRAG_SIZE_SHIFT 12
164#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
165
166extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
167extern void page_table_free(struct mm_struct *, unsigned long *, int);
168extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
169#ifdef CONFIG_SMP
170extern void __tlb_remove_table(void *_table);
171#endif
155 172
156#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) 173#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
157 174
@@ -164,90 +181,42 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
164static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, 181static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
165 pgtable_t pte_page) 182 pgtable_t pte_page)
166{ 183{
167 pmd_populate_kernel(mm, pmd, page_address(pte_page)); 184 pmd_set(pmd, (unsigned long)pte_page);
168} 185}
169 186
170static inline pgtable_t pmd_pgtable(pmd_t pmd) 187static inline pgtable_t pmd_pgtable(pmd_t pmd)
171{ 188{
172 return pmd_page(pmd); 189 return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE);
173} 190}
174 191
175static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 192static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
176 unsigned long address) 193 unsigned long address)
177{ 194{
178 return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); 195 return (pte_t *)page_table_alloc(mm, address, 1);
179} 196}
180 197
181static inline pgtable_t pte_alloc_one(struct mm_struct *mm, 198static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
182 unsigned long address) 199 unsigned long address)
183{ 200{
184 struct page *page; 201 return (pgtable_t)page_table_alloc(mm, address, 0);
185 pte_t *pte;
186
187 pte = pte_alloc_one_kernel(mm, address);
188 if (!pte)
189 return NULL;
190 page = virt_to_page(pte);
191 pgtable_page_ctor(page);
192 return page;
193} 202}
194 203
195static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 204static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
196{ 205{
197 free_page((unsigned long)pte); 206 page_table_free(mm, (unsigned long *)pte, 1);
198} 207}
199 208
200static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) 209static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
201{ 210{
202 pgtable_page_dtor(ptepage); 211 page_table_free(mm, (unsigned long *)ptepage, 0);
203 __free_page(ptepage);
204} 212}
205 213
206static inline void pgtable_free(void *table, unsigned index_size)
207{
208 if (!index_size)
209 free_page((unsigned long)table);
210 else {
211 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
212 kmem_cache_free(PGT_CACHE(index_size), table);
213 }
214}
215
216#ifdef CONFIG_SMP
217static inline void pgtable_free_tlb(struct mmu_gather *tlb,
218 void *table, int shift)
219{
220 unsigned long pgf = (unsigned long)table;
221 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
222 pgf |= shift;
223 tlb_remove_table(tlb, (void *)pgf);
224}
225
226static inline void __tlb_remove_table(void *_table)
227{
228 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
229 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
230
231 pgtable_free(table, shift);
232}
233#else /* !CONFIG_SMP */
234static inline void pgtable_free_tlb(struct mmu_gather *tlb,
235 void *table, int shift)
236{
237 pgtable_free(table, shift);
238}
239#endif /* CONFIG_SMP */
240
241static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, 214static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
242 unsigned long address) 215 unsigned long address)
243{ 216{
244 struct page *page = page_address(table);
245
246 tlb_flush_pgtable(tlb, address); 217 tlb_flush_pgtable(tlb, address);
247 pgtable_page_dtor(page); 218 pgtable_free_tlb(tlb, table, 0);
248 pgtable_free_tlb(tlb, page, 0);
249} 219}
250
251#endif /* CONFIG_PPC_64K_PAGES */ 220#endif /* CONFIG_PPC_64K_PAGES */
252 221
253static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 222static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -261,7 +230,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
261 kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd); 230 kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
262} 231}
263 232
264
265#define __pmd_free_tlb(tlb, pmd, addr) \ 233#define __pmd_free_tlb(tlb, pmd, addr) \
266 pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE) 234 pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
267#ifndef CONFIG_PPC_64K_PAGES 235#ifndef CONFIG_PPC_64K_PAGES