aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/pgtable_64.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2013-04-28 05:37:33 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-04-30 02:00:07 -0400
commit5c1f6ee9a31cbdac90bbb8ae1ba4475031ac74b4 (patch)
tree9b68dca4f1968199567b2fe631f346795d8eee69 /arch/powerpc/mm/pgtable_64.c
parentd614bb041209fd7cb5e4b35e11a7b2f6ee8f62b8 (diff)
powerpc: Reduce PTE table memory wastage
We allocate one page for the last level of linux page table. With THP and large page size of 16MB, that would mean we are wasting large part of that page. To map 16MB area, we only need a PTE space of 2K with 64K page size. This patch reduce the space wastage by sharing the page allocated for the last level of linux page table with multiple pmd entries. We call these smaller chunks PTE page fragments and allocated page, PTE page. In order to support systems which doesn't have 64K HPTE support, we also add another 2K to PTE page fragment. The second half of the PTE fragments is used for storing slot and secondary bit information of an HPTE. With this we now have a 4K PTE fragment. We use a simple approach to share the PTE page. On allocation, we bump the PTE page refcount to 16 and share the PTE page with the next 16 pte alloc request. This should help in the node locality of the PTE page fragment, assuming that the immediate pte alloc request will mostly come from the same NUMA node. We don't try to reuse the freed PTE page fragment. Hence we could be waisting some space. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm/pgtable_64.c')
-rw-r--r--arch/powerpc/mm/pgtable_64.c118
1 files changed, 118 insertions, 0 deletions
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 654258f165ae..a854096e1023 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -337,3 +337,121 @@ EXPORT_SYMBOL(__ioremap_at);
337EXPORT_SYMBOL(iounmap); 337EXPORT_SYMBOL(iounmap);
338EXPORT_SYMBOL(__iounmap); 338EXPORT_SYMBOL(__iounmap);
339EXPORT_SYMBOL(__iounmap_at); 339EXPORT_SYMBOL(__iounmap_at);
340
341#ifdef CONFIG_PPC_64K_PAGES
342static pte_t *get_from_cache(struct mm_struct *mm)
343{
344 void *pte_frag, *ret;
345
346 spin_lock(&mm->page_table_lock);
347 ret = mm->context.pte_frag;
348 if (ret) {
349 pte_frag = ret + PTE_FRAG_SIZE;
350 /*
351 * If we have taken up all the fragments mark PTE page NULL
352 */
353 if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
354 pte_frag = NULL;
355 mm->context.pte_frag = pte_frag;
356 }
357 spin_unlock(&mm->page_table_lock);
358 return (pte_t *)ret;
359}
360
361static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
362{
363 void *ret = NULL;
364 struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
365 __GFP_REPEAT | __GFP_ZERO);
366 if (!page)
367 return NULL;
368
369 ret = page_address(page);
370 spin_lock(&mm->page_table_lock);
371 /*
372 * If we find pgtable_page set, we return
373 * the allocated page with single fragement
374 * count.
375 */
376 if (likely(!mm->context.pte_frag)) {
377 atomic_set(&page->_count, PTE_FRAG_NR);
378 mm->context.pte_frag = ret + PTE_FRAG_SIZE;
379 }
380 spin_unlock(&mm->page_table_lock);
381
382 if (!kernel)
383 pgtable_page_ctor(page);
384
385 return (pte_t *)ret;
386}
387
388pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
389{
390 pte_t *pte;
391
392 pte = get_from_cache(mm);
393 if (pte)
394 return pte;
395
396 return __alloc_for_cache(mm, kernel);
397}
398
399void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
400{
401 struct page *page = virt_to_page(table);
402 if (put_page_testzero(page)) {
403 if (!kernel)
404 pgtable_page_dtor(page);
405 free_hot_cold_page(page, 0);
406 }
407}
408
409#ifdef CONFIG_SMP
410static void page_table_free_rcu(void *table)
411{
412 struct page *page = virt_to_page(table);
413 if (put_page_testzero(page)) {
414 pgtable_page_dtor(page);
415 free_hot_cold_page(page, 0);
416 }
417}
418
419void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
420{
421 unsigned long pgf = (unsigned long)table;
422
423 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
424 pgf |= shift;
425 tlb_remove_table(tlb, (void *)pgf);
426}
427
428void __tlb_remove_table(void *_table)
429{
430 void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
431 unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
432
433 if (!shift)
434 /* PTE page needs special handling */
435 page_table_free_rcu(table);
436 else {
437 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
438 kmem_cache_free(PGT_CACHE(shift), table);
439 }
440}
441#else
442void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
443{
444 if (!shift) {
445 /* PTE page needs special handling */
446 struct page *page = virt_to_page(table);
447 if (put_page_testzero(page)) {
448 pgtable_page_dtor(page);
449 free_hot_cold_page(page, 0);
450 }
451 } else {
452 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
453 kmem_cache_free(PGT_CACHE(shift), table);
454 }
455}
456#endif
457#endif /* CONFIG_PPC_64K_PAGES */