diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2013-04-28 05:37:33 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2013-04-30 02:00:07 -0400 |
commit | 5c1f6ee9a31cbdac90bbb8ae1ba4475031ac74b4 (patch) | |
tree | 9b68dca4f1968199567b2fe631f346795d8eee69 /arch/powerpc/mm | |
parent | d614bb041209fd7cb5e4b35e11a7b2f6ee8f62b8 (diff) |
powerpc: Reduce PTE table memory wastage
We allocate one page for the last level of linux page table. With THP and
large page size of 16MB, that would mean we are wasting large part
of that page. To map 16MB area, we only need a PTE space of 2K with 64K
page size. This patch reduce the space wastage by sharing the page
allocated for the last level of linux page table with multiple pmd
entries. We call these smaller chunks PTE page fragments and allocated
page, PTE page.
In order to support systems which doesn't have 64K HPTE support, we also
add another 2K to PTE page fragment. The second half of the PTE fragments
is used for storing slot and secondary bit information of an HPTE. With this
we now have a 4K PTE fragment.
We use a simple approach to share the PTE page. On allocation, we bump the
PTE page refcount to 16 and share the PTE page with the next 16 pte alloc
request. This should help in the node locality of the PTE page fragment,
assuming that the immediate pte alloc request will mostly come from the
same NUMA node. We don't try to reuse the freed PTE page fragment. Hence
we could be waisting some space.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash64.c | 37 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 118 |
2 files changed, 155 insertions, 0 deletions
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index d1d1b92c5b99..178876aef40f 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | 24 | ||
25 | #include <asm/mmu_context.h> | 25 | #include <asm/mmu_context.h> |
26 | #include <asm/pgalloc.h> | ||
26 | 27 | ||
27 | #include "icswx.h" | 28 | #include "icswx.h" |
28 | 29 | ||
@@ -85,6 +86,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |||
85 | spin_lock_init(mm->context.cop_lockp); | 86 | spin_lock_init(mm->context.cop_lockp); |
86 | #endif /* CONFIG_PPC_ICSWX */ | 87 | #endif /* CONFIG_PPC_ICSWX */ |
87 | 88 | ||
89 | #ifdef CONFIG_PPC_64K_PAGES | ||
90 | mm->context.pte_frag = NULL; | ||
91 | #endif | ||
88 | return 0; | 92 | return 0; |
89 | } | 93 | } |
90 | 94 | ||
@@ -96,13 +100,46 @@ void __destroy_context(int context_id) | |||
96 | } | 100 | } |
97 | EXPORT_SYMBOL_GPL(__destroy_context); | 101 | EXPORT_SYMBOL_GPL(__destroy_context); |
98 | 102 | ||
103 | #ifdef CONFIG_PPC_64K_PAGES | ||
104 | static void destroy_pagetable_page(struct mm_struct *mm) | ||
105 | { | ||
106 | int count; | ||
107 | void *pte_frag; | ||
108 | struct page *page; | ||
109 | |||
110 | pte_frag = mm->context.pte_frag; | ||
111 | if (!pte_frag) | ||
112 | return; | ||
113 | |||
114 | page = virt_to_page(pte_frag); | ||
115 | /* drop all the pending references */ | ||
116 | count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT; | ||
117 | /* We allow PTE_FRAG_NR fragments from a PTE page */ | ||
118 | count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count); | ||
119 | if (!count) { | ||
120 | pgtable_page_dtor(page); | ||
121 | free_hot_cold_page(page, 0); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | #else | ||
126 | static inline void destroy_pagetable_page(struct mm_struct *mm) | ||
127 | { | ||
128 | return; | ||
129 | } | ||
130 | #endif | ||
131 | |||
132 | |||
99 | void destroy_context(struct mm_struct *mm) | 133 | void destroy_context(struct mm_struct *mm) |
100 | { | 134 | { |
135 | |||
101 | #ifdef CONFIG_PPC_ICSWX | 136 | #ifdef CONFIG_PPC_ICSWX |
102 | drop_cop(mm->context.acop, mm); | 137 | drop_cop(mm->context.acop, mm); |
103 | kfree(mm->context.cop_lockp); | 138 | kfree(mm->context.cop_lockp); |
104 | mm->context.cop_lockp = NULL; | 139 | mm->context.cop_lockp = NULL; |
105 | #endif /* CONFIG_PPC_ICSWX */ | 140 | #endif /* CONFIG_PPC_ICSWX */ |
141 | |||
142 | destroy_pagetable_page(mm); | ||
106 | __destroy_context(mm->context.id); | 143 | __destroy_context(mm->context.id); |
107 | subpage_prot_free(mm); | 144 | subpage_prot_free(mm); |
108 | mm->context.id = MMU_NO_CONTEXT; | 145 | mm->context.id = MMU_NO_CONTEXT; |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 654258f165ae..a854096e1023 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -337,3 +337,121 @@ EXPORT_SYMBOL(__ioremap_at); | |||
337 | EXPORT_SYMBOL(iounmap); | 337 | EXPORT_SYMBOL(iounmap); |
338 | EXPORT_SYMBOL(__iounmap); | 338 | EXPORT_SYMBOL(__iounmap); |
339 | EXPORT_SYMBOL(__iounmap_at); | 339 | EXPORT_SYMBOL(__iounmap_at); |
340 | |||
341 | #ifdef CONFIG_PPC_64K_PAGES | ||
342 | static pte_t *get_from_cache(struct mm_struct *mm) | ||
343 | { | ||
344 | void *pte_frag, *ret; | ||
345 | |||
346 | spin_lock(&mm->page_table_lock); | ||
347 | ret = mm->context.pte_frag; | ||
348 | if (ret) { | ||
349 | pte_frag = ret + PTE_FRAG_SIZE; | ||
350 | /* | ||
351 | * If we have taken up all the fragments mark PTE page NULL | ||
352 | */ | ||
353 | if (((unsigned long)pte_frag & ~PAGE_MASK) == 0) | ||
354 | pte_frag = NULL; | ||
355 | mm->context.pte_frag = pte_frag; | ||
356 | } | ||
357 | spin_unlock(&mm->page_table_lock); | ||
358 | return (pte_t *)ret; | ||
359 | } | ||
360 | |||
361 | static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) | ||
362 | { | ||
363 | void *ret = NULL; | ||
364 | struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | | ||
365 | __GFP_REPEAT | __GFP_ZERO); | ||
366 | if (!page) | ||
367 | return NULL; | ||
368 | |||
369 | ret = page_address(page); | ||
370 | spin_lock(&mm->page_table_lock); | ||
371 | /* | ||
372 | * If we find pgtable_page set, we return | ||
373 | * the allocated page with single fragement | ||
374 | * count. | ||
375 | */ | ||
376 | if (likely(!mm->context.pte_frag)) { | ||
377 | atomic_set(&page->_count, PTE_FRAG_NR); | ||
378 | mm->context.pte_frag = ret + PTE_FRAG_SIZE; | ||
379 | } | ||
380 | spin_unlock(&mm->page_table_lock); | ||
381 | |||
382 | if (!kernel) | ||
383 | pgtable_page_ctor(page); | ||
384 | |||
385 | return (pte_t *)ret; | ||
386 | } | ||
387 | |||
388 | pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel) | ||
389 | { | ||
390 | pte_t *pte; | ||
391 | |||
392 | pte = get_from_cache(mm); | ||
393 | if (pte) | ||
394 | return pte; | ||
395 | |||
396 | return __alloc_for_cache(mm, kernel); | ||
397 | } | ||
398 | |||
399 | void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel) | ||
400 | { | ||
401 | struct page *page = virt_to_page(table); | ||
402 | if (put_page_testzero(page)) { | ||
403 | if (!kernel) | ||
404 | pgtable_page_dtor(page); | ||
405 | free_hot_cold_page(page, 0); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | #ifdef CONFIG_SMP | ||
410 | static void page_table_free_rcu(void *table) | ||
411 | { | ||
412 | struct page *page = virt_to_page(table); | ||
413 | if (put_page_testzero(page)) { | ||
414 | pgtable_page_dtor(page); | ||
415 | free_hot_cold_page(page, 0); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) | ||
420 | { | ||
421 | unsigned long pgf = (unsigned long)table; | ||
422 | |||
423 | BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); | ||
424 | pgf |= shift; | ||
425 | tlb_remove_table(tlb, (void *)pgf); | ||
426 | } | ||
427 | |||
428 | void __tlb_remove_table(void *_table) | ||
429 | { | ||
430 | void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); | ||
431 | unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; | ||
432 | |||
433 | if (!shift) | ||
434 | /* PTE page needs special handling */ | ||
435 | page_table_free_rcu(table); | ||
436 | else { | ||
437 | BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); | ||
438 | kmem_cache_free(PGT_CACHE(shift), table); | ||
439 | } | ||
440 | } | ||
441 | #else | ||
442 | void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) | ||
443 | { | ||
444 | if (!shift) { | ||
445 | /* PTE page needs special handling */ | ||
446 | struct page *page = virt_to_page(table); | ||
447 | if (put_page_testzero(page)) { | ||
448 | pgtable_page_dtor(page); | ||
449 | free_hot_cold_page(page, 0); | ||
450 | } | ||
451 | } else { | ||
452 | BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); | ||
453 | kmem_cache_free(PGT_CACHE(shift), table); | ||
454 | } | ||
455 | } | ||
456 | #endif | ||
457 | #endif /* CONFIG_PPC_64K_PAGES */ | ||