aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2009-10-28 12:27:18 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-10-30 02:20:57 -0400
commita0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree84efcadf011e16c240ac9b1c948141fc1cc7d324
parentf71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)
powerpc/mm: Cleanup management of kmem_caches for pagetables
Currently we have a fair bit of rather fiddly code to manage the various kmem_caches used to store page tables of various levels. We generally have two caches holding some combination of PGD, PUD and PMD tables, plus several more for the special hugepage pagetables. This patch cleans this all up by taking a different approach. Rather than the caches being designated as for PUDs or for hugeptes for 16M pages, the caches are simply allocated to be a specific size. Thus sharing of caches between different types/levels of pagetables happens naturally. The pagetable size, where needed, is passed around encoded in the same way as {PGD,PUD,PMD}_INDEX_SIZE; that is n where the pagetable contains 2^n pointers. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/pgalloc-32.h10
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h60
-rw-r--r--arch/powerpc/include/asm/pgalloc.h30
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h1
-rw-r--r--arch/powerpc/mm/hugetlbpage.c51
-rw-r--r--arch/powerpc/mm/init_64.c70
-rw-r--r--arch/powerpc/mm/pgtable.c25
7 files changed, 125 insertions, 122 deletions
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
index c9500d666a1d..580cf73b96e8 100644
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ b/arch/powerpc/include/asm/pgalloc-32.h
@@ -3,7 +3,8 @@
3 3
4#include <linux/threads.h> 4#include <linux/threads.h>
5 5
6#define PTE_NONCACHE_NUM 0 /* dummy for now to share code w/ppc64 */ 6/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
7#define MAX_PGTABLE_INDEX_SIZE 0
7 8
8extern void __bad_pte(pmd_t *pmd); 9extern void __bad_pte(pmd_t *pmd);
9 10
@@ -36,11 +37,10 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
36extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); 37extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
37extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr); 38extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
38 39
39static inline void pgtable_free(pgtable_free_t pgf) 40static inline void pgtable_free(void *table, unsigned index_size)
40{ 41{
41 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); 42 BUG_ON(index_size); /* 32-bit doesn't use this */
42 43 free_page((unsigned long)table);
43 free_page((unsigned long)p);
44} 44}
45 45
46#define check_pgt_cache() do { } while (0) 46#define check_pgt_cache() do { } while (0)
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index e6f069c4f713..5c1cd73dafa8 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -11,27 +11,39 @@
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13 13
14/*
15 * Functions that deal with pagetables that could be at any level of
16 * the table need to be passed an "index_size" so they know how to
17 * handle allocation. For PTE pages (which are linked to a struct
18 * page for now, and drawn from the main get_free_pages() pool), the
19 * allocation size will be (2^index_size * sizeof(pointer)) and
20 * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
21 *
22 * The maximum index size needs to be big enough to allow any
23 * pagetable sizes we need, but small enough to fit in the low bits of
24 * any page table pointer. In other words all pagetables, even tiny
25 * ones, must be aligned to allow at least enough low 0 bits to
26 * contain this value. This value is also used as a mask, so it must
27 * be one less than a power of two.
28 */
29#define MAX_PGTABLE_INDEX_SIZE 0xf
30
14#ifndef CONFIG_PPC_SUBPAGE_PROT 31#ifndef CONFIG_PPC_SUBPAGE_PROT
15static inline void subpage_prot_free(pgd_t *pgd) {} 32static inline void subpage_prot_free(pgd_t *pgd) {}
16#endif 33#endif
17 34
18extern struct kmem_cache *pgtable_cache[]; 35extern struct kmem_cache *pgtable_cache[];
19 36#define PGT_CACHE(shift) (pgtable_cache[(shift)-1])
20#define PGD_CACHE_NUM 0
21#define PUD_CACHE_NUM 1
22#define PMD_CACHE_NUM 1
23#define HUGEPTE_CACHE_NUM 2
24#define PTE_NONCACHE_NUM 7 /* from GFP rather than kmem_cache */
25 37
26static inline pgd_t *pgd_alloc(struct mm_struct *mm) 38static inline pgd_t *pgd_alloc(struct mm_struct *mm)
27{ 39{
28 return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); 40 return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
29} 41}
30 42
31static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 43static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
32{ 44{
33 subpage_prot_free(pgd); 45 subpage_prot_free(pgd);
34 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); 46 kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
35} 47}
36 48
37#ifndef CONFIG_PPC_64K_PAGES 49#ifndef CONFIG_PPC_64K_PAGES
@@ -40,13 +52,13 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
40 52
41static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 53static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
42{ 54{
43 return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], 55 return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
44 GFP_KERNEL|__GFP_REPEAT); 56 GFP_KERNEL|__GFP_REPEAT);
45} 57}
46 58
47static inline void pud_free(struct mm_struct *mm, pud_t *pud) 59static inline void pud_free(struct mm_struct *mm, pud_t *pud)
48{ 60{
49 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); 61 kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
50} 62}
51 63
52static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 64static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -78,13 +90,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
78 90
79static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 91static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
80{ 92{
81 return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], 93 return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
82 GFP_KERNEL|__GFP_REPEAT); 94 GFP_KERNEL|__GFP_REPEAT);
83} 95}
84 96
85static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) 97static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
86{ 98{
87 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); 99 kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
88} 100}
89 101
90static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, 102static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -107,24 +119,22 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
107 return page; 119 return page;
108} 120}
109 121
110static inline void pgtable_free(pgtable_free_t pgf) 122static inline void pgtable_free(void *table, unsigned index_size)
111{ 123{
112 void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); 124 if (!index_size)
113 int cachenum = pgf.val & PGF_CACHENUM_MASK; 125 free_page((unsigned long)table);
114 126 else {
115 if (cachenum == PTE_NONCACHE_NUM) 127 BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
116 free_page((unsigned long)p); 128 kmem_cache_free(PGT_CACHE(index_size), table);
117 else 129 }
118 kmem_cache_free(pgtable_cache[cachenum], p);
119} 130}
120 131
121#define __pmd_free_tlb(tlb, pmd,addr) \ 132#define __pmd_free_tlb(tlb, pmd, addr) \
122 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 133 pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
123 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
124#ifndef CONFIG_PPC_64K_PAGES 134#ifndef CONFIG_PPC_64K_PAGES
125#define __pud_free_tlb(tlb, pud, addr) \ 135#define __pud_free_tlb(tlb, pud, addr) \
126 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ 136 pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
127 PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) 137
128#endif /* CONFIG_PPC_64K_PAGES */ 138#endif /* CONFIG_PPC_64K_PAGES */
129 139
130#define check_pgt_cache() do { } while (0) 140#define check_pgt_cache() do { } while (0)
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index f2e812de7c3c..abe8532bd14e 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -24,25 +24,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
24 __free_page(ptepage); 24 __free_page(ptepage);
25} 25}
26 26
27typedef struct pgtable_free {
28 unsigned long val;
29} pgtable_free_t;
30
31/* This needs to be big enough to allow for MMU_PAGE_COUNT + 2 to be stored
32 * and small enough to fit in the low bits of any naturally aligned page
33 * table cache entry. Arbitrarily set to 0x1f, that should give us some
34 * room to grow
35 */
36#define PGF_CACHENUM_MASK 0x1f
37
38static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
39 unsigned long mask)
40{
41 BUG_ON(cachenum > PGF_CACHENUM_MASK);
42
43 return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
44}
45
46#ifdef CONFIG_PPC64 27#ifdef CONFIG_PPC64
47#include <asm/pgalloc-64.h> 28#include <asm/pgalloc-64.h>
48#else 29#else
@@ -50,12 +31,12 @@ static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
50#endif 31#endif
51 32
52#ifdef CONFIG_SMP 33#ifdef CONFIG_SMP
53extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 34extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
54extern void pte_free_finish(void); 35extern void pte_free_finish(void);
55#else /* CONFIG_SMP */ 36#else /* CONFIG_SMP */
56static inline void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) 37static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
57{ 38{
58 pgtable_free(pgf); 39 pgtable_free(table, shift);
59} 40}
60static inline void pte_free_finish(void) { } 41static inline void pte_free_finish(void) { }
61#endif /* !CONFIG_SMP */ 42#endif /* !CONFIG_SMP */
@@ -63,12 +44,9 @@ static inline void pte_free_finish(void) { }
63static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage, 44static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
64 unsigned long address) 45 unsigned long address)
65{ 46{
66 pgtable_free_t pgf = pgtable_free_cache(page_address(ptepage),
67 PTE_NONCACHE_NUM,
68 PTE_TABLE_SIZE-1);
69 tlb_flush_pgtable(tlb, address); 47 tlb_flush_pgtable(tlb, address);
70 pgtable_page_dtor(ptepage); 48 pgtable_page_dtor(ptepage);
71 pgtable_free_tlb(tlb, pgf); 49 pgtable_free_tlb(tlb, page_address(ptepage), 0);
72} 50}
73 51
74#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 806abe7a3fa5..8697d6555090 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -354,6 +354,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
354#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE}) 354#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
355#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT) 355#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
356 356
357void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
357void pgtable_cache_init(void); 358void pgtable_cache_init(void);
358 359
359/* 360/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3d542a9732ae..7230d7a4fbd9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -43,26 +43,14 @@ static unsigned nr_gpages;
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ 43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44 44
45#define hugepte_shift mmu_huge_psizes 45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) 46#define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) 47#define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize])
48 48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ 49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize]) 50 + HUGEPTE_INDEX_SIZE(psize))
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) 51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) 52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53 53
54/* Subtract one from array size because we don't need a cache for 4K since
55 * is not a huge page size */
56#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
58
59static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
60 [MMU_PAGE_64K] = "hugepte_cache_64K",
61 [MMU_PAGE_1M] = "hugepte_cache_1M",
62 [MMU_PAGE_16M] = "hugepte_cache_16M",
63 [MMU_PAGE_16G] = "hugepte_cache_16G",
64};
65
66/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 54/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
67 * will choke on pointers to hugepte tables, which is handy for 55 * will choke on pointers to hugepte tables, which is handy for
68 * catching screwups early. */ 56 * catching screwups early. */
@@ -114,15 +102,15 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 102static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned int psize) 103 unsigned long address, unsigned int psize)
116{ 104{
117 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], 105 pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]),
118 GFP_KERNEL|__GFP_REPEAT); 106 GFP_KERNEL|__GFP_REPEAT);
119 107
120 if (! new) 108 if (! new)
121 return -ENOMEM; 109 return -ENOMEM;
122 110
123 spin_lock(&mm->page_table_lock); 111 spin_lock(&mm->page_table_lock);
124 if (!hugepd_none(*hpdp)) 112 if (!hugepd_none(*hpdp))
125 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); 113 kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new);
126 else 114 else
127 hpdp->pd = (unsigned long)new | HUGEPD_OK; 115 hpdp->pd = (unsigned long)new | HUGEPD_OK;
128 spin_unlock(&mm->page_table_lock); 116 spin_unlock(&mm->page_table_lock);
@@ -271,9 +259,7 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
271 259
272 hpdp->pd = 0; 260 hpdp->pd = 0;
273 tlb->need_flush = 1; 261 tlb->need_flush = 1;
274 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, 262 pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]);
275 HUGEPTE_CACHE_NUM+psize-1,
276 PGF_CACHENUM_MASK));
277} 263}
278 264
279static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 265static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -698,8 +684,6 @@ static void __init set_huge_psize(int psize)
698 if (mmu_huge_psizes[psize] || 684 if (mmu_huge_psizes[psize] ||
699 mmu_psize_defs[psize].shift == PAGE_SHIFT) 685 mmu_psize_defs[psize].shift == PAGE_SHIFT)
700 return; 686 return;
701 if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
702 return;
703 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); 687 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
704 688
705 switch (mmu_psize_defs[psize].shift) { 689 switch (mmu_psize_defs[psize].shift) {
@@ -753,9 +737,9 @@ static int __init hugetlbpage_init(void)
753 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 737 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
754 return -ENODEV; 738 return -ENODEV;
755 739
756 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE 740 /* Add supported huge page sizes. Need to change
757 * and adjust PTE_NONCACHE_NUM if the number of supported huge page 741 * HUGE_MAX_HSTATE if the number of supported huge page sizes
758 * sizes changes. 742 * changes.
759 */ 743 */
760 set_huge_psize(MMU_PAGE_16M); 744 set_huge_psize(MMU_PAGE_16M);
761 set_huge_psize(MMU_PAGE_16G); 745 set_huge_psize(MMU_PAGE_16G);
@@ -769,16 +753,11 @@ static int __init hugetlbpage_init(void)
769 753
770 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 754 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
771 if (mmu_huge_psizes[psize]) { 755 if (mmu_huge_psizes[psize]) {
772 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = 756 pgtable_cache_add(hugepte_shift[psize], NULL);
773 kmem_cache_create( 757 if (!PGT_CACHE(hugepte_shift[psize]))
774 HUGEPTE_CACHE_NAME(psize), 758 panic("hugetlbpage_init(): could not create "
775 HUGEPTE_TABLE_SIZE(psize), 759 "pgtable cache for %d bit pagesize\n",
776 HUGEPTE_TABLE_SIZE(psize), 760 mmu_psize_to_shift(psize));
777 0,
778 NULL);
779 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
780 panic("hugetlbpage_init(): could not create %s"\
781 "\n", HUGEPTE_CACHE_NAME(psize));
782 } 761 }
783 } 762 }
784 763
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 335c578b9cc3..82ac61dcd3af 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
119 memset(addr, 0, PMD_TABLE_SIZE); 119 memset(addr, 0, PMD_TABLE_SIZE);
120} 120}
121 121
122static const unsigned int pgtable_cache_size[2] = { 122struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
123 PGD_TABLE_SIZE, PMD_TABLE_SIZE 123
124}; 124/*
125static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 125 * Create a kmem_cache() for pagetables. This is not used for PTE
126#ifdef CONFIG_PPC_64K_PAGES 126 * pages - they're linked to struct page, come from the normal free
127 "pgd_cache", "pmd_cache", 127 * pages pool and have a different entry size (see real_pte_t) to
128#else 128 * everything else. Caches created by this function are used for all
129 "pgd_cache", "pud_pmd_cache", 129 * the higher level pagetables, and for hugepage pagetables.
130#endif /* CONFIG_PPC_64K_PAGES */ 130 */
131}; 131void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
132 132{
133#ifdef CONFIG_HUGETLB_PAGE 133 char *name;
134/* Hugepages need an extra cache per hugepagesize, initialized in 134 unsigned long table_size = sizeof(void *) << shift;
135 * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT 135 unsigned long align = table_size;
136 * is not compile time constant. */ 136
137struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; 137 /* When batching pgtable pointers for RCU freeing, we store
138#else 138 * the index size in the low bits. Table alignment must be
139struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 139 * big enough to fit it */
140#endif 140 unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
141 struct kmem_cache *new;
142
143 /* It would be nice if this was a BUILD_BUG_ON(), but at the
144 * moment, gcc doesn't seem to recognize is_power_of_2 as a
145 * constant expression, so so much for that. */
146 BUG_ON(!is_power_of_2(minalign));
147 BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
148
149 if (PGT_CACHE(shift))
150 return; /* Already have a cache of this size */
151
152 align = max_t(unsigned long, align, minalign);
153 name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
154 new = kmem_cache_create(name, table_size, align, 0, ctor);
155 PGT_CACHE(shift) = new;
156
157 pr_debug("Allocated pgtable cache for order %d\n", shift);
158}
159
141 160
142void pgtable_cache_init(void) 161void pgtable_cache_init(void)
143{ 162{
144 pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); 163 pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
145 pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); 164 pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
165 if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
166 panic("Couldn't allocate pgtable caches");
167
168 /* In all current configs, when the PUD index exists it's the
169 * same size as either the pgd or pmd index. Verify that the
170 * initialization above has also created a PUD cache. This
171 * will need re-examiniation if we add new possibilities for
172 * the pagetable layout. */
173 BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
146} 174}
147 175
148#ifdef CONFIG_SPARSEMEM_VMEMMAP 176#ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 53040931de32..99df697c601a 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -49,12 +49,12 @@ struct pte_freelist_batch
49{ 49{
50 struct rcu_head rcu; 50 struct rcu_head rcu;
51 unsigned int index; 51 unsigned int index;
52 pgtable_free_t tables[0]; 52 unsigned long tables[0];
53}; 53};
54 54
55#define PTE_FREELIST_SIZE \ 55#define PTE_FREELIST_SIZE \
56 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ 56 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
57 / sizeof(pgtable_free_t)) 57 / sizeof(unsigned long))
58 58
59static void pte_free_smp_sync(void *arg) 59static void pte_free_smp_sync(void *arg)
60{ 60{
@@ -64,13 +64,13 @@ static void pte_free_smp_sync(void *arg)
64/* This is only called when we are critically out of memory 64/* This is only called when we are critically out of memory
65 * (and fail to get a page in pte_free_tlb). 65 * (and fail to get a page in pte_free_tlb).
66 */ 66 */
67static void pgtable_free_now(pgtable_free_t pgf) 67static void pgtable_free_now(void *table, unsigned shift)
68{ 68{
69 pte_freelist_forced_free++; 69 pte_freelist_forced_free++;
70 70
71 smp_call_function(pte_free_smp_sync, NULL, 1); 71 smp_call_function(pte_free_smp_sync, NULL, 1);
72 72
73 pgtable_free(pgf); 73 pgtable_free(table, shift);
74} 74}
75 75
76static void pte_free_rcu_callback(struct rcu_head *head) 76static void pte_free_rcu_callback(struct rcu_head *head)
@@ -79,8 +79,12 @@ static void pte_free_rcu_callback(struct rcu_head *head)
79 container_of(head, struct pte_freelist_batch, rcu); 79 container_of(head, struct pte_freelist_batch, rcu);
80 unsigned int i; 80 unsigned int i;
81 81
82 for (i = 0; i < batch->index; i++) 82 for (i = 0; i < batch->index; i++) {
83 pgtable_free(batch->tables[i]); 83 void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
84 unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
85
86 pgtable_free(table, shift);
87 }
84 88
85 free_page((unsigned long)batch); 89 free_page((unsigned long)batch);
86} 90}
@@ -91,25 +95,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
91 call_rcu(&batch->rcu, pte_free_rcu_callback); 95 call_rcu(&batch->rcu, pte_free_rcu_callback);
92} 96}
93 97
94void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) 98void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
95{ 99{
96 /* This is safe since tlb_gather_mmu has disabled preemption */ 100 /* This is safe since tlb_gather_mmu has disabled preemption */
97 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); 101 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
102 unsigned long pgf;
98 103
99 if (atomic_read(&tlb->mm->mm_users) < 2 || 104 if (atomic_read(&tlb->mm->mm_users) < 2 ||
100 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ 105 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
101 pgtable_free(pgf); 106 pgtable_free(table, shift);
102 return; 107 return;
103 } 108 }
104 109
105 if (*batchp == NULL) { 110 if (*batchp == NULL) {
106 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); 111 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
107 if (*batchp == NULL) { 112 if (*batchp == NULL) {
108 pgtable_free_now(pgf); 113 pgtable_free_now(table, shift);
109 return; 114 return;
110 } 115 }
111 (*batchp)->index = 0; 116 (*batchp)->index = 0;
112 } 117 }
118 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
119 pgf = (unsigned long)table | shift;
113 (*batchp)->tables[(*batchp)->index++] = pgf; 120 (*batchp)->tables[(*batchp)->index++] = pgf;
114 if ((*batchp)->index == PTE_FREELIST_SIZE) { 121 if ((*batchp)->index == PTE_FREELIST_SIZE) {
115 pte_free_submit(*batchp); 122 pte_free_submit(*batchp);