powerpc/mm: Cleanup management of kmem_caches for pagetables

Currently we have a fair bit of rather fiddly code to manage the various kmem_caches used to store page tables of various levels. We generally have two caches holding some combination of PGD, PUD and PMD tables, plus several more for the special hugepage pagetables. This patch cleans this all up by taking a different approach. Rather than the caches being designated as for PUDs or for hugeptes for 16M pages, the caches are simply allocated to be a specific size. Thus sharing of caches between different types/levels of pagetables happens naturally. The pagetable size, where needed, is passed around encoded in the same way as {PGD,PUD,PMD}_INDEX_SIZE; that is n where the pagetable contains 2^n pointers. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: David Gibson <david@gibson.dropbear.id.au> 2009-10-28 12:27:18 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2009-10-30 02:20:57 -0400
commit: a0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree: 84efcadf011e16c240ac9b1c948141fc1cc7d324 /arch/powerpc/mm
parent: f71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)
3 files changed, 80 insertions, 66 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3d542a9732ae..7230d7a4fbd9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -43,26 +43,14 @@ static unsigned nr_gpages;
 unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
 #define hugepte_shift                   mmu_huge_psizes
-#define PTRS_PER_HUGEPTE(psize)         (1 << hugepte_shift[psize])
+#define HUGEPTE_INDEX_SIZE(psize)       (mmu_huge_psizes[(psize)])
-#define HUGEPTE_TABLE_SIZE(psize)       (sizeof(pte_t) << hugepte_shift[psize])
+#define PTRS_PER_HUGEPTE(psize)         (1 << mmu_huge_psizes[psize])
 #define HUGEPD_SHIFT(psize)             (mmu_psize_to_shift(psize) \
-                                                + hugepte_shift[psize])
+                                         + HUGEPTE_INDEX_SIZE(psize))
 #define HUGEPD_SIZE(psize)              (1UL << HUGEPD_SHIFT(psize))
 #define HUGEPD_MASK(psize)              (~(HUGEPD_SIZE(psize)-1))
-/* Subtract one from array size because we don't need a cache for 4K since
- * is not a huge page size */
-#define HUGE_PGTABLE_INDEX(psize)       (HUGEPTE_CACHE_NUM + psize - 1)
-#define HUGEPTE_CACHE_NAME(psize)       (huge_pgtable_cache_name[psize])
-static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
-        [MMU_PAGE_64K]  = "hugepte_cache_64K",
-        [MMU_PAGE_1M]   = "hugepte_cache_1M",
-        [MMU_PAGE_16M]  = "hugepte_cache_16M",
-        [MMU_PAGE_16G]  = "hugepte_cache_16G",
-};
 /* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
 * will choke on pointers to hugepte tables, which is handy for
 * catching screwups early. */
@@ -114,15 +102,15 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
                           unsigned long address, unsigned int psize)
 {
-        pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
+        pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]),
-                                      GFP_KERNEL|__GFP_REPEAT);
+                                       GFP_KERNEL|__GFP_REPEAT);
        if (! new)
                return -ENOMEM;
        spin_lock(&mm->page_table_lock);
        if (!hugepd_none(*hpdp))
-                kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
+                kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new);
        else
                hpdp->pd = (unsigned long)new | HUGEPD_OK;
        spin_unlock(&mm->page_table_lock);
@@ -271,9 +259,7 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
        hpdp->pd = 0;
        tlb->need_flush = 1;
-        pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
+        pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]);
-                                                 HUGEPTE_CACHE_NUM+psize-1,
-                                                 PGF_CACHENUM_MASK));
 }
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -698,8 +684,6 @@ static void __init set_huge_psize(int psize)
                if (mmu_huge_psizes[psize] ||
                   mmu_psize_defs[psize].shift == PAGE_SHIFT)
                        return;
-                if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
-                        return;
                hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
                switch (mmu_psize_defs[psize].shift) {
@@ -753,9 +737,9 @@ static int __init hugetlbpage_init(void)
        if (!cpu_has_feature(CPU_FTR_16M_PAGE))
                return -ENODEV;
-        /* Add supported huge page sizes.  Need to change HUGE_MAX_HSTATE
+        /* Add supported huge page sizes.  Need to change
-         * and adjust PTE_NONCACHE_NUM if the number of supported huge page
+         *  HUGE_MAX_HSTATE if the number of supported huge page sizes
-         * sizes changes.
+         *  changes.
         */
        set_huge_psize(MMU_PAGE_16M);
        set_huge_psize(MMU_PAGE_16G);
@@ -769,16 +753,11 @@ static int __init hugetlbpage_init(void)
        for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
                if (mmu_huge_psizes[psize]) {
-                        pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
+                        pgtable_cache_add(hugepte_shift[psize], NULL);
-                                kmem_cache_create(
+                        if (!PGT_CACHE(hugepte_shift[psize]))
-                                        HUGEPTE_CACHE_NAME(psize),
+                                panic("hugetlbpage_init(): could not create "
-                                        HUGEPTE_TABLE_SIZE(psize),
+                                      "pgtable cache for %d bit pagesize\n",
-                                        HUGEPTE_TABLE_SIZE(psize),
+                                      mmu_psize_to_shift(psize));
-                                        0,
-                                        NULL);
-                        if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
-                                panic("hugetlbpage_init(): could not create %s"\
-                                      "\n", HUGEPTE_CACHE_NAME(psize));
                }
        }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 335c578b9cc3..82ac61dcd3af 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
        memset(addr, 0, PMD_TABLE_SIZE);
 }
-static const unsigned int pgtable_cache_size[2] = {
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-        PGD_TABLE_SIZE, PMD_TABLE_SIZE
-};
+/*
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ * Create a kmem_cache() for pagetables.  This is not used for PTE
-#ifdef CONFIG_PPC_64K_PAGES
+ * pages - they're linked to struct page, come from the normal free
-        "pgd_cache", "pmd_cache",
+ * pages pool and have a different entry size (see real_pte_t) to
-#else
+ * everything else.  Caches created by this function are used for all
-        "pgd_cache", "pud_pmd_cache",
+ * the higher level pagetables, and for hugepage pagetables.
-#endif /* CONFIG_PPC_64K_PAGES */
+ */
-};
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+{
-#ifdef CONFIG_HUGETLB_PAGE
+        char *name;
-/* Hugepages need an extra cache per hugepagesize, initialized in
+        unsigned long table_size = sizeof(void *) << shift;
- * hugetlbpage.c.  We can't put into the tables above, because HPAGE_SHIFT
+        unsigned long align = table_size;
- * is not compile time constant. */
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
+        /* When batching pgtable pointers for RCU freeing, we store
-#else
+         * the index size in the low bits.  Table alignment must be
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+         * big enough to fit it */
-#endif
+        unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
+        struct kmem_cache *new;
+        /* It would be nice if this was a BUILD_BUG_ON(), but at the
+         * moment, gcc doesn't seem to recognize is_power_of_2 as a
+         * constant expression, so so much for that. */
+        BUG_ON(!is_power_of_2(minalign));
+        BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
+        if (PGT_CACHE(shift))
+                return; /* Already have a cache of this size */
+        align = max_t(unsigned long, align, minalign);
+        name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+        new = kmem_cache_create(name, table_size, align, 0, ctor);
+        PGT_CACHE(shift) = new;
+        pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
 void pgtable_cache_init(void)
 {
-        pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
+        pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
-        pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
+        pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
+        if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
+                panic("Couldn't allocate pgtable caches");
+        /* In all current configs, when the PUD index exists it's the
+         * same size as either the pgd or pmd index.  Verify that the
+         * initialization above has also created a PUD cache.  This
+         * will need re-examiniation if we add new possibilities for
+         * the pagetable layout. */
+        BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 53040931de32..99df697c601a 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -49,12 +49,12 @@ struct pte_freelist_batch
 {
        struct rcu_head rcu;
        unsigned int    index;
-        pgtable_free_t  tables[0];
+        unsigned long   tables[0];
 };
 #define PTE_FREELIST_SIZE \
        ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
-          / sizeof(pgtable_free_t))
+          / sizeof(unsigned long))
 static void pte_free_smp_sync(void *arg)
 {
@@ -64,13 +64,13 @@ static void pte_free_smp_sync(void *arg)
 /* This is only called when we are critically out of memory
 * (and fail to get a page in pte_free_tlb).
 */
-static void pgtable_free_now(pgtable_free_t pgf)
+static void pgtable_free_now(void *table, unsigned shift)
 {
        pte_freelist_forced_free++;
        smp_call_function(pte_free_smp_sync, NULL, 1);
-        pgtable_free(pgf);
+        pgtable_free(table, shift);
 }
 static void pte_free_rcu_callback(struct rcu_head *head)
@@ -79,8 +79,12 @@ static void pte_free_rcu_callback(struct rcu_head *head)
                container_of(head, struct pte_freelist_batch, rcu);
        unsigned int i;
-        for (i = 0; i < batch->index; i++)
+        for (i = 0; i < batch->index; i++) {
-                pgtable_free(batch->tables[i]);
+                void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
+                unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
+                pgtable_free(table, shift);
+        }
        free_page((unsigned long)batch);
 }
@@ -91,25 +95,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
        call_rcu(&batch->rcu, pte_free_rcu_callback);
 }
-void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
 {
        /* This is safe since tlb_gather_mmu has disabled preemption */
        struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+        unsigned long pgf;
        if (atomic_read(&tlb->mm->mm_users) < 2 ||
            cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
-                pgtable_free(pgf);
+                pgtable_free(table, shift);
                return;
        }
        if (*batchp == NULL) {
                *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
                if (*batchp == NULL) {
-                        pgtable_free_now(pgf);
+                        pgtable_free_now(table, shift);
                        return;
                }
                (*batchp)->index = 0;
        }
+        BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+        pgf = (unsigned long)table | shift;
        (*batchp)->tables[(*batchp)->index++] = pgf;
        if ((*batchp)->index == PTE_FREELIST_SIZE) {
                pte_free_submit(*batchp);
author	David Gibson <david@gibson.dropbear.id.au>	2009-10-28 12:27:18 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2009-10-30 02:20:57 -0400
commit	a0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree	84efcadf011e16c240ac9b1c948141fc1cc7d324 /arch/powerpc/mm
parent	f71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3d542a9732ae..7230d7a4fbd9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c
@@ -43,26 +43,14 @@ static unsigned nr_gpages;
43	unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */	43	unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44		44
45	#define hugepte_shift mmu_huge_psizes	45	#define hugepte_shift mmu_huge_psizes
46	#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])	46	#define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)])
47	#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])	47	#define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize])
48		48
49	#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \	49	#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50	+ hugepte_shift[psize])	50	+ HUGEPTE_INDEX_SIZE(psize))
51	#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))	51	#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52	#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))	52	#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53		53
54	/* Subtract one from array size because we don't need a cache for 4K since
55	* is not a huge page size */
56	#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57	#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
58
59	static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
60	[MMU_PAGE_64K] = "hugepte_cache_64K",
61	[MMU_PAGE_1M] = "hugepte_cache_1M",
62	[MMU_PAGE_16M] = "hugepte_cache_16M",
63	[MMU_PAGE_16G] = "hugepte_cache_16G",
64	};
65
66	/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()	54	/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
67	* will choke on pointers to hugepte tables, which is handy for	55	* will choke on pointers to hugepte tables, which is handy for
68	* catching screwups early. */	56	* catching screwups early. */
@@ -114,15 +102,15 @@ static inline pte_t hugepte_offset(hugepd_t hpdp, unsigned long addr,
114	static int __hugepte_alloc(struct mm_struct mm, hugepd_t hpdp,	102	static int __hugepte_alloc(struct mm_struct mm, hugepd_t hpdp,
115	unsigned long address, unsigned int psize)	103	unsigned long address, unsigned int psize)
116	{	104	{
117	pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],	105	pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]),
118	GFP_KERNEL\|__GFP_REPEAT);	106	GFP_KERNEL\|__GFP_REPEAT);
119		107
120	if (! new)	108	if (! new)
121	return -ENOMEM;	109	return -ENOMEM;
122		110
123	spin_lock(&mm->page_table_lock);	111	spin_lock(&mm->page_table_lock);
124	if (!hugepd_none(*hpdp))	112	if (!hugepd_none(*hpdp))
125	kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);	113	kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new);
126	else	114	else
127	hpdp->pd = (unsigned long)new \| HUGEPD_OK;	115	hpdp->pd = (unsigned long)new \| HUGEPD_OK;
128	spin_unlock(&mm->page_table_lock);	116	spin_unlock(&mm->page_table_lock);
@@ -271,9 +259,7 @@ static void free_hugepte_range(struct mmu_gather tlb, hugepd_t hpdp,
271		259
272	hpdp->pd = 0;	260	hpdp->pd = 0;
273	tlb->need_flush = 1;	261	tlb->need_flush = 1;
274	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,	262	pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]);
275	HUGEPTE_CACHE_NUM+psize-1,
276	PGF_CACHENUM_MASK));
277	}	263	}
278		264
279	static void hugetlb_free_pmd_range(struct mmu_gather tlb, pud_t pud,	265	static void hugetlb_free_pmd_range(struct mmu_gather tlb, pud_t pud,
@@ -698,8 +684,6 @@ static void __init set_huge_psize(int psize)
698	if (mmu_huge_psizes[psize] \|\|	684	if (mmu_huge_psizes[psize] \|\|
699	mmu_psize_defs[psize].shift == PAGE_SHIFT)	685	mmu_psize_defs[psize].shift == PAGE_SHIFT)
700	return;	686	return;
701	if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
702	return;
703	hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);	687	hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
704		688
705	switch (mmu_psize_defs[psize].shift) {	689	switch (mmu_psize_defs[psize].shift) {
@@ -753,9 +737,9 @@ static int __init hugetlbpage_init(void)
753	if (!cpu_has_feature(CPU_FTR_16M_PAGE))	737	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
754	return -ENODEV;	738	return -ENODEV;
755		739
756	/* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE	740	/* Add supported huge page sizes. Need to change
757	* and adjust PTE_NONCACHE_NUM if the number of supported huge page	741	* HUGE_MAX_HSTATE if the number of supported huge page sizes
758	* sizes changes.	742	* changes.
759	*/	743	*/
760	set_huge_psize(MMU_PAGE_16M);	744	set_huge_psize(MMU_PAGE_16M);
761	set_huge_psize(MMU_PAGE_16G);	745	set_huge_psize(MMU_PAGE_16G);
@@ -769,16 +753,11 @@ static int __init hugetlbpage_init(void)
769		753
770	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {	754	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
771	if (mmu_huge_psizes[psize]) {	755	if (mmu_huge_psizes[psize]) {
772	pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =	756	pgtable_cache_add(hugepte_shift[psize], NULL);
773	kmem_cache_create(	757	if (!PGT_CACHE(hugepte_shift[psize]))
774	HUGEPTE_CACHE_NAME(psize),	758	panic("hugetlbpage_init(): could not create "
775	HUGEPTE_TABLE_SIZE(psize),	759	"pgtable cache for %d bit pagesize\n",
776	HUGEPTE_TABLE_SIZE(psize),	760	mmu_psize_to_shift(psize));
777	0,
778	NULL);
779	if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
780	panic("hugetlbpage_init(): could not create %s"\
781	"\n", HUGEPTE_CACHE_NAME(psize));
782	}	761	}
783	}	762	}
784		763


diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 335c578b9cc3..82ac61dcd3af 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
119	memset(addr, 0, PMD_TABLE_SIZE);	119	memset(addr, 0, PMD_TABLE_SIZE);
120	}	120	}
121		121
122	static const unsigned int pgtable_cache_size[2] = {	122	struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
123	PGD_TABLE_SIZE, PMD_TABLE_SIZE	123
124	};	124	/*
125	static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {	125	* Create a kmem_cache() for pagetables. This is not used for PTE
126	#ifdef CONFIG_PPC_64K_PAGES	126	* pages - they're linked to struct page, come from the normal free
127	"pgd_cache", "pmd_cache",	127	* pages pool and have a different entry size (see real_pte_t) to
128	#else	128	* everything else. Caches created by this function are used for all
129	"pgd_cache", "pud_pmd_cache",	129	* the higher level pagetables, and for hugepage pagetables.
130	#endif /* CONFIG_PPC_64K_PAGES */	130	*/
131	};	131	void pgtable_cache_add(unsigned shift, void (ctor)(void ))
132		132	{
133	#ifdef CONFIG_HUGETLB_PAGE	133	char *name;
134	/* Hugepages need an extra cache per hugepagesize, initialized in	134	unsigned long table_size = sizeof(void *) << shift;
135	* hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT	135	unsigned long align = table_size;
136	* is not compile time constant. */	136
137	struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];	137	/* When batching pgtable pointers for RCU freeing, we store
138	#else	138	* the index size in the low bits. Table alignment must be
139	struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];	139	* big enough to fit it */
140	#endif	140	unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
		141	struct kmem_cache *new;
		142
		143	/* It would be nice if this was a BUILD_BUG_ON(), but at the
		144	* moment, gcc doesn't seem to recognize is_power_of_2 as a
		145	* constant expression, so so much for that. */
		146	BUG_ON(!is_power_of_2(minalign));
		147	BUG_ON((shift < 1) \|\| (shift > MAX_PGTABLE_INDEX_SIZE));
		148
		149	if (PGT_CACHE(shift))
		150	return; /* Already have a cache of this size */
		151
		152	align = max_t(unsigned long, align, minalign);
		153	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
		154	new = kmem_cache_create(name, table_size, align, 0, ctor);
		155	PGT_CACHE(shift) = new;
		156
		157	pr_debug("Allocated pgtable cache for order %d\n", shift);
		158	}
		159
141		160
142	void pgtable_cache_init(void)	161	void pgtable_cache_init(void)
143	{	162	{
144	pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);	163	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
145	pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);	164	pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
		165	if (!PGT_CACHE(PGD_INDEX_SIZE) \|\| !PGT_CACHE(PMD_INDEX_SIZE))
		166	panic("Couldn't allocate pgtable caches");
		167
		168	/* In all current configs, when the PUD index exists it's the
		169	* same size as either the pgd or pmd index. Verify that the
		170	* initialization above has also created a PUD cache. This
		171	* will need re-examiniation if we add new possibilities for
		172	* the pagetable layout. */
		173	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
146	}	174	}
147		175
148	#ifdef CONFIG_SPARSEMEM_VMEMMAP	176	#ifdef CONFIG_SPARSEMEM_VMEMMAP


diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 53040931de32..99df697c601a 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c
@@ -49,12 +49,12 @@ struct pte_freelist_batch
49	{	49	{
50	struct rcu_head rcu;	50	struct rcu_head rcu;
51	unsigned int index;	51	unsigned int index;
52	pgtable_free_t tables[0];	52	unsigned long tables[0];
53	};	53	};
54		54
55	#define PTE_FREELIST_SIZE \	55	#define PTE_FREELIST_SIZE \
56	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \	56	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
57	/ sizeof(pgtable_free_t))	57	/ sizeof(unsigned long))
58		58
59	static void pte_free_smp_sync(void *arg)	59	static void pte_free_smp_sync(void *arg)
60	{	60	{
@@ -64,13 +64,13 @@ static void pte_free_smp_sync(void *arg)
64	/* This is only called when we are critically out of memory	64	/* This is only called when we are critically out of memory
65	* (and fail to get a page in pte_free_tlb).	65	* (and fail to get a page in pte_free_tlb).
66	*/	66	*/
67	static void pgtable_free_now(pgtable_free_t pgf)	67	static void pgtable_free_now(void *table, unsigned shift)
68	{	68	{
69	pte_freelist_forced_free++;	69	pte_freelist_forced_free++;
70		70
71	smp_call_function(pte_free_smp_sync, NULL, 1);	71	smp_call_function(pte_free_smp_sync, NULL, 1);
72		72
73	pgtable_free(pgf);	73	pgtable_free(table, shift);
74	}	74	}
75		75
76	static void pte_free_rcu_callback(struct rcu_head *head)	76	static void pte_free_rcu_callback(struct rcu_head *head)
@@ -79,8 +79,12 @@ static void pte_free_rcu_callback(struct rcu_head *head)
79	container_of(head, struct pte_freelist_batch, rcu);	79	container_of(head, struct pte_freelist_batch, rcu);
80	unsigned int i;	80	unsigned int i;
81		81
82	for (i = 0; i < batch->index; i++)	82	for (i = 0; i < batch->index; i++) {
83	pgtable_free(batch->tables[i]);	83	void table = (void )(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
		84	unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
		85
		86	pgtable_free(table, shift);
		87	}
84		88
85	free_page((unsigned long)batch);	89	free_page((unsigned long)batch);
86	}	90	}
@@ -91,25 +95,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
91	call_rcu(&batch->rcu, pte_free_rcu_callback);	95	call_rcu(&batch->rcu, pte_free_rcu_callback);
92	}	96	}
93		97
94	void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)	98	void pgtable_free_tlb(struct mmu_gather tlb, void table, unsigned shift)
95	{	99	{
96	/* This is safe since tlb_gather_mmu has disabled preemption */	100	/* This is safe since tlb_gather_mmu has disabled preemption */
97	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);	101	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
		102	unsigned long pgf;
98		103
99	if (atomic_read(&tlb->mm->mm_users) < 2 \|\|	104	if (atomic_read(&tlb->mm->mm_users) < 2 \|\|
100	cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){	105	cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
101	pgtable_free(pgf);	106	pgtable_free(table, shift);
102	return;	107	return;
103	}	108	}
104		109
105	if (*batchp == NULL) {	110	if (*batchp == NULL) {
106	batchp = (struct pte_freelist_batch )__get_free_page(GFP_ATOMIC);	111	batchp = (struct pte_freelist_batch )__get_free_page(GFP_ATOMIC);
107	if (*batchp == NULL) {	112	if (*batchp == NULL) {
108	pgtable_free_now(pgf);	113	pgtable_free_now(table, shift);
109	return;	114	return;
110	}	115	}
111	(*batchp)->index = 0;	116	(*batchp)->index = 0;
112	}	117	}
		118	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
		119	pgf = (unsigned long)table \| shift;
113	(batchp)->tables[(batchp)->index++] = pgf;	120	(batchp)->tables[(batchp)->index++] = pgf;
114	if ((*batchp)->index == PTE_FREELIST_SIZE) {	121	if ((*batchp)->index == PTE_FREELIST_SIZE) {
115	pte_free_submit(*batchp);	122	pte_free_submit(*batchp);