powerpc/mm: Cleanup management of kmem_caches for pagetables

Currently we have a fair bit of rather fiddly code to manage the various kmem_caches used to store page tables of various levels. We generally have two caches holding some combination of PGD, PUD and PMD tables, plus several more for the special hugepage pagetables. This patch cleans this all up by taking a different approach. Rather than the caches being designated as for PUDs or for hugeptes for 16M pages, the caches are simply allocated to be a specific size. Thus sharing of caches between different types/levels of pagetables happens naturally. The pagetable size, where needed, is passed around encoded in the same way as {PGD,PUD,PMD}_INDEX_SIZE; that is n where the pagetable contains 2^n pointers. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
author: David Gibson <david@gibson.dropbear.id.au> 2009-10-28 12:27:18 -0400
committer: Benjamin Herrenschmidt <benh@kernel.crashing.org> 2009-10-30 02:20:57 -0400
commit: a0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree: 84efcadf011e16c240ac9b1c948141fc1cc7d324 /arch/powerpc/mm/init_64.c
parent: f71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)
1 files changed, 49 insertions, 21 deletions
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 335c578b9cc3..82ac61dcd3af 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
        memset(addr, 0, PMD_TABLE_SIZE);
 }
-static const unsigned int pgtable_cache_size[2] = {
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-        PGD_TABLE_SIZE, PMD_TABLE_SIZE
-};
+/*
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ * Create a kmem_cache() for pagetables.  This is not used for PTE
-#ifdef CONFIG_PPC_64K_PAGES
+ * pages - they're linked to struct page, come from the normal free
-        "pgd_cache", "pmd_cache",
+ * pages pool and have a different entry size (see real_pte_t) to
-#else
+ * everything else.  Caches created by this function are used for all
-        "pgd_cache", "pud_pmd_cache",
+ * the higher level pagetables, and for hugepage pagetables.
-#endif /* CONFIG_PPC_64K_PAGES */
+ */
-};
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
+{
-#ifdef CONFIG_HUGETLB_PAGE
+        char *name;
-/* Hugepages need an extra cache per hugepagesize, initialized in
+        unsigned long table_size = sizeof(void *) << shift;
- * hugetlbpage.c.  We can't put into the tables above, because HPAGE_SHIFT
+        unsigned long align = table_size;
- * is not compile time constant. */
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
+        /* When batching pgtable pointers for RCU freeing, we store
-#else
+         * the index size in the low bits.  Table alignment must be
-struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+         * big enough to fit it */
-#endif
+        unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
+        struct kmem_cache *new;
+        /* It would be nice if this was a BUILD_BUG_ON(), but at the
+         * moment, gcc doesn't seem to recognize is_power_of_2 as a
+         * constant expression, so so much for that. */
+        BUG_ON(!is_power_of_2(minalign));
+        BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
+        if (PGT_CACHE(shift))
+                return; /* Already have a cache of this size */
+        align = max_t(unsigned long, align, minalign);
+        name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+        new = kmem_cache_create(name, table_size, align, 0, ctor);
+        PGT_CACHE(shift) = new;
+        pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
 void pgtable_cache_init(void)
 {
-        pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);
+        pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
-        pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);
+        pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
+        if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
+                panic("Couldn't allocate pgtable caches");
+        /* In all current configs, when the PUD index exists it's the
+         * same size as either the pgd or pmd index.  Verify that the
+         * initialization above has also created a PUD cache.  This
+         * will need re-examiniation if we add new possibilities for
+         * the pagetable layout. */
+        BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
author	David Gibson <david@gibson.dropbear.id.au>	2009-10-28 12:27:18 -0400
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>	2009-10-30 02:20:57 -0400
commit	a0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree	84efcadf011e16c240ac9b1c948141fc1cc7d324 /arch/powerpc/mm/init_64.c
parent	f71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 335c578b9cc3..82ac61dcd3af 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
119	memset(addr, 0, PMD_TABLE_SIZE);	119	memset(addr, 0, PMD_TABLE_SIZE);
120	}	120	}
121		121
122	static const unsigned int pgtable_cache_size[2] = {	122	struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
123	PGD_TABLE_SIZE, PMD_TABLE_SIZE	123
124	};	124	/*
125	static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {	125	* Create a kmem_cache() for pagetables. This is not used for PTE
126	#ifdef CONFIG_PPC_64K_PAGES	126	* pages - they're linked to struct page, come from the normal free
127	"pgd_cache", "pmd_cache",	127	* pages pool and have a different entry size (see real_pte_t) to
128	#else	128	* everything else. Caches created by this function are used for all
129	"pgd_cache", "pud_pmd_cache",	129	* the higher level pagetables, and for hugepage pagetables.
130	#endif /* CONFIG_PPC_64K_PAGES */	130	*/
131	};	131	void pgtable_cache_add(unsigned shift, void (ctor)(void ))
132		132	{
133	#ifdef CONFIG_HUGETLB_PAGE	133	char *name;
134	/* Hugepages need an extra cache per hugepagesize, initialized in	134	unsigned long table_size = sizeof(void *) << shift;
135	* hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT	135	unsigned long align = table_size;
136	* is not compile time constant. */	136
137	struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];	137	/* When batching pgtable pointers for RCU freeing, we store
138	#else	138	* the index size in the low bits. Table alignment must be
139	struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];	139	* big enough to fit it */
140	#endif	140	unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
		141	struct kmem_cache *new;
		142
		143	/* It would be nice if this was a BUILD_BUG_ON(), but at the
		144	* moment, gcc doesn't seem to recognize is_power_of_2 as a
		145	* constant expression, so so much for that. */
		146	BUG_ON(!is_power_of_2(minalign));
		147	BUG_ON((shift < 1) \|\| (shift > MAX_PGTABLE_INDEX_SIZE));
		148
		149	if (PGT_CACHE(shift))
		150	return; /* Already have a cache of this size */
		151
		152	align = max_t(unsigned long, align, minalign);
		153	name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
		154	new = kmem_cache_create(name, table_size, align, 0, ctor);
		155	PGT_CACHE(shift) = new;
		156
		157	pr_debug("Allocated pgtable cache for order %d\n", shift);
		158	}
		159
141		160
142	void pgtable_cache_init(void)	161	void pgtable_cache_init(void)
143	{	162	{
144	pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor);	163	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
145	pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor);	164	pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
		165	if (!PGT_CACHE(PGD_INDEX_SIZE) \|\| !PGT_CACHE(PMD_INDEX_SIZE))
		166	panic("Couldn't allocate pgtable caches");
		167
		168	/* In all current configs, when the PUD index exists it's the
		169	* same size as either the pgd or pmd index. Verify that the
		170	* initialization above has also created a PUD cache. This
		171	* will need re-examiniation if we add new possibilities for
		172	* the pagetable layout. */
		173	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
146	}	174	}
147		175
148	#ifdef CONFIG_SPARSEMEM_VMEMMAP	176	#ifdef CONFIG_SPARSEMEM_VMEMMAP