aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2009-10-28 12:27:18 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-10-30 02:20:57 -0400
commita0668cdc154e54bf0c85182e0535eea237d53146 (patch)
tree84efcadf011e16c240ac9b1c948141fc1cc7d324 /arch/powerpc/mm
parentf71dc176aa06359681c30ba6877ffccab6fba3a6 (diff)
powerpc/mm: Cleanup management of kmem_caches for pagetables
Currently we have a fair bit of rather fiddly code to manage the various kmem_caches used to store page tables of various levels. We generally have two caches holding some combination of PGD, PUD and PMD tables, plus several more for the special hugepage pagetables. This patch cleans this all up by taking a different approach. Rather than the caches being designated as for PUDs or for hugeptes for 16M pages, the caches are simply allocated to be a specific size. Thus sharing of caches between different types/levels of pagetables happens naturally. The pagetable size, where needed, is passed around encoded in the same way as {PGD,PUD,PMD}_INDEX_SIZE; that is n where the pagetable contains 2^n pointers. Signed-off-by: David Gibson <dwg@au1.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c51
-rw-r--r--arch/powerpc/mm/init_64.c70
-rw-r--r--arch/powerpc/mm/pgtable.c25
3 files changed, 80 insertions, 66 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3d542a9732ae..7230d7a4fbd9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -43,26 +43,14 @@ static unsigned nr_gpages;
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ 43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44 44
45#define hugepte_shift mmu_huge_psizes 45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) 46#define HUGEPTE_INDEX_SIZE(psize) (mmu_huge_psizes[(psize)])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) 47#define PTRS_PER_HUGEPTE(psize) (1 << mmu_huge_psizes[psize])
48 48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ 49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize]) 50 + HUGEPTE_INDEX_SIZE(psize))
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) 51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) 52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53 53
54/* Subtract one from array size because we don't need a cache for 4K since
55 * is not a huge page size */
56#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
58
59static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
60 [MMU_PAGE_64K] = "hugepte_cache_64K",
61 [MMU_PAGE_1M] = "hugepte_cache_1M",
62 [MMU_PAGE_16M] = "hugepte_cache_16M",
63 [MMU_PAGE_16G] = "hugepte_cache_16G",
64};
65
66/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 54/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
67 * will choke on pointers to hugepte tables, which is handy for 55 * will choke on pointers to hugepte tables, which is handy for
68 * catching screwups early. */ 56 * catching screwups early. */
@@ -114,15 +102,15 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 102static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned int psize) 103 unsigned long address, unsigned int psize)
116{ 104{
117 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], 105 pte_t *new = kmem_cache_zalloc(PGT_CACHE(hugepte_shift[psize]),
118 GFP_KERNEL|__GFP_REPEAT); 106 GFP_KERNEL|__GFP_REPEAT);
119 107
120 if (! new) 108 if (! new)
121 return -ENOMEM; 109 return -ENOMEM;
122 110
123 spin_lock(&mm->page_table_lock); 111 spin_lock(&mm->page_table_lock);
124 if (!hugepd_none(*hpdp)) 112 if (!hugepd_none(*hpdp))
125 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); 113 kmem_cache_free(PGT_CACHE(hugepte_shift[psize]), new);
126 else 114 else
127 hpdp->pd = (unsigned long)new | HUGEPD_OK; 115 hpdp->pd = (unsigned long)new | HUGEPD_OK;
128 spin_unlock(&mm->page_table_lock); 116 spin_unlock(&mm->page_table_lock);
@@ -271,9 +259,7 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
271 259
272 hpdp->pd = 0; 260 hpdp->pd = 0;
273 tlb->need_flush = 1; 261 tlb->need_flush = 1;
274 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, 262 pgtable_free_tlb(tlb, hugepte, hugepte_shift[psize]);
275 HUGEPTE_CACHE_NUM+psize-1,
276 PGF_CACHENUM_MASK));
277} 263}
278 264
279static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 265static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
@@ -698,8 +684,6 @@ static void __init set_huge_psize(int psize)
698 if (mmu_huge_psizes[psize] || 684 if (mmu_huge_psizes[psize] ||
699 mmu_psize_defs[psize].shift == PAGE_SHIFT) 685 mmu_psize_defs[psize].shift == PAGE_SHIFT)
700 return; 686 return;
701 if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
702 return;
703 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); 687 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
704 688
705 switch (mmu_psize_defs[psize].shift) { 689 switch (mmu_psize_defs[psize].shift) {
@@ -753,9 +737,9 @@ static int __init hugetlbpage_init(void)
753 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 737 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
754 return -ENODEV; 738 return -ENODEV;
755 739
756 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE 740 /* Add supported huge page sizes. Need to change
757 * and adjust PTE_NONCACHE_NUM if the number of supported huge page 741 * HUGE_MAX_HSTATE if the number of supported huge page sizes
758 * sizes changes. 742 * changes.
759 */ 743 */
760 set_huge_psize(MMU_PAGE_16M); 744 set_huge_psize(MMU_PAGE_16M);
761 set_huge_psize(MMU_PAGE_16G); 745 set_huge_psize(MMU_PAGE_16G);
@@ -769,16 +753,11 @@ static int __init hugetlbpage_init(void)
769 753
770 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 754 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
771 if (mmu_huge_psizes[psize]) { 755 if (mmu_huge_psizes[psize]) {
772 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = 756 pgtable_cache_add(hugepte_shift[psize], NULL);
773 kmem_cache_create( 757 if (!PGT_CACHE(hugepte_shift[psize]))
774 HUGEPTE_CACHE_NAME(psize), 758 panic("hugetlbpage_init(): could not create "
775 HUGEPTE_TABLE_SIZE(psize), 759 "pgtable cache for %d bit pagesize\n",
776 HUGEPTE_TABLE_SIZE(psize), 760 mmu_psize_to_shift(psize));
777 0,
778 NULL);
779 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
780 panic("hugetlbpage_init(): could not create %s"\
781 "\n", HUGEPTE_CACHE_NAME(psize));
782 } 761 }
783 } 762 }
784 763
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 335c578b9cc3..82ac61dcd3af 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -119,30 +119,58 @@ static void pmd_ctor(void *addr)
119 memset(addr, 0, PMD_TABLE_SIZE); 119 memset(addr, 0, PMD_TABLE_SIZE);
120} 120}
121 121
122static const unsigned int pgtable_cache_size[2] = { 122struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
123 PGD_TABLE_SIZE, PMD_TABLE_SIZE 123
124}; 124/*
125static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { 125 * Create a kmem_cache() for pagetables. This is not used for PTE
126#ifdef CONFIG_PPC_64K_PAGES 126 * pages - they're linked to struct page, come from the normal free
127 "pgd_cache", "pmd_cache", 127 * pages pool and have a different entry size (see real_pte_t) to
128#else 128 * everything else. Caches created by this function are used for all
129 "pgd_cache", "pud_pmd_cache", 129 * the higher level pagetables, and for hugepage pagetables.
130#endif /* CONFIG_PPC_64K_PAGES */ 130 */
131}; 131void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
132 132{
133#ifdef CONFIG_HUGETLB_PAGE 133 char *name;
134/* Hugepages need an extra cache per hugepagesize, initialized in 134 unsigned long table_size = sizeof(void *) << shift;
135 * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT 135 unsigned long align = table_size;
136 * is not compile time constant. */ 136
137struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; 137 /* When batching pgtable pointers for RCU freeing, we store
138#else 138 * the index size in the low bits. Table alignment must be
139struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 139 * big enough to fit it */
140#endif 140 unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
141 struct kmem_cache *new;
142
143 /* It would be nice if this was a BUILD_BUG_ON(), but at the
144 * moment, gcc doesn't seem to recognize is_power_of_2 as a
145 * constant expression, so so much for that. */
146 BUG_ON(!is_power_of_2(minalign));
147 BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
148
149 if (PGT_CACHE(shift))
150 return; /* Already have a cache of this size */
151
152 align = max_t(unsigned long, align, minalign);
153 name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
154 new = kmem_cache_create(name, table_size, align, 0, ctor);
155 PGT_CACHE(shift) = new;
156
157 pr_debug("Allocated pgtable cache for order %d\n", shift);
158}
159
141 160
142void pgtable_cache_init(void) 161void pgtable_cache_init(void)
143{ 162{
144 pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); 163 pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
145 pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); 164 pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
165 if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
166 panic("Couldn't allocate pgtable caches");
167
168 /* In all current configs, when the PUD index exists it's the
169 * same size as either the pgd or pmd index. Verify that the
170 * initialization above has also created a PUD cache. This
171 * will need re-examiniation if we add new possibilities for
172 * the pagetable layout. */
173 BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
146} 174}
147 175
148#ifdef CONFIG_SPARSEMEM_VMEMMAP 176#ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 53040931de32..99df697c601a 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -49,12 +49,12 @@ struct pte_freelist_batch
49{ 49{
50 struct rcu_head rcu; 50 struct rcu_head rcu;
51 unsigned int index; 51 unsigned int index;
52 pgtable_free_t tables[0]; 52 unsigned long tables[0];
53}; 53};
54 54
55#define PTE_FREELIST_SIZE \ 55#define PTE_FREELIST_SIZE \
56 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ 56 ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
57 / sizeof(pgtable_free_t)) 57 / sizeof(unsigned long))
58 58
59static void pte_free_smp_sync(void *arg) 59static void pte_free_smp_sync(void *arg)
60{ 60{
@@ -64,13 +64,13 @@ static void pte_free_smp_sync(void *arg)
64/* This is only called when we are critically out of memory 64/* This is only called when we are critically out of memory
65 * (and fail to get a page in pte_free_tlb). 65 * (and fail to get a page in pte_free_tlb).
66 */ 66 */
67static void pgtable_free_now(pgtable_free_t pgf) 67static void pgtable_free_now(void *table, unsigned shift)
68{ 68{
69 pte_freelist_forced_free++; 69 pte_freelist_forced_free++;
70 70
71 smp_call_function(pte_free_smp_sync, NULL, 1); 71 smp_call_function(pte_free_smp_sync, NULL, 1);
72 72
73 pgtable_free(pgf); 73 pgtable_free(table, shift);
74} 74}
75 75
76static void pte_free_rcu_callback(struct rcu_head *head) 76static void pte_free_rcu_callback(struct rcu_head *head)
@@ -79,8 +79,12 @@ static void pte_free_rcu_callback(struct rcu_head *head)
79 container_of(head, struct pte_freelist_batch, rcu); 79 container_of(head, struct pte_freelist_batch, rcu);
80 unsigned int i; 80 unsigned int i;
81 81
82 for (i = 0; i < batch->index; i++) 82 for (i = 0; i < batch->index; i++) {
83 pgtable_free(batch->tables[i]); 83 void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE);
84 unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE;
85
86 pgtable_free(table, shift);
87 }
84 88
85 free_page((unsigned long)batch); 89 free_page((unsigned long)batch);
86} 90}
@@ -91,25 +95,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch)
91 call_rcu(&batch->rcu, pte_free_rcu_callback); 95 call_rcu(&batch->rcu, pte_free_rcu_callback);
92} 96}
93 97
94void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) 98void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
95{ 99{
96 /* This is safe since tlb_gather_mmu has disabled preemption */ 100 /* This is safe since tlb_gather_mmu has disabled preemption */
97 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); 101 struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
102 unsigned long pgf;
98 103
99 if (atomic_read(&tlb->mm->mm_users) < 2 || 104 if (atomic_read(&tlb->mm->mm_users) < 2 ||
100 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ 105 cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
101 pgtable_free(pgf); 106 pgtable_free(table, shift);
102 return; 107 return;
103 } 108 }
104 109
105 if (*batchp == NULL) { 110 if (*batchp == NULL) {
106 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); 111 *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
107 if (*batchp == NULL) { 112 if (*batchp == NULL) {
108 pgtable_free_now(pgf); 113 pgtable_free_now(table, shift);
109 return; 114 return;
110 } 115 }
111 (*batchp)->index = 0; 116 (*batchp)->index = 0;
112 } 117 }
118 BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
119 pgf = (unsigned long)table | shift;
113 (*batchp)->tables[(*batchp)->index++] = pgf; 120 (*batchp)->tables[(*batchp)->index++] = pgf;
114 if ((*batchp)->index == PTE_FREELIST_SIZE) { 121 if ((*batchp)->index == PTE_FREELIST_SIZE) {
115 pte_free_submit(*batchp); 122 pte_free_submit(*batchp);