2 files changed, 266 insertions, 36 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7370f9f33e29..266b8b2ceac9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -30,13 +30,66 @@
 #define NUM_LOW_AREAS   (0x100000000UL >> SID_SHIFT)
 #define NUM_HIGH_AREAS  (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
+#ifdef CONFIG_PPC_64K_PAGES
+#define HUGEPTE_INDEX_SIZE      (PMD_SHIFT-HPAGE_SHIFT)
+#else
+#define HUGEPTE_INDEX_SIZE      (PUD_SHIFT-HPAGE_SHIFT)
+#endif
+#define PTRS_PER_HUGEPTE        (1 << HUGEPTE_INDEX_SIZE)
+#define HUGEPTE_TABLE_SIZE      (sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
+#define HUGEPD_SHIFT            (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
+#define HUGEPD_SIZE             (1UL << HUGEPD_SHIFT)
+#define HUGEPD_MASK             (~(HUGEPD_SIZE-1))
+#define huge_pgtable_cache      (pgtable_cache[HUGEPTE_CACHE_NUM])
+/* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
+ * will choke on pointers to hugepte tables, which is handy for
+ * catching screwups early. */
+#define HUGEPD_OK       0x1
+typedef struct { unsigned long pd; } hugepd_t;
+#define hugepd_none(hpd)        ((hpd).pd == 0)
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+        BUG_ON(!(hpd.pd & HUGEPD_OK));
+        return (pte_t *)(hpd.pd & ~HUGEPD_OK);
+}
+static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr)
+{
+        unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1));
+        pte_t *dir = hugepd_page(*hpdp);
+        return dir + idx;
+}
+static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
+                           unsigned long address)
+{
+        pte_t *new = kmem_cache_alloc(huge_pgtable_cache,
+                                      GFP_KERNEL|__GFP_REPEAT);
+        if (! new)
+                return -ENOMEM;
+        spin_lock(&mm->page_table_lock);
+        if (!hugepd_none(*hpdp))
+                kmem_cache_free(huge_pgtable_cache, new);
+        else
+                hpdp->pd = (unsigned long)new | HUGEPD_OK;
+        spin_unlock(&mm->page_table_lock);
+        return 0;
+}
 /* Modelled after find_linux_pte() */
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pg;
        pud_t *pu;
-        pmd_t *pm;
-        pte_t *pt;
        BUG_ON(! in_hugepage_area(mm->context, addr));
@@ -46,26 +99,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
        if (!pgd_none(*pg)) {
                pu = pud_offset(pg, addr);
                if (!pud_none(*pu)) {
-                        pm = pmd_offset(pu, addr);
 #ifdef CONFIG_PPC_64K_PAGES
-                        /* Currently, we use the normal PTE offset within full
+                        pmd_t *pm;
-                         * size PTE pages, thus our huge PTEs are scattered in
+                        pm = pmd_offset(pu, addr);
-                         * the PTE page and we do waste some. We may change
+                        if (!pmd_none(*pm))
-                         * that in the future, but the current mecanism keeps
+                                return hugepte_offset((hugepd_t *)pm, addr);
-                         * things much simpler
+#else
-                         */
+                        return hugepte_offset((hugepd_t *)pu, addr);
-                        if (!pmd_none(*pm)) {
+#endif
-                                /* Note: pte_offset_* are all equivalent on
-                                 * ppc64 as we don't have HIGHMEM
-                                 */
-                                pt = pte_offset_kernel(pm, addr);
-                                return pt;
-                        }
-#else /* CONFIG_PPC_64K_PAGES */
-                        /* On 4k pages, we put huge PTEs in the PMD page */
-                        pt = (pte_t *)pm;
-                        return pt;
-#endif /* CONFIG_PPC_64K_PAGES */
                }
        }
@@ -76,8 +117,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pg;
        pud_t *pu;
-        pmd_t *pm;
+        hugepd_t *hpdp = NULL;
-        pte_t *pt;
        BUG_ON(! in_hugepage_area(mm->context, addr));
@@ -87,23 +127,182 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
        pu = pud_alloc(mm, pg, addr);
        if (pu) {
+#ifdef CONFIG_PPC_64K_PAGES
+                pmd_t *pm;
                pm = pmd_alloc(mm, pu, addr);
-                if (pm) {
+                if (pm)
+                        hpdp = (hugepd_t *)pm;
+#else
+                hpdp = (hugepd_t *)pu;
+#endif
+        }
+        if (! hpdp)
+                return NULL;
+        if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr))
+                return NULL;
+        return hugepte_offset(hpdp, addr);
+}
+static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
+{
+        pte_t *hugepte = hugepd_page(*hpdp);
+        hpdp->pd = 0;
+        tlb->need_flush = 1;
+        pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
+                                                 HUGEPTE_TABLE_SIZE-1));
+}
 #ifdef CONFIG_PPC_64K_PAGES
-                        /* See comment in huge_pte_offset. Note that if we ever
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
-                         * want to put the page size in the PMD, we would have
+                                   unsigned long addr, unsigned long end,
-                         * to open code our own pte_alloc* function in order
+                                   unsigned long floor, unsigned long ceiling)
-                         * to populate and set the size atomically
+{
-                         */
+        pmd_t *pmd;
-                        pt = pte_alloc_map(mm, pm, addr);
+        unsigned long next;
-#else /* CONFIG_PPC_64K_PAGES */
+        unsigned long start;
-                        pt = (pte_t *)pm;
-#endif /* CONFIG_PPC_64K_PAGES */
+        start = addr;
-                        return pt;
+        pmd = pmd_offset(pud, addr);
-                }
+        do {
+                next = pmd_addr_end(addr, end);
+                if (pmd_none(*pmd))
+                        continue;
+                free_hugepte_range(tlb, (hugepd_t *)pmd);
+        } while (pmd++, addr = next, addr != end);
+        start &= PUD_MASK;
+        if (start < floor)
+                return;
+        if (ceiling) {
+                ceiling &= PUD_MASK;
+                if (!ceiling)
+                        return;
        }
+        if (end - 1 > ceiling - 1)
+                return;
-        return NULL;
+        pmd = pmd_offset(pud, start);
+        pud_clear(pud);
+        pmd_free_tlb(tlb, pmd);
+}
+#endif
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                                   unsigned long addr, unsigned long end,
+                                   unsigned long floor, unsigned long ceiling)
+{
+        pud_t *pud;
+        unsigned long next;
+        unsigned long start;
+        start = addr;
+        pud = pud_offset(pgd, addr);
+        do {
+                next = pud_addr_end(addr, end);
+#ifdef CONFIG_PPC_64K_PAGES
+                if (pud_none_or_clear_bad(pud))
+                        continue;
+                hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+#else
+                if (pud_none(*pud))
+                        continue;
+                free_hugepte_range(tlb, (hugepd_t *)pud);
+#endif
+        } while (pud++, addr = next, addr != end);
+        start &= PGDIR_MASK;
+        if (start < floor)
+                return;
+        if (ceiling) {
+                ceiling &= PGDIR_MASK;
+                if (!ceiling)
+                        return;
+        }
+        if (end - 1 > ceiling - 1)
+                return;
+        pud = pud_offset(pgd, start);
+        pgd_clear(pgd);
+        pud_free_tlb(tlb, pud);
+}
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void hugetlb_free_pgd_range(struct mmu_gather **tlb,
+                            unsigned long addr, unsigned long end,
+                            unsigned long floor, unsigned long ceiling)
+{
+        pgd_t *pgd;
+        unsigned long next;
+        unsigned long start;
+        /*
+         * Comments below take from the normal free_pgd_range().  They
+         * apply here too.  The tests against HUGEPD_MASK below are
+         * essential, because we *don't* test for this at the bottom
+         * level.  Without them we'll attempt to free a hugepte table
+         * when we unmap just part of it, even if there are other
+         * active mappings using it.
+         *
+         * The next few lines have given us lots of grief...
+         *
+         * Why are we testing HUGEPD* at this top level?  Because
+         * often there will be no work to do at all, and we'd prefer
+         * not to go all the way down to the bottom just to discover
+         * that.
+         *
+         * Why all these "- 1"s?  Because 0 represents both the bottom
+         * of the address space and the top of it (using -1 for the
+         * top wouldn't help much: the masks would do the wrong thing).
+         * The rule is that addr 0 and floor 0 refer to the bottom of
+         * the address space, but end 0 and ceiling 0 refer to the top
+         * Comparisons need to use "end - 1" and "ceiling - 1" (though
+         * that end 0 case should be mythical).
+         *
+         * Wherever addr is brought up or ceiling brought down, we
+         * must be careful to reject "the opposite 0" before it
+         * confuses the subsequent tests.  But what about where end is
+         * brought down by HUGEPD_SIZE below? no, end can't go down to
+         * 0 there.
+         *
+         * Whereas we round start (addr) and ceiling down, by different
+         * masks at different levels, in order to test whether a table
+         * now has no other vmas using it, so can be freed, we don't
+         * bother to round floor or end up - the tests don't need that.
+         */
+        addr &= HUGEPD_MASK;
+        if (addr < floor) {
+                addr += HUGEPD_SIZE;
+                if (!addr)
+                        return;
+        }
+        if (ceiling) {
+                ceiling &= HUGEPD_MASK;
+                if (!ceiling)
+                        return;
+        }
+        if (end - 1 > ceiling - 1)
+                end -= HUGEPD_SIZE;
+        if (addr > end - 1)
+                return;
+        start = addr;
+        pgd = pgd_offset((*tlb)->mm, addr);
+        do {
+                BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
+                next = pgd_addr_end(addr, end);
+                if (pgd_none_or_clear_bad(pgd))
+                        continue;
+                hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+        } while (pgd++, addr = next, addr != end);
 }
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -841,3 +1040,27 @@ repeat:
 out:
        return err;
 }
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+        memset(addr, 0, kmem_cache_size(cache));
+}
+static int __init hugetlbpage_init(void)
+{
+        if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+                return -ENODEV;
+        huge_pgtable_cache = kmem_cache_create("hugepte_cache",
+                                               HUGEPTE_TABLE_SIZE,
+                                               HUGEPTE_TABLE_SIZE,
+                                               SLAB_HWCACHE_ALIGN |
+                                               SLAB_MUST_HWCACHE_ALIGN,
+                                               zero_ctor, NULL);
+        if (! huge_pgtable_cache)
+                panic("hugetlbpage_init(): could not create hugepte cache\n");
+        return 0;
+}
+module_init(hugetlbpage_init);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index babebd15bdc4..9e30f968c184 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -162,7 +162,14 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
 };
 #endif /* CONFIG_PPC_64K_PAGES */
+#ifdef CONFIG_HUGETLB_PAGE
+/* Hugepages need one extra cache, initialized in hugetlbpage.c.  We
+ * can't put into the tables above, because HPAGE_SHIFT is not compile
+ * time constant. */
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1];
+#else
 kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+#endif
 void pgtable_cache_init(void)
 {

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7370f9f33e29..266b8b2ceac9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c
@@ -30,13 +30,66 @@
30	#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)	30	#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31	#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)	31	#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
32		32
		33	#ifdef CONFIG_PPC_64K_PAGES
		34	#define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT)
		35	#else
		36	#define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT)
		37	#endif
		38	#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
		39	#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
		40
		41	#define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
		42	#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT)
		43	#define HUGEPD_MASK (~(HUGEPD_SIZE-1))
		44
		45	#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM])
		46
		47	/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
		48	* will choke on pointers to hugepte tables, which is handy for
		49	* catching screwups early. */
		50	#define HUGEPD_OK 0x1
		51
		52	typedef struct { unsigned long pd; } hugepd_t;
		53
		54	#define hugepd_none(hpd) ((hpd).pd == 0)
		55
		56	static inline pte_t *hugepd_page(hugepd_t hpd)
		57	{
		58	BUG_ON(!(hpd.pd & HUGEPD_OK));
		59	return (pte_t *)(hpd.pd & ~HUGEPD_OK);
		60	}
		61
		62	static inline pte_t hugepte_offset(hugepd_t hpdp, unsigned long addr)
		63	{
		64	unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1));
		65	pte_t dir = hugepd_page(hpdp);
		66
		67	return dir + idx;
		68	}
		69
		70	static int __hugepte_alloc(struct mm_struct mm, hugepd_t hpdp,
		71	unsigned long address)
		72	{
		73	pte_t *new = kmem_cache_alloc(huge_pgtable_cache,
		74	GFP_KERNEL\|__GFP_REPEAT);
		75
		76	if (! new)
		77	return -ENOMEM;
		78
		79	spin_lock(&mm->page_table_lock);
		80	if (!hugepd_none(*hpdp))
		81	kmem_cache_free(huge_pgtable_cache, new);
		82	else
		83	hpdp->pd = (unsigned long)new \| HUGEPD_OK;
		84	spin_unlock(&mm->page_table_lock);
		85	return 0;
		86	}
		87
33	/* Modelled after find_linux_pte() */	88	/* Modelled after find_linux_pte() */
34	pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)	89	pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)
35	{	90	{
36	pgd_t *pg;	91	pgd_t *pg;
37	pud_t *pu;	92	pud_t *pu;
38	pmd_t *pm;
39	pte_t *pt;
40		93
41	BUG_ON(! in_hugepage_area(mm->context, addr));	94	BUG_ON(! in_hugepage_area(mm->context, addr));
42		95
@@ -46,26 +99,14 @@ pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)
46	if (!pgd_none(*pg)) {	99	if (!pgd_none(*pg)) {
47	pu = pud_offset(pg, addr);	100	pu = pud_offset(pg, addr);
48	if (!pud_none(*pu)) {	101	if (!pud_none(*pu)) {
49	pm = pmd_offset(pu, addr);
50	#ifdef CONFIG_PPC_64K_PAGES	102	#ifdef CONFIG_PPC_64K_PAGES
51	/* Currently, we use the normal PTE offset within full	103	pmd_t *pm;
52	* size PTE pages, thus our huge PTEs are scattered in	104	pm = pmd_offset(pu, addr);
53	* the PTE page and we do waste some. We may change	105	if (!pmd_none(*pm))
54	* that in the future, but the current mecanism keeps	106	return hugepte_offset((hugepd_t *)pm, addr);
55	* things much simpler	107	#else
56	*/	108	return hugepte_offset((hugepd_t *)pu, addr);
57	if (!pmd_none(*pm)) {	109	#endif
58	/* Note: pte_offset_* are all equivalent on
59	* ppc64 as we don't have HIGHMEM
60	*/
61	pt = pte_offset_kernel(pm, addr);
62	return pt;
63	}
64	#else /* CONFIG_PPC_64K_PAGES */
65	/* On 4k pages, we put huge PTEs in the PMD page */
66	pt = (pte_t *)pm;
67	return pt;
68	#endif /* CONFIG_PPC_64K_PAGES */
69	}	110	}
70	}	111	}
71		112
@@ -76,8 +117,7 @@ pte_t huge_pte_alloc(struct mm_struct mm, unsigned long addr)
76	{	117	{
77	pgd_t *pg;	118	pgd_t *pg;
78	pud_t *pu;	119	pud_t *pu;
79	pmd_t *pm;	120	hugepd_t *hpdp = NULL;
80	pte_t *pt;
81		121
82	BUG_ON(! in_hugepage_area(mm->context, addr));	122	BUG_ON(! in_hugepage_area(mm->context, addr));
83		123
@@ -87,23 +127,182 @@ pte_t huge_pte_alloc(struct mm_struct mm, unsigned long addr)
87	pu = pud_alloc(mm, pg, addr);	127	pu = pud_alloc(mm, pg, addr);
88		128
89	if (pu) {	129	if (pu) {
		130	#ifdef CONFIG_PPC_64K_PAGES
		131	pmd_t *pm;
90	pm = pmd_alloc(mm, pu, addr);	132	pm = pmd_alloc(mm, pu, addr);
91	if (pm) {	133	if (pm)
		134	hpdp = (hugepd_t *)pm;
		135	#else
		136	hpdp = (hugepd_t *)pu;
		137	#endif
		138	}
		139
		140	if (! hpdp)
		141	return NULL;
		142
		143	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr))
		144	return NULL;
		145
		146	return hugepte_offset(hpdp, addr);
		147	}
		148
		149	static void free_hugepte_range(struct mmu_gather tlb, hugepd_t hpdp)
		150	{
		151	pte_t hugepte = hugepd_page(hpdp);
		152
		153	hpdp->pd = 0;
		154	tlb->need_flush = 1;
		155	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
		156	HUGEPTE_TABLE_SIZE-1));
		157	}
		158
92	#ifdef CONFIG_PPC_64K_PAGES	159	#ifdef CONFIG_PPC_64K_PAGES
93	/* See comment in huge_pte_offset. Note that if we ever	160	static void hugetlb_free_pmd_range(struct mmu_gather tlb, pud_t pud,
94	* want to put the page size in the PMD, we would have	161	unsigned long addr, unsigned long end,
95	* to open code our own pte_alloc* function in order	162	unsigned long floor, unsigned long ceiling)
96	* to populate and set the size atomically	163	{
97	*/	164	pmd_t *pmd;
98	pt = pte_alloc_map(mm, pm, addr);	165	unsigned long next;
99	#else /* CONFIG_PPC_64K_PAGES */	166	unsigned long start;
100	pt = (pte_t *)pm;	167
101	#endif /* CONFIG_PPC_64K_PAGES */	168	start = addr;
102	return pt;	169	pmd = pmd_offset(pud, addr);
103	}	170	do {
		171	next = pmd_addr_end(addr, end);
		172	if (pmd_none(*pmd))
		173	continue;
		174	free_hugepte_range(tlb, (hugepd_t *)pmd);
		175	} while (pmd++, addr = next, addr != end);
		176
		177	start &= PUD_MASK;
		178	if (start < floor)
		179	return;
		180	if (ceiling) {
		181	ceiling &= PUD_MASK;
		182	if (!ceiling)
		183	return;
104	}	184	}
		185	if (end - 1 > ceiling - 1)
		186	return;
105		187
106	return NULL;	188	pmd = pmd_offset(pud, start);
		189	pud_clear(pud);
		190	pmd_free_tlb(tlb, pmd);
		191	}
		192	#endif
		193
		194	static void hugetlb_free_pud_range(struct mmu_gather tlb, pgd_t pgd,
		195	unsigned long addr, unsigned long end,
		196	unsigned long floor, unsigned long ceiling)
		197	{
		198	pud_t *pud;
		199	unsigned long next;
		200	unsigned long start;
		201
		202	start = addr;
		203	pud = pud_offset(pgd, addr);
		204	do {
		205	next = pud_addr_end(addr, end);
		206	#ifdef CONFIG_PPC_64K_PAGES
		207	if (pud_none_or_clear_bad(pud))
		208	continue;
		209	hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
		210	#else
		211	if (pud_none(*pud))
		212	continue;
		213	free_hugepte_range(tlb, (hugepd_t *)pud);
		214	#endif
		215	} while (pud++, addr = next, addr != end);
		216
		217	start &= PGDIR_MASK;
		218	if (start < floor)
		219	return;
		220	if (ceiling) {
		221	ceiling &= PGDIR_MASK;
		222	if (!ceiling)
		223	return;
		224	}
		225	if (end - 1 > ceiling - 1)
		226	return;
		227
		228	pud = pud_offset(pgd, start);
		229	pgd_clear(pgd);
		230	pud_free_tlb(tlb, pud);
		231	}
		232
		233	/*
		234	* This function frees user-level page tables of a process.
		235	*
		236	* Must be called with pagetable lock held.
		237	*/
		238	void hugetlb_free_pgd_range(struct mmu_gather **tlb,
		239	unsigned long addr, unsigned long end,
		240	unsigned long floor, unsigned long ceiling)
		241	{
		242	pgd_t *pgd;
		243	unsigned long next;
		244	unsigned long start;
		245
		246	/*
		247	* Comments below take from the normal free_pgd_range(). They
		248	* apply here too. The tests against HUGEPD_MASK below are
		249	* essential, because we don't test for this at the bottom
		250	* level. Without them we'll attempt to free a hugepte table
		251	* when we unmap just part of it, even if there are other
		252	* active mappings using it.
		253	*
		254	* The next few lines have given us lots of grief...
		255	*
		256	* Why are we testing HUGEPD* at this top level? Because
		257	* often there will be no work to do at all, and we'd prefer
		258	* not to go all the way down to the bottom just to discover
		259	* that.
		260	*
		261	* Why all these "- 1"s? Because 0 represents both the bottom
		262	* of the address space and the top of it (using -1 for the
		263	* top wouldn't help much: the masks would do the wrong thing).
		264	* The rule is that addr 0 and floor 0 refer to the bottom of
		265	* the address space, but end 0 and ceiling 0 refer to the top
		266	* Comparisons need to use "end - 1" and "ceiling - 1" (though
		267	* that end 0 case should be mythical).
		268	*
		269	* Wherever addr is brought up or ceiling brought down, we
		270	* must be careful to reject "the opposite 0" before it
		271	* confuses the subsequent tests. But what about where end is
		272	* brought down by HUGEPD_SIZE below? no, end can't go down to
		273	* 0 there.
		274	*
		275	* Whereas we round start (addr) and ceiling down, by different
		276	* masks at different levels, in order to test whether a table
		277	* now has no other vmas using it, so can be freed, we don't
		278	* bother to round floor or end up - the tests don't need that.
		279	*/
		280
		281	addr &= HUGEPD_MASK;
		282	if (addr < floor) {
		283	addr += HUGEPD_SIZE;
		284	if (!addr)
		285	return;
		286	}
		287	if (ceiling) {
		288	ceiling &= HUGEPD_MASK;
		289	if (!ceiling)
		290	return;
		291	}
		292	if (end - 1 > ceiling - 1)
		293	end -= HUGEPD_SIZE;
		294	if (addr > end - 1)
		295	return;
		296
		297	start = addr;
		298	pgd = pgd_offset((*tlb)->mm, addr);
		299	do {
		300	BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
		301	next = pgd_addr_end(addr, end);
		302	if (pgd_none_or_clear_bad(pgd))
		303	continue;
		304	hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
		305	} while (pgd++, addr = next, addr != end);
107	}	306	}
108		307
109	void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,	308	void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -841,3 +1040,27 @@ repeat:
841	out:	1040	out:
842	return err;	1041	return err;
843	}	1042	}
		1043
		1044	static void zero_ctor(void addr, kmem_cache_t cache, unsigned long flags)
		1045	{
		1046	memset(addr, 0, kmem_cache_size(cache));
		1047	}
		1048
		1049	static int __init hugetlbpage_init(void)
		1050	{
		1051	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
		1052	return -ENODEV;
		1053
		1054	huge_pgtable_cache = kmem_cache_create("hugepte_cache",
		1055	HUGEPTE_TABLE_SIZE,
		1056	HUGEPTE_TABLE_SIZE,
		1057	SLAB_HWCACHE_ALIGN \|
		1058	SLAB_MUST_HWCACHE_ALIGN,
		1059	zero_ctor, NULL);
		1060	if (! huge_pgtable_cache)
		1061	panic("hugetlbpage_init(): could not create hugepte cache\n");
		1062
		1063	return 0;
		1064	}
		1065
		1066	module_init(hugetlbpage_init);


diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index babebd15bdc4..9e30f968c184 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c
@@ -162,7 +162,14 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
162	};	162	};
163	#endif /* CONFIG_PPC_64K_PAGES */	163	#endif /* CONFIG_PPC_64K_PAGES */
164		164
		165	#ifdef CONFIG_HUGETLB_PAGE
		166	/* Hugepages need one extra cache, initialized in hugetlbpage.c. We
		167	* can't put into the tables above, because HPAGE_SHIFT is not compile
		168	* time constant. */
		169	kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1];
		170	#else
165	kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];	171	kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
		172	#endif
166		173
167	void pgtable_cache_init(void)	174	void pgtable_cache_init(void)
168	{	175	{