6 files changed, 158 insertions, 192 deletions
diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c
index 623b5d130c31..65d6e8527948 100644
--- a/arch/ppc64/mm/hash_utils.c
+++ b/arch/ppc64/mm/hash_utils.c
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
        int local = 0;
        cpumask_t tmp;
-        if ((ea & ~REGION_MASK) > EADDR_MASK)
+        if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
                return 1;
        switch (REGION_ID(ea)) {
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index f9524602818d..a13e44230a6f 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -27,124 +27,91 @@
 #include <linux/sysctl.h>
-#define HUGEPGDIR_SHIFT         (HPAGE_SHIFT + PAGE_SHIFT - 3)
+/* Modelled after find_linux_pte() */
-#define HUGEPGDIR_SIZE          (1UL << HUGEPGDIR_SHIFT)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-#define HUGEPGDIR_MASK          (~(HUGEPGDIR_SIZE-1))
-#define HUGEPTE_INDEX_SIZE      9
-#define HUGEPGD_INDEX_SIZE      10
-#define PTRS_PER_HUGEPTE        (1 << HUGEPTE_INDEX_SIZE)
-#define PTRS_PER_HUGEPGD        (1 << HUGEPGD_INDEX_SIZE)
-static inline int hugepgd_index(unsigned long addr)
-{
-        return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
-}
-static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
 {
-        int index;
+        pgd_t *pg;
+        pud_t *pu;
+        pmd_t *pm;
+        pte_t *pt;
-        if (! mm->context.huge_pgdir)
+        BUG_ON(! in_hugepage_area(mm->context, addr));
-                return NULL;
+        addr &= HPAGE_MASK;
+        pg = pgd_offset(mm, addr);
+        if (!pgd_none(*pg)) {
+                pu = pud_offset(pg, addr);
+                if (!pud_none(*pu)) {
+                        pm = pmd_offset(pu, addr);
+                        pt = (pte_t *)pm;
+                        BUG_ON(!pmd_none(*pm)
+                               && !(pte_present(*pt) && pte_huge(*pt)));
+                        return pt;
+                }
+        }
-        index = hugepgd_index(addr);
+        return NULL;
-        BUG_ON(index >= PTRS_PER_HUGEPGD);
-        return (pud_t *)(mm->context.huge_pgdir + index);
 }
-static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
-        int index;
+        pgd_t *pg;
+        pud_t *pu;
-        if (pud_none(*dir))
+        pmd_t *pm;
-                return NULL;
+        pte_t *pt;
-        index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
-        return (pte_t *)pud_page(*dir) + index;
-}
-static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
-{
        BUG_ON(! in_hugepage_area(mm->context, addr));
-        if (! mm->context.huge_pgdir) {
+        addr &= HPAGE_MASK;
-                pgd_t *new;
-                spin_unlock(&mm->page_table_lock);
-                /* Don't use pgd_alloc(), because we want __GFP_REPEAT */
-                new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-                BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-                spin_lock(&mm->page_table_lock);
-                /*
-                 * Because we dropped the lock, we should re-check the
-                 * entry, as somebody else could have populated it..
-                 */
-                if (mm->context.huge_pgdir)
-                        pgd_free(new);
-                else
-                        mm->context.huge_pgdir = new;
-        }
-        return hugepgd_offset(mm, addr);
-}
-static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
+        pg = pgd_offset(mm, addr);
-{
+        pu = pud_alloc(mm, pg, addr);
-        if (! pud_present(*dir)) {
-                pte_t *new;
-                spin_unlock(&mm->page_table_lock);
+        if (pu) {
-                new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
+                pm = pmd_alloc(mm, pu, addr);
-                BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
+                if (pm) {
-                spin_lock(&mm->page_table_lock);
+                        pt = (pte_t *)pm;
-                /*
+                        BUG_ON(!pmd_none(*pm)
-                 * Because we dropped the lock, we should re-check the
+                               && !(pte_present(*pt) && pte_huge(*pt)));
-                 * entry, as somebody else could have populated it..
+                        return pt;
-                 */
-                if (pud_present(*dir)) {
-                        if (new)
-                                kmem_cache_free(zero_cache, new);
-                } else {
-                        struct page *ptepage;
-                        if (! new)
-                                return NULL;
-                        ptepage = virt_to_page(new);
-                        ptepage->mapping = (void *) mm;
-                        ptepage->index = addr & HUGEPGDIR_MASK;
-                        pud_populate(mm, dir, new);
                }
        }
-        return hugepte_offset(dir, addr);
+        return NULL;
 }
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+#define HUGEPTE_BATCH_SIZE      (HPAGE_SIZE / PMD_SIZE)
-{
-        pud_t *pud;
-        BUG_ON(! in_hugepage_area(mm->context, addr));
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                     pte_t *ptep, pte_t pte)
+{
+        int i;
-        pud = hugepgd_offset(mm, addr);
+        if (pte_present(*ptep)) {
-        if (! pud)
+                pte_clear(mm, addr, ptep);
-                return NULL;
+                flush_tlb_pending();
+        }
-        return hugepte_offset(pud, addr);
+        for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+                *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+                ptep++;
+        }
 }
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+                              pte_t *ptep)
 {
-        pud_t *pud;
+        unsigned long old = pte_update(ptep, ~0UL);
+        int i;
-        BUG_ON(! in_hugepage_area(mm->context, addr));
+        if (old & _PAGE_HASHPTE)
+                hpte_update(mm, addr, old, 0);
-        pud = hugepgd_alloc(mm, addr);
+        for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
-        if (! pud)
+                ptep[i] = __pte(0);
-                return NULL;
-        return hugepte_alloc(mm, pud, addr);
+        return __pte(old);
 }
 /*
@@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        }
 }
-void hugetlb_mm_free_pgd(struct mm_struct *mm)
-{
-        int i;
-        pgd_t *pgdir;
-        spin_lock(&mm->page_table_lock);
-        pgdir = mm->context.huge_pgdir;
-        if (! pgdir)
-                goto out;
-        mm->context.huge_pgdir = NULL;
-        /* cleanup any hugepte pages leftover */
-        for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
-                pud_t *pud = (pud_t *)(pgdir + i);
-                if (! pud_none(*pud)) {
-                        pte_t *pte = (pte_t *)pud_page(*pud);
-                        struct page *ptepage = virt_to_page(pte);
-                        ptepage->mapping = NULL;
-                        BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
-                        kmem_cache_free(zero_cache, pte);
-                }
-                pud_clear(pud);
-        }
-        BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
-        kmem_cache_free(zero_cache, pgdir);
- out:
-        spin_unlock(&mm->page_table_lock);
-}
 int hash_huge_page(struct mm_struct *mm, unsigned long access,
                   unsigned long ea, unsigned long vsid, int local)
 {
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index b6e75b891ac0..c65b87b92756 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
                        break;
                if ((unsigned long)tmp->addr >= ioremap_bot)
                        addr = tmp->size + (unsigned long) tmp->addr;
-                if (addr > IMALLOC_END-size) 
+                if (addr >= IMALLOC_END-size)
                        return 1;
        }
        *im_addr = addr;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index e58a24d42879..87f256df8de5 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -66,6 +66,14 @@
 #include <asm/vdso.h>
 #include <asm/imalloc.h>
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
 int mem_init_done;
 unsigned long ioremap_bot = IMALLOC_BASE;
 static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
         * Before that, we map using addresses going
         * up from ioremap_bot.  imalloc will use
         * the addresses from ioremap_bot through
-         * IMALLOC_END (0xE000001fffffffff)
+         * IMALLOC_END
         * 
         */
        pa = addr & PAGE_MASK;
@@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
        int index;
        int err;
-#ifdef CONFIG_HUGETLB_PAGE
-        /* We leave htlb_segs as it was, but for a fork, we need to
-         * clear the huge_pgdir. */
-        mm->context.huge_pgdir = NULL;
-#endif
 again:
        if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
                return -ENOMEM;
@@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm)
        spin_unlock(&mmu_context_lock);
        mm->context.id = NO_CONTEXT;
-        hugetlb_mm_free_pgd(mm);
 }
 /*
@@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
        return virt_addr;
 }
-kmem_cache_t *zero_cache;
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
-static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
 {
-        memset(pte, 0, PAGE_SIZE);
+        memset(addr, 0, kmem_cache_size(cache));
 }
+static const int pgtable_cache_size[2] = {
+        PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+        "pgd_pte_cache", "pud_pmd_cache",
+};
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
 void pgtable_cache_init(void)
 {
-        zero_cache = kmem_cache_create("zero",
+        int i;
-                                PAGE_SIZE,
-                                0,
+        BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
-                                SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
+        BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
-                                zero_ctor,
+        BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
-                                NULL);
+        BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-        if (!zero_cache)
-                panic("pgtable_cache_init(): could not create zero_cache!\n");
+        for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+                int size = pgtable_cache_size[i];
+                const char *name = pgtable_cache_name[i];
+                pgtable_cache[i] = kmem_cache_create(name,
+                                                     size, size,
+                                                     SLAB_HWCACHE_ALIGN
+                                                     | SLAB_MUST_HWCACHE_ALIGN,
+                                                     zero_ctor,
+                                                     NULL);
+                if (! pgtable_cache[i])
+                        panic("pgtable_cache_init(): could not create %s!\n",
+                              name);
+        }
 }
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S
index 8379d678f70f..f20fc52483a7 100644
--- a/arch/ppc64/mm/slb_low.S
+++ b/arch/ppc64/mm/slb_low.S
@@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
 0:      /* user address: proto-VSID = context<<15 | ESID */
        li      r11,SLB_VSID_USER
-        srdi.   r9,r3,13
+        srdi.   r9,r3,USER_ESID_BITS
        bne-    8f                      /* invalid ea bits set */
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
index 26f0172c4527..d8a6593a13f0 100644
--- a/arch/ppc64/mm/tlb.c
+++ b/arch/ppc64/mm/tlb.c
@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+struct pte_freelist_batch
+{
+        struct rcu_head rcu;
+        unsigned int    index;
+        pgtable_free_t  tables[0];
+};
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+#define PTE_FREELIST_SIZE \
+        ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
+          / sizeof(pgtable_free_t))
+#ifdef CONFIG_SMP
+static void pte_free_smp_sync(void *arg)
+{
+        /* Do nothing, just ensure we sync with all CPUs */
+}
+#endif
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+static void pgtable_free_now(pgtable_free_t pgf)
+{
+        pte_freelist_forced_free++;
+        smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+        pgtable_free(pgf);
+}
+static void pte_free_rcu_callback(struct rcu_head *head)
+{
+        struct pte_freelist_batch *batch =
+                container_of(head, struct pte_freelist_batch, rcu);
+        unsigned int i;
+        for (i = 0; i < batch->index; i++)
+                pgtable_free(batch->tables[i]);
+        free_page((unsigned long)batch);
+}
+static void pte_free_submit(struct pte_freelist_batch *batch)
+{
+        INIT_RCU_HEAD(&batch->rcu);
+        call_rcu(&batch->rcu, pte_free_rcu_callback);
+}
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 {
        /* This is safe as we are holding page_table_lock */
        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
        if (atomic_read(&tlb->mm->mm_users) < 2 ||
            cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
-                pte_free(ptepage);
+                pgtable_free(pgf);
                return;
        }
        if (*batchp == NULL) {
                *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
                if (*batchp == NULL) {
-                        pte_free_now(ptepage);
+                        pgtable_free_now(pgf);
                        return;
                }
                (*batchp)->index = 0;
        }
-        (*batchp)->pages[(*batchp)->index++] = ptepage;
+        (*batchp)->tables[(*batchp)->index++] = pgf;
        if ((*batchp)->index == PTE_FREELIST_SIZE) {
                pte_free_submit(*batchp);
                *batchp = NULL;
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
        put_cpu();
 }
-#ifdef CONFIG_SMP
-static void pte_free_smp_sync(void *arg)
-{
-        /* Do nothing, just ensure we sync with all CPUs */
-}
-#endif
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-void pte_free_now(struct page *ptepage)
-{
-        pte_freelist_forced_free++;
-        smp_call_function(pte_free_smp_sync, NULL, 0, 1);
-        pte_free(ptepage);
-}
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
-        struct pte_freelist_batch *batch =
-                container_of(head, struct pte_freelist_batch, rcu);
-        unsigned int i;
-        for (i = 0; i < batch->index; i++)
-                pte_free(batch->pages[i]);
-        free_page((unsigned long)batch);
-}
-void pte_free_submit(struct pte_freelist_batch *batch)
-{
-        INIT_RCU_HEAD(&batch->rcu);
-        call_rcu(&batch->rcu, pte_free_rcu_callback);
-}
 void pte_free_finish(void)
 {
        /* This is safe as we are holding page_table_lock */

diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 623b5d130c31..65d6e8527948 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
302	int local = 0;	302	int local = 0;
303	cpumask_t tmp;	303	cpumask_t tmp;
304		304
305	if ((ea & ~REGION_MASK) > EADDR_MASK)	305	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
306	return 1;	306	return 1;
307		307
308	switch (REGION_ID(ea)) {	308	switch (REGION_ID(ea)) {


diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index f9524602818d..a13e44230a6f 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c
@@ -27,124 +27,91 @@
27		27
28	#include <linux/sysctl.h>	28	#include <linux/sysctl.h>
29		29
30	#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3)	30	/* Modelled after find_linux_pte() */
31	#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT)	31	pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)
32	#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1))
33
34	#define HUGEPTE_INDEX_SIZE 9
35	#define HUGEPGD_INDEX_SIZE 10
36
37	#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
38	#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE)
39
40	static inline int hugepgd_index(unsigned long addr)
41	{
42	return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
43	}
44
45	static pud_t hugepgd_offset(struct mm_struct mm, unsigned long addr)
46	{	32	{
47	int index;	33	pgd_t *pg;
		34	pud_t *pu;
		35	pmd_t *pm;
		36	pte_t *pt;
48		37
49	if (! mm->context.huge_pgdir)	38	BUG_ON(! in_hugepage_area(mm->context, addr));
50	return NULL;
51		39
		40	addr &= HPAGE_MASK;
		41
		42	pg = pgd_offset(mm, addr);
		43	if (!pgd_none(*pg)) {
		44	pu = pud_offset(pg, addr);
		45	if (!pud_none(*pu)) {
		46	pm = pmd_offset(pu, addr);
		47	pt = (pte_t *)pm;
		48	BUG_ON(!pmd_none(*pm)
		49	&& !(pte_present(pt) && pte_huge(pt)));
		50	return pt;
		51	}
		52	}
52		53
53	index = hugepgd_index(addr);	54	return NULL;
54	BUG_ON(index >= PTRS_PER_HUGEPGD);
55	return (pud_t *)(mm->context.huge_pgdir + index);
56	}	55	}
57		56
58	static inline pte_t hugepte_offset(pud_t dir, unsigned long addr)	57	pte_t huge_pte_alloc(struct mm_struct mm, unsigned long addr)
59	{	58	{
60	int index;	59	pgd_t *pg;
61		60	pud_t *pu;
62	if (pud_none(*dir))	61	pmd_t *pm;
63	return NULL;	62	pte_t *pt;
64
65	index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
66	return (pte_t )pud_page(dir) + index;
67	}
68		63
69	static pud_t hugepgd_alloc(struct mm_struct mm, unsigned long addr)
70	{
71	BUG_ON(! in_hugepage_area(mm->context, addr));	64	BUG_ON(! in_hugepage_area(mm->context, addr));
72		65
73	if (! mm->context.huge_pgdir) {	66	addr &= HPAGE_MASK;
74	pgd_t *new;
75	spin_unlock(&mm->page_table_lock);
76	/* Don't use pgd_alloc(), because we want __GFP_REPEAT */
77	new = kmem_cache_alloc(zero_cache, GFP_KERNEL \| __GFP_REPEAT);
78	BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
79	spin_lock(&mm->page_table_lock);
80
81	/*
82	* Because we dropped the lock, we should re-check the
83	* entry, as somebody else could have populated it..
84	*/
85	if (mm->context.huge_pgdir)
86	pgd_free(new);
87	else
88	mm->context.huge_pgdir = new;
89	}
90	return hugepgd_offset(mm, addr);
91	}
92		67
93	static pte_t hugepte_alloc(struct mm_struct mm, pud_t *dir, unsigned long addr)	68	pg = pgd_offset(mm, addr);
94	{	69	pu = pud_alloc(mm, pg, addr);
95	if (! pud_present(*dir)) {
96	pte_t *new;
97		70
98	spin_unlock(&mm->page_table_lock);	71	if (pu) {
99	new = kmem_cache_alloc(zero_cache, GFP_KERNEL \| __GFP_REPEAT);	72	pm = pmd_alloc(mm, pu, addr);
100	BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));	73	if (pm) {
101	spin_lock(&mm->page_table_lock);	74	pt = (pte_t *)pm;
102	/*	75	BUG_ON(!pmd_none(*pm)
103	* Because we dropped the lock, we should re-check the	76	&& !(pte_present(pt) && pte_huge(pt)));
104	* entry, as somebody else could have populated it..	77	return pt;
105	*/
106	if (pud_present(*dir)) {
107	if (new)
108	kmem_cache_free(zero_cache, new);
109	} else {
110	struct page *ptepage;
111
112	if (! new)
113	return NULL;
114	ptepage = virt_to_page(new);
115	ptepage->mapping = (void *) mm;
116	ptepage->index = addr & HUGEPGDIR_MASK;
117	pud_populate(mm, dir, new);
118	}	78	}
119	}	79	}
120		80
121	return hugepte_offset(dir, addr);	81	return NULL;
122	}	82	}
123		83
124	pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)	84	#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
125	{
126	pud_t *pud;
127		85
128	BUG_ON(! in_hugepage_area(mm->context, addr));	86	void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
		87	pte_t *ptep, pte_t pte)
		88	{
		89	int i;
129		90
130	pud = hugepgd_offset(mm, addr);	91	if (pte_present(*ptep)) {
131	if (! pud)	92	pte_clear(mm, addr, ptep);
132	return NULL;	93	flush_tlb_pending();
		94	}
133		95
134	return hugepte_offset(pud, addr);	96	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
		97	*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
		98	ptep++;
		99	}
135	}	100	}
136		101
137	pte_t huge_pte_alloc(struct mm_struct mm, unsigned long addr)	102	pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
		103	pte_t *ptep)
138	{	104	{
139	pud_t *pud;	105	unsigned long old = pte_update(ptep, ~0UL);
		106	int i;
140		107
141	BUG_ON(! in_hugepage_area(mm->context, addr));	108	if (old & _PAGE_HASHPTE)
		109	hpte_update(mm, addr, old, 0);
142		110
143	pud = hugepgd_alloc(mm, addr);	111	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
144	if (! pud)	112	ptep[i] = __pte(0);
145	return NULL;
146		113
147	return hugepte_alloc(mm, pud, addr);	114	return __pte(old);
148	}	115	}
149		116
150	/*	117	/*
@@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
541	}	508	}
542	}	509	}
543		510
544	void hugetlb_mm_free_pgd(struct mm_struct *mm)
545	{
546	int i;
547	pgd_t *pgdir;
548
549	spin_lock(&mm->page_table_lock);
550
551	pgdir = mm->context.huge_pgdir;
552	if (! pgdir)
553	goto out;
554
555	mm->context.huge_pgdir = NULL;
556
557	/* cleanup any hugepte pages leftover */
558	for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
559	pud_t pud = (pud_t )(pgdir + i);
560
561	if (! pud_none(*pud)) {
562	pte_t pte = (pte_t )pud_page(*pud);
563	struct page *ptepage = virt_to_page(pte);
564
565	ptepage->mapping = NULL;
566
567	BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
568	kmem_cache_free(zero_cache, pte);
569	}
570	pud_clear(pud);
571	}
572
573	BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
574	kmem_cache_free(zero_cache, pgdir);
575
576	out:
577	spin_unlock(&mm->page_table_lock);
578	}
579
580	int hash_huge_page(struct mm_struct *mm, unsigned long access,	511	int hash_huge_page(struct mm_struct *mm, unsigned long access,
581	unsigned long ea, unsigned long vsid, int local)	512	unsigned long ea, unsigned long vsid, int local)
582	{	513	{


diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index b6e75b891ac0..c65b87b92756 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c
@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
31	break;	31	break;
32	if ((unsigned long)tmp->addr >= ioremap_bot)	32	if ((unsigned long)tmp->addr >= ioremap_bot)
33	addr = tmp->size + (unsigned long) tmp->addr;	33	addr = tmp->size + (unsigned long) tmp->addr;
34	if (addr > IMALLOC_END-size)	34	if (addr >= IMALLOC_END-size)
35	return 1;	35	return 1;
36	}	36	}
37	*im_addr = addr;	37	*im_addr = addr;


diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index e58a24d42879..87f256df8de5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c
@@ -66,6 +66,14 @@
66	#include <asm/vdso.h>	66	#include <asm/vdso.h>
67	#include <asm/imalloc.h>	67	#include <asm/imalloc.h>
68		68
		69	#if PGTABLE_RANGE > USER_VSID_RANGE
		70	#warning Limited user VSID range means pagetable space is wasted
		71	#endif
		72
		73	#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
		74	#warning TASK_SIZE is smaller than it needs to be.
		75	#endif
		76
69	int mem_init_done;	77	int mem_init_done;
70	unsigned long ioremap_bot = IMALLOC_BASE;	78	unsigned long ioremap_bot = IMALLOC_BASE;
71	static unsigned long phbs_io_bot = PHBS_IO_BASE;	79	static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
226	* Before that, we map using addresses going	234	* Before that, we map using addresses going
227	* up from ioremap_bot. imalloc will use	235	* up from ioremap_bot. imalloc will use
228	* the addresses from ioremap_bot through	236	* the addresses from ioremap_bot through
229	* IMALLOC_END (0xE000001fffffffff)	237	* IMALLOC_END
230	*	238	*
231	*/	239	*/
232	pa = addr & PAGE_MASK;	240	pa = addr & PAGE_MASK;
@@ -417,12 +425,6 @@ int init_new_context(struct task_struct tsk, struct mm_struct mm)
417	int index;	425	int index;
418	int err;	426	int err;
419		427
420	#ifdef CONFIG_HUGETLB_PAGE
421	/* We leave htlb_segs as it was, but for a fork, we need to
422	* clear the huge_pgdir. */
423	mm->context.huge_pgdir = NULL;
424	#endif
425
426	again:	428	again:
427	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))	429	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
428	return -ENOMEM;	430	return -ENOMEM;
@@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm)
453	spin_unlock(&mmu_context_lock);	455	spin_unlock(&mmu_context_lock);
454		456
455	mm->context.id = NO_CONTEXT;	457	mm->context.id = NO_CONTEXT;
456
457	hugetlb_mm_free_pgd(mm);
458	}	458	}
459		459
460	/*	460	/*
@@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
833	return virt_addr;	833	return virt_addr;
834	}	834	}
835		835
836	kmem_cache_t *zero_cache;	836	static void zero_ctor(void addr, kmem_cache_t cache, unsigned long flags)
837
838	static void zero_ctor(void pte, kmem_cache_t cache, unsigned long flags)
839	{	837	{
840	memset(pte, 0, PAGE_SIZE);	838	memset(addr, 0, kmem_cache_size(cache));
841	}	839	}
842		840
		841	static const int pgtable_cache_size[2] = {
		842	PTE_TABLE_SIZE, PMD_TABLE_SIZE
		843	};
		844	static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
		845	"pgd_pte_cache", "pud_pmd_cache",
		846	};
		847
		848	kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
		849
843	void pgtable_cache_init(void)	850	void pgtable_cache_init(void)
844	{	851	{
845	zero_cache = kmem_cache_create("zero",	852	int i;
846	PAGE_SIZE,	853
847	0,	854	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
848	SLAB_HWCACHE_ALIGN \| SLAB_MUST_HWCACHE_ALIGN,	855	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
849	zero_ctor,	856	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
850	NULL);	857	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
851	if (!zero_cache)	858
852	panic("pgtable_cache_init(): could not create zero_cache!\n");	859	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
		860	int size = pgtable_cache_size[i];
		861	const char *name = pgtable_cache_name[i];
		862
		863	pgtable_cache[i] = kmem_cache_create(name,
		864	size, size,
		865	SLAB_HWCACHE_ALIGN
		866	\| SLAB_MUST_HWCACHE_ALIGN,
		867	zero_ctor,
		868	NULL);
		869	if (! pgtable_cache[i])
		870	panic("pgtable_cache_init(): could not create %s!\n",
		871	name);
		872	}
853	}	873	}
854		874
855	pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,	875	pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,


diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index 8379d678f70f..f20fc52483a7 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S
@@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
91	0: /* user address: proto-VSID = context<<15 \| ESID */	91	0: /* user address: proto-VSID = context<<15 \| ESID */
92	li r11,SLB_VSID_USER	92	li r11,SLB_VSID_USER
93		93
94	srdi. r9,r3,13	94	srdi. r9,r3,USER_ESID_BITS
95	bne- 8f /* invalid ea bits set */	95	bne- 8f /* invalid ea bits set */
96		96
97	#ifdef CONFIG_HUGETLB_PAGE	97	#ifdef CONFIG_HUGETLB_PAGE


diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c index 26f0172c4527..d8a6593a13f0 100644 --- a/arch/ppc64/mm/tlb.c +++ b/arch/ppc64/mm/tlb.c
@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
41	DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);	41	DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
42	unsigned long pte_freelist_forced_free;	42	unsigned long pte_freelist_forced_free;
43		43
44	void __pte_free_tlb(struct mmu_gather tlb, struct page ptepage)	44	struct pte_freelist_batch
		45	{
		46	struct rcu_head rcu;
		47	unsigned int index;
		48	pgtable_free_t tables[0];
		49	};
		50
		51	DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
		52	unsigned long pte_freelist_forced_free;
		53
		54	#define PTE_FREELIST_SIZE \
		55	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
		56	/ sizeof(pgtable_free_t))
		57
		58	#ifdef CONFIG_SMP
		59	static void pte_free_smp_sync(void *arg)
		60	{
		61	/* Do nothing, just ensure we sync with all CPUs */
		62	}
		63	#endif
		64
		65	/* This is only called when we are critically out of memory
		66	* (and fail to get a page in pte_free_tlb).
		67	*/
		68	static void pgtable_free_now(pgtable_free_t pgf)
		69	{
		70	pte_freelist_forced_free++;
		71
		72	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
		73
		74	pgtable_free(pgf);
		75	}
		76
		77	static void pte_free_rcu_callback(struct rcu_head *head)
		78	{
		79	struct pte_freelist_batch *batch =
		80	container_of(head, struct pte_freelist_batch, rcu);
		81	unsigned int i;
		82
		83	for (i = 0; i < batch->index; i++)
		84	pgtable_free(batch->tables[i]);
		85
		86	free_page((unsigned long)batch);
		87	}
		88
		89	static void pte_free_submit(struct pte_freelist_batch *batch)
		90	{
		91	INIT_RCU_HEAD(&batch->rcu);
		92	call_rcu(&batch->rcu, pte_free_rcu_callback);
		93	}
		94
		95	void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
45	{	96	{
46	/* This is safe as we are holding page_table_lock */	97	/* This is safe as we are holding page_table_lock */
47	cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());	98	cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather tlb, struct page ptepage)
49		100
50	if (atomic_read(&tlb->mm->mm_users) < 2 \|\|	101	if (atomic_read(&tlb->mm->mm_users) < 2 \|\|
51	cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {	102	cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
52	pte_free(ptepage);	103	pgtable_free(pgf);
53	return;	104	return;
54	}	105	}
55		106
56	if (*batchp == NULL) {	107	if (*batchp == NULL) {
57	batchp = (struct pte_freelist_batch )__get_free_page(GFP_ATOMIC);	108	batchp = (struct pte_freelist_batch )__get_free_page(GFP_ATOMIC);
58	if (*batchp == NULL) {	109	if (*batchp == NULL) {
59	pte_free_now(ptepage);	110	pgtable_free_now(pgf);
60	return;	111	return;
61	}	112	}
62	(*batchp)->index = 0;	113	(*batchp)->index = 0;
63	}	114	}
64	(batchp)->pages[(batchp)->index++] = ptepage;	115	(batchp)->tables[(batchp)->index++] = pgf;
65	if ((*batchp)->index == PTE_FREELIST_SIZE) {	116	if ((*batchp)->index == PTE_FREELIST_SIZE) {
66	pte_free_submit(*batchp);	117	pte_free_submit(*batchp);
67	*batchp = NULL;	118	*batchp = NULL;
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
132	put_cpu();	183	put_cpu();
133	}	184	}
134		185
135	#ifdef CONFIG_SMP
136	static void pte_free_smp_sync(void *arg)
137	{
138	/* Do nothing, just ensure we sync with all CPUs */
139	}
140	#endif
141
142	/* This is only called when we are critically out of memory
143	* (and fail to get a page in pte_free_tlb).
144	*/
145	void pte_free_now(struct page *ptepage)
146	{
147	pte_freelist_forced_free++;
148
149	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
150
151	pte_free(ptepage);
152	}
153
154	static void pte_free_rcu_callback(struct rcu_head *head)
155	{
156	struct pte_freelist_batch *batch =
157	container_of(head, struct pte_freelist_batch, rcu);
158	unsigned int i;
159
160	for (i = 0; i < batch->index; i++)
161	pte_free(batch->pages[i]);
162	free_page((unsigned long)batch);
163	}
164
165	void pte_free_submit(struct pte_freelist_batch *batch)
166	{
167	INIT_RCU_HEAD(&batch->rcu);
168	call_rcu(&batch->rcu, pte_free_rcu_callback);
169	}
170
171	void pte_free_finish(void)	186	void pte_free_finish(void)
172	{	187	{
173	/* This is safe as we are holding page_table_lock */	188	/* This is safe as we are holding page_table_lock */