aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-26 11:48:49 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-26 11:48:49 -0400
commitc3cc99ff5d24e2eeaf7ec2032e720681916990e3 (patch)
treec3e74171bbbd2adde9d60b9db1c440415c8d2831 /arch/powerpc/mm
parent38ffbe66d59051fd9cfcfc8545f164700e2fa3bc (diff)
parent024e8ac04453b3525448c31ef39848cf675ba6db (diff)
Merge branch 'linus' into x86/xen
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/fault.c25
-rw-r--r--arch/powerpc/mm/hash_utils_64.c51
-rw-r--r--arch/powerpc/mm/hugetlbpage.c341
-rw-r--r--arch/powerpc/mm/init_64.c8
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/mm/pgtable_32.c22
-rw-r--r--arch/powerpc/mm/pgtable_64.c16
-rw-r--r--arch/powerpc/mm/tlb_64.c2
8 files changed, 329 insertions, 139 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1707d00331fc..565b7a237c84 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
100 return 0; 100 return 0;
101} 101}
102 102
103#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
104static void do_dabr(struct pt_regs *regs, unsigned long address,
105 unsigned long error_code)
106{
107 siginfo_t info;
108
109 if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
110 11, SIGSEGV) == NOTIFY_STOP)
111 return;
112
113 if (debugger_dabr_match(regs))
114 return;
115
116 /* Clear the DABR */
117 set_dabr(0);
118
119 /* Deliver the signal to userspace */
120 info.si_signo = SIGTRAP;
121 info.si_errno = 0;
122 info.si_code = TRAP_HWBKPT;
123 info.si_addr = (void __user *)address;
124 force_sig_info(SIGTRAP, &info, current);
125}
126#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
127
128/* 103/*
129 * For 600- and 800-family processors, the error_code parameter is DSISR 104 * For 600- and 800-family processors, the error_code parameter is DSISR
130 * for a data fault, SRR1 for an instruction fault. For 400-family processors 105 * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 8d3b58ebd38e..5ce5a4dcd008 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -68,6 +68,7 @@
68 68
69#define KB (1024) 69#define KB (1024)
70#define MB (1024*KB) 70#define MB (1024*KB)
71#define GB (1024L*MB)
71 72
72/* 73/*
73 * Note: pte --> Linux PTE 74 * Note: pte --> Linux PTE
@@ -102,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M;
102int mmu_highuser_ssize = MMU_SEGSIZE_256M; 103int mmu_highuser_ssize = MMU_SEGSIZE_256M;
103u16 mmu_slb_size = 64; 104u16 mmu_slb_size = 64;
104#ifdef CONFIG_HUGETLB_PAGE 105#ifdef CONFIG_HUGETLB_PAGE
105int mmu_huge_psize = MMU_PAGE_16M;
106unsigned int HPAGE_SHIFT; 106unsigned int HPAGE_SHIFT;
107#endif 107#endif
108#ifdef CONFIG_PPC_64K_PAGES 108#ifdef CONFIG_PPC_64K_PAGES
@@ -329,6 +329,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
329 return 0; 329 return 0;
330} 330}
331 331
332/* Scan for 16G memory blocks that have been set aside for huge pages
333 * and reserve those blocks for 16G huge pages.
334 */
335static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
336 const char *uname, int depth,
337 void *data) {
338 char *type = of_get_flat_dt_prop(node, "device_type", NULL);
339 unsigned long *addr_prop;
340 u32 *page_count_prop;
341 unsigned int expected_pages;
342 long unsigned int phys_addr;
343 long unsigned int block_size;
344
345 /* We are scanning "memory" nodes only */
346 if (type == NULL || strcmp(type, "memory") != 0)
347 return 0;
348
349 /* This property is the log base 2 of the number of virtual pages that
350 * will represent this memory block. */
351 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
352 if (page_count_prop == NULL)
353 return 0;
354 expected_pages = (1 << page_count_prop[0]);
355 addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
356 if (addr_prop == NULL)
357 return 0;
358 phys_addr = addr_prop[0];
359 block_size = addr_prop[1];
360 if (block_size != (16 * GB))
361 return 0;
362 printk(KERN_INFO "Huge page(16GB) memory: "
363 "addr = 0x%lX size = 0x%lX pages = %d\n",
364 phys_addr, block_size, expected_pages);
365 lmb_reserve(phys_addr, block_size * expected_pages);
366 add_gpage(phys_addr, block_size, expected_pages);
367 return 0;
368}
369
332static void __init htab_init_page_sizes(void) 370static void __init htab_init_page_sizes(void)
333{ 371{
334 int rc; 372 int rc;
@@ -418,15 +456,18 @@ static void __init htab_init_page_sizes(void)
418 ); 456 );
419 457
420#ifdef CONFIG_HUGETLB_PAGE 458#ifdef CONFIG_HUGETLB_PAGE
421 /* Init large page size. Currently, we pick 16M or 1M depending 459 /* Reserve 16G huge page memory sections for huge pages */
460 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
461
462/* Set default large page size. Currently, we pick 16M or 1M depending
422 * on what is available 463 * on what is available
423 */ 464 */
424 if (mmu_psize_defs[MMU_PAGE_16M].shift) 465 if (mmu_psize_defs[MMU_PAGE_16M].shift)
425 set_huge_psize(MMU_PAGE_16M); 466 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
426 /* With 4k/4level pagetables, we can't (for now) cope with a 467 /* With 4k/4level pagetables, we can't (for now) cope with a
427 * huge page size < PMD_SIZE */ 468 * huge page size < PMD_SIZE */
428 else if (mmu_psize_defs[MMU_PAGE_1M].shift) 469 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
429 set_huge_psize(MMU_PAGE_1M); 470 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
430#endif /* CONFIG_HUGETLB_PAGE */ 471#endif /* CONFIG_HUGETLB_PAGE */
431} 472}
432 473
@@ -847,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
847 888
848#ifdef CONFIG_HUGETLB_PAGE 889#ifdef CONFIG_HUGETLB_PAGE
849 /* Handle hugepage regions */ 890 /* Handle hugepage regions */
850 if (HPAGE_SHIFT && psize == mmu_huge_psize) { 891 if (HPAGE_SHIFT && mmu_huge_psizes[psize]) {
851 DBG_LOW(" -> huge page !\n"); 892 DBG_LOW(" -> huge page !\n");
852 return hash_huge_page(mm, access, ea, vsid, local, trap); 893 return hash_huge_page(mm, access, ea, vsid, local, trap);
853 } 894 }
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0d12fba31bc5..fb42c4dd3217 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,21 +24,43 @@
24#include <asm/cputable.h> 24#include <asm/cputable.h>
25#include <asm/spu.h> 25#include <asm/spu.h>
26 26
27#define HPAGE_SHIFT_64K 16 27#define PAGE_SHIFT_64K 16
28#define HPAGE_SHIFT_16M 24 28#define PAGE_SHIFT_16M 24
29#define PAGE_SHIFT_16G 34
29 30
30#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) 31#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) 32#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
33#define MAX_NUMBER_GPAGES 1024
32 34
33unsigned int hugepte_shift; 35/* Tracks the 16G pages after the device tree is scanned and before the
34#define PTRS_PER_HUGEPTE (1 << hugepte_shift) 36 * huge_boot_pages list is ready. */
35#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) 37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
38static unsigned nr_gpages;
36 39
37#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) 40/* Array of valid huge page sizes - non-zero value(hugepte_shift) is
38#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) 41 * stored for the huge page sizes that are valid.
39#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) 42 */
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
44
45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize])
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53
54/* Subtract one from array size because we don't need a cache for 4K since
55 * is not a huge page size */
56#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \
57 + psize-1])
58#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
40 59
41#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) 60static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
61 "unused_4K", "hugepte_cache_64K", "unused_64K_AP",
62 "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
63};
42 64
43/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() 65/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
44 * will choke on pointers to hugepte tables, which is handy for 66 * will choke on pointers to hugepte tables, which is handy for
@@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t;
49 71
50#define hugepd_none(hpd) ((hpd).pd == 0) 72#define hugepd_none(hpd) ((hpd).pd == 0)
51 73
74static inline int shift_to_mmu_psize(unsigned int shift)
75{
76 switch (shift) {
77#ifndef CONFIG_PPC_64K_PAGES
78 case PAGE_SHIFT_64K:
79 return MMU_PAGE_64K;
80#endif
81 case PAGE_SHIFT_16M:
82 return MMU_PAGE_16M;
83 case PAGE_SHIFT_16G:
84 return MMU_PAGE_16G;
85 }
86 return -1;
87}
88
89static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
90{
91 if (mmu_psize_defs[mmu_psize].shift)
92 return mmu_psize_defs[mmu_psize].shift;
93 BUG();
94}
95
52static inline pte_t *hugepd_page(hugepd_t hpd) 96static inline pte_t *hugepd_page(hugepd_t hpd)
53{ 97{
54 BUG_ON(!(hpd.pd & HUGEPD_OK)); 98 BUG_ON(!(hpd.pd & HUGEPD_OK));
55 return (pte_t *)(hpd.pd & ~HUGEPD_OK); 99 return (pte_t *)(hpd.pd & ~HUGEPD_OK);
56} 100}
57 101
58static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) 102static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
103 struct hstate *hstate)
59{ 104{
60 unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); 105 unsigned int shift = huge_page_shift(hstate);
106 int psize = shift_to_mmu_psize(shift);
107 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
61 pte_t *dir = hugepd_page(*hpdp); 108 pte_t *dir = hugepd_page(*hpdp);
62 109
63 return dir + idx; 110 return dir + idx;
64} 111}
65 112
66static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, 113static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
67 unsigned long address) 114 unsigned long address, unsigned int psize)
68{ 115{
69 pte_t *new = kmem_cache_alloc(huge_pgtable_cache, 116 pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize),
70 GFP_KERNEL|__GFP_REPEAT); 117 GFP_KERNEL|__GFP_REPEAT);
71 118
72 if (! new) 119 if (! new)
@@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
74 121
75 spin_lock(&mm->page_table_lock); 122 spin_lock(&mm->page_table_lock);
76 if (!hugepd_none(*hpdp)) 123 if (!hugepd_none(*hpdp))
77 kmem_cache_free(huge_pgtable_cache, new); 124 kmem_cache_free(huge_pgtable_cache(psize), new);
78 else 125 else
79 hpdp->pd = (unsigned long)new | HUGEPD_OK; 126 hpdp->pd = (unsigned long)new | HUGEPD_OK;
80 spin_unlock(&mm->page_table_lock); 127 spin_unlock(&mm->page_table_lock);
@@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
83 130
84/* Base page size affects how we walk hugetlb page tables */ 131/* Base page size affects how we walk hugetlb page tables */
85#ifdef CONFIG_PPC_64K_PAGES 132#ifdef CONFIG_PPC_64K_PAGES
86#define hpmd_offset(pud, addr) pmd_offset(pud, addr) 133#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr)
87#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) 134#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr)
88#else 135#else
89static inline 136static inline
90pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) 137pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
91{ 138{
92 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 139 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
93 return pmd_offset(pud, addr); 140 return pmd_offset(pud, addr);
94 else 141 else
95 return (pmd_t *) pud; 142 return (pmd_t *) pud;
96} 143}
97static inline 144static inline
98pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) 145pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
146 struct hstate *hstate)
99{ 147{
100 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 148 if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
101 return pmd_alloc(mm, pud, addr); 149 return pmd_alloc(mm, pud, addr);
102 else 150 else
103 return (pmd_t *) pud; 151 return (pmd_t *) pud;
104} 152}
105#endif 153#endif
106 154
155/* Build list of addresses of gigantic pages. This function is used in early
156 * boot before the buddy or bootmem allocator is setup.
157 */
158void add_gpage(unsigned long addr, unsigned long page_size,
159 unsigned long number_of_pages)
160{
161 if (!addr)
162 return;
163 while (number_of_pages > 0) {
164 gpage_freearray[nr_gpages] = addr;
165 nr_gpages++;
166 number_of_pages--;
167 addr += page_size;
168 }
169}
170
171/* Moves the gigantic page addresses from the temporary list to the
172 * huge_boot_pages list.
173 */
174int alloc_bootmem_huge_page(struct hstate *hstate)
175{
176 struct huge_bootmem_page *m;
177 if (nr_gpages == 0)
178 return 0;
179 m = phys_to_virt(gpage_freearray[--nr_gpages]);
180 gpage_freearray[nr_gpages] = 0;
181 list_add(&m->list, &huge_boot_pages);
182 m->hstate = hstate;
183 return 1;
184}
185
186
107/* Modelled after find_linux_pte() */ 187/* Modelled after find_linux_pte() */
108pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 188pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
109{ 189{
@@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
111 pud_t *pu; 191 pud_t *pu;
112 pmd_t *pm; 192 pmd_t *pm;
113 193
114 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 194 unsigned int psize;
195 unsigned int shift;
196 unsigned long sz;
197 struct hstate *hstate;
198 psize = get_slice_psize(mm, addr);
199 shift = mmu_psize_to_shift(psize);
200 sz = ((1UL) << shift);
201 hstate = size_to_hstate(sz);
115 202
116 addr &= HPAGE_MASK; 203 addr &= hstate->mask;
117 204
118 pg = pgd_offset(mm, addr); 205 pg = pgd_offset(mm, addr);
119 if (!pgd_none(*pg)) { 206 if (!pgd_none(*pg)) {
120 pu = pud_offset(pg, addr); 207 pu = pud_offset(pg, addr);
121 if (!pud_none(*pu)) { 208 if (!pud_none(*pu)) {
122 pm = hpmd_offset(pu, addr); 209 pm = hpmd_offset(pu, addr, hstate);
123 if (!pmd_none(*pm)) 210 if (!pmd_none(*pm))
124 return hugepte_offset((hugepd_t *)pm, addr); 211 return hugepte_offset((hugepd_t *)pm, addr,
212 hstate);
125 } 213 }
126 } 214 }
127 215
128 return NULL; 216 return NULL;
129} 217}
130 218
131pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) 219pte_t *huge_pte_alloc(struct mm_struct *mm,
220 unsigned long addr, unsigned long sz)
132{ 221{
133 pgd_t *pg; 222 pgd_t *pg;
134 pud_t *pu; 223 pud_t *pu;
135 pmd_t *pm; 224 pmd_t *pm;
136 hugepd_t *hpdp = NULL; 225 hugepd_t *hpdp = NULL;
226 struct hstate *hstate;
227 unsigned int psize;
228 hstate = size_to_hstate(sz);
137 229
138 BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); 230 psize = get_slice_psize(mm, addr);
231 BUG_ON(!mmu_huge_psizes[psize]);
139 232
140 addr &= HPAGE_MASK; 233 addr &= hstate->mask;
141 234
142 pg = pgd_offset(mm, addr); 235 pg = pgd_offset(mm, addr);
143 pu = pud_alloc(mm, pg, addr); 236 pu = pud_alloc(mm, pg, addr);
144 237
145 if (pu) { 238 if (pu) {
146 pm = hpmd_alloc(mm, pu, addr); 239 pm = hpmd_alloc(mm, pu, addr, hstate);
147 if (pm) 240 if (pm)
148 hpdp = (hugepd_t *)pm; 241 hpdp = (hugepd_t *)pm;
149 } 242 }
@@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
151 if (! hpdp) 244 if (! hpdp)
152 return NULL; 245 return NULL;
153 246
154 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) 247 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
155 return NULL; 248 return NULL;
156 249
157 return hugepte_offset(hpdp, addr); 250 return hugepte_offset(hpdp, addr, hstate);
158} 251}
159 252
160int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 253int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
@@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
162 return 0; 255 return 0;
163} 256}
164 257
165static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) 258static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
259 unsigned int psize)
166{ 260{
167 pte_t *hugepte = hugepd_page(*hpdp); 261 pte_t *hugepte = hugepd_page(*hpdp);
168 262
169 hpdp->pd = 0; 263 hpdp->pd = 0;
170 tlb->need_flush = 1; 264 tlb->need_flush = 1;
171 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, 265 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
266 HUGEPTE_CACHE_NUM+psize-1,
172 PGF_CACHENUM_MASK)); 267 PGF_CACHENUM_MASK));
173} 268}
174 269
175static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, 270static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
176 unsigned long addr, unsigned long end, 271 unsigned long addr, unsigned long end,
177 unsigned long floor, unsigned long ceiling) 272 unsigned long floor, unsigned long ceiling,
273 unsigned int psize)
178{ 274{
179 pmd_t *pmd; 275 pmd_t *pmd;
180 unsigned long next; 276 unsigned long next;
@@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
186 next = pmd_addr_end(addr, end); 282 next = pmd_addr_end(addr, end);
187 if (pmd_none(*pmd)) 283 if (pmd_none(*pmd))
188 continue; 284 continue;
189 free_hugepte_range(tlb, (hugepd_t *)pmd); 285 free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
190 } while (pmd++, addr = next, addr != end); 286 } while (pmd++, addr = next, addr != end);
191 287
192 start &= PUD_MASK; 288 start &= PUD_MASK;
@@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
212 pud_t *pud; 308 pud_t *pud;
213 unsigned long next; 309 unsigned long next;
214 unsigned long start; 310 unsigned long start;
311 unsigned int shift;
312 unsigned int psize = get_slice_psize(tlb->mm, addr);
313 shift = mmu_psize_to_shift(psize);
215 314
216 start = addr; 315 start = addr;
217 pud = pud_offset(pgd, addr); 316 pud = pud_offset(pgd, addr);
@@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
220#ifdef CONFIG_PPC_64K_PAGES 319#ifdef CONFIG_PPC_64K_PAGES
221 if (pud_none_or_clear_bad(pud)) 320 if (pud_none_or_clear_bad(pud))
222 continue; 321 continue;
223 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 322 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling,
323 psize);
224#else 324#else
225 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { 325 if (shift == PAGE_SHIFT_64K) {
226 if (pud_none_or_clear_bad(pud)) 326 if (pud_none_or_clear_bad(pud))
227 continue; 327 continue;
228 hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); 328 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
329 ceiling, psize);
229 } else { 330 } else {
230 if (pud_none(*pud)) 331 if (pud_none(*pud))
231 continue; 332 continue;
232 free_hugepte_range(tlb, (hugepd_t *)pud); 333 free_hugepte_range(tlb, (hugepd_t *)pud, psize);
233 } 334 }
234#endif 335#endif
235 } while (pud++, addr = next, addr != end); 336 } while (pud++, addr = next, addr != end);
@@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
255 * 356 *
256 * Must be called with pagetable lock held. 357 * Must be called with pagetable lock held.
257 */ 358 */
258void hugetlb_free_pgd_range(struct mmu_gather **tlb, 359void hugetlb_free_pgd_range(struct mmu_gather *tlb,
259 unsigned long addr, unsigned long end, 360 unsigned long addr, unsigned long end,
260 unsigned long floor, unsigned long ceiling) 361 unsigned long floor, unsigned long ceiling)
261{ 362{
@@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
297 * now has no other vmas using it, so can be freed, we don't 398 * now has no other vmas using it, so can be freed, we don't
298 * bother to round floor or end up - the tests don't need that. 399 * bother to round floor or end up - the tests don't need that.
299 */ 400 */
401 unsigned int psize = get_slice_psize(tlb->mm, addr);
300 402
301 addr &= HUGEPD_MASK; 403 addr &= HUGEPD_MASK(psize);
302 if (addr < floor) { 404 if (addr < floor) {
303 addr += HUGEPD_SIZE; 405 addr += HUGEPD_SIZE(psize);
304 if (!addr) 406 if (!addr)
305 return; 407 return;
306 } 408 }
307 if (ceiling) { 409 if (ceiling) {
308 ceiling &= HUGEPD_MASK; 410 ceiling &= HUGEPD_MASK(psize);
309 if (!ceiling) 411 if (!ceiling)
310 return; 412 return;
311 } 413 }
312 if (end - 1 > ceiling - 1) 414 if (end - 1 > ceiling - 1)
313 end -= HUGEPD_SIZE; 415 end -= HUGEPD_SIZE(psize);
314 if (addr > end - 1) 416 if (addr > end - 1)
315 return; 417 return;
316 418
317 start = addr; 419 start = addr;
318 pgd = pgd_offset((*tlb)->mm, addr); 420 pgd = pgd_offset(tlb->mm, addr);
319 do { 421 do {
320 BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); 422 psize = get_slice_psize(tlb->mm, addr);
423 BUG_ON(!mmu_huge_psizes[psize]);
321 next = pgd_addr_end(addr, end); 424 next = pgd_addr_end(addr, end);
322 if (pgd_none_or_clear_bad(pgd)) 425 if (pgd_none_or_clear_bad(pgd))
323 continue; 426 continue;
324 hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); 427 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
325 } while (pgd++, addr = next, addr != end); 428 } while (pgd++, addr = next, addr != end);
326} 429}
327 430
@@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
334 * necessary anymore if we make hpte_need_flush() get the 437 * necessary anymore if we make hpte_need_flush() get the
335 * page size from the slices 438 * page size from the slices
336 */ 439 */
337 pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); 440 unsigned int psize = get_slice_psize(mm, addr);
441 unsigned int shift = mmu_psize_to_shift(psize);
442 unsigned long sz = ((1UL) << shift);
443 struct hstate *hstate = size_to_hstate(sz);
444 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
338 } 445 }
339 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 446 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
340} 447}
@@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
351{ 458{
352 pte_t *ptep; 459 pte_t *ptep;
353 struct page *page; 460 struct page *page;
461 unsigned int mmu_psize = get_slice_psize(mm, address);
354 462
355 if (get_slice_psize(mm, address) != mmu_huge_psize) 463 /* Verify it is a huge page else bail. */
464 if (!mmu_huge_psizes[mmu_psize])
356 return ERR_PTR(-EINVAL); 465 return ERR_PTR(-EINVAL);
357 466
358 ptep = huge_pte_offset(mm, address); 467 ptep = huge_pte_offset(mm, address);
359 page = pte_page(*ptep); 468 page = pte_page(*ptep);
360 if (page) 469 if (page) {
361 page += (address % HPAGE_SIZE) / PAGE_SIZE; 470 unsigned int shift = mmu_psize_to_shift(mmu_psize);
471 unsigned long sz = ((1UL) << shift);
472 page += (address % sz) / PAGE_SIZE;
473 }
362 474
363 return page; 475 return page;
364} 476}
@@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd)
368 return 0; 480 return 0;
369} 481}
370 482
483int pud_huge(pud_t pud)
484{
485 return 0;
486}
487
371struct page * 488struct page *
372follow_huge_pmd(struct mm_struct *mm, unsigned long address, 489follow_huge_pmd(struct mm_struct *mm, unsigned long address,
373 pmd_t *pmd, int write) 490 pmd_t *pmd, int write)
@@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
381 unsigned long len, unsigned long pgoff, 498 unsigned long len, unsigned long pgoff,
382 unsigned long flags) 499 unsigned long flags)
383{ 500{
384 return slice_get_unmapped_area(addr, len, flags, 501 struct hstate *hstate = hstate_file(file);
385 mmu_huge_psize, 1, 0); 502 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
503 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
386} 504}
387 505
388/* 506/*
389 * Called by asm hashtable.S for doing lazy icache flush 507 * Called by asm hashtable.S for doing lazy icache flush
390 */ 508 */
391static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, 509static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
392 pte_t pte, int trap) 510 pte_t pte, int trap, unsigned long sz)
393{ 511{
394 struct page *page; 512 struct page *page;
395 int i; 513 int i;
@@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
402 /* page is dirty */ 520 /* page is dirty */
403 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { 521 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
404 if (trap == 0x400) { 522 if (trap == 0x400) {
405 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) 523 for (i = 0; i < (sz / PAGE_SIZE); i++)
406 __flush_dcache_icache(page_address(page+i)); 524 __flush_dcache_icache(page_address(page+i));
407 set_bit(PG_arch_1, &page->flags); 525 set_bit(PG_arch_1, &page->flags);
408 } else { 526 } else {
@@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
418{ 536{
419 pte_t *ptep; 537 pte_t *ptep;
420 unsigned long old_pte, new_pte; 538 unsigned long old_pte, new_pte;
421 unsigned long va, rflags, pa; 539 unsigned long va, rflags, pa, sz;
422 long slot; 540 long slot;
423 int err = 1; 541 int err = 1;
424 int ssize = user_segment_size(ea); 542 int ssize = user_segment_size(ea);
543 unsigned int mmu_psize;
544 int shift;
545 mmu_psize = get_slice_psize(mm, ea);
425 546
547 if (!mmu_huge_psizes[mmu_psize])
548 goto out;
426 ptep = huge_pte_offset(mm, ea); 549 ptep = huge_pte_offset(mm, ea);
427 550
428 /* Search the Linux page table for a match with va */ 551 /* Search the Linux page table for a match with va */
@@ -465,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
465 rflags = 0x2 | (!(new_pte & _PAGE_RW)); 588 rflags = 0x2 | (!(new_pte & _PAGE_RW));
466 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 589 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
467 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); 590 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
591 shift = mmu_psize_to_shift(mmu_psize);
592 sz = ((1UL) << shift);
468 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) 593 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
469 /* No CPU has hugepages but lacks no execute, so we 594 /* No CPU has hugepages but lacks no execute, so we
470 * don't need to worry about that case */ 595 * don't need to worry about that case */
471 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), 596 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
472 trap); 597 trap, sz);
473 598
474 /* Check if pte already has an hpte (case 2) */ 599 /* Check if pte already has an hpte (case 2) */
475 if (unlikely(old_pte & _PAGE_HASHPTE)) { 600 if (unlikely(old_pte & _PAGE_HASHPTE)) {
476 /* There MIGHT be an HPTE for this pte */ 601 /* There MIGHT be an HPTE for this pte */
477 unsigned long hash, slot; 602 unsigned long hash, slot;
478 603
479 hash = hpt_hash(va, HPAGE_SHIFT, ssize); 604 hash = hpt_hash(va, shift, ssize);
480 if (old_pte & _PAGE_F_SECOND) 605 if (old_pte & _PAGE_F_SECOND)
481 hash = ~hash; 606 hash = ~hash;
482 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 607 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
483 slot += (old_pte & _PAGE_F_GIX) >> 12; 608 slot += (old_pte & _PAGE_F_GIX) >> 12;
484 609
485 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, 610 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
486 ssize, local) == -1) 611 ssize, local) == -1)
487 old_pte &= ~_PAGE_HPTEFLAGS; 612 old_pte &= ~_PAGE_HPTEFLAGS;
488 } 613 }
489 614
490 if (likely(!(old_pte & _PAGE_HASHPTE))) { 615 if (likely(!(old_pte & _PAGE_HASHPTE))) {
491 unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); 616 unsigned long hash = hpt_hash(va, shift, ssize);
492 unsigned long hpte_group; 617 unsigned long hpte_group;
493 618
494 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; 619 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -509,7 +634,7 @@ repeat:
509 634
510 /* Insert into the hash table, primary slot */ 635 /* Insert into the hash table, primary slot */
511 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, 636 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
512 mmu_huge_psize, ssize); 637 mmu_psize, ssize);
513 638
514 /* Primary is full, try the secondary */ 639 /* Primary is full, try the secondary */
515 if (unlikely(slot == -1)) { 640 if (unlikely(slot == -1)) {
@@ -517,7 +642,7 @@ repeat:
517 HPTES_PER_GROUP) & ~0x7UL; 642 HPTES_PER_GROUP) & ~0x7UL;
518 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 643 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
519 HPTE_V_SECONDARY, 644 HPTE_V_SECONDARY,
520 mmu_huge_psize, ssize); 645 mmu_psize, ssize);
521 if (slot == -1) { 646 if (slot == -1) {
522 if (mftb() & 0x1) 647 if (mftb() & 0x1)
523 hpte_group = ((hash & htab_hash_mask) * 648 hpte_group = ((hash & htab_hash_mask) *
@@ -549,45 +674,54 @@ void set_huge_psize(int psize)
549{ 674{
550 /* Check that it is a page size supported by the hardware and 675 /* Check that it is a page size supported by the hardware and
551 * that it fits within pagetable limits. */ 676 * that it fits within pagetable limits. */
552 if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && 677 if (mmu_psize_defs[psize].shift &&
678 mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
553 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || 679 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
554 mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { 680 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
555 HPAGE_SHIFT = mmu_psize_defs[psize].shift; 681 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
556 mmu_huge_psize = psize; 682 /* Return if huge page size has already been setup or is the
557#ifdef CONFIG_PPC_64K_PAGES 683 * same as the base page size. */
558 hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); 684 if (mmu_huge_psizes[psize] ||
559#else 685 mmu_psize_defs[psize].shift == PAGE_SHIFT)
560 if (HPAGE_SHIFT == HPAGE_SHIFT_64K) 686 return;
561 hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); 687 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
562 else 688
563 hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); 689 switch (mmu_psize_defs[psize].shift) {
564#endif 690 case PAGE_SHIFT_64K:
565 691 /* We only allow 64k hpages with 4k base page,
692 * which was checked above, and always put them
693 * at the PMD */
694 hugepte_shift[psize] = PMD_SHIFT;
695 break;
696 case PAGE_SHIFT_16M:
697 /* 16M pages can be at two different levels
698 * of pagestables based on base page size */
699 if (PAGE_SHIFT == PAGE_SHIFT_64K)
700 hugepte_shift[psize] = PMD_SHIFT;
701 else /* 4k base page */
702 hugepte_shift[psize] = PUD_SHIFT;
703 break;
704 case PAGE_SHIFT_16G:
705 /* 16G pages are always at PGD level */
706 hugepte_shift[psize] = PGDIR_SHIFT;
707 break;
708 }
709 hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
566 } else 710 } else
567 HPAGE_SHIFT = 0; 711 hugepte_shift[psize] = 0;
568} 712}
569 713
570static int __init hugepage_setup_sz(char *str) 714static int __init hugepage_setup_sz(char *str)
571{ 715{
572 unsigned long long size; 716 unsigned long long size;
573 int mmu_psize = -1; 717 int mmu_psize;
574 int shift; 718 int shift;
575 719
576 size = memparse(str, &str); 720 size = memparse(str, &str);
577 721
578 shift = __ffs(size); 722 shift = __ffs(size);
579 switch (shift) { 723 mmu_psize = shift_to_mmu_psize(shift);
580#ifndef CONFIG_PPC_64K_PAGES 724 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
581 case HPAGE_SHIFT_64K:
582 mmu_psize = MMU_PAGE_64K;
583 break;
584#endif
585 case HPAGE_SHIFT_16M:
586 mmu_psize = MMU_PAGE_16M;
587 break;
588 }
589
590 if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift)
591 set_huge_psize(mmu_psize); 725 set_huge_psize(mmu_psize);
592 else 726 else
593 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); 727 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
@@ -603,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr)
603 737
604static int __init hugetlbpage_init(void) 738static int __init hugetlbpage_init(void)
605{ 739{
740 unsigned int psize;
741
606 if (!cpu_has_feature(CPU_FTR_16M_PAGE)) 742 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
607 return -ENODEV; 743 return -ENODEV;
608 744 /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
609 huge_pgtable_cache = kmem_cache_create("hugepte_cache", 745 * and adjust PTE_NONCACHE_NUM if the number of supported huge page
610 HUGEPTE_TABLE_SIZE, 746 * sizes changes.
611 HUGEPTE_TABLE_SIZE, 747 */
612 0, 748 set_huge_psize(MMU_PAGE_16M);
613 zero_ctor); 749 set_huge_psize(MMU_PAGE_64K);
614 if (! huge_pgtable_cache) 750 set_huge_psize(MMU_PAGE_16G);
615 panic("hugetlbpage_init(): could not create hugepte cache\n"); 751
752 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
753 if (mmu_huge_psizes[psize]) {
754 huge_pgtable_cache(psize) = kmem_cache_create(
755 HUGEPTE_CACHE_NAME(psize),
756 HUGEPTE_TABLE_SIZE(psize),
757 HUGEPTE_TABLE_SIZE(psize),
758 0,
759 zero_ctor);
760 if (!huge_pgtable_cache(psize))
761 panic("hugetlbpage_init(): could not create %s"\
762 "\n", HUGEPTE_CACHE_NAME(psize));
763 }
764 }
616 765
617 return 0; 766 return 0;
618} 767}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6ef63caca682..a41bc5aa2043 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
153}; 153};
154 154
155#ifdef CONFIG_HUGETLB_PAGE 155#ifdef CONFIG_HUGETLB_PAGE
156/* Hugepages need one extra cache, initialized in hugetlbpage.c. We 156/* Hugepages need an extra cache per hugepagesize, initialized in
157 * can't put into the tables above, because HPAGE_SHIFT is not compile 157 * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT
158 * time constant. */ 158 * is not compile time constant. */
159struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; 159struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT];
160#else 160#else
161struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; 161struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
162#endif 162#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index cf4bffba6f7c..d9a181351332 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
39EXPORT_SYMBOL(numa_cpumask_lookup_table); 39EXPORT_SYMBOL(numa_cpumask_lookup_table);
40EXPORT_SYMBOL(node_data); 40EXPORT_SYMBOL(node_data);
41 41
42static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
43static int min_common_depth; 42static int min_common_depth;
44static int n_mem_addr_cells, n_mem_size_cells; 43static int n_mem_addr_cells, n_mem_size_cells;
45 44
@@ -816,7 +815,7 @@ void __init do_init_bootmem(void)
816 dbg("node %d\n", nid); 815 dbg("node %d\n", nid);
817 dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); 816 dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
818 817
819 NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; 818 NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
820 NODE_DATA(nid)->node_start_pfn = start_pfn; 819 NODE_DATA(nid)->node_start_pfn = start_pfn;
821 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 820 NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
822 821
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index c7584072dfcc..2001abdb1912 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -145,13 +145,20 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage)
145void __iomem * 145void __iomem *
146ioremap(phys_addr_t addr, unsigned long size) 146ioremap(phys_addr_t addr, unsigned long size)
147{ 147{
148 return __ioremap(addr, size, _PAGE_NO_CACHE); 148 return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
149} 149}
150EXPORT_SYMBOL(ioremap); 150EXPORT_SYMBOL(ioremap);
151 151
152void __iomem * 152void __iomem *
153ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) 153ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
154{ 154{
155 /* writeable implies dirty for kernel addresses */
156 if (flags & _PAGE_RW)
157 flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
158
159 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
160 flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC);
161
155 return __ioremap(addr, size, flags); 162 return __ioremap(addr, size, flags);
156} 163}
157EXPORT_SYMBOL(ioremap_flags); 164EXPORT_SYMBOL(ioremap_flags);
@@ -163,6 +170,14 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
163 phys_addr_t p; 170 phys_addr_t p;
164 int err; 171 int err;
165 172
173 /* Make sure we have the base flags */
174 if ((flags & _PAGE_PRESENT) == 0)
175 flags |= _PAGE_KERNEL;
176
177 /* Non-cacheable page cannot be coherent */
178 if (flags & _PAGE_NO_CACHE)
179 flags &= ~_PAGE_COHERENT;
180
166 /* 181 /*
167 * Choose an address to map it to. 182 * Choose an address to map it to.
168 * Once the vmalloc system is running, we use it. 183 * Once the vmalloc system is running, we use it.
@@ -219,11 +234,6 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
219 v = (ioremap_bot -= size); 234 v = (ioremap_bot -= size);
220 } 235 }
221 236
222 if ((flags & _PAGE_PRESENT) == 0)
223 flags |= _PAGE_KERNEL;
224 if (flags & _PAGE_NO_CACHE)
225 flags |= _PAGE_GUARDED;
226
227 /* 237 /*
228 * Should check if it is a candidate for a BAT mapping 238 * Should check if it is a candidate for a BAT mapping
229 */ 239 */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3ef0ad2f9ca0..365e61ae5dbc 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -107,9 +107,18 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
107{ 107{
108 unsigned long i; 108 unsigned long i;
109 109
110 /* Make sure we have the base flags */
110 if ((flags & _PAGE_PRESENT) == 0) 111 if ((flags & _PAGE_PRESENT) == 0)
111 flags |= pgprot_val(PAGE_KERNEL); 112 flags |= pgprot_val(PAGE_KERNEL);
112 113
114 /* Non-cacheable page cannot be coherent */
115 if (flags & _PAGE_NO_CACHE)
116 flags &= ~_PAGE_COHERENT;
117
118 /* We don't support the 4K PFN hack with ioremap */
119 if (flags & _PAGE_4K_PFN)
120 return NULL;
121
113 WARN_ON(pa & ~PAGE_MASK); 122 WARN_ON(pa & ~PAGE_MASK);
114 WARN_ON(((unsigned long)ea) & ~PAGE_MASK); 123 WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
115 WARN_ON(size & ~PAGE_MASK); 124 WARN_ON(size & ~PAGE_MASK);
@@ -190,6 +199,13 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
190void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, 199void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
191 unsigned long flags) 200 unsigned long flags)
192{ 201{
202 /* writeable implies dirty for kernel addresses */
203 if (flags & _PAGE_RW)
204 flags |= _PAGE_DIRTY;
205
206 /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
207 flags &= ~(_PAGE_USER | _PAGE_EXEC);
208
193 if (ppc_md.ioremap) 209 if (ppc_md.ioremap)
194 return ppc_md.ioremap(addr, size, flags); 210 return ppc_md.ioremap(addr, size, flags);
195 return __ioremap(addr, size, flags); 211 return __ioremap(addr, size, flags);
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index a01b5c608ff9..409fcc7b63ce 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
147 */ 147 */
148 if (huge) { 148 if (huge) {
149#ifdef CONFIG_HUGETLB_PAGE 149#ifdef CONFIG_HUGETLB_PAGE
150 psize = mmu_huge_psize; 150 psize = get_slice_psize(mm, addr);;
151#else 151#else
152 BUG(); 152 BUG();
153 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ 153 psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */