diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 341 |
1 files changed, 245 insertions, 96 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0d12fba31bc5..fb42c4dd3217 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -24,21 +24,43 @@ | |||
24 | #include <asm/cputable.h> | 24 | #include <asm/cputable.h> |
25 | #include <asm/spu.h> | 25 | #include <asm/spu.h> |
26 | 26 | ||
27 | #define HPAGE_SHIFT_64K 16 | 27 | #define PAGE_SHIFT_64K 16 |
28 | #define HPAGE_SHIFT_16M 24 | 28 | #define PAGE_SHIFT_16M 24 |
29 | #define PAGE_SHIFT_16G 34 | ||
29 | 30 | ||
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | 31 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | 32 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) |
33 | #define MAX_NUMBER_GPAGES 1024 | ||
32 | 34 | ||
33 | unsigned int hugepte_shift; | 35 | /* Tracks the 16G pages after the device tree is scanned and before the |
34 | #define PTRS_PER_HUGEPTE (1 << hugepte_shift) | 36 | * huge_boot_pages list is ready. */ |
35 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) | 37 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; |
38 | static unsigned nr_gpages; | ||
36 | 39 | ||
37 | #define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) | 40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is |
38 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | 41 | * stored for the huge page sizes that are valid. |
39 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | 42 | */ |
43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | ||
44 | |||
45 | #define hugepte_shift mmu_huge_psizes | ||
46 | #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) | ||
47 | #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) | ||
48 | |||
49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
50 | + hugepte_shift[psize]) | ||
51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
53 | |||
54 | /* Subtract one from array size because we don't need a cache for 4K since | ||
55 | * is not a huge page size */ | ||
56 | #define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ | ||
57 | + psize-1]) | ||
58 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) | ||
40 | 59 | ||
41 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | 60 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { |
61 | "unused_4K", "hugepte_cache_64K", "unused_64K_AP", | ||
62 | "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" | ||
63 | }; | ||
42 | 64 | ||
43 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 65 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
44 | * will choke on pointers to hugepte tables, which is handy for | 66 | * will choke on pointers to hugepte tables, which is handy for |
@@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; | |||
49 | 71 | ||
50 | #define hugepd_none(hpd) ((hpd).pd == 0) | 72 | #define hugepd_none(hpd) ((hpd).pd == 0) |
51 | 73 | ||
74 | static inline int shift_to_mmu_psize(unsigned int shift) | ||
75 | { | ||
76 | switch (shift) { | ||
77 | #ifndef CONFIG_PPC_64K_PAGES | ||
78 | case PAGE_SHIFT_64K: | ||
79 | return MMU_PAGE_64K; | ||
80 | #endif | ||
81 | case PAGE_SHIFT_16M: | ||
82 | return MMU_PAGE_16M; | ||
83 | case PAGE_SHIFT_16G: | ||
84 | return MMU_PAGE_16G; | ||
85 | } | ||
86 | return -1; | ||
87 | } | ||
88 | |||
89 | static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | ||
90 | { | ||
91 | if (mmu_psize_defs[mmu_psize].shift) | ||
92 | return mmu_psize_defs[mmu_psize].shift; | ||
93 | BUG(); | ||
94 | } | ||
95 | |||
52 | static inline pte_t *hugepd_page(hugepd_t hpd) | 96 | static inline pte_t *hugepd_page(hugepd_t hpd) |
53 | { | 97 | { |
54 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 98 | BUG_ON(!(hpd.pd & HUGEPD_OK)); |
55 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 99 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); |
56 | } | 100 | } |
57 | 101 | ||
58 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | 102 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, |
103 | struct hstate *hstate) | ||
59 | { | 104 | { |
60 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | 105 | unsigned int shift = huge_page_shift(hstate); |
106 | int psize = shift_to_mmu_psize(shift); | ||
107 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
61 | pte_t *dir = hugepd_page(*hpdp); | 108 | pte_t *dir = hugepd_page(*hpdp); |
62 | 109 | ||
63 | return dir + idx; | 110 | return dir + idx; |
64 | } | 111 | } |
65 | 112 | ||
66 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 113 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
67 | unsigned long address) | 114 | unsigned long address, unsigned int psize) |
68 | { | 115 | { |
69 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | 116 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), |
70 | GFP_KERNEL|__GFP_REPEAT); | 117 | GFP_KERNEL|__GFP_REPEAT); |
71 | 118 | ||
72 | if (! new) | 119 | if (! new) |
@@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |||
74 | 121 | ||
75 | spin_lock(&mm->page_table_lock); | 122 | spin_lock(&mm->page_table_lock); |
76 | if (!hugepd_none(*hpdp)) | 123 | if (!hugepd_none(*hpdp)) |
77 | kmem_cache_free(huge_pgtable_cache, new); | 124 | kmem_cache_free(huge_pgtable_cache(psize), new); |
78 | else | 125 | else |
79 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 126 | hpdp->pd = (unsigned long)new | HUGEPD_OK; |
80 | spin_unlock(&mm->page_table_lock); | 127 | spin_unlock(&mm->page_table_lock); |
@@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |||
83 | 130 | ||
84 | /* Base page size affects how we walk hugetlb page tables */ | 131 | /* Base page size affects how we walk hugetlb page tables */ |
85 | #ifdef CONFIG_PPC_64K_PAGES | 132 | #ifdef CONFIG_PPC_64K_PAGES |
86 | #define hpmd_offset(pud, addr) pmd_offset(pud, addr) | 133 | #define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) |
87 | #define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) | 134 | #define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) |
88 | #else | 135 | #else |
89 | static inline | 136 | static inline |
90 | pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) | 137 | pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) |
91 | { | 138 | { |
92 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 139 | if (huge_page_shift(hstate) == PAGE_SHIFT_64K) |
93 | return pmd_offset(pud, addr); | 140 | return pmd_offset(pud, addr); |
94 | else | 141 | else |
95 | return (pmd_t *) pud; | 142 | return (pmd_t *) pud; |
96 | } | 143 | } |
97 | static inline | 144 | static inline |
98 | pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) | 145 | pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, |
146 | struct hstate *hstate) | ||
99 | { | 147 | { |
100 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 148 | if (huge_page_shift(hstate) == PAGE_SHIFT_64K) |
101 | return pmd_alloc(mm, pud, addr); | 149 | return pmd_alloc(mm, pud, addr); |
102 | else | 150 | else |
103 | return (pmd_t *) pud; | 151 | return (pmd_t *) pud; |
104 | } | 152 | } |
105 | #endif | 153 | #endif |
106 | 154 | ||
155 | /* Build list of addresses of gigantic pages. This function is used in early | ||
156 | * boot before the buddy or bootmem allocator is setup. | ||
157 | */ | ||
158 | void add_gpage(unsigned long addr, unsigned long page_size, | ||
159 | unsigned long number_of_pages) | ||
160 | { | ||
161 | if (!addr) | ||
162 | return; | ||
163 | while (number_of_pages > 0) { | ||
164 | gpage_freearray[nr_gpages] = addr; | ||
165 | nr_gpages++; | ||
166 | number_of_pages--; | ||
167 | addr += page_size; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* Moves the gigantic page addresses from the temporary list to the | ||
172 | * huge_boot_pages list. | ||
173 | */ | ||
174 | int alloc_bootmem_huge_page(struct hstate *hstate) | ||
175 | { | ||
176 | struct huge_bootmem_page *m; | ||
177 | if (nr_gpages == 0) | ||
178 | return 0; | ||
179 | m = phys_to_virt(gpage_freearray[--nr_gpages]); | ||
180 | gpage_freearray[nr_gpages] = 0; | ||
181 | list_add(&m->list, &huge_boot_pages); | ||
182 | m->hstate = hstate; | ||
183 | return 1; | ||
184 | } | ||
185 | |||
186 | |||
107 | /* Modelled after find_linux_pte() */ | 187 | /* Modelled after find_linux_pte() */ |
108 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 188 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
109 | { | 189 | { |
@@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
111 | pud_t *pu; | 191 | pud_t *pu; |
112 | pmd_t *pm; | 192 | pmd_t *pm; |
113 | 193 | ||
114 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); | 194 | unsigned int psize; |
195 | unsigned int shift; | ||
196 | unsigned long sz; | ||
197 | struct hstate *hstate; | ||
198 | psize = get_slice_psize(mm, addr); | ||
199 | shift = mmu_psize_to_shift(psize); | ||
200 | sz = ((1UL) << shift); | ||
201 | hstate = size_to_hstate(sz); | ||
115 | 202 | ||
116 | addr &= HPAGE_MASK; | 203 | addr &= hstate->mask; |
117 | 204 | ||
118 | pg = pgd_offset(mm, addr); | 205 | pg = pgd_offset(mm, addr); |
119 | if (!pgd_none(*pg)) { | 206 | if (!pgd_none(*pg)) { |
120 | pu = pud_offset(pg, addr); | 207 | pu = pud_offset(pg, addr); |
121 | if (!pud_none(*pu)) { | 208 | if (!pud_none(*pu)) { |
122 | pm = hpmd_offset(pu, addr); | 209 | pm = hpmd_offset(pu, addr, hstate); |
123 | if (!pmd_none(*pm)) | 210 | if (!pmd_none(*pm)) |
124 | return hugepte_offset((hugepd_t *)pm, addr); | 211 | return hugepte_offset((hugepd_t *)pm, addr, |
212 | hstate); | ||
125 | } | 213 | } |
126 | } | 214 | } |
127 | 215 | ||
128 | return NULL; | 216 | return NULL; |
129 | } | 217 | } |
130 | 218 | ||
131 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 219 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
220 | unsigned long addr, unsigned long sz) | ||
132 | { | 221 | { |
133 | pgd_t *pg; | 222 | pgd_t *pg; |
134 | pud_t *pu; | 223 | pud_t *pu; |
135 | pmd_t *pm; | 224 | pmd_t *pm; |
136 | hugepd_t *hpdp = NULL; | 225 | hugepd_t *hpdp = NULL; |
226 | struct hstate *hstate; | ||
227 | unsigned int psize; | ||
228 | hstate = size_to_hstate(sz); | ||
137 | 229 | ||
138 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); | 230 | psize = get_slice_psize(mm, addr); |
231 | BUG_ON(!mmu_huge_psizes[psize]); | ||
139 | 232 | ||
140 | addr &= HPAGE_MASK; | 233 | addr &= hstate->mask; |
141 | 234 | ||
142 | pg = pgd_offset(mm, addr); | 235 | pg = pgd_offset(mm, addr); |
143 | pu = pud_alloc(mm, pg, addr); | 236 | pu = pud_alloc(mm, pg, addr); |
144 | 237 | ||
145 | if (pu) { | 238 | if (pu) { |
146 | pm = hpmd_alloc(mm, pu, addr); | 239 | pm = hpmd_alloc(mm, pu, addr, hstate); |
147 | if (pm) | 240 | if (pm) |
148 | hpdp = (hugepd_t *)pm; | 241 | hpdp = (hugepd_t *)pm; |
149 | } | 242 | } |
@@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
151 | if (! hpdp) | 244 | if (! hpdp) |
152 | return NULL; | 245 | return NULL; |
153 | 246 | ||
154 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | 247 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) |
155 | return NULL; | 248 | return NULL; |
156 | 249 | ||
157 | return hugepte_offset(hpdp, addr); | 250 | return hugepte_offset(hpdp, addr, hstate); |
158 | } | 251 | } |
159 | 252 | ||
160 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 253 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
@@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
162 | return 0; | 255 | return 0; |
163 | } | 256 | } |
164 | 257 | ||
165 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) | 258 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, |
259 | unsigned int psize) | ||
166 | { | 260 | { |
167 | pte_t *hugepte = hugepd_page(*hpdp); | 261 | pte_t *hugepte = hugepd_page(*hpdp); |
168 | 262 | ||
169 | hpdp->pd = 0; | 263 | hpdp->pd = 0; |
170 | tlb->need_flush = 1; | 264 | tlb->need_flush = 1; |
171 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | 265 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, |
266 | HUGEPTE_CACHE_NUM+psize-1, | ||
172 | PGF_CACHENUM_MASK)); | 267 | PGF_CACHENUM_MASK)); |
173 | } | 268 | } |
174 | 269 | ||
175 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 270 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
176 | unsigned long addr, unsigned long end, | 271 | unsigned long addr, unsigned long end, |
177 | unsigned long floor, unsigned long ceiling) | 272 | unsigned long floor, unsigned long ceiling, |
273 | unsigned int psize) | ||
178 | { | 274 | { |
179 | pmd_t *pmd; | 275 | pmd_t *pmd; |
180 | unsigned long next; | 276 | unsigned long next; |
@@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
186 | next = pmd_addr_end(addr, end); | 282 | next = pmd_addr_end(addr, end); |
187 | if (pmd_none(*pmd)) | 283 | if (pmd_none(*pmd)) |
188 | continue; | 284 | continue; |
189 | free_hugepte_range(tlb, (hugepd_t *)pmd); | 285 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); |
190 | } while (pmd++, addr = next, addr != end); | 286 | } while (pmd++, addr = next, addr != end); |
191 | 287 | ||
192 | start &= PUD_MASK; | 288 | start &= PUD_MASK; |
@@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
212 | pud_t *pud; | 308 | pud_t *pud; |
213 | unsigned long next; | 309 | unsigned long next; |
214 | unsigned long start; | 310 | unsigned long start; |
311 | unsigned int shift; | ||
312 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
313 | shift = mmu_psize_to_shift(psize); | ||
215 | 314 | ||
216 | start = addr; | 315 | start = addr; |
217 | pud = pud_offset(pgd, addr); | 316 | pud = pud_offset(pgd, addr); |
@@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
220 | #ifdef CONFIG_PPC_64K_PAGES | 319 | #ifdef CONFIG_PPC_64K_PAGES |
221 | if (pud_none_or_clear_bad(pud)) | 320 | if (pud_none_or_clear_bad(pud)) |
222 | continue; | 321 | continue; |
223 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | 322 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, |
323 | psize); | ||
224 | #else | 324 | #else |
225 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { | 325 | if (shift == PAGE_SHIFT_64K) { |
226 | if (pud_none_or_clear_bad(pud)) | 326 | if (pud_none_or_clear_bad(pud)) |
227 | continue; | 327 | continue; |
228 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | 328 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
329 | ceiling, psize); | ||
229 | } else { | 330 | } else { |
230 | if (pud_none(*pud)) | 331 | if (pud_none(*pud)) |
231 | continue; | 332 | continue; |
232 | free_hugepte_range(tlb, (hugepd_t *)pud); | 333 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); |
233 | } | 334 | } |
234 | #endif | 335 | #endif |
235 | } while (pud++, addr = next, addr != end); | 336 | } while (pud++, addr = next, addr != end); |
@@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
255 | * | 356 | * |
256 | * Must be called with pagetable lock held. | 357 | * Must be called with pagetable lock held. |
257 | */ | 358 | */ |
258 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | 359 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, |
259 | unsigned long addr, unsigned long end, | 360 | unsigned long addr, unsigned long end, |
260 | unsigned long floor, unsigned long ceiling) | 361 | unsigned long floor, unsigned long ceiling) |
261 | { | 362 | { |
@@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |||
297 | * now has no other vmas using it, so can be freed, we don't | 398 | * now has no other vmas using it, so can be freed, we don't |
298 | * bother to round floor or end up - the tests don't need that. | 399 | * bother to round floor or end up - the tests don't need that. |
299 | */ | 400 | */ |
401 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
300 | 402 | ||
301 | addr &= HUGEPD_MASK; | 403 | addr &= HUGEPD_MASK(psize); |
302 | if (addr < floor) { | 404 | if (addr < floor) { |
303 | addr += HUGEPD_SIZE; | 405 | addr += HUGEPD_SIZE(psize); |
304 | if (!addr) | 406 | if (!addr) |
305 | return; | 407 | return; |
306 | } | 408 | } |
307 | if (ceiling) { | 409 | if (ceiling) { |
308 | ceiling &= HUGEPD_MASK; | 410 | ceiling &= HUGEPD_MASK(psize); |
309 | if (!ceiling) | 411 | if (!ceiling) |
310 | return; | 412 | return; |
311 | } | 413 | } |
312 | if (end - 1 > ceiling - 1) | 414 | if (end - 1 > ceiling - 1) |
313 | end -= HUGEPD_SIZE; | 415 | end -= HUGEPD_SIZE(psize); |
314 | if (addr > end - 1) | 416 | if (addr > end - 1) |
315 | return; | 417 | return; |
316 | 418 | ||
317 | start = addr; | 419 | start = addr; |
318 | pgd = pgd_offset((*tlb)->mm, addr); | 420 | pgd = pgd_offset(tlb->mm, addr); |
319 | do { | 421 | do { |
320 | BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); | 422 | psize = get_slice_psize(tlb->mm, addr); |
423 | BUG_ON(!mmu_huge_psizes[psize]); | ||
321 | next = pgd_addr_end(addr, end); | 424 | next = pgd_addr_end(addr, end); |
322 | if (pgd_none_or_clear_bad(pgd)) | 425 | if (pgd_none_or_clear_bad(pgd)) |
323 | continue; | 426 | continue; |
324 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | 427 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
325 | } while (pgd++, addr = next, addr != end); | 428 | } while (pgd++, addr = next, addr != end); |
326 | } | 429 | } |
327 | 430 | ||
@@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
334 | * necessary anymore if we make hpte_need_flush() get the | 437 | * necessary anymore if we make hpte_need_flush() get the |
335 | * page size from the slices | 438 | * page size from the slices |
336 | */ | 439 | */ |
337 | pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); | 440 | unsigned int psize = get_slice_psize(mm, addr); |
441 | unsigned int shift = mmu_psize_to_shift(psize); | ||
442 | unsigned long sz = ((1UL) << shift); | ||
443 | struct hstate *hstate = size_to_hstate(sz); | ||
444 | pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); | ||
338 | } | 445 | } |
339 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | 446 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
340 | } | 447 | } |
@@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
351 | { | 458 | { |
352 | pte_t *ptep; | 459 | pte_t *ptep; |
353 | struct page *page; | 460 | struct page *page; |
461 | unsigned int mmu_psize = get_slice_psize(mm, address); | ||
354 | 462 | ||
355 | if (get_slice_psize(mm, address) != mmu_huge_psize) | 463 | /* Verify it is a huge page else bail. */ |
464 | if (!mmu_huge_psizes[mmu_psize]) | ||
356 | return ERR_PTR(-EINVAL); | 465 | return ERR_PTR(-EINVAL); |
357 | 466 | ||
358 | ptep = huge_pte_offset(mm, address); | 467 | ptep = huge_pte_offset(mm, address); |
359 | page = pte_page(*ptep); | 468 | page = pte_page(*ptep); |
360 | if (page) | 469 | if (page) { |
361 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | 470 | unsigned int shift = mmu_psize_to_shift(mmu_psize); |
471 | unsigned long sz = ((1UL) << shift); | ||
472 | page += (address % sz) / PAGE_SIZE; | ||
473 | } | ||
362 | 474 | ||
363 | return page; | 475 | return page; |
364 | } | 476 | } |
@@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd) | |||
368 | return 0; | 480 | return 0; |
369 | } | 481 | } |
370 | 482 | ||
483 | int pud_huge(pud_t pud) | ||
484 | { | ||
485 | return 0; | ||
486 | } | ||
487 | |||
371 | struct page * | 488 | struct page * |
372 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 489 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
373 | pmd_t *pmd, int write) | 490 | pmd_t *pmd, int write) |
@@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
381 | unsigned long len, unsigned long pgoff, | 498 | unsigned long len, unsigned long pgoff, |
382 | unsigned long flags) | 499 | unsigned long flags) |
383 | { | 500 | { |
384 | return slice_get_unmapped_area(addr, len, flags, | 501 | struct hstate *hstate = hstate_file(file); |
385 | mmu_huge_psize, 1, 0); | 502 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); |
503 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); | ||
386 | } | 504 | } |
387 | 505 | ||
388 | /* | 506 | /* |
389 | * Called by asm hashtable.S for doing lazy icache flush | 507 | * Called by asm hashtable.S for doing lazy icache flush |
390 | */ | 508 | */ |
391 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | 509 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, |
392 | pte_t pte, int trap) | 510 | pte_t pte, int trap, unsigned long sz) |
393 | { | 511 | { |
394 | struct page *page; | 512 | struct page *page; |
395 | int i; | 513 | int i; |
@@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |||
402 | /* page is dirty */ | 520 | /* page is dirty */ |
403 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | 521 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { |
404 | if (trap == 0x400) { | 522 | if (trap == 0x400) { |
405 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) | 523 | for (i = 0; i < (sz / PAGE_SIZE); i++) |
406 | __flush_dcache_icache(page_address(page+i)); | 524 | __flush_dcache_icache(page_address(page+i)); |
407 | set_bit(PG_arch_1, &page->flags); | 525 | set_bit(PG_arch_1, &page->flags); |
408 | } else { | 526 | } else { |
@@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
418 | { | 536 | { |
419 | pte_t *ptep; | 537 | pte_t *ptep; |
420 | unsigned long old_pte, new_pte; | 538 | unsigned long old_pte, new_pte; |
421 | unsigned long va, rflags, pa; | 539 | unsigned long va, rflags, pa, sz; |
422 | long slot; | 540 | long slot; |
423 | int err = 1; | 541 | int err = 1; |
424 | int ssize = user_segment_size(ea); | 542 | int ssize = user_segment_size(ea); |
543 | unsigned int mmu_psize; | ||
544 | int shift; | ||
545 | mmu_psize = get_slice_psize(mm, ea); | ||
425 | 546 | ||
547 | if (!mmu_huge_psizes[mmu_psize]) | ||
548 | goto out; | ||
426 | ptep = huge_pte_offset(mm, ea); | 549 | ptep = huge_pte_offset(mm, ea); |
427 | 550 | ||
428 | /* Search the Linux page table for a match with va */ | 551 | /* Search the Linux page table for a match with va */ |
@@ -465,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
465 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | 588 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); |
466 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 589 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
467 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | 590 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
591 | shift = mmu_psize_to_shift(mmu_psize); | ||
592 | sz = ((1UL) << shift); | ||
468 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | 593 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
469 | /* No CPU has hugepages but lacks no execute, so we | 594 | /* No CPU has hugepages but lacks no execute, so we |
470 | * don't need to worry about that case */ | 595 | * don't need to worry about that case */ |
471 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | 596 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), |
472 | trap); | 597 | trap, sz); |
473 | 598 | ||
474 | /* Check if pte already has an hpte (case 2) */ | 599 | /* Check if pte already has an hpte (case 2) */ |
475 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | 600 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
476 | /* There MIGHT be an HPTE for this pte */ | 601 | /* There MIGHT be an HPTE for this pte */ |
477 | unsigned long hash, slot; | 602 | unsigned long hash, slot; |
478 | 603 | ||
479 | hash = hpt_hash(va, HPAGE_SHIFT, ssize); | 604 | hash = hpt_hash(va, shift, ssize); |
480 | if (old_pte & _PAGE_F_SECOND) | 605 | if (old_pte & _PAGE_F_SECOND) |
481 | hash = ~hash; | 606 | hash = ~hash; |
482 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 607 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
483 | slot += (old_pte & _PAGE_F_GIX) >> 12; | 608 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
484 | 609 | ||
485 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, | 610 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, |
486 | ssize, local) == -1) | 611 | ssize, local) == -1) |
487 | old_pte &= ~_PAGE_HPTEFLAGS; | 612 | old_pte &= ~_PAGE_HPTEFLAGS; |
488 | } | 613 | } |
489 | 614 | ||
490 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | 615 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
491 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); | 616 | unsigned long hash = hpt_hash(va, shift, ssize); |
492 | unsigned long hpte_group; | 617 | unsigned long hpte_group; |
493 | 618 | ||
494 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | 619 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
@@ -509,7 +634,7 @@ repeat: | |||
509 | 634 | ||
510 | /* Insert into the hash table, primary slot */ | 635 | /* Insert into the hash table, primary slot */ |
511 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | 636 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, |
512 | mmu_huge_psize, ssize); | 637 | mmu_psize, ssize); |
513 | 638 | ||
514 | /* Primary is full, try the secondary */ | 639 | /* Primary is full, try the secondary */ |
515 | if (unlikely(slot == -1)) { | 640 | if (unlikely(slot == -1)) { |
@@ -517,7 +642,7 @@ repeat: | |||
517 | HPTES_PER_GROUP) & ~0x7UL; | 642 | HPTES_PER_GROUP) & ~0x7UL; |
518 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | 643 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
519 | HPTE_V_SECONDARY, | 644 | HPTE_V_SECONDARY, |
520 | mmu_huge_psize, ssize); | 645 | mmu_psize, ssize); |
521 | if (slot == -1) { | 646 | if (slot == -1) { |
522 | if (mftb() & 0x1) | 647 | if (mftb() & 0x1) |
523 | hpte_group = ((hash & htab_hash_mask) * | 648 | hpte_group = ((hash & htab_hash_mask) * |
@@ -549,45 +674,54 @@ void set_huge_psize(int psize) | |||
549 | { | 674 | { |
550 | /* Check that it is a page size supported by the hardware and | 675 | /* Check that it is a page size supported by the hardware and |
551 | * that it fits within pagetable limits. */ | 676 | * that it fits within pagetable limits. */ |
552 | if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && | 677 | if (mmu_psize_defs[psize].shift && |
678 | mmu_psize_defs[psize].shift < SID_SHIFT_1T && | ||
553 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || | 679 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || |
554 | mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { | 680 | mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || |
555 | HPAGE_SHIFT = mmu_psize_defs[psize].shift; | 681 | mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { |
556 | mmu_huge_psize = psize; | 682 | /* Return if huge page size has already been setup or is the |
557 | #ifdef CONFIG_PPC_64K_PAGES | 683 | * same as the base page size. */ |
558 | hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); | 684 | if (mmu_huge_psizes[psize] || |
559 | #else | 685 | mmu_psize_defs[psize].shift == PAGE_SHIFT) |
560 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 686 | return; |
561 | hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); | 687 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); |
562 | else | 688 | |
563 | hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); | 689 | switch (mmu_psize_defs[psize].shift) { |
564 | #endif | 690 | case PAGE_SHIFT_64K: |
565 | 691 | /* We only allow 64k hpages with 4k base page, | |
692 | * which was checked above, and always put them | ||
693 | * at the PMD */ | ||
694 | hugepte_shift[psize] = PMD_SHIFT; | ||
695 | break; | ||
696 | case PAGE_SHIFT_16M: | ||
697 | /* 16M pages can be at two different levels | ||
698 | * of pagestables based on base page size */ | ||
699 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
700 | hugepte_shift[psize] = PMD_SHIFT; | ||
701 | else /* 4k base page */ | ||
702 | hugepte_shift[psize] = PUD_SHIFT; | ||
703 | break; | ||
704 | case PAGE_SHIFT_16G: | ||
705 | /* 16G pages are always at PGD level */ | ||
706 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
707 | break; | ||
708 | } | ||
709 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
566 | } else | 710 | } else |
567 | HPAGE_SHIFT = 0; | 711 | hugepte_shift[psize] = 0; |
568 | } | 712 | } |
569 | 713 | ||
570 | static int __init hugepage_setup_sz(char *str) | 714 | static int __init hugepage_setup_sz(char *str) |
571 | { | 715 | { |
572 | unsigned long long size; | 716 | unsigned long long size; |
573 | int mmu_psize = -1; | 717 | int mmu_psize; |
574 | int shift; | 718 | int shift; |
575 | 719 | ||
576 | size = memparse(str, &str); | 720 | size = memparse(str, &str); |
577 | 721 | ||
578 | shift = __ffs(size); | 722 | shift = __ffs(size); |
579 | switch (shift) { | 723 | mmu_psize = shift_to_mmu_psize(shift); |
580 | #ifndef CONFIG_PPC_64K_PAGES | 724 | if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) |
581 | case HPAGE_SHIFT_64K: | ||
582 | mmu_psize = MMU_PAGE_64K; | ||
583 | break; | ||
584 | #endif | ||
585 | case HPAGE_SHIFT_16M: | ||
586 | mmu_psize = MMU_PAGE_16M; | ||
587 | break; | ||
588 | } | ||
589 | |||
590 | if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift) | ||
591 | set_huge_psize(mmu_psize); | 725 | set_huge_psize(mmu_psize); |
592 | else | 726 | else |
593 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); | 727 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); |
@@ -603,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr) | |||
603 | 737 | ||
604 | static int __init hugetlbpage_init(void) | 738 | static int __init hugetlbpage_init(void) |
605 | { | 739 | { |
740 | unsigned int psize; | ||
741 | |||
606 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 742 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
607 | return -ENODEV; | 743 | return -ENODEV; |
608 | 744 | /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE | |
609 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | 745 | * and adjust PTE_NONCACHE_NUM if the number of supported huge page |
610 | HUGEPTE_TABLE_SIZE, | 746 | * sizes changes. |
611 | HUGEPTE_TABLE_SIZE, | 747 | */ |
612 | 0, | 748 | set_huge_psize(MMU_PAGE_16M); |
613 | zero_ctor); | 749 | set_huge_psize(MMU_PAGE_64K); |
614 | if (! huge_pgtable_cache) | 750 | set_huge_psize(MMU_PAGE_16G); |
615 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | 751 | |
752 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | ||
753 | if (mmu_huge_psizes[psize]) { | ||
754 | huge_pgtable_cache(psize) = kmem_cache_create( | ||
755 | HUGEPTE_CACHE_NAME(psize), | ||
756 | HUGEPTE_TABLE_SIZE(psize), | ||
757 | HUGEPTE_TABLE_SIZE(psize), | ||
758 | 0, | ||
759 | zero_ctor); | ||
760 | if (!huge_pgtable_cache(psize)) | ||
761 | panic("hugetlbpage_init(): could not create %s"\ | ||
762 | "\n", HUGEPTE_CACHE_NAME(psize)); | ||
763 | } | ||
764 | } | ||
616 | 765 | ||
617 | return 0; | 766 | return 0; |
618 | } | 767 | } |