diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 12:46:22 -0500 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /arch/powerpc/mm/hugetlbpage.c | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 401 |
1 files changed, 354 insertions, 47 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1901b9..8558b572e55d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | 2 | * PPC Huge TLB Page Support for Kernel. |
3 | * | 3 | * |
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | 4 | * Copyright (C) 2003 David Gibson, IBM Corporation. |
5 | * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor | ||
5 | * | 6 | * |
6 | * Based on the IA-32 version: | 7 | * Based on the IA-32 version: |
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | 8 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> |
@@ -11,24 +12,40 @@ | |||
11 | #include <linux/io.h> | 12 | #include <linux/io.h> |
12 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
13 | #include <linux/hugetlb.h> | 14 | #include <linux/hugetlb.h> |
15 | #include <linux/of_fdt.h> | ||
16 | #include <linux/memblock.h> | ||
17 | #include <linux/bootmem.h> | ||
18 | #include <linux/moduleparam.h> | ||
14 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
15 | #include <asm/pgalloc.h> | 20 | #include <asm/pgalloc.h> |
16 | #include <asm/tlb.h> | 21 | #include <asm/tlb.h> |
22 | #include <asm/setup.h> | ||
17 | 23 | ||
18 | #define PAGE_SHIFT_64K 16 | 24 | #define PAGE_SHIFT_64K 16 |
19 | #define PAGE_SHIFT_16M 24 | 25 | #define PAGE_SHIFT_16M 24 |
20 | #define PAGE_SHIFT_16G 34 | 26 | #define PAGE_SHIFT_16G 34 |
21 | 27 | ||
22 | #define MAX_NUMBER_GPAGES 1024 | 28 | unsigned int HPAGE_SHIFT; |
23 | 29 | ||
24 | /* Tracks the 16G pages after the device tree is scanned and before the | 30 | /* |
25 | * huge_boot_pages list is ready. */ | 31 | * Tracks gpages after the device tree is scanned and before the |
26 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; | 32 | * huge_boot_pages list is ready. On 64-bit implementations, this is |
33 | * just used to track 16G pages and so is a single array. 32-bit | ||
34 | * implementations may have more than one gpage size due to limitations | ||
35 | * of the memory allocators, so we need multiple arrays | ||
36 | */ | ||
37 | #ifdef CONFIG_PPC64 | ||
38 | #define MAX_NUMBER_GPAGES 1024 | ||
39 | static u64 gpage_freearray[MAX_NUMBER_GPAGES]; | ||
27 | static unsigned nr_gpages; | 40 | static unsigned nr_gpages; |
28 | 41 | #else | |
29 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 42 | #define MAX_NUMBER_GPAGES 128 |
30 | * will choke on pointers to hugepte tables, which is handy for | 43 | struct psize_gpages { |
31 | * catching screwups early. */ | 44 | u64 gpage_list[MAX_NUMBER_GPAGES]; |
45 | unsigned int nr_gpages; | ||
46 | }; | ||
47 | static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; | ||
48 | #endif | ||
32 | 49 | ||
33 | static inline int shift_to_mmu_psize(unsigned int shift) | 50 | static inline int shift_to_mmu_psize(unsigned int shift) |
34 | { | 51 | { |
@@ -49,25 +66,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | |||
49 | 66 | ||
50 | #define hugepd_none(hpd) ((hpd).pd == 0) | 67 | #define hugepd_none(hpd) ((hpd).pd == 0) |
51 | 68 | ||
52 | static inline pte_t *hugepd_page(hugepd_t hpd) | ||
53 | { | ||
54 | BUG_ON(!hugepd_ok(hpd)); | ||
55 | return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); | ||
56 | } | ||
57 | |||
58 | static inline unsigned int hugepd_shift(hugepd_t hpd) | ||
59 | { | ||
60 | return hpd.pd & HUGEPD_SHIFT_MASK; | ||
61 | } | ||
62 | |||
63 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) | ||
64 | { | ||
65 | unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); | ||
66 | pte_t *dir = hugepd_page(*hpdp); | ||
67 | |||
68 | return dir + idx; | ||
69 | } | ||
70 | |||
71 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | 69 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) |
72 | { | 70 | { |
73 | pgd_t *pg; | 71 | pgd_t *pg; |
@@ -93,7 +91,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift | |||
93 | if (is_hugepd(pm)) | 91 | if (is_hugepd(pm)) |
94 | hpdp = (hugepd_t *)pm; | 92 | hpdp = (hugepd_t *)pm; |
95 | else if (!pmd_none(*pm)) { | 93 | else if (!pmd_none(*pm)) { |
96 | return pte_offset_map(pm, ea); | 94 | return pte_offset_kernel(pm, ea); |
97 | } | 95 | } |
98 | } | 96 | } |
99 | } | 97 | } |
@@ -114,8 +112,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
114 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 112 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
115 | unsigned long address, unsigned pdshift, unsigned pshift) | 113 | unsigned long address, unsigned pdshift, unsigned pshift) |
116 | { | 114 | { |
117 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), | 115 | struct kmem_cache *cachep; |
118 | GFP_KERNEL|__GFP_REPEAT); | 116 | pte_t *new; |
117 | |||
118 | #ifdef CONFIG_PPC64 | ||
119 | cachep = PGT_CACHE(pdshift - pshift); | ||
120 | #else | ||
121 | int i; | ||
122 | int num_hugepd = 1 << (pshift - pdshift); | ||
123 | cachep = hugepte_cache; | ||
124 | #endif | ||
125 | |||
126 | new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); | ||
119 | 127 | ||
120 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); | 128 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); |
121 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); | 129 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); |
@@ -124,10 +132,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |||
124 | return -ENOMEM; | 132 | return -ENOMEM; |
125 | 133 | ||
126 | spin_lock(&mm->page_table_lock); | 134 | spin_lock(&mm->page_table_lock); |
135 | #ifdef CONFIG_PPC64 | ||
127 | if (!hugepd_none(*hpdp)) | 136 | if (!hugepd_none(*hpdp)) |
128 | kmem_cache_free(PGT_CACHE(pdshift - pshift), new); | 137 | kmem_cache_free(cachep, new); |
129 | else | 138 | else |
130 | hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; | 139 | hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; |
140 | #else | ||
141 | /* | ||
142 | * We have multiple higher-level entries that point to the same | ||
143 | * actual pte location. Fill in each as we go and backtrack on error. | ||
144 | * We need all of these so the DTLB pgtable walk code can find the | ||
145 | * right higher-level entry without knowing if it's a hugepage or not. | ||
146 | */ | ||
147 | for (i = 0; i < num_hugepd; i++, hpdp++) { | ||
148 | if (unlikely(!hugepd_none(*hpdp))) | ||
149 | break; | ||
150 | else | ||
151 | hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; | ||
152 | } | ||
153 | /* If we bailed from the for loop early, an error occurred, clean up */ | ||
154 | if (i < num_hugepd) { | ||
155 | for (i = i - 1 ; i >= 0; i--, hpdp--) | ||
156 | hpdp->pd = 0; | ||
157 | kmem_cache_free(cachep, new); | ||
158 | } | ||
159 | #endif | ||
131 | spin_unlock(&mm->page_table_lock); | 160 | spin_unlock(&mm->page_table_lock); |
132 | return 0; | 161 | return 0; |
133 | } | 162 | } |
@@ -169,11 +198,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz | |||
169 | return hugepte_offset(hpdp, addr, pdshift); | 198 | return hugepte_offset(hpdp, addr, pdshift); |
170 | } | 199 | } |
171 | 200 | ||
201 | #ifdef CONFIG_PPC32 | ||
202 | /* Build list of addresses of gigantic pages. This function is used in early | ||
203 | * boot before the buddy or bootmem allocator is setup. | ||
204 | */ | ||
205 | void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) | ||
206 | { | ||
207 | unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); | ||
208 | int i; | ||
209 | |||
210 | if (addr == 0) | ||
211 | return; | ||
212 | |||
213 | gpage_freearray[idx].nr_gpages = number_of_pages; | ||
214 | |||
215 | for (i = 0; i < number_of_pages; i++) { | ||
216 | gpage_freearray[idx].gpage_list[i] = addr; | ||
217 | addr += page_size; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | * Moves the gigantic page addresses from the temporary list to the | ||
223 | * huge_boot_pages list. | ||
224 | */ | ||
225 | int alloc_bootmem_huge_page(struct hstate *hstate) | ||
226 | { | ||
227 | struct huge_bootmem_page *m; | ||
228 | int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); | ||
229 | int nr_gpages = gpage_freearray[idx].nr_gpages; | ||
230 | |||
231 | if (nr_gpages == 0) | ||
232 | return 0; | ||
233 | |||
234 | #ifdef CONFIG_HIGHMEM | ||
235 | /* | ||
236 | * If gpages can be in highmem we can't use the trick of storing the | ||
237 | * data structure in the page; allocate space for this | ||
238 | */ | ||
239 | m = alloc_bootmem(sizeof(struct huge_bootmem_page)); | ||
240 | m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; | ||
241 | #else | ||
242 | m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); | ||
243 | #endif | ||
244 | |||
245 | list_add(&m->list, &huge_boot_pages); | ||
246 | gpage_freearray[idx].nr_gpages = nr_gpages; | ||
247 | gpage_freearray[idx].gpage_list[nr_gpages] = 0; | ||
248 | m->hstate = hstate; | ||
249 | |||
250 | return 1; | ||
251 | } | ||
252 | /* | ||
253 | * Scan the command line hugepagesz= options for gigantic pages; store those in | ||
254 | * a list that we use to allocate the memory once all options are parsed. | ||
255 | */ | ||
256 | |||
257 | unsigned long gpage_npages[MMU_PAGE_COUNT]; | ||
258 | |||
259 | static int __init do_gpage_early_setup(char *param, char *val) | ||
260 | { | ||
261 | static phys_addr_t size; | ||
262 | unsigned long npages; | ||
263 | |||
264 | /* | ||
265 | * The hugepagesz and hugepages cmdline options are interleaved. We | ||
266 | * use the size variable to keep track of whether or not this was done | ||
267 | * properly and skip over instances where it is incorrect. Other | ||
268 | * command-line parsing code will issue warnings, so we don't need to. | ||
269 | * | ||
270 | */ | ||
271 | if ((strcmp(param, "default_hugepagesz") == 0) || | ||
272 | (strcmp(param, "hugepagesz") == 0)) { | ||
273 | size = memparse(val, NULL); | ||
274 | } else if (strcmp(param, "hugepages") == 0) { | ||
275 | if (size != 0) { | ||
276 | if (sscanf(val, "%lu", &npages) <= 0) | ||
277 | npages = 0; | ||
278 | gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; | ||
279 | size = 0; | ||
280 | } | ||
281 | } | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | |||
286 | /* | ||
287 | * This function allocates physical space for pages that are larger than the | ||
288 | * buddy allocator can handle. We want to allocate these in highmem because | ||
289 | * the amount of lowmem is limited. This means that this function MUST be | ||
290 | * called before lowmem_end_addr is set up in MMU_init() in order for the lmb | ||
291 | * allocate to grab highmem. | ||
292 | */ | ||
293 | void __init reserve_hugetlb_gpages(void) | ||
294 | { | ||
295 | static __initdata char cmdline[COMMAND_LINE_SIZE]; | ||
296 | phys_addr_t size, base; | ||
297 | int i; | ||
298 | |||
299 | strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); | ||
300 | parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); | ||
301 | |||
302 | /* | ||
303 | * Walk gpage list in reverse, allocating larger page sizes first. | ||
304 | * Skip over unsupported sizes, or sizes that have 0 gpages allocated. | ||
305 | * When we reach the point in the list where pages are no longer | ||
306 | * considered gpages, we're done. | ||
307 | */ | ||
308 | for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { | ||
309 | if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) | ||
310 | continue; | ||
311 | else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) | ||
312 | break; | ||
313 | |||
314 | size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); | ||
315 | base = memblock_alloc_base(size * gpage_npages[i], size, | ||
316 | MEMBLOCK_ALLOC_ANYWHERE); | ||
317 | add_gpage(base, size, gpage_npages[i]); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | #else /* PPC64 */ | ||
322 | |||
172 | /* Build list of addresses of gigantic pages. This function is used in early | 323 | /* Build list of addresses of gigantic pages. This function is used in early |
173 | * boot before the buddy or bootmem allocator is setup. | 324 | * boot before the buddy or bootmem allocator is setup. |
174 | */ | 325 | */ |
175 | void add_gpage(unsigned long addr, unsigned long page_size, | 326 | void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) |
176 | unsigned long number_of_pages) | ||
177 | { | 327 | { |
178 | if (!addr) | 328 | if (!addr) |
179 | return; | 329 | return; |
@@ -199,19 +349,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate) | |||
199 | m->hstate = hstate; | 349 | m->hstate = hstate; |
200 | return 1; | 350 | return 1; |
201 | } | 351 | } |
352 | #endif | ||
202 | 353 | ||
203 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 354 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
204 | { | 355 | { |
205 | return 0; | 356 | return 0; |
206 | } | 357 | } |
207 | 358 | ||
359 | #ifdef CONFIG_PPC32 | ||
360 | #define HUGEPD_FREELIST_SIZE \ | ||
361 | ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) | ||
362 | |||
363 | struct hugepd_freelist { | ||
364 | struct rcu_head rcu; | ||
365 | unsigned int index; | ||
366 | void *ptes[0]; | ||
367 | }; | ||
368 | |||
369 | static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); | ||
370 | |||
371 | static void hugepd_free_rcu_callback(struct rcu_head *head) | ||
372 | { | ||
373 | struct hugepd_freelist *batch = | ||
374 | container_of(head, struct hugepd_freelist, rcu); | ||
375 | unsigned int i; | ||
376 | |||
377 | for (i = 0; i < batch->index; i++) | ||
378 | kmem_cache_free(hugepte_cache, batch->ptes[i]); | ||
379 | |||
380 | free_page((unsigned long)batch); | ||
381 | } | ||
382 | |||
383 | static void hugepd_free(struct mmu_gather *tlb, void *hugepte) | ||
384 | { | ||
385 | struct hugepd_freelist **batchp; | ||
386 | |||
387 | batchp = &__get_cpu_var(hugepd_freelist_cur); | ||
388 | |||
389 | if (atomic_read(&tlb->mm->mm_users) < 2 || | ||
390 | cpumask_equal(mm_cpumask(tlb->mm), | ||
391 | cpumask_of(smp_processor_id()))) { | ||
392 | kmem_cache_free(hugepte_cache, hugepte); | ||
393 | return; | ||
394 | } | ||
395 | |||
396 | if (*batchp == NULL) { | ||
397 | *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); | ||
398 | (*batchp)->index = 0; | ||
399 | } | ||
400 | |||
401 | (*batchp)->ptes[(*batchp)->index++] = hugepte; | ||
402 | if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { | ||
403 | call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); | ||
404 | *batchp = NULL; | ||
405 | } | ||
406 | } | ||
407 | #endif | ||
408 | |||
208 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, | 409 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, |
209 | unsigned long start, unsigned long end, | 410 | unsigned long start, unsigned long end, |
210 | unsigned long floor, unsigned long ceiling) | 411 | unsigned long floor, unsigned long ceiling) |
211 | { | 412 | { |
212 | pte_t *hugepte = hugepd_page(*hpdp); | 413 | pte_t *hugepte = hugepd_page(*hpdp); |
213 | unsigned shift = hugepd_shift(*hpdp); | 414 | int i; |
415 | |||
214 | unsigned long pdmask = ~((1UL << pdshift) - 1); | 416 | unsigned long pdmask = ~((1UL << pdshift) - 1); |
417 | unsigned int num_hugepd = 1; | ||
418 | |||
419 | #ifdef CONFIG_PPC64 | ||
420 | unsigned int shift = hugepd_shift(*hpdp); | ||
421 | #else | ||
422 | /* Note: On 32-bit the hpdp may be the first of several */ | ||
423 | num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); | ||
424 | #endif | ||
215 | 425 | ||
216 | start &= pdmask; | 426 | start &= pdmask; |
217 | if (start < floor) | 427 | if (start < floor) |
@@ -224,9 +434,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif | |||
224 | if (end - 1 > ceiling - 1) | 434 | if (end - 1 > ceiling - 1) |
225 | return; | 435 | return; |
226 | 436 | ||
227 | hpdp->pd = 0; | 437 | for (i = 0; i < num_hugepd; i++, hpdp++) |
438 | hpdp->pd = 0; | ||
439 | |||
228 | tlb->need_flush = 1; | 440 | tlb->need_flush = 1; |
441 | #ifdef CONFIG_PPC64 | ||
229 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); | 442 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); |
443 | #else | ||
444 | hugepd_free(tlb, hugepte); | ||
445 | #endif | ||
230 | } | 446 | } |
231 | 447 | ||
232 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 448 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
@@ -331,18 +547,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
331 | * too. | 547 | * too. |
332 | */ | 548 | */ |
333 | 549 | ||
334 | pgd = pgd_offset(tlb->mm, addr); | ||
335 | do { | 550 | do { |
336 | next = pgd_addr_end(addr, end); | 551 | next = pgd_addr_end(addr, end); |
552 | pgd = pgd_offset(tlb->mm, addr); | ||
337 | if (!is_hugepd(pgd)) { | 553 | if (!is_hugepd(pgd)) { |
338 | if (pgd_none_or_clear_bad(pgd)) | 554 | if (pgd_none_or_clear_bad(pgd)) |
339 | continue; | 555 | continue; |
340 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); | 556 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
341 | } else { | 557 | } else { |
558 | #ifdef CONFIG_PPC32 | ||
559 | /* | ||
560 | * Increment next by the size of the huge mapping since | ||
561 | * on 32-bit there may be more than one entry at the pgd | ||
562 | * level for a single hugepage, but all of them point to | ||
563 | * the same kmem cache that holds the hugepte. | ||
564 | */ | ||
565 | next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); | ||
566 | #endif | ||
342 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, | 567 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
343 | addr, next, floor, ceiling); | 568 | addr, next, floor, ceiling); |
344 | } | 569 | } |
345 | } while (pgd++, addr = next, addr != end); | 570 | } while (addr = next, addr != end); |
346 | } | 571 | } |
347 | 572 | ||
348 | struct page * | 573 | struct page * |
@@ -390,7 +615,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add | |||
390 | { | 615 | { |
391 | unsigned long mask; | 616 | unsigned long mask; |
392 | unsigned long pte_end; | 617 | unsigned long pte_end; |
393 | struct page *head, *page; | 618 | struct page *head, *page, *tail; |
394 | pte_t pte; | 619 | pte_t pte; |
395 | int refs; | 620 | int refs; |
396 | 621 | ||
@@ -413,6 +638,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add | |||
413 | head = pte_page(pte); | 638 | head = pte_page(pte); |
414 | 639 | ||
415 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | 640 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); |
641 | tail = page; | ||
416 | do { | 642 | do { |
417 | VM_BUG_ON(compound_head(page) != head); | 643 | VM_BUG_ON(compound_head(page) != head); |
418 | pages[*nr] = page; | 644 | pages[*nr] = page; |
@@ -428,10 +654,20 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add | |||
428 | 654 | ||
429 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | 655 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { |
430 | /* Could be optimized better */ | 656 | /* Could be optimized better */ |
431 | while (*nr) { | 657 | *nr -= refs; |
432 | put_page(page); | 658 | while (refs--) |
433 | (*nr)--; | 659 | put_page(head); |
434 | } | 660 | return 0; |
661 | } | ||
662 | |||
663 | /* | ||
664 | * Any tail page need their mapcount reference taken before we | ||
665 | * return. | ||
666 | */ | ||
667 | while (refs--) { | ||
668 | if (PageTail(tail)) | ||
669 | get_huge_page_tail(tail); | ||
670 | tail++; | ||
435 | } | 671 | } |
436 | 672 | ||
437 | return 1; | 673 | return 1; |
@@ -466,17 +702,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
466 | unsigned long len, unsigned long pgoff, | 702 | unsigned long len, unsigned long pgoff, |
467 | unsigned long flags) | 703 | unsigned long flags) |
468 | { | 704 | { |
705 | #ifdef CONFIG_PPC_MM_SLICES | ||
469 | struct hstate *hstate = hstate_file(file); | 706 | struct hstate *hstate = hstate_file(file); |
470 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); | 707 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); |
471 | 708 | ||
472 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); | 709 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); |
710 | #else | ||
711 | return get_unmapped_area(file, addr, len, pgoff, flags); | ||
712 | #endif | ||
473 | } | 713 | } |
474 | 714 | ||
475 | unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) | 715 | unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) |
476 | { | 716 | { |
717 | #ifdef CONFIG_PPC_MM_SLICES | ||
477 | unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); | 718 | unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); |
478 | 719 | ||
479 | return 1UL << mmu_psize_to_shift(psize); | 720 | return 1UL << mmu_psize_to_shift(psize); |
721 | #else | ||
722 | if (!is_vm_hugetlb_page(vma)) | ||
723 | return PAGE_SIZE; | ||
724 | |||
725 | return huge_page_size(hstate_vma(vma)); | ||
726 | #endif | ||
727 | } | ||
728 | |||
729 | static inline bool is_power_of_4(unsigned long x) | ||
730 | { | ||
731 | if (is_power_of_2(x)) | ||
732 | return (__ilog2(x) % 2) ? false : true; | ||
733 | return false; | ||
480 | } | 734 | } |
481 | 735 | ||
482 | static int __init add_huge_page_size(unsigned long long size) | 736 | static int __init add_huge_page_size(unsigned long long size) |
@@ -486,9 +740,14 @@ static int __init add_huge_page_size(unsigned long long size) | |||
486 | 740 | ||
487 | /* Check that it is a page size supported by the hardware and | 741 | /* Check that it is a page size supported by the hardware and |
488 | * that it fits within pagetable and slice limits. */ | 742 | * that it fits within pagetable and slice limits. */ |
743 | #ifdef CONFIG_PPC_FSL_BOOK3E | ||
744 | if ((size < PAGE_SIZE) || !is_power_of_4(size)) | ||
745 | return -EINVAL; | ||
746 | #else | ||
489 | if (!is_power_of_2(size) | 747 | if (!is_power_of_2(size) |
490 | || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) | 748 | || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) |
491 | return -EINVAL; | 749 | return -EINVAL; |
750 | #endif | ||
492 | 751 | ||
493 | if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) | 752 | if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) |
494 | return -EINVAL; | 753 | return -EINVAL; |
@@ -525,6 +784,46 @@ static int __init hugepage_setup_sz(char *str) | |||
525 | } | 784 | } |
526 | __setup("hugepagesz=", hugepage_setup_sz); | 785 | __setup("hugepagesz=", hugepage_setup_sz); |
527 | 786 | ||
787 | #ifdef CONFIG_FSL_BOOKE | ||
788 | struct kmem_cache *hugepte_cache; | ||
789 | static int __init hugetlbpage_init(void) | ||
790 | { | ||
791 | int psize; | ||
792 | |||
793 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | ||
794 | unsigned shift; | ||
795 | |||
796 | if (!mmu_psize_defs[psize].shift) | ||
797 | continue; | ||
798 | |||
799 | shift = mmu_psize_to_shift(psize); | ||
800 | |||
801 | /* Don't treat normal page sizes as huge... */ | ||
802 | if (shift != PAGE_SHIFT) | ||
803 | if (add_huge_page_size(1ULL << shift) < 0) | ||
804 | continue; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Create a kmem cache for hugeptes. The bottom bits in the pte have | ||
809 | * size information encoded in them, so align them to allow this | ||
810 | */ | ||
811 | hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), | ||
812 | HUGEPD_SHIFT_MASK + 1, 0, NULL); | ||
813 | if (hugepte_cache == NULL) | ||
814 | panic("%s: Unable to create kmem cache for hugeptes\n", | ||
815 | __func__); | ||
816 | |||
817 | /* Default hpage size = 4M */ | ||
818 | if (mmu_psize_defs[MMU_PAGE_4M].shift) | ||
819 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; | ||
820 | else | ||
821 | panic("%s: Unable to set default huge page size\n", __func__); | ||
822 | |||
823 | |||
824 | return 0; | ||
825 | } | ||
826 | #else | ||
528 | static int __init hugetlbpage_init(void) | 827 | static int __init hugetlbpage_init(void) |
529 | { | 828 | { |
530 | int psize; | 829 | int psize; |
@@ -567,15 +866,23 @@ static int __init hugetlbpage_init(void) | |||
567 | 866 | ||
568 | return 0; | 867 | return 0; |
569 | } | 868 | } |
570 | 869 | #endif | |
571 | module_init(hugetlbpage_init); | 870 | module_init(hugetlbpage_init); |
572 | 871 | ||
573 | void flush_dcache_icache_hugepage(struct page *page) | 872 | void flush_dcache_icache_hugepage(struct page *page) |
574 | { | 873 | { |
575 | int i; | 874 | int i; |
875 | void *start; | ||
576 | 876 | ||
577 | BUG_ON(!PageCompound(page)); | 877 | BUG_ON(!PageCompound(page)); |
578 | 878 | ||
579 | for (i = 0; i < (1UL << compound_order(page)); i++) | 879 | for (i = 0; i < (1UL << compound_order(page)); i++) { |
580 | __flush_dcache_icache(page_address(page+i)); | 880 | if (!PageHighMem(page)) { |
881 | __flush_dcache_icache(page_address(page+i)); | ||
882 | } else { | ||
883 | start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); | ||
884 | __flush_dcache_icache(start); | ||
885 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | ||
886 | } | ||
887 | } | ||
581 | } | 888 | } |