diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-26 11:48:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-26 11:48:49 -0400 |
commit | c3cc99ff5d24e2eeaf7ec2032e720681916990e3 (patch) | |
tree | c3e74171bbbd2adde9d60b9db1c440415c8d2831 /arch/powerpc/mm | |
parent | 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc (diff) | |
parent | 024e8ac04453b3525448c31ef39848cf675ba6db (diff) |
Merge branch 'linus' into x86/xen
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/fault.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 51 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 341 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 2 |
8 files changed, 329 insertions, 139 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1707d00331fc..565b7a237c84 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs) | |||
100 | return 0; | 100 | return 0; |
101 | } | 101 | } |
102 | 102 | ||
103 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
104 | static void do_dabr(struct pt_regs *regs, unsigned long address, | ||
105 | unsigned long error_code) | ||
106 | { | ||
107 | siginfo_t info; | ||
108 | |||
109 | if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, | ||
110 | 11, SIGSEGV) == NOTIFY_STOP) | ||
111 | return; | ||
112 | |||
113 | if (debugger_dabr_match(regs)) | ||
114 | return; | ||
115 | |||
116 | /* Clear the DABR */ | ||
117 | set_dabr(0); | ||
118 | |||
119 | /* Deliver the signal to userspace */ | ||
120 | info.si_signo = SIGTRAP; | ||
121 | info.si_errno = 0; | ||
122 | info.si_code = TRAP_HWBKPT; | ||
123 | info.si_addr = (void __user *)address; | ||
124 | force_sig_info(SIGTRAP, &info, current); | ||
125 | } | ||
126 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ | ||
127 | |||
128 | /* | 103 | /* |
129 | * For 600- and 800-family processors, the error_code parameter is DSISR | 104 | * For 600- and 800-family processors, the error_code parameter is DSISR |
130 | * for a data fault, SRR1 for an instruction fault. For 400-family processors | 105 | * for a data fault, SRR1 for an instruction fault. For 400-family processors |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8d3b58ebd38e..5ce5a4dcd008 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -68,6 +68,7 @@ | |||
68 | 68 | ||
69 | #define KB (1024) | 69 | #define KB (1024) |
70 | #define MB (1024*KB) | 70 | #define MB (1024*KB) |
71 | #define GB (1024L*MB) | ||
71 | 72 | ||
72 | /* | 73 | /* |
73 | * Note: pte --> Linux PTE | 74 | * Note: pte --> Linux PTE |
@@ -102,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; | |||
102 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; | 103 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; |
103 | u16 mmu_slb_size = 64; | 104 | u16 mmu_slb_size = 64; |
104 | #ifdef CONFIG_HUGETLB_PAGE | 105 | #ifdef CONFIG_HUGETLB_PAGE |
105 | int mmu_huge_psize = MMU_PAGE_16M; | ||
106 | unsigned int HPAGE_SHIFT; | 106 | unsigned int HPAGE_SHIFT; |
107 | #endif | 107 | #endif |
108 | #ifdef CONFIG_PPC_64K_PAGES | 108 | #ifdef CONFIG_PPC_64K_PAGES |
@@ -329,6 +329,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, | |||
329 | return 0; | 329 | return 0; |
330 | } | 330 | } |
331 | 331 | ||
332 | /* Scan for 16G memory blocks that have been set aside for huge pages | ||
333 | * and reserve those blocks for 16G huge pages. | ||
334 | */ | ||
335 | static int __init htab_dt_scan_hugepage_blocks(unsigned long node, | ||
336 | const char *uname, int depth, | ||
337 | void *data) { | ||
338 | char *type = of_get_flat_dt_prop(node, "device_type", NULL); | ||
339 | unsigned long *addr_prop; | ||
340 | u32 *page_count_prop; | ||
341 | unsigned int expected_pages; | ||
342 | long unsigned int phys_addr; | ||
343 | long unsigned int block_size; | ||
344 | |||
345 | /* We are scanning "memory" nodes only */ | ||
346 | if (type == NULL || strcmp(type, "memory") != 0) | ||
347 | return 0; | ||
348 | |||
349 | /* This property is the log base 2 of the number of virtual pages that | ||
350 | * will represent this memory block. */ | ||
351 | page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); | ||
352 | if (page_count_prop == NULL) | ||
353 | return 0; | ||
354 | expected_pages = (1 << page_count_prop[0]); | ||
355 | addr_prop = of_get_flat_dt_prop(node, "reg", NULL); | ||
356 | if (addr_prop == NULL) | ||
357 | return 0; | ||
358 | phys_addr = addr_prop[0]; | ||
359 | block_size = addr_prop[1]; | ||
360 | if (block_size != (16 * GB)) | ||
361 | return 0; | ||
362 | printk(KERN_INFO "Huge page(16GB) memory: " | ||
363 | "addr = 0x%lX size = 0x%lX pages = %d\n", | ||
364 | phys_addr, block_size, expected_pages); | ||
365 | lmb_reserve(phys_addr, block_size * expected_pages); | ||
366 | add_gpage(phys_addr, block_size, expected_pages); | ||
367 | return 0; | ||
368 | } | ||
369 | |||
332 | static void __init htab_init_page_sizes(void) | 370 | static void __init htab_init_page_sizes(void) |
333 | { | 371 | { |
334 | int rc; | 372 | int rc; |
@@ -418,15 +456,18 @@ static void __init htab_init_page_sizes(void) | |||
418 | ); | 456 | ); |
419 | 457 | ||
420 | #ifdef CONFIG_HUGETLB_PAGE | 458 | #ifdef CONFIG_HUGETLB_PAGE |
421 | /* Init large page size. Currently, we pick 16M or 1M depending | 459 | /* Reserve 16G huge page memory sections for huge pages */ |
460 | of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); | ||
461 | |||
462 | /* Set default large page size. Currently, we pick 16M or 1M depending | ||
422 | * on what is available | 463 | * on what is available |
423 | */ | 464 | */ |
424 | if (mmu_psize_defs[MMU_PAGE_16M].shift) | 465 | if (mmu_psize_defs[MMU_PAGE_16M].shift) |
425 | set_huge_psize(MMU_PAGE_16M); | 466 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; |
426 | /* With 4k/4level pagetables, we can't (for now) cope with a | 467 | /* With 4k/4level pagetables, we can't (for now) cope with a |
427 | * huge page size < PMD_SIZE */ | 468 | * huge page size < PMD_SIZE */ |
428 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | 469 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) |
429 | set_huge_psize(MMU_PAGE_1M); | 470 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; |
430 | #endif /* CONFIG_HUGETLB_PAGE */ | 471 | #endif /* CONFIG_HUGETLB_PAGE */ |
431 | } | 472 | } |
432 | 473 | ||
@@ -847,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
847 | 888 | ||
848 | #ifdef CONFIG_HUGETLB_PAGE | 889 | #ifdef CONFIG_HUGETLB_PAGE |
849 | /* Handle hugepage regions */ | 890 | /* Handle hugepage regions */ |
850 | if (HPAGE_SHIFT && psize == mmu_huge_psize) { | 891 | if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { |
851 | DBG_LOW(" -> huge page !\n"); | 892 | DBG_LOW(" -> huge page !\n"); |
852 | return hash_huge_page(mm, access, ea, vsid, local, trap); | 893 | return hash_huge_page(mm, access, ea, vsid, local, trap); |
853 | } | 894 | } |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0d12fba31bc5..fb42c4dd3217 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -24,21 +24,43 @@ | |||
24 | #include <asm/cputable.h> | 24 | #include <asm/cputable.h> |
25 | #include <asm/spu.h> | 25 | #include <asm/spu.h> |
26 | 26 | ||
27 | #define HPAGE_SHIFT_64K 16 | 27 | #define PAGE_SHIFT_64K 16 |
28 | #define HPAGE_SHIFT_16M 24 | 28 | #define PAGE_SHIFT_16M 24 |
29 | #define PAGE_SHIFT_16G 34 | ||
29 | 30 | ||
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | 31 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | 32 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) |
33 | #define MAX_NUMBER_GPAGES 1024 | ||
32 | 34 | ||
33 | unsigned int hugepte_shift; | 35 | /* Tracks the 16G pages after the device tree is scanned and before the |
34 | #define PTRS_PER_HUGEPTE (1 << hugepte_shift) | 36 | * huge_boot_pages list is ready. */ |
35 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) | 37 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; |
38 | static unsigned nr_gpages; | ||
36 | 39 | ||
37 | #define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) | 40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is |
38 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | 41 | * stored for the huge page sizes that are valid. |
39 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | 42 | */ |
43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | ||
44 | |||
45 | #define hugepte_shift mmu_huge_psizes | ||
46 | #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) | ||
47 | #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) | ||
48 | |||
49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
50 | + hugepte_shift[psize]) | ||
51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
53 | |||
54 | /* Subtract one from array size because we don't need a cache for 4K since | ||
55 | * is not a huge page size */ | ||
56 | #define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ | ||
57 | + psize-1]) | ||
58 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) | ||
40 | 59 | ||
41 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | 60 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { |
61 | "unused_4K", "hugepte_cache_64K", "unused_64K_AP", | ||
62 | "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" | ||
63 | }; | ||
42 | 64 | ||
43 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 65 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
44 | * will choke on pointers to hugepte tables, which is handy for | 66 | * will choke on pointers to hugepte tables, which is handy for |
@@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; | |||
49 | 71 | ||
50 | #define hugepd_none(hpd) ((hpd).pd == 0) | 72 | #define hugepd_none(hpd) ((hpd).pd == 0) |
51 | 73 | ||
74 | static inline int shift_to_mmu_psize(unsigned int shift) | ||
75 | { | ||
76 | switch (shift) { | ||
77 | #ifndef CONFIG_PPC_64K_PAGES | ||
78 | case PAGE_SHIFT_64K: | ||
79 | return MMU_PAGE_64K; | ||
80 | #endif | ||
81 | case PAGE_SHIFT_16M: | ||
82 | return MMU_PAGE_16M; | ||
83 | case PAGE_SHIFT_16G: | ||
84 | return MMU_PAGE_16G; | ||
85 | } | ||
86 | return -1; | ||
87 | } | ||
88 | |||
89 | static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | ||
90 | { | ||
91 | if (mmu_psize_defs[mmu_psize].shift) | ||
92 | return mmu_psize_defs[mmu_psize].shift; | ||
93 | BUG(); | ||
94 | } | ||
95 | |||
52 | static inline pte_t *hugepd_page(hugepd_t hpd) | 96 | static inline pte_t *hugepd_page(hugepd_t hpd) |
53 | { | 97 | { |
54 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 98 | BUG_ON(!(hpd.pd & HUGEPD_OK)); |
55 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 99 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); |
56 | } | 100 | } |
57 | 101 | ||
58 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | 102 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, |
103 | struct hstate *hstate) | ||
59 | { | 104 | { |
60 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | 105 | unsigned int shift = huge_page_shift(hstate); |
106 | int psize = shift_to_mmu_psize(shift); | ||
107 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
61 | pte_t *dir = hugepd_page(*hpdp); | 108 | pte_t *dir = hugepd_page(*hpdp); |
62 | 109 | ||
63 | return dir + idx; | 110 | return dir + idx; |
64 | } | 111 | } |
65 | 112 | ||
66 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 113 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
67 | unsigned long address) | 114 | unsigned long address, unsigned int psize) |
68 | { | 115 | { |
69 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | 116 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache(psize), |
70 | GFP_KERNEL|__GFP_REPEAT); | 117 | GFP_KERNEL|__GFP_REPEAT); |
71 | 118 | ||
72 | if (! new) | 119 | if (! new) |
@@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |||
74 | 121 | ||
75 | spin_lock(&mm->page_table_lock); | 122 | spin_lock(&mm->page_table_lock); |
76 | if (!hugepd_none(*hpdp)) | 123 | if (!hugepd_none(*hpdp)) |
77 | kmem_cache_free(huge_pgtable_cache, new); | 124 | kmem_cache_free(huge_pgtable_cache(psize), new); |
78 | else | 125 | else |
79 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 126 | hpdp->pd = (unsigned long)new | HUGEPD_OK; |
80 | spin_unlock(&mm->page_table_lock); | 127 | spin_unlock(&mm->page_table_lock); |
@@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |||
83 | 130 | ||
84 | /* Base page size affects how we walk hugetlb page tables */ | 131 | /* Base page size affects how we walk hugetlb page tables */ |
85 | #ifdef CONFIG_PPC_64K_PAGES | 132 | #ifdef CONFIG_PPC_64K_PAGES |
86 | #define hpmd_offset(pud, addr) pmd_offset(pud, addr) | 133 | #define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) |
87 | #define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) | 134 | #define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) |
88 | #else | 135 | #else |
89 | static inline | 136 | static inline |
90 | pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) | 137 | pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) |
91 | { | 138 | { |
92 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 139 | if (huge_page_shift(hstate) == PAGE_SHIFT_64K) |
93 | return pmd_offset(pud, addr); | 140 | return pmd_offset(pud, addr); |
94 | else | 141 | else |
95 | return (pmd_t *) pud; | 142 | return (pmd_t *) pud; |
96 | } | 143 | } |
97 | static inline | 144 | static inline |
98 | pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) | 145 | pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, |
146 | struct hstate *hstate) | ||
99 | { | 147 | { |
100 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 148 | if (huge_page_shift(hstate) == PAGE_SHIFT_64K) |
101 | return pmd_alloc(mm, pud, addr); | 149 | return pmd_alloc(mm, pud, addr); |
102 | else | 150 | else |
103 | return (pmd_t *) pud; | 151 | return (pmd_t *) pud; |
104 | } | 152 | } |
105 | #endif | 153 | #endif |
106 | 154 | ||
155 | /* Build list of addresses of gigantic pages. This function is used in early | ||
156 | * boot before the buddy or bootmem allocator is setup. | ||
157 | */ | ||
158 | void add_gpage(unsigned long addr, unsigned long page_size, | ||
159 | unsigned long number_of_pages) | ||
160 | { | ||
161 | if (!addr) | ||
162 | return; | ||
163 | while (number_of_pages > 0) { | ||
164 | gpage_freearray[nr_gpages] = addr; | ||
165 | nr_gpages++; | ||
166 | number_of_pages--; | ||
167 | addr += page_size; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* Moves the gigantic page addresses from the temporary list to the | ||
172 | * huge_boot_pages list. | ||
173 | */ | ||
174 | int alloc_bootmem_huge_page(struct hstate *hstate) | ||
175 | { | ||
176 | struct huge_bootmem_page *m; | ||
177 | if (nr_gpages == 0) | ||
178 | return 0; | ||
179 | m = phys_to_virt(gpage_freearray[--nr_gpages]); | ||
180 | gpage_freearray[nr_gpages] = 0; | ||
181 | list_add(&m->list, &huge_boot_pages); | ||
182 | m->hstate = hstate; | ||
183 | return 1; | ||
184 | } | ||
185 | |||
186 | |||
107 | /* Modelled after find_linux_pte() */ | 187 | /* Modelled after find_linux_pte() */ |
108 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 188 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
109 | { | 189 | { |
@@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
111 | pud_t *pu; | 191 | pud_t *pu; |
112 | pmd_t *pm; | 192 | pmd_t *pm; |
113 | 193 | ||
114 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); | 194 | unsigned int psize; |
195 | unsigned int shift; | ||
196 | unsigned long sz; | ||
197 | struct hstate *hstate; | ||
198 | psize = get_slice_psize(mm, addr); | ||
199 | shift = mmu_psize_to_shift(psize); | ||
200 | sz = ((1UL) << shift); | ||
201 | hstate = size_to_hstate(sz); | ||
115 | 202 | ||
116 | addr &= HPAGE_MASK; | 203 | addr &= hstate->mask; |
117 | 204 | ||
118 | pg = pgd_offset(mm, addr); | 205 | pg = pgd_offset(mm, addr); |
119 | if (!pgd_none(*pg)) { | 206 | if (!pgd_none(*pg)) { |
120 | pu = pud_offset(pg, addr); | 207 | pu = pud_offset(pg, addr); |
121 | if (!pud_none(*pu)) { | 208 | if (!pud_none(*pu)) { |
122 | pm = hpmd_offset(pu, addr); | 209 | pm = hpmd_offset(pu, addr, hstate); |
123 | if (!pmd_none(*pm)) | 210 | if (!pmd_none(*pm)) |
124 | return hugepte_offset((hugepd_t *)pm, addr); | 211 | return hugepte_offset((hugepd_t *)pm, addr, |
212 | hstate); | ||
125 | } | 213 | } |
126 | } | 214 | } |
127 | 215 | ||
128 | return NULL; | 216 | return NULL; |
129 | } | 217 | } |
130 | 218 | ||
131 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 219 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
220 | unsigned long addr, unsigned long sz) | ||
132 | { | 221 | { |
133 | pgd_t *pg; | 222 | pgd_t *pg; |
134 | pud_t *pu; | 223 | pud_t *pu; |
135 | pmd_t *pm; | 224 | pmd_t *pm; |
136 | hugepd_t *hpdp = NULL; | 225 | hugepd_t *hpdp = NULL; |
226 | struct hstate *hstate; | ||
227 | unsigned int psize; | ||
228 | hstate = size_to_hstate(sz); | ||
137 | 229 | ||
138 | BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); | 230 | psize = get_slice_psize(mm, addr); |
231 | BUG_ON(!mmu_huge_psizes[psize]); | ||
139 | 232 | ||
140 | addr &= HPAGE_MASK; | 233 | addr &= hstate->mask; |
141 | 234 | ||
142 | pg = pgd_offset(mm, addr); | 235 | pg = pgd_offset(mm, addr); |
143 | pu = pud_alloc(mm, pg, addr); | 236 | pu = pud_alloc(mm, pg, addr); |
144 | 237 | ||
145 | if (pu) { | 238 | if (pu) { |
146 | pm = hpmd_alloc(mm, pu, addr); | 239 | pm = hpmd_alloc(mm, pu, addr, hstate); |
147 | if (pm) | 240 | if (pm) |
148 | hpdp = (hugepd_t *)pm; | 241 | hpdp = (hugepd_t *)pm; |
149 | } | 242 | } |
@@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
151 | if (! hpdp) | 244 | if (! hpdp) |
152 | return NULL; | 245 | return NULL; |
153 | 246 | ||
154 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | 247 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) |
155 | return NULL; | 248 | return NULL; |
156 | 249 | ||
157 | return hugepte_offset(hpdp, addr); | 250 | return hugepte_offset(hpdp, addr, hstate); |
158 | } | 251 | } |
159 | 252 | ||
160 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 253 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
@@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
162 | return 0; | 255 | return 0; |
163 | } | 256 | } |
164 | 257 | ||
165 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) | 258 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, |
259 | unsigned int psize) | ||
166 | { | 260 | { |
167 | pte_t *hugepte = hugepd_page(*hpdp); | 261 | pte_t *hugepte = hugepd_page(*hpdp); |
168 | 262 | ||
169 | hpdp->pd = 0; | 263 | hpdp->pd = 0; |
170 | tlb->need_flush = 1; | 264 | tlb->need_flush = 1; |
171 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | 265 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, |
266 | HUGEPTE_CACHE_NUM+psize-1, | ||
172 | PGF_CACHENUM_MASK)); | 267 | PGF_CACHENUM_MASK)); |
173 | } | 268 | } |
174 | 269 | ||
175 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 270 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
176 | unsigned long addr, unsigned long end, | 271 | unsigned long addr, unsigned long end, |
177 | unsigned long floor, unsigned long ceiling) | 272 | unsigned long floor, unsigned long ceiling, |
273 | unsigned int psize) | ||
178 | { | 274 | { |
179 | pmd_t *pmd; | 275 | pmd_t *pmd; |
180 | unsigned long next; | 276 | unsigned long next; |
@@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
186 | next = pmd_addr_end(addr, end); | 282 | next = pmd_addr_end(addr, end); |
187 | if (pmd_none(*pmd)) | 283 | if (pmd_none(*pmd)) |
188 | continue; | 284 | continue; |
189 | free_hugepte_range(tlb, (hugepd_t *)pmd); | 285 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); |
190 | } while (pmd++, addr = next, addr != end); | 286 | } while (pmd++, addr = next, addr != end); |
191 | 287 | ||
192 | start &= PUD_MASK; | 288 | start &= PUD_MASK; |
@@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
212 | pud_t *pud; | 308 | pud_t *pud; |
213 | unsigned long next; | 309 | unsigned long next; |
214 | unsigned long start; | 310 | unsigned long start; |
311 | unsigned int shift; | ||
312 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
313 | shift = mmu_psize_to_shift(psize); | ||
215 | 314 | ||
216 | start = addr; | 315 | start = addr; |
217 | pud = pud_offset(pgd, addr); | 316 | pud = pud_offset(pgd, addr); |
@@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
220 | #ifdef CONFIG_PPC_64K_PAGES | 319 | #ifdef CONFIG_PPC_64K_PAGES |
221 | if (pud_none_or_clear_bad(pud)) | 320 | if (pud_none_or_clear_bad(pud)) |
222 | continue; | 321 | continue; |
223 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | 322 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, |
323 | psize); | ||
224 | #else | 324 | #else |
225 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { | 325 | if (shift == PAGE_SHIFT_64K) { |
226 | if (pud_none_or_clear_bad(pud)) | 326 | if (pud_none_or_clear_bad(pud)) |
227 | continue; | 327 | continue; |
228 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | 328 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
329 | ceiling, psize); | ||
229 | } else { | 330 | } else { |
230 | if (pud_none(*pud)) | 331 | if (pud_none(*pud)) |
231 | continue; | 332 | continue; |
232 | free_hugepte_range(tlb, (hugepd_t *)pud); | 333 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); |
233 | } | 334 | } |
234 | #endif | 335 | #endif |
235 | } while (pud++, addr = next, addr != end); | 336 | } while (pud++, addr = next, addr != end); |
@@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
255 | * | 356 | * |
256 | * Must be called with pagetable lock held. | 357 | * Must be called with pagetable lock held. |
257 | */ | 358 | */ |
258 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | 359 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, |
259 | unsigned long addr, unsigned long end, | 360 | unsigned long addr, unsigned long end, |
260 | unsigned long floor, unsigned long ceiling) | 361 | unsigned long floor, unsigned long ceiling) |
261 | { | 362 | { |
@@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |||
297 | * now has no other vmas using it, so can be freed, we don't | 398 | * now has no other vmas using it, so can be freed, we don't |
298 | * bother to round floor or end up - the tests don't need that. | 399 | * bother to round floor or end up - the tests don't need that. |
299 | */ | 400 | */ |
401 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
300 | 402 | ||
301 | addr &= HUGEPD_MASK; | 403 | addr &= HUGEPD_MASK(psize); |
302 | if (addr < floor) { | 404 | if (addr < floor) { |
303 | addr += HUGEPD_SIZE; | 405 | addr += HUGEPD_SIZE(psize); |
304 | if (!addr) | 406 | if (!addr) |
305 | return; | 407 | return; |
306 | } | 408 | } |
307 | if (ceiling) { | 409 | if (ceiling) { |
308 | ceiling &= HUGEPD_MASK; | 410 | ceiling &= HUGEPD_MASK(psize); |
309 | if (!ceiling) | 411 | if (!ceiling) |
310 | return; | 412 | return; |
311 | } | 413 | } |
312 | if (end - 1 > ceiling - 1) | 414 | if (end - 1 > ceiling - 1) |
313 | end -= HUGEPD_SIZE; | 415 | end -= HUGEPD_SIZE(psize); |
314 | if (addr > end - 1) | 416 | if (addr > end - 1) |
315 | return; | 417 | return; |
316 | 418 | ||
317 | start = addr; | 419 | start = addr; |
318 | pgd = pgd_offset((*tlb)->mm, addr); | 420 | pgd = pgd_offset(tlb->mm, addr); |
319 | do { | 421 | do { |
320 | BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); | 422 | psize = get_slice_psize(tlb->mm, addr); |
423 | BUG_ON(!mmu_huge_psizes[psize]); | ||
321 | next = pgd_addr_end(addr, end); | 424 | next = pgd_addr_end(addr, end); |
322 | if (pgd_none_or_clear_bad(pgd)) | 425 | if (pgd_none_or_clear_bad(pgd)) |
323 | continue; | 426 | continue; |
324 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | 427 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
325 | } while (pgd++, addr = next, addr != end); | 428 | } while (pgd++, addr = next, addr != end); |
326 | } | 429 | } |
327 | 430 | ||
@@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | |||
334 | * necessary anymore if we make hpte_need_flush() get the | 437 | * necessary anymore if we make hpte_need_flush() get the |
335 | * page size from the slices | 438 | * page size from the slices |
336 | */ | 439 | */ |
337 | pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); | 440 | unsigned int psize = get_slice_psize(mm, addr); |
441 | unsigned int shift = mmu_psize_to_shift(psize); | ||
442 | unsigned long sz = ((1UL) << shift); | ||
443 | struct hstate *hstate = size_to_hstate(sz); | ||
444 | pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); | ||
338 | } | 445 | } |
339 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | 446 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
340 | } | 447 | } |
@@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
351 | { | 458 | { |
352 | pte_t *ptep; | 459 | pte_t *ptep; |
353 | struct page *page; | 460 | struct page *page; |
461 | unsigned int mmu_psize = get_slice_psize(mm, address); | ||
354 | 462 | ||
355 | if (get_slice_psize(mm, address) != mmu_huge_psize) | 463 | /* Verify it is a huge page else bail. */ |
464 | if (!mmu_huge_psizes[mmu_psize]) | ||
356 | return ERR_PTR(-EINVAL); | 465 | return ERR_PTR(-EINVAL); |
357 | 466 | ||
358 | ptep = huge_pte_offset(mm, address); | 467 | ptep = huge_pte_offset(mm, address); |
359 | page = pte_page(*ptep); | 468 | page = pte_page(*ptep); |
360 | if (page) | 469 | if (page) { |
361 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | 470 | unsigned int shift = mmu_psize_to_shift(mmu_psize); |
471 | unsigned long sz = ((1UL) << shift); | ||
472 | page += (address % sz) / PAGE_SIZE; | ||
473 | } | ||
362 | 474 | ||
363 | return page; | 475 | return page; |
364 | } | 476 | } |
@@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd) | |||
368 | return 0; | 480 | return 0; |
369 | } | 481 | } |
370 | 482 | ||
483 | int pud_huge(pud_t pud) | ||
484 | { | ||
485 | return 0; | ||
486 | } | ||
487 | |||
371 | struct page * | 488 | struct page * |
372 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 489 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
373 | pmd_t *pmd, int write) | 490 | pmd_t *pmd, int write) |
@@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
381 | unsigned long len, unsigned long pgoff, | 498 | unsigned long len, unsigned long pgoff, |
382 | unsigned long flags) | 499 | unsigned long flags) |
383 | { | 500 | { |
384 | return slice_get_unmapped_area(addr, len, flags, | 501 | struct hstate *hstate = hstate_file(file); |
385 | mmu_huge_psize, 1, 0); | 502 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); |
503 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); | ||
386 | } | 504 | } |
387 | 505 | ||
388 | /* | 506 | /* |
389 | * Called by asm hashtable.S for doing lazy icache flush | 507 | * Called by asm hashtable.S for doing lazy icache flush |
390 | */ | 508 | */ |
391 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | 509 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, |
392 | pte_t pte, int trap) | 510 | pte_t pte, int trap, unsigned long sz) |
393 | { | 511 | { |
394 | struct page *page; | 512 | struct page *page; |
395 | int i; | 513 | int i; |
@@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |||
402 | /* page is dirty */ | 520 | /* page is dirty */ |
403 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | 521 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { |
404 | if (trap == 0x400) { | 522 | if (trap == 0x400) { |
405 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) | 523 | for (i = 0; i < (sz / PAGE_SIZE); i++) |
406 | __flush_dcache_icache(page_address(page+i)); | 524 | __flush_dcache_icache(page_address(page+i)); |
407 | set_bit(PG_arch_1, &page->flags); | 525 | set_bit(PG_arch_1, &page->flags); |
408 | } else { | 526 | } else { |
@@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
418 | { | 536 | { |
419 | pte_t *ptep; | 537 | pte_t *ptep; |
420 | unsigned long old_pte, new_pte; | 538 | unsigned long old_pte, new_pte; |
421 | unsigned long va, rflags, pa; | 539 | unsigned long va, rflags, pa, sz; |
422 | long slot; | 540 | long slot; |
423 | int err = 1; | 541 | int err = 1; |
424 | int ssize = user_segment_size(ea); | 542 | int ssize = user_segment_size(ea); |
543 | unsigned int mmu_psize; | ||
544 | int shift; | ||
545 | mmu_psize = get_slice_psize(mm, ea); | ||
425 | 546 | ||
547 | if (!mmu_huge_psizes[mmu_psize]) | ||
548 | goto out; | ||
426 | ptep = huge_pte_offset(mm, ea); | 549 | ptep = huge_pte_offset(mm, ea); |
427 | 550 | ||
428 | /* Search the Linux page table for a match with va */ | 551 | /* Search the Linux page table for a match with va */ |
@@ -465,30 +588,32 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
465 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | 588 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); |
466 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 589 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
467 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | 590 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
591 | shift = mmu_psize_to_shift(mmu_psize); | ||
592 | sz = ((1UL) << shift); | ||
468 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | 593 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
469 | /* No CPU has hugepages but lacks no execute, so we | 594 | /* No CPU has hugepages but lacks no execute, so we |
470 | * don't need to worry about that case */ | 595 | * don't need to worry about that case */ |
471 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | 596 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), |
472 | trap); | 597 | trap, sz); |
473 | 598 | ||
474 | /* Check if pte already has an hpte (case 2) */ | 599 | /* Check if pte already has an hpte (case 2) */ |
475 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | 600 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
476 | /* There MIGHT be an HPTE for this pte */ | 601 | /* There MIGHT be an HPTE for this pte */ |
477 | unsigned long hash, slot; | 602 | unsigned long hash, slot; |
478 | 603 | ||
479 | hash = hpt_hash(va, HPAGE_SHIFT, ssize); | 604 | hash = hpt_hash(va, shift, ssize); |
480 | if (old_pte & _PAGE_F_SECOND) | 605 | if (old_pte & _PAGE_F_SECOND) |
481 | hash = ~hash; | 606 | hash = ~hash; |
482 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 607 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
483 | slot += (old_pte & _PAGE_F_GIX) >> 12; | 608 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
484 | 609 | ||
485 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, | 610 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, |
486 | ssize, local) == -1) | 611 | ssize, local) == -1) |
487 | old_pte &= ~_PAGE_HPTEFLAGS; | 612 | old_pte &= ~_PAGE_HPTEFLAGS; |
488 | } | 613 | } |
489 | 614 | ||
490 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | 615 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
491 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); | 616 | unsigned long hash = hpt_hash(va, shift, ssize); |
492 | unsigned long hpte_group; | 617 | unsigned long hpte_group; |
493 | 618 | ||
494 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | 619 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
@@ -509,7 +634,7 @@ repeat: | |||
509 | 634 | ||
510 | /* Insert into the hash table, primary slot */ | 635 | /* Insert into the hash table, primary slot */ |
511 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | 636 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, |
512 | mmu_huge_psize, ssize); | 637 | mmu_psize, ssize); |
513 | 638 | ||
514 | /* Primary is full, try the secondary */ | 639 | /* Primary is full, try the secondary */ |
515 | if (unlikely(slot == -1)) { | 640 | if (unlikely(slot == -1)) { |
@@ -517,7 +642,7 @@ repeat: | |||
517 | HPTES_PER_GROUP) & ~0x7UL; | 642 | HPTES_PER_GROUP) & ~0x7UL; |
518 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | 643 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
519 | HPTE_V_SECONDARY, | 644 | HPTE_V_SECONDARY, |
520 | mmu_huge_psize, ssize); | 645 | mmu_psize, ssize); |
521 | if (slot == -1) { | 646 | if (slot == -1) { |
522 | if (mftb() & 0x1) | 647 | if (mftb() & 0x1) |
523 | hpte_group = ((hash & htab_hash_mask) * | 648 | hpte_group = ((hash & htab_hash_mask) * |
@@ -549,45 +674,54 @@ void set_huge_psize(int psize) | |||
549 | { | 674 | { |
550 | /* Check that it is a page size supported by the hardware and | 675 | /* Check that it is a page size supported by the hardware and |
551 | * that it fits within pagetable limits. */ | 676 | * that it fits within pagetable limits. */ |
552 | if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && | 677 | if (mmu_psize_defs[psize].shift && |
678 | mmu_psize_defs[psize].shift < SID_SHIFT_1T && | ||
553 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || | 679 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || |
554 | mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { | 680 | mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || |
555 | HPAGE_SHIFT = mmu_psize_defs[psize].shift; | 681 | mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { |
556 | mmu_huge_psize = psize; | 682 | /* Return if huge page size has already been setup or is the |
557 | #ifdef CONFIG_PPC_64K_PAGES | 683 | * same as the base page size. */ |
558 | hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); | 684 | if (mmu_huge_psizes[psize] || |
559 | #else | 685 | mmu_psize_defs[psize].shift == PAGE_SHIFT) |
560 | if (HPAGE_SHIFT == HPAGE_SHIFT_64K) | 686 | return; |
561 | hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); | 687 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); |
562 | else | 688 | |
563 | hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); | 689 | switch (mmu_psize_defs[psize].shift) { |
564 | #endif | 690 | case PAGE_SHIFT_64K: |
565 | 691 | /* We only allow 64k hpages with 4k base page, | |
692 | * which was checked above, and always put them | ||
693 | * at the PMD */ | ||
694 | hugepte_shift[psize] = PMD_SHIFT; | ||
695 | break; | ||
696 | case PAGE_SHIFT_16M: | ||
697 | /* 16M pages can be at two different levels | ||
698 | * of pagestables based on base page size */ | ||
699 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
700 | hugepte_shift[psize] = PMD_SHIFT; | ||
701 | else /* 4k base page */ | ||
702 | hugepte_shift[psize] = PUD_SHIFT; | ||
703 | break; | ||
704 | case PAGE_SHIFT_16G: | ||
705 | /* 16G pages are always at PGD level */ | ||
706 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
707 | break; | ||
708 | } | ||
709 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
566 | } else | 710 | } else |
567 | HPAGE_SHIFT = 0; | 711 | hugepte_shift[psize] = 0; |
568 | } | 712 | } |
569 | 713 | ||
570 | static int __init hugepage_setup_sz(char *str) | 714 | static int __init hugepage_setup_sz(char *str) |
571 | { | 715 | { |
572 | unsigned long long size; | 716 | unsigned long long size; |
573 | int mmu_psize = -1; | 717 | int mmu_psize; |
574 | int shift; | 718 | int shift; |
575 | 719 | ||
576 | size = memparse(str, &str); | 720 | size = memparse(str, &str); |
577 | 721 | ||
578 | shift = __ffs(size); | 722 | shift = __ffs(size); |
579 | switch (shift) { | 723 | mmu_psize = shift_to_mmu_psize(shift); |
580 | #ifndef CONFIG_PPC_64K_PAGES | 724 | if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) |
581 | case HPAGE_SHIFT_64K: | ||
582 | mmu_psize = MMU_PAGE_64K; | ||
583 | break; | ||
584 | #endif | ||
585 | case HPAGE_SHIFT_16M: | ||
586 | mmu_psize = MMU_PAGE_16M; | ||
587 | break; | ||
588 | } | ||
589 | |||
590 | if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift) | ||
591 | set_huge_psize(mmu_psize); | 725 | set_huge_psize(mmu_psize); |
592 | else | 726 | else |
593 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); | 727 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); |
@@ -603,16 +737,31 @@ static void zero_ctor(struct kmem_cache *cache, void *addr) | |||
603 | 737 | ||
604 | static int __init hugetlbpage_init(void) | 738 | static int __init hugetlbpage_init(void) |
605 | { | 739 | { |
740 | unsigned int psize; | ||
741 | |||
606 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 742 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
607 | return -ENODEV; | 743 | return -ENODEV; |
608 | 744 | /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE | |
609 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | 745 | * and adjust PTE_NONCACHE_NUM if the number of supported huge page |
610 | HUGEPTE_TABLE_SIZE, | 746 | * sizes changes. |
611 | HUGEPTE_TABLE_SIZE, | 747 | */ |
612 | 0, | 748 | set_huge_psize(MMU_PAGE_16M); |
613 | zero_ctor); | 749 | set_huge_psize(MMU_PAGE_64K); |
614 | if (! huge_pgtable_cache) | 750 | set_huge_psize(MMU_PAGE_16G); |
615 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | 751 | |
752 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | ||
753 | if (mmu_huge_psizes[psize]) { | ||
754 | huge_pgtable_cache(psize) = kmem_cache_create( | ||
755 | HUGEPTE_CACHE_NAME(psize), | ||
756 | HUGEPTE_TABLE_SIZE(psize), | ||
757 | HUGEPTE_TABLE_SIZE(psize), | ||
758 | 0, | ||
759 | zero_ctor); | ||
760 | if (!huge_pgtable_cache(psize)) | ||
761 | panic("hugetlbpage_init(): could not create %s"\ | ||
762 | "\n", HUGEPTE_CACHE_NAME(psize)); | ||
763 | } | ||
764 | } | ||
616 | 765 | ||
617 | return 0; | 766 | return 0; |
618 | } | 767 | } |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6ef63caca682..a41bc5aa2043 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -153,10 +153,10 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | |||
153 | }; | 153 | }; |
154 | 154 | ||
155 | #ifdef CONFIG_HUGETLB_PAGE | 155 | #ifdef CONFIG_HUGETLB_PAGE |
156 | /* Hugepages need one extra cache, initialized in hugetlbpage.c. We | 156 | /* Hugepages need an extra cache per hugepagesize, initialized in |
157 | * can't put into the tables above, because HPAGE_SHIFT is not compile | 157 | * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT |
158 | * time constant. */ | 158 | * is not compile time constant. */ |
159 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; | 159 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; |
160 | #else | 160 | #else |
161 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | 161 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; |
162 | #endif | 162 | #endif |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index cf4bffba6f7c..d9a181351332 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); | |||
39 | EXPORT_SYMBOL(numa_cpumask_lookup_table); | 39 | EXPORT_SYMBOL(numa_cpumask_lookup_table); |
40 | EXPORT_SYMBOL(node_data); | 40 | EXPORT_SYMBOL(node_data); |
41 | 41 | ||
42 | static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; | ||
43 | static int min_common_depth; | 42 | static int min_common_depth; |
44 | static int n_mem_addr_cells, n_mem_size_cells; | 43 | static int n_mem_addr_cells, n_mem_size_cells; |
45 | 44 | ||
@@ -816,7 +815,7 @@ void __init do_init_bootmem(void) | |||
816 | dbg("node %d\n", nid); | 815 | dbg("node %d\n", nid); |
817 | dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); | 816 | dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); |
818 | 817 | ||
819 | NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; | 818 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
820 | NODE_DATA(nid)->node_start_pfn = start_pfn; | 819 | NODE_DATA(nid)->node_start_pfn = start_pfn; |
821 | NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; | 820 | NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; |
822 | 821 | ||
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index c7584072dfcc..2001abdb1912 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
@@ -145,13 +145,20 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage) | |||
145 | void __iomem * | 145 | void __iomem * |
146 | ioremap(phys_addr_t addr, unsigned long size) | 146 | ioremap(phys_addr_t addr, unsigned long size) |
147 | { | 147 | { |
148 | return __ioremap(addr, size, _PAGE_NO_CACHE); | 148 | return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); |
149 | } | 149 | } |
150 | EXPORT_SYMBOL(ioremap); | 150 | EXPORT_SYMBOL(ioremap); |
151 | 151 | ||
152 | void __iomem * | 152 | void __iomem * |
153 | ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) | 153 | ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) |
154 | { | 154 | { |
155 | /* writeable implies dirty for kernel addresses */ | ||
156 | if (flags & _PAGE_RW) | ||
157 | flags |= _PAGE_DIRTY | _PAGE_HWWRITE; | ||
158 | |||
159 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ | ||
160 | flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); | ||
161 | |||
155 | return __ioremap(addr, size, flags); | 162 | return __ioremap(addr, size, flags); |
156 | } | 163 | } |
157 | EXPORT_SYMBOL(ioremap_flags); | 164 | EXPORT_SYMBOL(ioremap_flags); |
@@ -163,6 +170,14 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) | |||
163 | phys_addr_t p; | 170 | phys_addr_t p; |
164 | int err; | 171 | int err; |
165 | 172 | ||
173 | /* Make sure we have the base flags */ | ||
174 | if ((flags & _PAGE_PRESENT) == 0) | ||
175 | flags |= _PAGE_KERNEL; | ||
176 | |||
177 | /* Non-cacheable page cannot be coherent */ | ||
178 | if (flags & _PAGE_NO_CACHE) | ||
179 | flags &= ~_PAGE_COHERENT; | ||
180 | |||
166 | /* | 181 | /* |
167 | * Choose an address to map it to. | 182 | * Choose an address to map it to. |
168 | * Once the vmalloc system is running, we use it. | 183 | * Once the vmalloc system is running, we use it. |
@@ -219,11 +234,6 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) | |||
219 | v = (ioremap_bot -= size); | 234 | v = (ioremap_bot -= size); |
220 | } | 235 | } |
221 | 236 | ||
222 | if ((flags & _PAGE_PRESENT) == 0) | ||
223 | flags |= _PAGE_KERNEL; | ||
224 | if (flags & _PAGE_NO_CACHE) | ||
225 | flags |= _PAGE_GUARDED; | ||
226 | |||
227 | /* | 237 | /* |
228 | * Should check if it is a candidate for a BAT mapping | 238 | * Should check if it is a candidate for a BAT mapping |
229 | */ | 239 | */ |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3ef0ad2f9ca0..365e61ae5dbc 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -107,9 +107,18 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, | |||
107 | { | 107 | { |
108 | unsigned long i; | 108 | unsigned long i; |
109 | 109 | ||
110 | /* Make sure we have the base flags */ | ||
110 | if ((flags & _PAGE_PRESENT) == 0) | 111 | if ((flags & _PAGE_PRESENT) == 0) |
111 | flags |= pgprot_val(PAGE_KERNEL); | 112 | flags |= pgprot_val(PAGE_KERNEL); |
112 | 113 | ||
114 | /* Non-cacheable page cannot be coherent */ | ||
115 | if (flags & _PAGE_NO_CACHE) | ||
116 | flags &= ~_PAGE_COHERENT; | ||
117 | |||
118 | /* We don't support the 4K PFN hack with ioremap */ | ||
119 | if (flags & _PAGE_4K_PFN) | ||
120 | return NULL; | ||
121 | |||
113 | WARN_ON(pa & ~PAGE_MASK); | 122 | WARN_ON(pa & ~PAGE_MASK); |
114 | WARN_ON(((unsigned long)ea) & ~PAGE_MASK); | 123 | WARN_ON(((unsigned long)ea) & ~PAGE_MASK); |
115 | WARN_ON(size & ~PAGE_MASK); | 124 | WARN_ON(size & ~PAGE_MASK); |
@@ -190,6 +199,13 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) | |||
190 | void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, | 199 | void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, |
191 | unsigned long flags) | 200 | unsigned long flags) |
192 | { | 201 | { |
202 | /* writeable implies dirty for kernel addresses */ | ||
203 | if (flags & _PAGE_RW) | ||
204 | flags |= _PAGE_DIRTY; | ||
205 | |||
206 | /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ | ||
207 | flags &= ~(_PAGE_USER | _PAGE_EXEC); | ||
208 | |||
193 | if (ppc_md.ioremap) | 209 | if (ppc_md.ioremap) |
194 | return ppc_md.ioremap(addr, size, flags); | 210 | return ppc_md.ioremap(addr, size, flags); |
195 | return __ioremap(addr, size, flags); | 211 | return __ioremap(addr, size, flags); |
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index a01b5c608ff9..409fcc7b63ce 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c | |||
@@ -147,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
147 | */ | 147 | */ |
148 | if (huge) { | 148 | if (huge) { |
149 | #ifdef CONFIG_HUGETLB_PAGE | 149 | #ifdef CONFIG_HUGETLB_PAGE |
150 | psize = mmu_huge_psize; | 150 | psize = get_slice_psize(mm, addr);; |
151 | #else | 151 | #else |
152 | BUG(); | 152 | BUG(); |
153 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ | 153 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |