diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2015-11-30 22:36:53 -0500 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2015-12-13 23:19:14 -0500 |
commit | e34aa03ca48d0c7982530436ce996f374b65913c (patch) | |
tree | 6d66b8a7067eea95aebabb1d4770d66d0cda6049 | |
parent | 26a344aea48c99cfd80d292a470a480e1c2bd5d9 (diff) |
powerpc/mm: Move THP headers around
We support THP only with book3s_64 and 64K page size. Move
THP details to hash64-64k.h to clarify the same.
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/hash-64k.h | 126 | ||||
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/hash.h | 223 | ||||
-rw-r--r-- | arch/powerpc/include/asm/nohash/64/pgtable.h | 253 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 10 |
6 files changed, 201 insertions, 423 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 1857d19de18e..7570677c11c3 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h | |||
@@ -170,6 +170,132 @@ static inline int hugepd_ok(hugepd_t hpd) | |||
170 | 170 | ||
171 | #endif /* CONFIG_HUGETLB_PAGE */ | 171 | #endif /* CONFIG_HUGETLB_PAGE */ |
172 | 172 | ||
173 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
174 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | ||
175 | unsigned long addr, | ||
176 | pmd_t *pmdp, | ||
177 | unsigned long clr, | ||
178 | unsigned long set); | ||
179 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | ||
180 | { | ||
181 | /* | ||
182 | * The hpte hindex is stored in the pgtable whose address is in the | ||
183 | * second half of the PMD | ||
184 | * | ||
185 | * Order this load with the test for pmd_trans_huge in the caller | ||
186 | */ | ||
187 | smp_rmb(); | ||
188 | return *(char **)(pmdp + PTRS_PER_PMD); | ||
189 | |||
190 | |||
191 | } | ||
192 | /* | ||
193 | * The linux hugepage PMD now include the pmd entries followed by the address | ||
194 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | ||
195 | * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per | ||
196 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and | ||
197 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | ||
198 | * | ||
199 | * The last three bits are intentionally left to zero. This memory location | ||
200 | * are also used as normal page PTE pointers. So if we have any pointers | ||
201 | * left around while we collapse a hugepage, we need to make sure | ||
202 | * _PAGE_PRESENT bit of that is zero when we look at them | ||
203 | */ | ||
204 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | ||
205 | { | ||
206 | return (hpte_slot_array[index] >> 3) & 0x1; | ||
207 | } | ||
208 | |||
209 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | ||
210 | int index) | ||
211 | { | ||
212 | return hpte_slot_array[index] >> 4; | ||
213 | } | ||
214 | |||
215 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | ||
216 | unsigned int index, unsigned int hidx) | ||
217 | { | ||
218 | hpte_slot_array[index] = hidx << 4 | 0x1 << 3; | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | * | ||
223 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | ||
224 | * page. The hugetlbfs page table walking and mangling paths are totally | ||
225 | * separated form the core VM paths and they're differentiated by | ||
226 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | ||
227 | * | ||
228 | * pmd_trans_huge() is defined as false at build time if | ||
229 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | ||
230 | * time in such case. | ||
231 | * | ||
232 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | ||
233 | * for THP we also track the subpage details at the pmd level. We don't do | ||
234 | * that for explicit huge pages. | ||
235 | * | ||
236 | */ | ||
237 | static inline int pmd_trans_huge(pmd_t pmd) | ||
238 | { | ||
239 | /* | ||
240 | * leaf pte for huge page, bottom two bits != 00 | ||
241 | */ | ||
242 | return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); | ||
243 | } | ||
244 | |||
245 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
246 | { | ||
247 | if (pmd_trans_huge(pmd)) | ||
248 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | static inline int pmd_large(pmd_t pmd) | ||
253 | { | ||
254 | /* | ||
255 | * leaf pte for huge page, bottom two bits != 00 | ||
256 | */ | ||
257 | return ((pmd_val(pmd) & 0x3) != 0x0); | ||
258 | } | ||
259 | |||
260 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
261 | { | ||
262 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); | ||
263 | } | ||
264 | |||
265 | static inline pmd_t pmd_mksplitting(pmd_t pmd) | ||
266 | { | ||
267 | return __pmd(pmd_val(pmd) | _PAGE_SPLITTING); | ||
268 | } | ||
269 | |||
270 | #define __HAVE_ARCH_PMD_SAME | ||
271 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | ||
272 | { | ||
273 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); | ||
274 | } | ||
275 | |||
276 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | ||
277 | unsigned long addr, pmd_t *pmdp) | ||
278 | { | ||
279 | unsigned long old; | ||
280 | |||
281 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | ||
282 | return 0; | ||
283 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); | ||
284 | return ((old & _PAGE_ACCESSED) != 0); | ||
285 | } | ||
286 | |||
287 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
288 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | ||
289 | pmd_t *pmdp) | ||
290 | { | ||
291 | |||
292 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) | ||
293 | return; | ||
294 | |||
295 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); | ||
296 | } | ||
297 | |||
298 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
173 | #endif /* __ASSEMBLY__ */ | 299 | #endif /* __ASSEMBLY__ */ |
174 | 300 | ||
175 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ | 301 | #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ |
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 9c212449b2e8..42e1273adad1 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h | |||
@@ -2,6 +2,55 @@ | |||
2 | #define _ASM_POWERPC_BOOK3S_64_HASH_H | 2 | #define _ASM_POWERPC_BOOK3S_64_HASH_H |
3 | #ifdef __KERNEL__ | 3 | #ifdef __KERNEL__ |
4 | 4 | ||
5 | /* | ||
6 | * Common bits between 4K and 64K pages in a linux-style PTE. | ||
7 | * These match the bits in the (hardware-defined) PowerPC PTE as closely | ||
8 | * as possible. Additional bits may be defined in pgtable-hash64-*.h | ||
9 | * | ||
10 | * Note: We only support user read/write permissions. Supervisor always | ||
11 | * have full read/write to pages above PAGE_OFFSET (pages below that | ||
12 | * always use the user access permissions). | ||
13 | * | ||
14 | * We could create separate kernel read-only if we used the 3 PP bits | ||
15 | * combinations that newer processors provide but we currently don't. | ||
16 | */ | ||
17 | #define _PAGE_PRESENT 0x00001 /* software: pte contains a translation */ | ||
18 | #define _PAGE_USER 0x00002 /* matches one of the PP bits */ | ||
19 | #define _PAGE_BIT_SWAP_TYPE 2 | ||
20 | #define _PAGE_EXEC 0x00004 /* No execute on POWER4 and newer (we invert) */ | ||
21 | #define _PAGE_GUARDED 0x00008 | ||
22 | /* We can derive Memory coherence from _PAGE_NO_CACHE */ | ||
23 | #define _PAGE_COHERENT 0x0 | ||
24 | #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ | ||
25 | #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ | ||
26 | #define _PAGE_DIRTY 0x00080 /* C: page changed */ | ||
27 | #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ | ||
28 | #define _PAGE_RW 0x00200 /* software: user write access allowed */ | ||
29 | #define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */ | ||
30 | #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ | ||
31 | #define _PAGE_F_GIX 0x07000 /* full page: hidx bits */ | ||
32 | #define _PAGE_F_GIX_SHIFT 12 | ||
33 | #define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */ | ||
34 | #define _PAGE_SPECIAL 0x10000 /* software: special page */ | ||
35 | |||
36 | /* | ||
37 | * THP pages can't be special. So use the _PAGE_SPECIAL | ||
38 | */ | ||
39 | #define _PAGE_SPLITTING _PAGE_SPECIAL | ||
40 | |||
41 | /* | ||
42 | * We need to differentiate between explicit huge page and THP huge | ||
43 | * page, since THP huge page also need to track real subpage details | ||
44 | */ | ||
45 | #define _PAGE_THP_HUGE _PAGE_4K_PFN | ||
46 | |||
47 | /* | ||
48 | * set of bits not changed in pmd_modify. | ||
49 | */ | ||
50 | #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ | ||
51 | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ | ||
52 | _PAGE_THP_HUGE) | ||
53 | |||
5 | #ifdef CONFIG_PPC_64K_PAGES | 54 | #ifdef CONFIG_PPC_64K_PAGES |
6 | #include <asm/book3s/64/hash-64k.h> | 55 | #include <asm/book3s/64/hash-64k.h> |
7 | #else | 56 | #else |
@@ -57,36 +106,6 @@ | |||
57 | #define HAVE_ARCH_UNMAPPED_AREA | 106 | #define HAVE_ARCH_UNMAPPED_AREA |
58 | #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN | 107 | #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN |
59 | #endif /* CONFIG_PPC_MM_SLICES */ | 108 | #endif /* CONFIG_PPC_MM_SLICES */ |
60 | /* | ||
61 | * Common bits between 4K and 64K pages in a linux-style PTE. | ||
62 | * These match the bits in the (hardware-defined) PowerPC PTE as closely | ||
63 | * as possible. Additional bits may be defined in pgtable-hash64-*.h | ||
64 | * | ||
65 | * Note: We only support user read/write permissions. Supervisor always | ||
66 | * have full read/write to pages above PAGE_OFFSET (pages below that | ||
67 | * always use the user access permissions). | ||
68 | * | ||
69 | * We could create separate kernel read-only if we used the 3 PP bits | ||
70 | * combinations that newer processors provide but we currently don't. | ||
71 | */ | ||
72 | #define _PAGE_PRESENT 0x00001 /* software: pte contains a translation */ | ||
73 | #define _PAGE_USER 0x00002 /* matches one of the PP bits */ | ||
74 | #define _PAGE_BIT_SWAP_TYPE 2 | ||
75 | #define _PAGE_EXEC 0x00004 /* No execute on POWER4 and newer (we invert) */ | ||
76 | #define _PAGE_GUARDED 0x00008 | ||
77 | /* We can derive Memory coherence from _PAGE_NO_CACHE */ | ||
78 | #define _PAGE_COHERENT 0x0 | ||
79 | #define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ | ||
80 | #define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ | ||
81 | #define _PAGE_DIRTY 0x00080 /* C: page changed */ | ||
82 | #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ | ||
83 | #define _PAGE_RW 0x00200 /* software: user write access allowed */ | ||
84 | #define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */ | ||
85 | #define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ | ||
86 | #define _PAGE_F_GIX 0x07000 /* full page: hidx bits */ | ||
87 | #define _PAGE_F_GIX_SHIFT 12 | ||
88 | #define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */ | ||
89 | #define _PAGE_SPECIAL 0x10000 /* software: special page */ | ||
90 | 109 | ||
91 | /* No separate kernel read-only */ | 110 | /* No separate kernel read-only */ |
92 | #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ | 111 | #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ |
@@ -105,24 +124,6 @@ | |||
105 | 124 | ||
106 | /* Hash table based platforms need atomic updates of the linux PTE */ | 125 | /* Hash table based platforms need atomic updates of the linux PTE */ |
107 | #define PTE_ATOMIC_UPDATES 1 | 126 | #define PTE_ATOMIC_UPDATES 1 |
108 | |||
109 | /* | ||
110 | * THP pages can't be special. So use the _PAGE_SPECIAL | ||
111 | */ | ||
112 | #define _PAGE_SPLITTING _PAGE_SPECIAL | ||
113 | |||
114 | /* | ||
115 | * We need to differentiate between explicit huge page and THP huge | ||
116 | * page, since THP huge page also need to track real subpage details | ||
117 | */ | ||
118 | #define _PAGE_THP_HUGE _PAGE_4K_PFN | ||
119 | |||
120 | /* | ||
121 | * set of bits not changed in pmd_modify. | ||
122 | */ | ||
123 | #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ | ||
124 | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ | ||
125 | _PAGE_THP_HUGE) | ||
126 | #define _PTE_NONE_MASK _PAGE_HPTEFLAGS | 127 | #define _PTE_NONE_MASK _PAGE_HPTEFLAGS |
127 | /* | 128 | /* |
128 | * The mask convered by the RPN must be a ULL on 32-bit platforms with | 129 | * The mask convered by the RPN must be a ULL on 32-bit platforms with |
@@ -231,11 +232,6 @@ | |||
231 | 232 | ||
232 | extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | 233 | extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, |
233 | pte_t *ptep, unsigned long pte, int huge); | 234 | pte_t *ptep, unsigned long pte, int huge); |
234 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | ||
235 | unsigned long addr, | ||
236 | pmd_t *pmdp, | ||
237 | unsigned long clr, | ||
238 | unsigned long set); | ||
239 | extern unsigned long htab_convert_pte_flags(unsigned long pteflags); | 235 | extern unsigned long htab_convert_pte_flags(unsigned long pteflags); |
240 | /* Atomic PTE updates */ | 236 | /* Atomic PTE updates */ |
241 | static inline unsigned long pte_update(struct mm_struct *mm, | 237 | static inline unsigned long pte_update(struct mm_struct *mm, |
@@ -361,127 +357,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) | |||
361 | #define __HAVE_ARCH_PTE_SAME | 357 | #define __HAVE_ARCH_PTE_SAME |
362 | #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) | 358 | #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) |
363 | 359 | ||
364 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | ||
365 | { | ||
366 | /* | ||
367 | * The hpte hindex is stored in the pgtable whose address is in the | ||
368 | * second half of the PMD | ||
369 | * | ||
370 | * Order this load with the test for pmd_trans_huge in the caller | ||
371 | */ | ||
372 | smp_rmb(); | ||
373 | return *(char **)(pmdp + PTRS_PER_PMD); | ||
374 | |||
375 | |||
376 | } | ||
377 | /* | ||
378 | * The linux hugepage PMD now include the pmd entries followed by the address | ||
379 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | ||
380 | * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per | ||
381 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and | ||
382 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | ||
383 | * | ||
384 | * The last three bits are intentionally left to zero. This memory location | ||
385 | * are also used as normal page PTE pointers. So if we have any pointers | ||
386 | * left around while we collapse a hugepage, we need to make sure | ||
387 | * _PAGE_PRESENT bit of that is zero when we look at them | ||
388 | */ | ||
389 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | ||
390 | { | ||
391 | return (hpte_slot_array[index] >> 3) & 0x1; | ||
392 | } | ||
393 | |||
394 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | ||
395 | int index) | ||
396 | { | ||
397 | return hpte_slot_array[index] >> 4; | ||
398 | } | ||
399 | |||
400 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | ||
401 | unsigned int index, unsigned int hidx) | ||
402 | { | ||
403 | hpte_slot_array[index] = hidx << 4 | 0x1 << 3; | ||
404 | } | ||
405 | |||
406 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
407 | /* | ||
408 | * | ||
409 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | ||
410 | * page. The hugetlbfs page table walking and mangling paths are totally | ||
411 | * separated form the core VM paths and they're differentiated by | ||
412 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | ||
413 | * | ||
414 | * pmd_trans_huge() is defined as false at build time if | ||
415 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | ||
416 | * time in such case. | ||
417 | * | ||
418 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | ||
419 | * for THP we also track the subpage details at the pmd level. We don't do | ||
420 | * that for explicit huge pages. | ||
421 | * | ||
422 | */ | ||
423 | static inline int pmd_trans_huge(pmd_t pmd) | ||
424 | { | ||
425 | /* | ||
426 | * leaf pte for huge page, bottom two bits != 00 | ||
427 | */ | ||
428 | return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); | ||
429 | } | ||
430 | |||
431 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
432 | { | ||
433 | if (pmd_trans_huge(pmd)) | ||
434 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | #endif | ||
439 | static inline int pmd_large(pmd_t pmd) | ||
440 | { | ||
441 | /* | ||
442 | * leaf pte for huge page, bottom two bits != 00 | ||
443 | */ | ||
444 | return ((pmd_val(pmd) & 0x3) != 0x0); | ||
445 | } | ||
446 | |||
447 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
448 | { | ||
449 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); | ||
450 | } | ||
451 | |||
452 | static inline pmd_t pmd_mksplitting(pmd_t pmd) | ||
453 | { | ||
454 | return __pmd(pmd_val(pmd) | _PAGE_SPLITTING); | ||
455 | } | ||
456 | |||
457 | #define __HAVE_ARCH_PMD_SAME | ||
458 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | ||
459 | { | ||
460 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); | ||
461 | } | ||
462 | |||
463 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | ||
464 | unsigned long addr, pmd_t *pmdp) | ||
465 | { | ||
466 | unsigned long old; | ||
467 | |||
468 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | ||
469 | return 0; | ||
470 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); | ||
471 | return ((old & _PAGE_ACCESSED) != 0); | ||
472 | } | ||
473 | |||
474 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
475 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | ||
476 | pmd_t *pmdp) | ||
477 | { | ||
478 | |||
479 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) | ||
480 | return; | ||
481 | |||
482 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); | ||
483 | } | ||
484 | |||
485 | /* Generic accessors to PTE bits */ | 360 | /* Generic accessors to PTE bits */ |
486 | static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} | 361 | static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} |
487 | static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } | 362 | static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } |
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index c24e03f22655..d635a924d652 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h | |||
@@ -154,6 +154,11 @@ static inline void pmd_clear(pmd_t *pmdp) | |||
154 | *pmdp = __pmd(0); | 154 | *pmdp = __pmd(0); |
155 | } | 155 | } |
156 | 156 | ||
157 | static inline pte_t pmd_pte(pmd_t pmd) | ||
158 | { | ||
159 | return __pte(pmd_val(pmd)); | ||
160 | } | ||
161 | |||
157 | #define pmd_none(pmd) (!pmd_val(pmd)) | 162 | #define pmd_none(pmd) (!pmd_val(pmd)) |
158 | #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ | 163 | #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ |
159 | || (pmd_val(pmd) & PMD_BAD_BITS)) | 164 | || (pmd_val(pmd) & PMD_BAD_BITS)) |
@@ -389,252 +394,4 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); | |||
389 | void pgtable_cache_init(void); | 394 | void pgtable_cache_init(void); |
390 | #endif /* __ASSEMBLY__ */ | 395 | #endif /* __ASSEMBLY__ */ |
391 | 396 | ||
392 | /* | ||
393 | * THP pages can't be special. So use the _PAGE_SPECIAL | ||
394 | */ | ||
395 | #define _PAGE_SPLITTING _PAGE_SPECIAL | ||
396 | |||
397 | /* | ||
398 | * We need to differentiate between explicit huge page and THP huge | ||
399 | * page, since THP huge page also need to track real subpage details | ||
400 | */ | ||
401 | #define _PAGE_THP_HUGE _PAGE_4K_PFN | ||
402 | |||
403 | /* | ||
404 | * set of bits not changed in pmd_modify. | ||
405 | */ | ||
406 | #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ | ||
407 | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ | ||
408 | _PAGE_THP_HUGE) | ||
409 | |||
410 | #ifndef __ASSEMBLY__ | ||
411 | /* | ||
412 | * The linux hugepage PMD now include the pmd entries followed by the address | ||
413 | * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits. | ||
414 | * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per | ||
415 | * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and | ||
416 | * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t. | ||
417 | * | ||
418 | * The last three bits are intentionally left to zero. This memory location | ||
419 | * are also used as normal page PTE pointers. So if we have any pointers | ||
420 | * left around while we collapse a hugepage, we need to make sure | ||
421 | * _PAGE_PRESENT bit of that is zero when we look at them | ||
422 | */ | ||
423 | static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index) | ||
424 | { | ||
425 | return (hpte_slot_array[index] >> 3) & 0x1; | ||
426 | } | ||
427 | |||
428 | static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array, | ||
429 | int index) | ||
430 | { | ||
431 | return hpte_slot_array[index] >> 4; | ||
432 | } | ||
433 | |||
434 | static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, | ||
435 | unsigned int index, unsigned int hidx) | ||
436 | { | ||
437 | hpte_slot_array[index] = hidx << 4 | 0x1 << 3; | ||
438 | } | ||
439 | |||
440 | struct page *realmode_pfn_to_page(unsigned long pfn); | ||
441 | |||
442 | static inline char *get_hpte_slot_array(pmd_t *pmdp) | ||
443 | { | ||
444 | /* | ||
445 | * The hpte hindex is stored in the pgtable whose address is in the | ||
446 | * second half of the PMD | ||
447 | * | ||
448 | * Order this load with the test for pmd_trans_huge in the caller | ||
449 | */ | ||
450 | smp_rmb(); | ||
451 | return *(char **)(pmdp + PTRS_PER_PMD); | ||
452 | |||
453 | |||
454 | } | ||
455 | |||
456 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
457 | extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | ||
458 | pmd_t *pmdp, unsigned long old_pmd); | ||
459 | extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); | ||
460 | extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); | ||
461 | extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); | ||
462 | extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
463 | pmd_t *pmdp, pmd_t pmd); | ||
464 | extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
465 | pmd_t *pmd); | ||
466 | /* | ||
467 | * | ||
468 | * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs | ||
469 | * page. The hugetlbfs page table walking and mangling paths are totally | ||
470 | * separated form the core VM paths and they're differentiated by | ||
471 | * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. | ||
472 | * | ||
473 | * pmd_trans_huge() is defined as false at build time if | ||
474 | * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build | ||
475 | * time in such case. | ||
476 | * | ||
477 | * For ppc64 we need to differntiate from explicit hugepages from THP, because | ||
478 | * for THP we also track the subpage details at the pmd level. We don't do | ||
479 | * that for explicit huge pages. | ||
480 | * | ||
481 | */ | ||
482 | static inline int pmd_trans_huge(pmd_t pmd) | ||
483 | { | ||
484 | /* | ||
485 | * leaf pte for huge page, bottom two bits != 00 | ||
486 | */ | ||
487 | return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); | ||
488 | } | ||
489 | |||
490 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
491 | { | ||
492 | if (pmd_trans_huge(pmd)) | ||
493 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | extern int has_transparent_hugepage(void); | ||
498 | #else | ||
499 | static inline void hpte_do_hugepage_flush(struct mm_struct *mm, | ||
500 | unsigned long addr, pmd_t *pmdp, | ||
501 | unsigned long old_pmd) | ||
502 | { | ||
503 | |||
504 | WARN(1, "%s called with THP disabled\n", __func__); | ||
505 | } | ||
506 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
507 | |||
508 | static inline int pmd_large(pmd_t pmd) | ||
509 | { | ||
510 | /* | ||
511 | * leaf pte for huge page, bottom two bits != 00 | ||
512 | */ | ||
513 | return ((pmd_val(pmd) & 0x3) != 0x0); | ||
514 | } | ||
515 | |||
516 | static inline pte_t pmd_pte(pmd_t pmd) | ||
517 | { | ||
518 | return __pte(pmd_val(pmd)); | ||
519 | } | ||
520 | |||
521 | static inline pmd_t pte_pmd(pte_t pte) | ||
522 | { | ||
523 | return __pmd(pte_val(pte)); | ||
524 | } | ||
525 | |||
526 | static inline pte_t *pmdp_ptep(pmd_t *pmd) | ||
527 | { | ||
528 | return (pte_t *)pmd; | ||
529 | } | ||
530 | |||
531 | #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) | ||
532 | #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) | ||
533 | #define pmd_young(pmd) pte_young(pmd_pte(pmd)) | ||
534 | #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) | ||
535 | #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) | ||
536 | #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) | ||
537 | #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) | ||
538 | #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) | ||
539 | |||
540 | #define __HAVE_ARCH_PMD_WRITE | ||
541 | #define pmd_write(pmd) pte_write(pmd_pte(pmd)) | ||
542 | |||
543 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
544 | { | ||
545 | /* Do nothing, mk_pmd() does this part. */ | ||
546 | return pmd; | ||
547 | } | ||
548 | |||
549 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
550 | { | ||
551 | return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); | ||
552 | } | ||
553 | |||
554 | static inline pmd_t pmd_mksplitting(pmd_t pmd) | ||
555 | { | ||
556 | return __pmd(pmd_val(pmd) | _PAGE_SPLITTING); | ||
557 | } | ||
558 | |||
559 | #define __HAVE_ARCH_PMD_SAME | ||
560 | static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) | ||
561 | { | ||
562 | return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0); | ||
563 | } | ||
564 | |||
565 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
566 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
567 | unsigned long address, pmd_t *pmdp, | ||
568 | pmd_t entry, int dirty); | ||
569 | |||
570 | extern unsigned long pmd_hugepage_update(struct mm_struct *mm, | ||
571 | unsigned long addr, | ||
572 | pmd_t *pmdp, | ||
573 | unsigned long clr, | ||
574 | unsigned long set); | ||
575 | |||
576 | static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, | ||
577 | unsigned long addr, pmd_t *pmdp) | ||
578 | { | ||
579 | unsigned long old; | ||
580 | |||
581 | if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) | ||
582 | return 0; | ||
583 | old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); | ||
584 | return ((old & _PAGE_ACCESSED) != 0); | ||
585 | } | ||
586 | |||
587 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
588 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
589 | unsigned long address, pmd_t *pmdp); | ||
590 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
591 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
592 | unsigned long address, pmd_t *pmdp); | ||
593 | |||
594 | #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR | ||
595 | extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, | ||
596 | unsigned long addr, pmd_t *pmdp); | ||
597 | |||
598 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
599 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, | ||
600 | pmd_t *pmdp) | ||
601 | { | ||
602 | |||
603 | if ((pmd_val(*pmdp) & _PAGE_RW) == 0) | ||
604 | return; | ||
605 | |||
606 | pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0); | ||
607 | } | ||
608 | |||
609 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||
610 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
611 | unsigned long address, pmd_t *pmdp); | ||
612 | |||
613 | extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, | ||
614 | unsigned long address, pmd_t *pmdp); | ||
615 | #define pmdp_collapse_flush pmdp_collapse_flush | ||
616 | |||
617 | #define __HAVE_ARCH_PGTABLE_DEPOSIT | ||
618 | extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | ||
619 | pgtable_t pgtable); | ||
620 | #define __HAVE_ARCH_PGTABLE_WITHDRAW | ||
621 | extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); | ||
622 | |||
623 | #define __HAVE_ARCH_PMDP_INVALIDATE | ||
624 | extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | ||
625 | pmd_t *pmdp); | ||
626 | |||
627 | #define pmd_move_must_withdraw pmd_move_must_withdraw | ||
628 | struct spinlock; | ||
629 | static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, | ||
630 | struct spinlock *old_pmd_ptl) | ||
631 | { | ||
632 | /* | ||
633 | * Archs like ppc64 use pgtable to store per pmd | ||
634 | * specific information. So when we switch the pmd, | ||
635 | * we should also withdraw and deposit the pgtable | ||
636 | */ | ||
637 | return true; | ||
638 | } | ||
639 | #endif /* __ASSEMBLY__ */ | ||
640 | #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */ | 397 | #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */ |
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c8822af10a58..8eaac81347fd 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
@@ -429,6 +429,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
429 | local_irq_restore(flags); | 429 | local_irq_restore(flags); |
430 | } | 430 | } |
431 | 431 | ||
432 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
432 | static void native_hugepage_invalidate(unsigned long vsid, | 433 | static void native_hugepage_invalidate(unsigned long vsid, |
433 | unsigned long addr, | 434 | unsigned long addr, |
434 | unsigned char *hpte_slot_array, | 435 | unsigned char *hpte_slot_array, |
@@ -482,6 +483,15 @@ static void native_hugepage_invalidate(unsigned long vsid, | |||
482 | } | 483 | } |
483 | local_irq_restore(flags); | 484 | local_irq_restore(flags); |
484 | } | 485 | } |
486 | #else | ||
487 | static void native_hugepage_invalidate(unsigned long vsid, | ||
488 | unsigned long addr, | ||
489 | unsigned char *hpte_slot_array, | ||
490 | int psize, int ssize, int local) | ||
491 | { | ||
492 | WARN(1, "%s called without THP support\n", __func__); | ||
493 | } | ||
494 | #endif | ||
485 | 495 | ||
486 | static inline int __hpte_actual_psize(unsigned int lp, int psize) | 496 | static inline int __hpte_actual_psize(unsigned int lp, int psize) |
487 | { | 497 | { |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3967e3cce03e..d42dd289abfe 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -359,7 +359,7 @@ struct page *pud_page(pud_t pud) | |||
359 | struct page *pmd_page(pmd_t pmd) | 359 | struct page *pmd_page(pmd_t pmd) |
360 | { | 360 | { |
361 | if (pmd_trans_huge(pmd) || pmd_huge(pmd)) | 361 | if (pmd_trans_huge(pmd) || pmd_huge(pmd)) |
362 | return pfn_to_page(pmd_pfn(pmd)); | 362 | return pte_page(pmd_pte(pmd)); |
363 | return virt_to_page(pmd_page_vaddr(pmd)); | 363 | return virt_to_page(pmd_page_vaddr(pmd)); |
364 | } | 364 | } |
365 | 365 | ||
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b7a67e3d2201..6d46547871aa 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -396,6 +396,7 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
396 | BUG_ON(lpar_rc != H_SUCCESS); | 396 | BUG_ON(lpar_rc != H_SUCCESS); |
397 | } | 397 | } |
398 | 398 | ||
399 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
399 | /* | 400 | /* |
400 | * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need | 401 | * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need |
401 | * to make sure that we avoid bouncing the hypervisor tlbie lock. | 402 | * to make sure that we avoid bouncing the hypervisor tlbie lock. |
@@ -494,6 +495,15 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, | |||
494 | __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, | 495 | __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, |
495 | index, psize, ssize); | 496 | index, psize, ssize); |
496 | } | 497 | } |
498 | #else | ||
499 | static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, | ||
500 | unsigned long addr, | ||
501 | unsigned char *hpte_slot_array, | ||
502 | int psize, int ssize, int local) | ||
503 | { | ||
504 | WARN(1, "%s called without THP support\n", __func__); | ||
505 | } | ||
506 | #endif | ||
497 | 507 | ||
498 | static void pSeries_lpar_hpte_removebolted(unsigned long ea, | 508 | static void pSeries_lpar_hpte_removebolted(unsigned long ea, |
499 | int psize, int ssize) | 509 | int psize, int ssize) |