diff options
author | Gerald Schaefer <gerald.schaefer@de.ibm.com> | 2016-07-04 08:47:01 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-07-06 02:46:43 -0400 |
commit | d08de8e2d86744f91d9d5d57c56ca2b6e33bf6ec (patch) | |
tree | 2bcf0bcb5ec30e48eb82fbc7e3a216cab8671f04 /arch/s390/mm | |
parent | 46210c440c7b2d32a8ee4e1f0248d0a0b4ad9fa5 (diff) |
s390/mm: add support for 2GB hugepages
This adds support for 2GB hugetlbfs pages on s390.
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/gmap.c | 3 | ||||
-rw-r--r-- | arch/s390/mm/gup.c | 45 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 129 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 39 |
4 files changed, 176 insertions, 40 deletions
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cace818d86eb..69466f6055c2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c | |||
@@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) | |||
430 | VM_BUG_ON(pgd_none(*pgd)); | 430 | VM_BUG_ON(pgd_none(*pgd)); |
431 | pud = pud_offset(pgd, vmaddr); | 431 | pud = pud_offset(pgd, vmaddr); |
432 | VM_BUG_ON(pud_none(*pud)); | 432 | VM_BUG_ON(pud_none(*pud)); |
433 | /* large puds cannot yet be handled */ | ||
434 | if (pud_large(*pud)) | ||
435 | return -EFAULT; | ||
433 | pmd = pmd_offset(pud, vmaddr); | 436 | pmd = pmd_offset(pud, vmaddr); |
434 | VM_BUG_ON(pmd_none(*pmd)); | 437 | VM_BUG_ON(pmd_none(*pmd)); |
435 | /* large pmds cannot yet be handled */ | 438 | /* large pmds cannot yet be handled */ |
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index a8a6765f1a51..adb0c34bf431 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c | |||
@@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, | |||
128 | return 1; | 128 | return 1; |
129 | } | 129 | } |
130 | 130 | ||
131 | static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, | ||
132 | unsigned long end, int write, struct page **pages, int *nr) | ||
133 | { | ||
134 | struct page *head, *page; | ||
135 | unsigned long mask; | ||
136 | int refs; | ||
137 | |||
138 | mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID; | ||
139 | if ((pud_val(pud) & mask) != 0) | ||
140 | return 0; | ||
141 | VM_BUG_ON(!pfn_valid(pud_pfn(pud))); | ||
142 | |||
143 | refs = 0; | ||
144 | head = pud_page(pud); | ||
145 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
146 | do { | ||
147 | VM_BUG_ON_PAGE(compound_head(page) != head, page); | ||
148 | pages[*nr] = page; | ||
149 | (*nr)++; | ||
150 | page++; | ||
151 | refs++; | ||
152 | } while (addr += PAGE_SIZE, addr != end); | ||
153 | |||
154 | if (!page_cache_add_speculative(head, refs)) { | ||
155 | *nr -= refs; | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | if (unlikely(pud_val(pud) != pud_val(*pudp))) { | ||
160 | *nr -= refs; | ||
161 | while (refs--) | ||
162 | put_page(head); | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | return 1; | ||
167 | } | ||
168 | |||
131 | static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | 169 | static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, |
132 | unsigned long end, int write, struct page **pages, int *nr) | 170 | unsigned long end, int write, struct page **pages, int *nr) |
133 | { | 171 | { |
@@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, | |||
144 | next = pud_addr_end(addr, end); | 182 | next = pud_addr_end(addr, end); |
145 | if (pud_none(pud)) | 183 | if (pud_none(pud)) |
146 | return 0; | 184 | return 0; |
147 | if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) | 185 | if (unlikely(pud_large(pud))) { |
186 | if (!gup_huge_pud(pudp, pud, addr, next, write, pages, | ||
187 | nr)) | ||
188 | return 0; | ||
189 | } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages, | ||
190 | nr)) | ||
148 | return 0; | 191 | return 0; |
149 | } while (pudp++, addr = next, addr != end); | 192 | } while (pudp++, addr = next, addr != end); |
150 | 193 | ||
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 1b5e8983f4f3..e19d853883be 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -1,19 +1,22 @@ | |||
1 | /* | 1 | /* |
2 | * IBM System z Huge TLB Page Support for Kernel. | 2 | * IBM System z Huge TLB Page Support for Kernel. |
3 | * | 3 | * |
4 | * Copyright IBM Corp. 2007 | 4 | * Copyright IBM Corp. 2007,2016 |
5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> | 5 | * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define KMSG_COMPONENT "hugetlb" | ||
9 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
10 | |||
8 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
9 | #include <linux/hugetlb.h> | 12 | #include <linux/hugetlb.h> |
10 | 13 | ||
11 | static inline pmd_t __pte_to_pmd(pte_t pte) | 14 | static inline unsigned long __pte_to_rste(pte_t pte) |
12 | { | 15 | { |
13 | pmd_t pmd; | 16 | unsigned long rste; |
14 | 17 | ||
15 | /* | 18 | /* |
16 | * Convert encoding pte bits pmd bits | 19 | * Convert encoding pte bits pmd / pud bits |
17 | * lIR.uswrdy.p dy..R...I...wr | 20 | * lIR.uswrdy.p dy..R...I...wr |
18 | * empty 010.000000.0 -> 00..0...1...00 | 21 | * empty 010.000000.0 -> 00..0...1...00 |
19 | * prot-none, clean, old 111.000000.1 -> 00..1...1...00 | 22 | * prot-none, clean, old 111.000000.1 -> 00..1...1...00 |
@@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte) | |||
33 | * u unused, l large | 36 | * u unused, l large |
34 | */ | 37 | */ |
35 | if (pte_present(pte)) { | 38 | if (pte_present(pte)) { |
36 | pmd_val(pmd) = pte_val(pte) & PAGE_MASK; | 39 | rste = pte_val(pte) & PAGE_MASK; |
37 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4; | 40 | rste |= (pte_val(pte) & _PAGE_READ) >> 4; |
38 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4; | 41 | rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; |
39 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5; | 42 | rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; |
40 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); | 43 | rste |= (pte_val(pte) & _PAGE_PROTECT); |
41 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; | 44 | rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; |
42 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; | 45 | rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; |
43 | pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; | 46 | rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; |
44 | } else | 47 | } else |
45 | pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; | 48 | rste = _SEGMENT_ENTRY_INVALID; |
46 | return pmd; | 49 | return rste; |
47 | } | 50 | } |
48 | 51 | ||
49 | static inline pte_t __pmd_to_pte(pmd_t pmd) | 52 | static inline pte_t __rste_to_pte(unsigned long rste) |
50 | { | 53 | { |
54 | int present; | ||
51 | pte_t pte; | 55 | pte_t pte; |
52 | 56 | ||
57 | if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | ||
58 | present = pud_present(__pud(rste)); | ||
59 | else | ||
60 | present = pmd_present(__pmd(rste)); | ||
61 | |||
53 | /* | 62 | /* |
54 | * Convert encoding pmd bits pte bits | 63 | * Convert encoding pmd / pud bits pte bits |
55 | * dy..R...I...wr lIR.uswrdy.p | 64 | * dy..R...I...wr lIR.uswrdy.p |
56 | * empty 00..0...1...00 -> 010.000000.0 | 65 | * empty 00..0...1...00 -> 010.000000.0 |
57 | * prot-none, clean, old 00..1...1...00 -> 111.000000.1 | 66 | * prot-none, clean, old 00..1...1...00 -> 111.000000.1 |
@@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) | |||
70 | * SW-bits: p present, y young, d dirty, r read, w write, s special, | 79 | * SW-bits: p present, y young, d dirty, r read, w write, s special, |
71 | * u unused, l large | 80 | * u unused, l large |
72 | */ | 81 | */ |
73 | if (pmd_present(pmd)) { | 82 | if (present) { |
74 | pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; | 83 | pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; |
75 | pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; | 84 | pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; |
76 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4; | 85 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; |
77 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4; | 86 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; |
78 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5; | 87 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; |
79 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); | 88 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); |
80 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; | 89 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; |
81 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; | 90 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; |
82 | pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; | 91 | pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; |
83 | } else | 92 | } else |
84 | pte_val(pte) = _PAGE_INVALID; | 93 | pte_val(pte) = _PAGE_INVALID; |
85 | return pte; | 94 | return pte; |
@@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) | |||
88 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 97 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
89 | pte_t *ptep, pte_t pte) | 98 | pte_t *ptep, pte_t pte) |
90 | { | 99 | { |
91 | pmd_t pmd = __pte_to_pmd(pte); | 100 | unsigned long rste = __pte_to_rste(pte); |
92 | 101 | ||
93 | pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; | 102 | /* Set correct table type for 2G hugepages */ |
94 | *(pmd_t *) ptep = pmd; | 103 | if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
104 | rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; | ||
105 | else | ||
106 | rste |= _SEGMENT_ENTRY_LARGE; | ||
107 | pte_val(*ptep) = rste; | ||
95 | } | 108 | } |
96 | 109 | ||
97 | pte_t huge_ptep_get(pte_t *ptep) | 110 | pte_t huge_ptep_get(pte_t *ptep) |
98 | { | 111 | { |
99 | pmd_t pmd = *(pmd_t *) ptep; | 112 | return __rste_to_pte(pte_val(*ptep)); |
100 | |||
101 | return __pmd_to_pte(pmd); | ||
102 | } | 113 | } |
103 | 114 | ||
104 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | 115 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
105 | unsigned long addr, pte_t *ptep) | 116 | unsigned long addr, pte_t *ptep) |
106 | { | 117 | { |
118 | pte_t pte = huge_ptep_get(ptep); | ||
107 | pmd_t *pmdp = (pmd_t *) ptep; | 119 | pmd_t *pmdp = (pmd_t *) ptep; |
108 | pmd_t old; | 120 | pud_t *pudp = (pud_t *) ptep; |
109 | 121 | ||
110 | old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); | 122 | if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
111 | return __pmd_to_pte(old); | 123 | pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY)); |
124 | else | ||
125 | pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); | ||
126 | return pte; | ||
112 | } | 127 | } |
113 | 128 | ||
114 | pte_t *huge_pte_alloc(struct mm_struct *mm, | 129 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
@@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, | |||
120 | 135 | ||
121 | pgdp = pgd_offset(mm, addr); | 136 | pgdp = pgd_offset(mm, addr); |
122 | pudp = pud_alloc(mm, pgdp, addr); | 137 | pudp = pud_alloc(mm, pgdp, addr); |
123 | if (pudp) | 138 | if (pudp) { |
124 | pmdp = pmd_alloc(mm, pudp, addr); | 139 | if (sz == PUD_SIZE) |
140 | return (pte_t *) pudp; | ||
141 | else if (sz == PMD_SIZE) | ||
142 | pmdp = pmd_alloc(mm, pudp, addr); | ||
143 | } | ||
125 | return (pte_t *) pmdp; | 144 | return (pte_t *) pmdp; |
126 | } | 145 | } |
127 | 146 | ||
@@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
134 | pgdp = pgd_offset(mm, addr); | 153 | pgdp = pgd_offset(mm, addr); |
135 | if (pgd_present(*pgdp)) { | 154 | if (pgd_present(*pgdp)) { |
136 | pudp = pud_offset(pgdp, addr); | 155 | pudp = pud_offset(pgdp, addr); |
137 | if (pud_present(*pudp)) | 156 | if (pud_present(*pudp)) { |
157 | if (pud_large(*pudp)) | ||
158 | return (pte_t *) pudp; | ||
138 | pmdp = pmd_offset(pudp, addr); | 159 | pmdp = pmd_offset(pudp, addr); |
160 | } | ||
139 | } | 161 | } |
140 | return (pte_t *) pmdp; | 162 | return (pte_t *) pmdp; |
141 | } | 163 | } |
@@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd) | |||
147 | 169 | ||
148 | int pud_huge(pud_t pud) | 170 | int pud_huge(pud_t pud) |
149 | { | 171 | { |
150 | return 0; | 172 | return pud_large(pud); |
173 | } | ||
174 | |||
175 | struct page * | ||
176 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
177 | pud_t *pud, int flags) | ||
178 | { | ||
179 | if (flags & FOLL_GET) | ||
180 | return NULL; | ||
181 | |||
182 | return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
183 | } | ||
184 | |||
185 | static __init int setup_hugepagesz(char *opt) | ||
186 | { | ||
187 | unsigned long size; | ||
188 | char *string = opt; | ||
189 | |||
190 | size = memparse(opt, &opt); | ||
191 | if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { | ||
192 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
193 | } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { | ||
194 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
195 | } else { | ||
196 | pr_err("hugepagesz= specifies an unsupported page size %s\n", | ||
197 | string); | ||
198 | return 0; | ||
199 | } | ||
200 | return 1; | ||
151 | } | 201 | } |
202 | __setup("hugepagesz=", setup_hugepagesz); | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 74f8f2a8a4e8..b98d1a152d46 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -352,6 +352,45 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, | |||
352 | } | 352 | } |
353 | EXPORT_SYMBOL(pmdp_xchg_lazy); | 353 | EXPORT_SYMBOL(pmdp_xchg_lazy); |
354 | 354 | ||
355 | static inline pud_t pudp_flush_direct(struct mm_struct *mm, | ||
356 | unsigned long addr, pud_t *pudp) | ||
357 | { | ||
358 | pud_t old; | ||
359 | |||
360 | old = *pudp; | ||
361 | if (pud_val(old) & _REGION_ENTRY_INVALID) | ||
362 | return old; | ||
363 | if (!MACHINE_HAS_IDTE) { | ||
364 | /* | ||
365 | * Invalid bit position is the same for pmd and pud, so we can | ||
366 | * re-use _pmd_csp() here | ||
367 | */ | ||
368 | __pmdp_csp((pmd_t *) pudp); | ||
369 | return old; | ||
370 | } | ||
371 | atomic_inc(&mm->context.flush_count); | ||
372 | if (MACHINE_HAS_TLB_LC && | ||
373 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) | ||
374 | __pudp_idte_local(addr, pudp); | ||
375 | else | ||
376 | __pudp_idte(addr, pudp); | ||
377 | atomic_dec(&mm->context.flush_count); | ||
378 | return old; | ||
379 | } | ||
380 | |||
381 | pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, | ||
382 | pud_t *pudp, pud_t new) | ||
383 | { | ||
384 | pud_t old; | ||
385 | |||
386 | preempt_disable(); | ||
387 | old = pudp_flush_direct(mm, addr, pudp); | ||
388 | *pudp = new; | ||
389 | preempt_enable(); | ||
390 | return old; | ||
391 | } | ||
392 | EXPORT_SYMBOL(pudp_xchg_direct); | ||
393 | |||
355 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 394 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
356 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | 395 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
357 | pgtable_t pgtable) | 396 | pgtable_t pgtable) |