aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerald Schaefer <gerald.schaefer@de.ibm.com>2012-09-10 10:43:26 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2012-09-17 03:44:24 -0400
commit4db84d4f07b87c452c6000e0595dc9570ed21b41 (patch)
treec5e74358f74fd6fb8a2e4a9eec3c99d524c76018
parentdb2e1f43e7b2320d0450fe34e5ac4ac4d8c7708d (diff)
s390/mm: fix user access page-table walk code
The s390 page-table walk code, used for user copy and futex, currently cannot handle huge pages. As far as user copy is concerned, that is not really a problem because those functions will only be used on old hardware that has no huge page support. But the futex code will also use pagetable walk functions on current hardware when user space runs in primary space mode. So, if a futex sits in a huge page, the futex operation on it will result in a page fault loop or even data corruption. This patch adds the code for resolving huge page mappings in the user access pagetable walk code on s390. Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--arch/s390/lib/uaccess_pt.c142
1 files changed, 63 insertions, 79 deletions
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60ee2b883797..2d37bb861faf 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -2,69 +2,82 @@
2 * User access functions based on page table walks for enhanced 2 * User access functions based on page table walks for enhanced
3 * system layout without hardware support. 3 * system layout without hardware support.
4 * 4 *
5 * Copyright IBM Corp. 2006 5 * Copyright IBM Corp. 2006, 2012
6 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) 6 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
7 */ 7 */
8 8
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/hardirq.h> 10#include <linux/hardirq.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/hugetlb.h>
12#include <asm/uaccess.h> 13#include <asm/uaccess.h>
13#include <asm/futex.h> 14#include <asm/futex.h>
14#include "uaccess.h" 15#include "uaccess.h"
15 16
16static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) 17
18/*
19 * Returns kernel address for user virtual address. If the returned address is
20 * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address
21 * contains the (negative) exception code.
22 */
23static __always_inline unsigned long follow_table(struct mm_struct *mm,
24 unsigned long addr, int write)
17{ 25{
18 pgd_t *pgd; 26 pgd_t *pgd;
19 pud_t *pud; 27 pud_t *pud;
20 pmd_t *pmd; 28 pmd_t *pmd;
29 pte_t *ptep;
21 30
22 pgd = pgd_offset(mm, addr); 31 pgd = pgd_offset(mm, addr);
23 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 32 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
24 return (pte_t *) 0x3a; 33 return -0x3aUL;
25 34
26 pud = pud_offset(pgd, addr); 35 pud = pud_offset(pgd, addr);
27 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 36 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
28 return (pte_t *) 0x3b; 37 return -0x3bUL;
29 38
30 pmd = pmd_offset(pud, addr); 39 pmd = pmd_offset(pud, addr);
31 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 40 if (pmd_none(*pmd))
32 return (pte_t *) 0x10; 41 return -0x10UL;
42 if (pmd_huge(*pmd)) {
43 if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO))
44 return -0x04UL;
45 return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK);
46 }
47 if (unlikely(pmd_bad(*pmd)))
48 return -0x10UL;
49
50 ptep = pte_offset_map(pmd, addr);
51 if (!pte_present(*ptep))
52 return -0x11UL;
53 if (write && !pte_write(*ptep))
54 return -0x04UL;
33 55
34 return pte_offset_map(pmd, addr); 56 return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
35} 57}
36 58
37static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, 59static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr,
38 size_t n, int write_user) 60 size_t n, int write_user)
39{ 61{
40 struct mm_struct *mm = current->mm; 62 struct mm_struct *mm = current->mm;
41 unsigned long offset, pfn, done, size; 63 unsigned long offset, done, size, kaddr;
42 pte_t *pte;
43 void *from, *to; 64 void *from, *to;
44 65
45 done = 0; 66 done = 0;
46retry: 67retry:
47 spin_lock(&mm->page_table_lock); 68 spin_lock(&mm->page_table_lock);
48 do { 69 do {
49 pte = follow_table(mm, uaddr); 70 kaddr = follow_table(mm, uaddr, write_user);
50 if ((unsigned long) pte < 0x1000) 71 if (IS_ERR_VALUE(kaddr))
51 goto fault; 72 goto fault;
52 if (!pte_present(*pte)) {
53 pte = (pte_t *) 0x11;
54 goto fault;
55 } else if (write_user && !pte_write(*pte)) {
56 pte = (pte_t *) 0x04;
57 goto fault;
58 }
59 73
60 pfn = pte_pfn(*pte); 74 offset = uaddr & ~PAGE_MASK;
61 offset = uaddr & (PAGE_SIZE - 1);
62 size = min(n - done, PAGE_SIZE - offset); 75 size = min(n - done, PAGE_SIZE - offset);
63 if (write_user) { 76 if (write_user) {
64 to = (void *)((pfn << PAGE_SHIFT) + offset); 77 to = (void *) kaddr;
65 from = kptr + done; 78 from = kptr + done;
66 } else { 79 } else {
67 from = (void *)((pfn << PAGE_SHIFT) + offset); 80 from = (void *) kaddr;
68 to = kptr + done; 81 to = kptr + done;
69 } 82 }
70 memcpy(to, from, size); 83 memcpy(to, from, size);
@@ -75,7 +88,7 @@ retry:
75 return n - done; 88 return n - done;
76fault: 89fault:
77 spin_unlock(&mm->page_table_lock); 90 spin_unlock(&mm->page_table_lock);
78 if (__handle_fault(uaddr, (unsigned long) pte, write_user)) 91 if (__handle_fault(uaddr, -kaddr, write_user))
79 return n - done; 92 return n - done;
80 goto retry; 93 goto retry;
81} 94}
@@ -84,27 +97,22 @@ fault:
84 * Do DAT for user address by page table walk, return kernel address. 97 * Do DAT for user address by page table walk, return kernel address.
85 * This function needs to be called with current->mm->page_table_lock held. 98 * This function needs to be called with current->mm->page_table_lock held.
86 */ 99 */
87static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) 100static __always_inline unsigned long __dat_user_addr(unsigned long uaddr,
101 int write)
88{ 102{
89 struct mm_struct *mm = current->mm; 103 struct mm_struct *mm = current->mm;
90 unsigned long pfn; 104 unsigned long kaddr;
91 pte_t *pte;
92 int rc; 105 int rc;
93 106
94retry: 107retry:
95 pte = follow_table(mm, uaddr); 108 kaddr = follow_table(mm, uaddr, write);
96 if ((unsigned long) pte < 0x1000) 109 if (IS_ERR_VALUE(kaddr))
97 goto fault;
98 if (!pte_present(*pte)) {
99 pte = (pte_t *) 0x11;
100 goto fault; 110 goto fault;
101 }
102 111
103 pfn = pte_pfn(*pte); 112 return kaddr;
104 return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
105fault: 113fault:
106 spin_unlock(&mm->page_table_lock); 114 spin_unlock(&mm->page_table_lock);
107 rc = __handle_fault(uaddr, (unsigned long) pte, 0); 115 rc = __handle_fault(uaddr, -kaddr, write);
108 spin_lock(&mm->page_table_lock); 116 spin_lock(&mm->page_table_lock);
109 if (!rc) 117 if (!rc)
110 goto retry; 118 goto retry;
@@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to)
159 167
160static size_t strnlen_user_pt(size_t count, const char __user *src) 168static size_t strnlen_user_pt(size_t count, const char __user *src)
161{ 169{
162 char *addr;
163 unsigned long uaddr = (unsigned long) src; 170 unsigned long uaddr = (unsigned long) src;
164 struct mm_struct *mm = current->mm; 171 struct mm_struct *mm = current->mm;
165 unsigned long offset, pfn, done, len; 172 unsigned long offset, done, len, kaddr;
166 pte_t *pte;
167 size_t len_str; 173 size_t len_str;
168 174
169 if (segment_eq(get_fs(), KERNEL_DS)) 175 if (segment_eq(get_fs(), KERNEL_DS))
@@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src)
172retry: 178retry:
173 spin_lock(&mm->page_table_lock); 179 spin_lock(&mm->page_table_lock);
174 do { 180 do {
175 pte = follow_table(mm, uaddr); 181 kaddr = follow_table(mm, uaddr, 0);
176 if ((unsigned long) pte < 0x1000) 182 if (IS_ERR_VALUE(kaddr))
177 goto fault;
178 if (!pte_present(*pte)) {
179 pte = (pte_t *) 0x11;
180 goto fault; 183 goto fault;
181 }
182 184
183 pfn = pte_pfn(*pte); 185 offset = uaddr & ~PAGE_MASK;
184 offset = uaddr & (PAGE_SIZE-1);
185 addr = (char *)(pfn << PAGE_SHIFT) + offset;
186 len = min(count - done, PAGE_SIZE - offset); 186 len = min(count - done, PAGE_SIZE - offset);
187 len_str = strnlen(addr, len); 187 len_str = strnlen((char *) kaddr, len);
188 done += len_str; 188 done += len_str;
189 uaddr += len_str; 189 uaddr += len_str;
190 } while ((len_str == len) && (done < count)); 190 } while ((len_str == len) && (done < count));
@@ -192,7 +192,7 @@ retry:
192 return done + 1; 192 return done + 1;
193fault: 193fault:
194 spin_unlock(&mm->page_table_lock); 194 spin_unlock(&mm->page_table_lock);
195 if (__handle_fault(uaddr, (unsigned long) pte, 0)) 195 if (__handle_fault(uaddr, -kaddr, 0))
196 return 0; 196 return 0;
197 goto retry; 197 goto retry;
198} 198}
@@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
225 const void __user *from) 225 const void __user *from)
226{ 226{
227 struct mm_struct *mm = current->mm; 227 struct mm_struct *mm = current->mm;
228 unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, 228 unsigned long offset_max, uaddr, done, size, error_code;
229 uaddr, done, size, error_code;
230 unsigned long uaddr_from = (unsigned long) from; 229 unsigned long uaddr_from = (unsigned long) from;
231 unsigned long uaddr_to = (unsigned long) to; 230 unsigned long uaddr_to = (unsigned long) to;
232 pte_t *pte_from, *pte_to; 231 unsigned long kaddr_to, kaddr_from;
233 int write_user; 232 int write_user;
234 233
235 if (segment_eq(get_fs(), KERNEL_DS)) { 234 if (segment_eq(get_fs(), KERNEL_DS)) {
@@ -242,38 +241,23 @@ retry:
242 do { 241 do {
243 write_user = 0; 242 write_user = 0;
244 uaddr = uaddr_from; 243 uaddr = uaddr_from;
245 pte_from = follow_table(mm, uaddr_from); 244 kaddr_from = follow_table(mm, uaddr_from, 0);
246 error_code = (unsigned long) pte_from; 245 error_code = kaddr_from;
247 if (error_code < 0x1000) 246 if (IS_ERR_VALUE(error_code))
248 goto fault;
249 if (!pte_present(*pte_from)) {
250 error_code = 0x11;
251 goto fault; 247 goto fault;
252 }
253 248
254 write_user = 1; 249 write_user = 1;
255 uaddr = uaddr_to; 250 uaddr = uaddr_to;
256 pte_to = follow_table(mm, uaddr_to); 251 kaddr_to = follow_table(mm, uaddr_to, 1);
257 error_code = (unsigned long) pte_to; 252 error_code = (unsigned long) kaddr_to;
258 if (error_code < 0x1000) 253 if (IS_ERR_VALUE(error_code))
259 goto fault;
260 if (!pte_present(*pte_to)) {
261 error_code = 0x11;
262 goto fault; 254 goto fault;
263 } else if (!pte_write(*pte_to)) {
264 error_code = 0x04;
265 goto fault;
266 }
267 255
268 pfn_from = pte_pfn(*pte_from); 256 offset_max = max(uaddr_from & ~PAGE_MASK,
269 pfn_to = pte_pfn(*pte_to); 257 uaddr_to & ~PAGE_MASK);
270 offset_from = uaddr_from & (PAGE_SIZE-1);
271 offset_to = uaddr_from & (PAGE_SIZE-1);
272 offset_max = max(offset_from, offset_to);
273 size = min(n - done, PAGE_SIZE - offset_max); 258 size = min(n - done, PAGE_SIZE - offset_max);
274 259
275 memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, 260 memcpy((void *) kaddr_to, (void *) kaddr_from, size);
276 (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
277 done += size; 261 done += size;
278 uaddr_from += size; 262 uaddr_from += size;
279 uaddr_to += size; 263 uaddr_to += size;
@@ -282,7 +266,7 @@ retry:
282 return n - done; 266 return n - done;
283fault: 267fault:
284 spin_unlock(&mm->page_table_lock); 268 spin_unlock(&mm->page_table_lock);
285 if (__handle_fault(uaddr, error_code, write_user)) 269 if (__handle_fault(uaddr, -error_code, write_user))
286 return n - done; 270 return n - done;
287 goto retry; 271 goto retry;
288} 272}
@@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
341 return __futex_atomic_op_pt(op, uaddr, oparg, old); 325 return __futex_atomic_op_pt(op, uaddr, oparg, old);
342 spin_lock(&current->mm->page_table_lock); 326 spin_lock(&current->mm->page_table_lock);
343 uaddr = (u32 __force __user *) 327 uaddr = (u32 __force __user *)
344 __dat_user_addr((__force unsigned long) uaddr); 328 __dat_user_addr((__force unsigned long) uaddr, 1);
345 if (!uaddr) { 329 if (!uaddr) {
346 spin_unlock(&current->mm->page_table_lock); 330 spin_unlock(&current->mm->page_table_lock);
347 return -EFAULT; 331 return -EFAULT;
@@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
378 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); 362 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
379 spin_lock(&current->mm->page_table_lock); 363 spin_lock(&current->mm->page_table_lock);
380 uaddr = (u32 __force __user *) 364 uaddr = (u32 __force __user *)
381 __dat_user_addr((__force unsigned long) uaddr); 365 __dat_user_addr((__force unsigned long) uaddr, 1);
382 if (!uaddr) { 366 if (!uaddr) {
383 spin_unlock(&current->mm->page_table_lock); 367 spin_unlock(&current->mm->page_table_lock);
384 return -EFAULT; 368 return -EFAULT;