aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/lib/uaccess_pt.c
diff options
context:
space:
mode:
authorGerald Schaefer <geraldsc@de.ibm.com>2007-02-05 15:18:17 -0500
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2007-02-05 15:18:17 -0500
commitc1821c2e9711adc3cd298a16b7237c92a2cee78d (patch)
tree9155b089db35a37d95863125ea4c5f918bd7801b /arch/s390/lib/uaccess_pt.c
parent86aa9fc2456d8a662f299a70bdb70987209170f0 (diff)
[S390] noexec protection
This provides a noexec protection on s390 hardware. Our hardware does not have any bits left in the pte for a hw noexec bit, so this is a different approach using shadow page tables and a special addressing mode that allows separate address spaces for code and data. As a special feature of our "secondary-space" addressing mode, separate page tables can be specified for the translation of data addresses (storage operands) and instruction addresses. The shadow page table is used for the instruction addresses and the standard page table for the data addresses. The shadow page table is linked to the standard page table by a pointer in page->lru.next of the struct page corresponding to the page that contains the standard page table (since page->private is not really private with the pte_lock and the page table pages are not in the LRU list). Depending on the software bits of a pte, it is either inserted into both page tables or just into the standard (data) page table. Pages of a vma that does not have the VM_EXEC bit set get mapped only in the data address space. Any try to execute code on such a page will cause a page translation exception. The standard reaction to this is a SIGSEGV with two exceptions: the two system call opcodes 0x0a77 (sys_sigreturn) and 0x0aad (sys_rt_sigreturn) are allowed. They are stored by the kernel to the signal stack frame. Unfortunately, the signal return mechanism cannot be modified to use an SA_RESTORER because the exception unwinding code depends on the system call opcode stored behind the signal stack frame. This feature requires that user space is executed in secondary-space mode and the kernel in home-space mode, which means that the addressing modes need to be switched and that the noexec protection only works for user space. After switching the addressing modes, we cannot use the mvcp/mvcs instructions anymore to copy between kernel and user space. A new mvcos instruction has been added to the z9 EC/BC hardware which allows to copy between arbitrary address spaces, but on older hardware the page tables need to be walked manually. Signed-off-by: Gerald Schaefer <geraldsc@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/lib/uaccess_pt.c')
-rw-r--r--arch/s390/lib/uaccess_pt.c320
1 files changed, 319 insertions, 1 deletions
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 24ead559c7b..637192fa7c9 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * arch/s390/lib/uaccess_pt.c 2 * arch/s390/lib/uaccess_pt.c
3 * 3 *
4 * User access functions based on page table walks. 4 * User access functions based on page table walks for enhanced
5 * system layout without hardware support.
5 * 6 *
6 * Copyright IBM Corp. 2006 7 * Copyright IBM Corp. 2006
7 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) 8 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com)
@@ -134,6 +135,49 @@ fault:
134 goto retry; 135 goto retry;
135} 136}
136 137
138/*
139 * Do DAT for user address by page table walk, return kernel address.
140 * This function needs to be called with current->mm->page_table_lock held.
141 */
142static inline unsigned long __dat_user_addr(unsigned long uaddr)
143{
144 struct mm_struct *mm = current->mm;
145 unsigned long pfn, ret;
146 pgd_t *pgd;
147 pmd_t *pmd;
148 pte_t *pte;
149 int rc;
150
151 ret = 0;
152retry:
153 pgd = pgd_offset(mm, uaddr);
154 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
155 goto fault;
156
157 pmd = pmd_offset(pgd, uaddr);
158 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
159 goto fault;
160
161 pte = pte_offset_map(pmd, uaddr);
162 if (!pte || !pte_present(*pte))
163 goto fault;
164
165 pfn = pte_pfn(*pte);
166 if (!pfn_valid(pfn))
167 goto out;
168
169 ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1));
170out:
171 return ret;
172fault:
173 spin_unlock(&mm->page_table_lock);
174 rc = __handle_fault(mm, uaddr, 0);
175 spin_lock(&mm->page_table_lock);
176 if (rc)
177 goto out;
178 goto retry;
179}
180
137size_t copy_from_user_pt(size_t n, const void __user *from, void *to) 181size_t copy_from_user_pt(size_t n, const void __user *from, void *to)
138{ 182{
139 size_t rc; 183 size_t rc;
@@ -156,3 +200,277 @@ size_t copy_to_user_pt(size_t n, void __user *to, const void *from)
156 } 200 }
157 return __user_copy_pt((unsigned long) to, (void *) from, n, 1); 201 return __user_copy_pt((unsigned long) to, (void *) from, n, 1);
158} 202}
203
204static size_t clear_user_pt(size_t n, void __user *to)
205{
206 long done, size, ret;
207
208 if (segment_eq(get_fs(), KERNEL_DS)) {
209 memset((void __kernel __force *) to, 0, n);
210 return 0;
211 }
212 done = 0;
213 do {
214 if (n - done > PAGE_SIZE)
215 size = PAGE_SIZE;
216 else
217 size = n - done;
218 ret = __user_copy_pt((unsigned long) to + done,
219 &empty_zero_page, size, 1);
220 done += size;
221 if (ret)
222 return ret + n - done;
223 } while (done < n);
224 return 0;
225}
226
227static size_t strnlen_user_pt(size_t count, const char __user *src)
228{
229 char *addr;
230 unsigned long uaddr = (unsigned long) src;
231 struct mm_struct *mm = current->mm;
232 unsigned long offset, pfn, done, len;
233 pgd_t *pgd;
234 pmd_t *pmd;
235 pte_t *pte;
236 size_t len_str;
237
238 if (segment_eq(get_fs(), KERNEL_DS))
239 return strnlen((const char __kernel __force *) src, count) + 1;
240 done = 0;
241retry:
242 spin_lock(&mm->page_table_lock);
243 do {
244 pgd = pgd_offset(mm, uaddr);
245 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
246 goto fault;
247
248 pmd = pmd_offset(pgd, uaddr);
249 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
250 goto fault;
251
252 pte = pte_offset_map(pmd, uaddr);
253 if (!pte || !pte_present(*pte))
254 goto fault;
255
256 pfn = pte_pfn(*pte);
257 if (!pfn_valid(pfn)) {
258 done = -1;
259 goto out;
260 }
261
262 offset = uaddr & (PAGE_SIZE-1);
263 addr = (char *)(pfn << PAGE_SHIFT) + offset;
264 len = min(count - done, PAGE_SIZE - offset);
265 len_str = strnlen(addr, len);
266 done += len_str;
267 uaddr += len_str;
268 } while ((len_str == len) && (done < count));
269out:
270 spin_unlock(&mm->page_table_lock);
271 return done + 1;
272fault:
273 spin_unlock(&mm->page_table_lock);
274 if (__handle_fault(mm, uaddr, 0)) {
275 return 0;
276 }
277 goto retry;
278}
279
280static size_t strncpy_from_user_pt(size_t count, const char __user *src,
281 char *dst)
282{
283 size_t n = strnlen_user_pt(count, src);
284
285 if (!n)
286 return -EFAULT;
287 if (n > count)
288 n = count;
289 if (segment_eq(get_fs(), KERNEL_DS)) {
290 memcpy(dst, (const char __kernel __force *) src, n);
291 if (dst[n-1] == '\0')
292 return n-1;
293 else
294 return n;
295 }
296 if (__user_copy_pt((unsigned long) src, dst, n, 0))
297 return -EFAULT;
298 if (dst[n-1] == '\0')
299 return n-1;
300 else
301 return n;
302}
303
304static size_t copy_in_user_pt(size_t n, void __user *to,
305 const void __user *from)
306{
307 struct mm_struct *mm = current->mm;
308 unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to,
309 uaddr, done, size;
310 unsigned long uaddr_from = (unsigned long) from;
311 unsigned long uaddr_to = (unsigned long) to;
312 pgd_t *pgd_from, *pgd_to;
313 pmd_t *pmd_from, *pmd_to;
314 pte_t *pte_from, *pte_to;
315 int write_user;
316
317 done = 0;
318retry:
319 spin_lock(&mm->page_table_lock);
320 do {
321 pgd_from = pgd_offset(mm, uaddr_from);
322 if (pgd_none(*pgd_from) || unlikely(pgd_bad(*pgd_from))) {
323 uaddr = uaddr_from;
324 write_user = 0;
325 goto fault;
326 }
327 pgd_to = pgd_offset(mm, uaddr_to);
328 if (pgd_none(*pgd_to) || unlikely(pgd_bad(*pgd_to))) {
329 uaddr = uaddr_to;
330 write_user = 1;
331 goto fault;
332 }
333
334 pmd_from = pmd_offset(pgd_from, uaddr_from);
335 if (pmd_none(*pmd_from) || unlikely(pmd_bad(*pmd_from))) {
336 uaddr = uaddr_from;
337 write_user = 0;
338 goto fault;
339 }
340 pmd_to = pmd_offset(pgd_to, uaddr_to);
341 if (pmd_none(*pmd_to) || unlikely(pmd_bad(*pmd_to))) {
342 uaddr = uaddr_to;
343 write_user = 1;
344 goto fault;
345 }
346
347 pte_from = pte_offset_map(pmd_from, uaddr_from);
348 if (!pte_from || !pte_present(*pte_from)) {
349 uaddr = uaddr_from;
350 write_user = 0;
351 goto fault;
352 }
353 pte_to = pte_offset_map(pmd_to, uaddr_to);
354 if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) {
355 uaddr = uaddr_to;
356 write_user = 1;
357 goto fault;
358 }
359
360 pfn_from = pte_pfn(*pte_from);
361 if (!pfn_valid(pfn_from))
362 goto out;
363 pfn_to = pte_pfn(*pte_to);
364 if (!pfn_valid(pfn_to))
365 goto out;
366
367 offset_from = uaddr_from & (PAGE_SIZE-1);
368 offset_to = uaddr_from & (PAGE_SIZE-1);
369 offset_max = max(offset_from, offset_to);
370 size = min(n - done, PAGE_SIZE - offset_max);
371
372 memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to,
373 (void *)(pfn_from << PAGE_SHIFT) + offset_from, size);
374 done += size;
375 uaddr_from += size;
376 uaddr_to += size;
377 } while (done < n);
378out:
379 spin_unlock(&mm->page_table_lock);
380 return n - done;
381fault:
382 spin_unlock(&mm->page_table_lock);
383 if (__handle_fault(mm, uaddr, write_user))
384 return n - done;
385 goto retry;
386}
387
388#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \
389 asm volatile("0: l %1,0(%6)\n" \
390 "1: " insn \
391 "2: cs %1,%2,0(%6)\n" \
392 "3: jl 1b\n" \
393 " lhi %0,0\n" \
394 "4:\n" \
395 EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
396 : "=d" (ret), "=&d" (oldval), "=&d" (newval), \
397 "=m" (*uaddr) \
398 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
399 "m" (*uaddr) : "cc" );
400
401int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
402{
403 int oldval = 0, newval, ret;
404
405 spin_lock(&current->mm->page_table_lock);
406 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
407 if (!uaddr) {
408 spin_unlock(&current->mm->page_table_lock);
409 return -EFAULT;
410 }
411 get_page(virt_to_page(uaddr));
412 spin_unlock(&current->mm->page_table_lock);
413 switch (op) {
414 case FUTEX_OP_SET:
415 __futex_atomic_op("lr %2,%5\n",
416 ret, oldval, newval, uaddr, oparg);
417 break;
418 case FUTEX_OP_ADD:
419 __futex_atomic_op("lr %2,%1\nar %2,%5\n",
420 ret, oldval, newval, uaddr, oparg);
421 break;
422 case FUTEX_OP_OR:
423 __futex_atomic_op("lr %2,%1\nor %2,%5\n",
424 ret, oldval, newval, uaddr, oparg);
425 break;
426 case FUTEX_OP_ANDN:
427 __futex_atomic_op("lr %2,%1\nnr %2,%5\n",
428 ret, oldval, newval, uaddr, oparg);
429 break;
430 case FUTEX_OP_XOR:
431 __futex_atomic_op("lr %2,%1\nxr %2,%5\n",
432 ret, oldval, newval, uaddr, oparg);
433 break;
434 default:
435 ret = -ENOSYS;
436 }
437 put_page(virt_to_page(uaddr));
438 *old = oldval;
439 return ret;
440}
441
442int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
443{
444 int ret;
445
446 spin_lock(&current->mm->page_table_lock);
447 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
448 if (!uaddr) {
449 spin_unlock(&current->mm->page_table_lock);
450 return -EFAULT;
451 }
452 get_page(virt_to_page(uaddr));
453 spin_unlock(&current->mm->page_table_lock);
454 asm volatile(" cs %1,%4,0(%5)\n"
455 "0: lr %0,%1\n"
456 "1:\n"
457 EX_TABLE(0b,1b)
458 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
459 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
460 : "cc", "memory" );
461 put_page(virt_to_page(uaddr));
462 return ret;
463}
464
465struct uaccess_ops uaccess_pt = {
466 .copy_from_user = copy_from_user_pt,
467 .copy_from_user_small = copy_from_user_pt,
468 .copy_to_user = copy_to_user_pt,
469 .copy_to_user_small = copy_to_user_pt,
470 .copy_in_user = copy_in_user_pt,
471 .clear_user = clear_user_pt,
472 .strnlen_user = strnlen_user_pt,
473 .strncpy_from_user = strncpy_from_user_pt,
474 .futex_atomic_op = futex_atomic_op_pt,
475 .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt,
476};