diff options
Diffstat (limited to 'arch/s390/mm/fault.c')
-rw-r--r-- | arch/s390/mm/fault.c | 331 |
1 files changed, 162 insertions, 169 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7462aebd3eb6..2b76a879a7b5 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -26,9 +26,9 @@ | |||
26 | #include <linux/module.h> | 26 | #include <linux/module.h> |
27 | #include <linux/hardirq.h> | 27 | #include <linux/hardirq.h> |
28 | #include <linux/kprobes.h> | 28 | #include <linux/kprobes.h> |
29 | #include <linux/uaccess.h> | ||
29 | 30 | ||
30 | #include <asm/system.h> | 31 | #include <asm/system.h> |
31 | #include <asm/uaccess.h> | ||
32 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
33 | #include <asm/kdebug.h> | 33 | #include <asm/kdebug.h> |
34 | #include <asm/s390_ext.h> | 34 | #include <asm/s390_ext.h> |
@@ -63,21 +63,25 @@ int unregister_page_fault_notifier(struct notifier_block *nb) | |||
63 | return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); | 63 | return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); |
64 | } | 64 | } |
65 | 65 | ||
66 | static inline int notify_page_fault(enum die_val val, const char *str, | 66 | static int __kprobes __notify_page_fault(struct pt_regs *regs, long err) |
67 | struct pt_regs *regs, long err, int trap, int sig) | ||
68 | { | 67 | { |
69 | struct die_args args = { | 68 | struct die_args args = { .str = "page fault", |
70 | .regs = regs, | 69 | .trapnr = 14, |
71 | .str = str, | 70 | .signr = SIGSEGV }; |
72 | .err = err, | 71 | args.regs = regs; |
73 | .trapnr = trap, | 72 | args.err = err; |
74 | .signr = sig | 73 | return atomic_notifier_call_chain(¬ify_page_fault_chain, |
75 | }; | 74 | DIE_PAGE_FAULT, &args); |
76 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); | 75 | } |
76 | |||
77 | static inline int notify_page_fault(struct pt_regs *regs, long err) | ||
78 | { | ||
79 | if (unlikely(kprobe_running())) | ||
80 | return __notify_page_fault(regs, err); | ||
81 | return NOTIFY_DONE; | ||
77 | } | 82 | } |
78 | #else | 83 | #else |
79 | static inline int notify_page_fault(enum die_val val, const char *str, | 84 | static inline int notify_page_fault(struct pt_regs *regs, long err) |
80 | struct pt_regs *regs, long err, int trap, int sig) | ||
81 | { | 85 | { |
82 | return NOTIFY_DONE; | 86 | return NOTIFY_DONE; |
83 | } | 87 | } |
@@ -170,74 +174,127 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, | |||
170 | force_sig_info(SIGSEGV, &si, current); | 174 | force_sig_info(SIGSEGV, &si, current); |
171 | } | 175 | } |
172 | 176 | ||
177 | static void do_no_context(struct pt_regs *regs, unsigned long error_code, | ||
178 | unsigned long address) | ||
179 | { | ||
180 | const struct exception_table_entry *fixup; | ||
181 | |||
182 | /* Are we prepared to handle this kernel fault? */ | ||
183 | fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); | ||
184 | if (fixup) { | ||
185 | regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; | ||
186 | return; | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * Oops. The kernel tried to access some bad page. We'll have to | ||
191 | * terminate things with extreme prejudice. | ||
192 | */ | ||
193 | if (check_space(current) == 0) | ||
194 | printk(KERN_ALERT "Unable to handle kernel pointer dereference" | ||
195 | " at virtual kernel address %p\n", (void *)address); | ||
196 | else | ||
197 | printk(KERN_ALERT "Unable to handle kernel paging request" | ||
198 | " at virtual user address %p\n", (void *)address); | ||
199 | |||
200 | die("Oops", regs, error_code); | ||
201 | do_exit(SIGKILL); | ||
202 | } | ||
203 | |||
204 | static void do_low_address(struct pt_regs *regs, unsigned long error_code) | ||
205 | { | ||
206 | /* Low-address protection hit in kernel mode means | ||
207 | NULL pointer write access in kernel mode. */ | ||
208 | if (regs->psw.mask & PSW_MASK_PSTATE) { | ||
209 | /* Low-address protection hit in user mode 'cannot happen'. */ | ||
210 | die ("Low-address protection", regs, error_code); | ||
211 | do_exit(SIGKILL); | ||
212 | } | ||
213 | |||
214 | do_no_context(regs, error_code, 0); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * We ran out of memory, or some other thing happened to us that made | ||
219 | * us unable to handle the page fault gracefully. | ||
220 | */ | ||
221 | static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code, | ||
222 | unsigned long address) | ||
223 | { | ||
224 | struct task_struct *tsk = current; | ||
225 | struct mm_struct *mm = tsk->mm; | ||
226 | |||
227 | up_read(&mm->mmap_sem); | ||
228 | if (is_init(tsk)) { | ||
229 | yield(); | ||
230 | down_read(&mm->mmap_sem); | ||
231 | return 1; | ||
232 | } | ||
233 | printk("VM: killing process %s\n", tsk->comm); | ||
234 | if (regs->psw.mask & PSW_MASK_PSTATE) | ||
235 | do_exit(SIGKILL); | ||
236 | do_no_context(regs, error_code, address); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | static void do_sigbus(struct pt_regs *regs, unsigned long error_code, | ||
241 | unsigned long address) | ||
242 | { | ||
243 | struct task_struct *tsk = current; | ||
244 | struct mm_struct *mm = tsk->mm; | ||
245 | |||
246 | up_read(&mm->mmap_sem); | ||
247 | /* | ||
248 | * Send a sigbus, regardless of whether we were in kernel | ||
249 | * or user mode. | ||
250 | */ | ||
251 | tsk->thread.prot_addr = address; | ||
252 | tsk->thread.trap_no = error_code; | ||
253 | force_sig(SIGBUS, tsk); | ||
254 | |||
255 | /* Kernel mode? Handle exceptions or die */ | ||
256 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) | ||
257 | do_no_context(regs, error_code, address); | ||
258 | } | ||
259 | |||
173 | #ifdef CONFIG_S390_EXEC_PROTECT | 260 | #ifdef CONFIG_S390_EXEC_PROTECT |
174 | extern long sys_sigreturn(struct pt_regs *regs); | 261 | extern long sys_sigreturn(struct pt_regs *regs); |
175 | extern long sys_rt_sigreturn(struct pt_regs *regs); | 262 | extern long sys_rt_sigreturn(struct pt_regs *regs); |
176 | extern long sys32_sigreturn(struct pt_regs *regs); | 263 | extern long sys32_sigreturn(struct pt_regs *regs); |
177 | extern long sys32_rt_sigreturn(struct pt_regs *regs); | 264 | extern long sys32_rt_sigreturn(struct pt_regs *regs); |
178 | 265 | ||
179 | static inline void do_sigreturn(struct mm_struct *mm, struct pt_regs *regs, | 266 | static int signal_return(struct mm_struct *mm, struct pt_regs *regs, |
180 | int rt) | 267 | unsigned long address, unsigned long error_code) |
181 | { | 268 | { |
269 | u16 instruction; | ||
270 | int rc, compat; | ||
271 | |||
272 | pagefault_disable(); | ||
273 | rc = __get_user(instruction, (u16 __user *) regs->psw.addr); | ||
274 | pagefault_enable(); | ||
275 | if (rc) | ||
276 | return -EFAULT; | ||
277 | |||
182 | up_read(&mm->mmap_sem); | 278 | up_read(&mm->mmap_sem); |
183 | clear_tsk_thread_flag(current, TIF_SINGLE_STEP); | 279 | clear_tsk_thread_flag(current, TIF_SINGLE_STEP); |
184 | #ifdef CONFIG_COMPAT | 280 | #ifdef CONFIG_COMPAT |
185 | if (test_tsk_thread_flag(current, TIF_31BIT)) { | 281 | compat = test_tsk_thread_flag(current, TIF_31BIT); |
186 | if (rt) | 282 | if (compat && instruction == 0x0a77) |
187 | sys32_rt_sigreturn(regs); | 283 | sys32_sigreturn(regs); |
188 | else | 284 | else if (compat && instruction == 0x0aad) |
189 | sys32_sigreturn(regs); | 285 | sys32_rt_sigreturn(regs); |
190 | return; | ||
191 | } | ||
192 | #endif /* CONFIG_COMPAT */ | ||
193 | if (rt) | ||
194 | sys_rt_sigreturn(regs); | ||
195 | else | 286 | else |
287 | #endif | ||
288 | if (instruction == 0x0a77) | ||
196 | sys_sigreturn(regs); | 289 | sys_sigreturn(regs); |
197 | return; | 290 | else if (instruction == 0x0aad) |
198 | } | 291 | sys_rt_sigreturn(regs); |
199 | |||
200 | static int signal_return(struct mm_struct *mm, struct pt_regs *regs, | ||
201 | unsigned long address, unsigned long error_code) | ||
202 | { | ||
203 | pgd_t *pgd; | ||
204 | pmd_t *pmd; | ||
205 | pte_t *pte; | ||
206 | u16 *instruction; | ||
207 | unsigned long pfn, uaddr = regs->psw.addr; | ||
208 | |||
209 | spin_lock(&mm->page_table_lock); | ||
210 | pgd = pgd_offset(mm, uaddr); | ||
211 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | ||
212 | goto out_fault; | ||
213 | pmd = pmd_offset(pgd, uaddr); | ||
214 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) | ||
215 | goto out_fault; | ||
216 | pte = pte_offset_map(pmd_offset(pgd_offset(mm, uaddr), uaddr), uaddr); | ||
217 | if (!pte || !pte_present(*pte)) | ||
218 | goto out_fault; | ||
219 | pfn = pte_pfn(*pte); | ||
220 | if (!pfn_valid(pfn)) | ||
221 | goto out_fault; | ||
222 | spin_unlock(&mm->page_table_lock); | ||
223 | |||
224 | instruction = (u16 *) ((pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE-1))); | ||
225 | if (*instruction == 0x0a77) | ||
226 | do_sigreturn(mm, regs, 0); | ||
227 | else if (*instruction == 0x0aad) | ||
228 | do_sigreturn(mm, regs, 1); | ||
229 | else { | 292 | else { |
230 | printk("- XXX - do_exception: task = %s, primary, NO EXEC " | ||
231 | "-> SIGSEGV\n", current->comm); | ||
232 | up_read(&mm->mmap_sem); | ||
233 | current->thread.prot_addr = address; | 293 | current->thread.prot_addr = address; |
234 | current->thread.trap_no = error_code; | 294 | current->thread.trap_no = error_code; |
235 | do_sigsegv(regs, error_code, SEGV_MAPERR, address); | 295 | do_sigsegv(regs, error_code, SEGV_MAPERR, address); |
236 | } | 296 | } |
237 | return 0; | 297 | return 0; |
238 | out_fault: | ||
239 | spin_unlock(&mm->page_table_lock); | ||
240 | return -EFAULT; | ||
241 | } | 298 | } |
242 | #endif /* CONFIG_S390_EXEC_PROTECT */ | 299 | #endif /* CONFIG_S390_EXEC_PROTECT */ |
243 | 300 | ||
@@ -253,49 +310,23 @@ out_fault: | |||
253 | * 3b Region third trans. -> Not present (nullification) | 310 | * 3b Region third trans. -> Not present (nullification) |
254 | */ | 311 | */ |
255 | static inline void | 312 | static inline void |
256 | do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) | 313 | do_exception(struct pt_regs *regs, unsigned long error_code, int write) |
257 | { | 314 | { |
258 | struct task_struct *tsk; | 315 | struct task_struct *tsk; |
259 | struct mm_struct *mm; | 316 | struct mm_struct *mm; |
260 | struct vm_area_struct * vma; | 317 | struct vm_area_struct *vma; |
261 | unsigned long address; | 318 | unsigned long address; |
262 | const struct exception_table_entry *fixup; | ||
263 | int si_code; | ||
264 | int space; | 319 | int space; |
320 | int si_code; | ||
265 | 321 | ||
266 | tsk = current; | 322 | if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
267 | mm = tsk->mm; | ||
268 | |||
269 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | ||
270 | SIGSEGV) == NOTIFY_STOP) | ||
271 | return; | 323 | return; |
272 | 324 | ||
273 | /* | 325 | tsk = current; |
274 | * Check for low-address protection. This needs to be treated | 326 | mm = tsk->mm; |
275 | * as a special case because the translation exception code | ||
276 | * field is not guaranteed to contain valid data in this case. | ||
277 | */ | ||
278 | if (is_protection && !(S390_lowcore.trans_exc_code & 4)) { | ||
279 | |||
280 | /* Low-address protection hit in kernel mode means | ||
281 | NULL pointer write access in kernel mode. */ | ||
282 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) { | ||
283 | address = 0; | ||
284 | space = 0; | ||
285 | goto no_context; | ||
286 | } | ||
287 | |||
288 | /* Low-address protection hit in user mode 'cannot happen'. */ | ||
289 | die ("Low-address protection", regs, error_code); | ||
290 | do_exit(SIGKILL); | ||
291 | } | ||
292 | 327 | ||
293 | /* | 328 | /* get the failing address and the affected space */ |
294 | * get the failing address | 329 | address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; |
295 | * more specific the segment and page table portion of | ||
296 | * the address | ||
297 | */ | ||
298 | address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; | ||
299 | space = check_space(tsk); | 330 | space = check_space(tsk); |
300 | 331 | ||
301 | /* | 332 | /* |
@@ -313,7 +344,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) | |||
313 | */ | 344 | */ |
314 | local_irq_enable(); | 345 | local_irq_enable(); |
315 | 346 | ||
316 | down_read(&mm->mmap_sem); | 347 | down_read(&mm->mmap_sem); |
317 | 348 | ||
318 | si_code = SEGV_MAPERR; | 349 | si_code = SEGV_MAPERR; |
319 | vma = find_vma(mm, address); | 350 | vma = find_vma(mm, address); |
@@ -330,19 +361,19 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) | |||
330 | return; | 361 | return; |
331 | #endif | 362 | #endif |
332 | 363 | ||
333 | if (vma->vm_start <= address) | 364 | if (vma->vm_start <= address) |
334 | goto good_area; | 365 | goto good_area; |
335 | if (!(vma->vm_flags & VM_GROWSDOWN)) | 366 | if (!(vma->vm_flags & VM_GROWSDOWN)) |
336 | goto bad_area; | 367 | goto bad_area; |
337 | if (expand_stack(vma, address)) | 368 | if (expand_stack(vma, address)) |
338 | goto bad_area; | 369 | goto bad_area; |
339 | /* | 370 | /* |
340 | * Ok, we have a good vm_area for this memory access, so | 371 | * Ok, we have a good vm_area for this memory access, so |
341 | * we can handle it.. | 372 | * we can handle it.. |
342 | */ | 373 | */ |
343 | good_area: | 374 | good_area: |
344 | si_code = SEGV_ACCERR; | 375 | si_code = SEGV_ACCERR; |
345 | if (!is_protection) { | 376 | if (!write) { |
346 | /* page not present, check vm flags */ | 377 | /* page not present, check vm flags */ |
347 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | 378 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) |
348 | goto bad_area; | 379 | goto bad_area; |
@@ -357,7 +388,7 @@ survive: | |||
357 | * make sure we exit gracefully rather than endlessly redo | 388 | * make sure we exit gracefully rather than endlessly redo |
358 | * the fault. | 389 | * the fault. |
359 | */ | 390 | */ |
360 | switch (handle_mm_fault(mm, vma, address, is_protection)) { | 391 | switch (handle_mm_fault(mm, vma, address, write)) { |
361 | case VM_FAULT_MINOR: | 392 | case VM_FAULT_MINOR: |
362 | tsk->min_flt++; | 393 | tsk->min_flt++; |
363 | break; | 394 | break; |
@@ -365,9 +396,12 @@ survive: | |||
365 | tsk->maj_flt++; | 396 | tsk->maj_flt++; |
366 | break; | 397 | break; |
367 | case VM_FAULT_SIGBUS: | 398 | case VM_FAULT_SIGBUS: |
368 | goto do_sigbus; | 399 | do_sigbus(regs, error_code, address); |
400 | return; | ||
369 | case VM_FAULT_OOM: | 401 | case VM_FAULT_OOM: |
370 | goto out_of_memory; | 402 | if (do_out_of_memory(regs, error_code, address)) |
403 | goto survive; | ||
404 | return; | ||
371 | default: | 405 | default: |
372 | BUG(); | 406 | BUG(); |
373 | } | 407 | } |
@@ -385,75 +419,34 @@ survive: | |||
385 | * Fix it, but check if it's kernel or user first.. | 419 | * Fix it, but check if it's kernel or user first.. |
386 | */ | 420 | */ |
387 | bad_area: | 421 | bad_area: |
388 | up_read(&mm->mmap_sem); | 422 | up_read(&mm->mmap_sem); |
389 | 423 | ||
390 | /* User mode accesses just cause a SIGSEGV */ | 424 | /* User mode accesses just cause a SIGSEGV */ |
391 | if (regs->psw.mask & PSW_MASK_PSTATE) { | 425 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
392 | tsk->thread.prot_addr = address; | 426 | tsk->thread.prot_addr = address; |
393 | tsk->thread.trap_no = error_code; | 427 | tsk->thread.trap_no = error_code; |
394 | do_sigsegv(regs, error_code, si_code, address); | 428 | do_sigsegv(regs, error_code, si_code, address); |
395 | return; | 429 | return; |
396 | } | 430 | } |
397 | 431 | ||
398 | no_context: | 432 | no_context: |
399 | /* Are we prepared to handle this kernel fault? */ | 433 | do_no_context(regs, error_code, address); |
400 | fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); | ||
401 | if (fixup) { | ||
402 | regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; | ||
403 | return; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Oops. The kernel tried to access some bad page. We'll have to | ||
408 | * terminate things with extreme prejudice. | ||
409 | */ | ||
410 | if (space == 0) | ||
411 | printk(KERN_ALERT "Unable to handle kernel pointer dereference" | ||
412 | " at virtual kernel address %p\n", (void *)address); | ||
413 | else | ||
414 | printk(KERN_ALERT "Unable to handle kernel paging request" | ||
415 | " at virtual user address %p\n", (void *)address); | ||
416 | |||
417 | die("Oops", regs, error_code); | ||
418 | do_exit(SIGKILL); | ||
419 | |||
420 | |||
421 | /* | ||
422 | * We ran out of memory, or some other thing happened to us that made | ||
423 | * us unable to handle the page fault gracefully. | ||
424 | */ | ||
425 | out_of_memory: | ||
426 | up_read(&mm->mmap_sem); | ||
427 | if (is_init(tsk)) { | ||
428 | yield(); | ||
429 | down_read(&mm->mmap_sem); | ||
430 | goto survive; | ||
431 | } | ||
432 | printk("VM: killing process %s\n", tsk->comm); | ||
433 | if (regs->psw.mask & PSW_MASK_PSTATE) | ||
434 | do_exit(SIGKILL); | ||
435 | goto no_context; | ||
436 | |||
437 | do_sigbus: | ||
438 | up_read(&mm->mmap_sem); | ||
439 | |||
440 | /* | ||
441 | * Send a sigbus, regardless of whether we were in kernel | ||
442 | * or user mode. | ||
443 | */ | ||
444 | tsk->thread.prot_addr = address; | ||
445 | tsk->thread.trap_no = error_code; | ||
446 | force_sig(SIGBUS, tsk); | ||
447 | |||
448 | /* Kernel mode? Handle exceptions or die */ | ||
449 | if (!(regs->psw.mask & PSW_MASK_PSTATE)) | ||
450 | goto no_context; | ||
451 | } | 434 | } |
452 | 435 | ||
453 | void __kprobes do_protection_exception(struct pt_regs *regs, | 436 | void __kprobes do_protection_exception(struct pt_regs *regs, |
454 | unsigned long error_code) | 437 | unsigned long error_code) |
455 | { | 438 | { |
439 | /* Protection exception is supressing, decrement psw address. */ | ||
456 | regs->psw.addr -= (error_code >> 16); | 440 | regs->psw.addr -= (error_code >> 16); |
441 | /* | ||
442 | * Check for low-address protection. This needs to be treated | ||
443 | * as a special case because the translation exception code | ||
444 | * field is not guaranteed to contain valid data in this case. | ||
445 | */ | ||
446 | if (unlikely(!(S390_lowcore.trans_exc_code & 4))) { | ||
447 | do_low_address(regs, error_code); | ||
448 | return; | ||
449 | } | ||
457 | do_exception(regs, 4, 1); | 450 | do_exception(regs, 4, 1); |
458 | } | 451 | } |
459 | 452 | ||