diff options
Diffstat (limited to 'arch/s390/mm/fault.c')
-rw-r--r-- | arch/s390/mm/fault.c | 187 |
1 files changed, 98 insertions, 89 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab988135e5c6..a0f9e730f26a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -225,33 +225,6 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, | |||
225 | force_sig_info(SIGBUS, &si, tsk); | 225 | force_sig_info(SIGBUS, &si, tsk); |
226 | } | 226 | } |
227 | 227 | ||
228 | #ifdef CONFIG_S390_EXEC_PROTECT | ||
229 | static noinline int signal_return(struct pt_regs *regs, long int_code, | ||
230 | unsigned long trans_exc_code) | ||
231 | { | ||
232 | u16 instruction; | ||
233 | int rc; | ||
234 | |||
235 | rc = __get_user(instruction, (u16 __user *) regs->psw.addr); | ||
236 | |||
237 | if (!rc && instruction == 0x0a77) { | ||
238 | clear_tsk_thread_flag(current, TIF_PER_TRAP); | ||
239 | if (is_compat_task()) | ||
240 | sys32_sigreturn(); | ||
241 | else | ||
242 | sys_sigreturn(); | ||
243 | } else if (!rc && instruction == 0x0aad) { | ||
244 | clear_tsk_thread_flag(current, TIF_PER_TRAP); | ||
245 | if (is_compat_task()) | ||
246 | sys32_rt_sigreturn(); | ||
247 | else | ||
248 | sys_rt_sigreturn(); | ||
249 | } else | ||
250 | do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); | ||
251 | return 0; | ||
252 | } | ||
253 | #endif /* CONFIG_S390_EXEC_PROTECT */ | ||
254 | |||
255 | static noinline void do_fault_error(struct pt_regs *regs, long int_code, | 228 | static noinline void do_fault_error(struct pt_regs *regs, long int_code, |
256 | unsigned long trans_exc_code, int fault) | 229 | unsigned long trans_exc_code, int fault) |
257 | { | 230 | { |
@@ -259,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, | |||
259 | 232 | ||
260 | switch (fault) { | 233 | switch (fault) { |
261 | case VM_FAULT_BADACCESS: | 234 | case VM_FAULT_BADACCESS: |
262 | #ifdef CONFIG_S390_EXEC_PROTECT | ||
263 | if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && | ||
264 | (trans_exc_code & 3) == 0) { | ||
265 | signal_return(regs, int_code, trans_exc_code); | ||
266 | break; | ||
267 | } | ||
268 | #endif /* CONFIG_S390_EXEC_PROTECT */ | ||
269 | case VM_FAULT_BADMAP: | 235 | case VM_FAULT_BADMAP: |
270 | /* Bad memory access. Check if it is kernel or user space. */ | 236 | /* Bad memory access. Check if it is kernel or user space. */ |
271 | if (regs->psw.mask & PSW_MASK_PSTATE) { | 237 | if (regs->psw.mask & PSW_MASK_PSTATE) { |
@@ -414,11 +380,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, | |||
414 | int access, fault; | 380 | int access, fault; |
415 | 381 | ||
416 | access = VM_READ | VM_EXEC | VM_WRITE; | 382 | access = VM_READ | VM_EXEC | VM_WRITE; |
417 | #ifdef CONFIG_S390_EXEC_PROTECT | ||
418 | if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && | ||
419 | (trans_exc_code & 3) == 0) | ||
420 | access = VM_EXEC; | ||
421 | #endif | ||
422 | fault = do_exception(regs, access, trans_exc_code); | 383 | fault = do_exception(regs, access, trans_exc_code); |
423 | if (unlikely(fault)) | 384 | if (unlikely(fault)) |
424 | do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); | 385 | do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); |
@@ -491,22 +452,28 @@ static int __init nopfault(char *str) | |||
491 | 452 | ||
492 | __setup("nopfault", nopfault); | 453 | __setup("nopfault", nopfault); |
493 | 454 | ||
494 | typedef struct { | 455 | struct pfault_refbk { |
495 | __u16 refdiagc; | 456 | u16 refdiagc; |
496 | __u16 reffcode; | 457 | u16 reffcode; |
497 | __u16 refdwlen; | 458 | u16 refdwlen; |
498 | __u16 refversn; | 459 | u16 refversn; |
499 | __u64 refgaddr; | 460 | u64 refgaddr; |
500 | __u64 refselmk; | 461 | u64 refselmk; |
501 | __u64 refcmpmk; | 462 | u64 refcmpmk; |
502 | __u64 reserved; | 463 | u64 reserved; |
503 | } __attribute__ ((packed, aligned(8))) pfault_refbk_t; | 464 | } __attribute__ ((packed, aligned(8))); |
504 | 465 | ||
505 | int pfault_init(void) | 466 | int pfault_init(void) |
506 | { | 467 | { |
507 | pfault_refbk_t refbk = | 468 | struct pfault_refbk refbk = { |
508 | { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, | 469 | .refdiagc = 0x258, |
509 | __PF_RES_FIELD }; | 470 | .reffcode = 0, |
471 | .refdwlen = 5, | ||
472 | .refversn = 2, | ||
473 | .refgaddr = __LC_CURRENT_PID, | ||
474 | .refselmk = 1ULL << 48, | ||
475 | .refcmpmk = 1ULL << 48, | ||
476 | .reserved = __PF_RES_FIELD }; | ||
510 | int rc; | 477 | int rc; |
511 | 478 | ||
512 | if (!MACHINE_IS_VM || pfault_disable) | 479 | if (!MACHINE_IS_VM || pfault_disable) |
@@ -524,8 +491,12 @@ int pfault_init(void) | |||
524 | 491 | ||
525 | void pfault_fini(void) | 492 | void pfault_fini(void) |
526 | { | 493 | { |
527 | pfault_refbk_t refbk = | 494 | struct pfault_refbk refbk = { |
528 | { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; | 495 | .refdiagc = 0x258, |
496 | .reffcode = 1, | ||
497 | .refdwlen = 5, | ||
498 | .refversn = 2, | ||
499 | }; | ||
529 | 500 | ||
530 | if (!MACHINE_IS_VM || pfault_disable) | 501 | if (!MACHINE_IS_VM || pfault_disable) |
531 | return; | 502 | return; |
@@ -537,11 +508,15 @@ void pfault_fini(void) | |||
537 | : : "a" (&refbk), "m" (refbk) : "cc"); | 508 | : : "a" (&refbk), "m" (refbk) : "cc"); |
538 | } | 509 | } |
539 | 510 | ||
511 | static DEFINE_SPINLOCK(pfault_lock); | ||
512 | static LIST_HEAD(pfault_list); | ||
513 | |||
540 | static void pfault_interrupt(unsigned int ext_int_code, | 514 | static void pfault_interrupt(unsigned int ext_int_code, |
541 | unsigned int param32, unsigned long param64) | 515 | unsigned int param32, unsigned long param64) |
542 | { | 516 | { |
543 | struct task_struct *tsk; | 517 | struct task_struct *tsk; |
544 | __u16 subcode; | 518 | __u16 subcode; |
519 | pid_t pid; | ||
545 | 520 | ||
546 | /* | 521 | /* |
547 | * Get the external interruption subcode & pfault | 522 | * Get the external interruption subcode & pfault |
@@ -553,44 +528,79 @@ static void pfault_interrupt(unsigned int ext_int_code, | |||
553 | if ((subcode & 0xff00) != __SUBCODE_MASK) | 528 | if ((subcode & 0xff00) != __SUBCODE_MASK) |
554 | return; | 529 | return; |
555 | kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; | 530 | kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; |
556 | 531 | if (subcode & 0x0080) { | |
557 | /* | 532 | /* Get the token (= pid of the affected task). */ |
558 | * Get the token (= address of the task structure of the affected task). | 533 | pid = sizeof(void *) == 4 ? param32 : param64; |
559 | */ | 534 | rcu_read_lock(); |
560 | #ifdef CONFIG_64BIT | 535 | tsk = find_task_by_pid_ns(pid, &init_pid_ns); |
561 | tsk = (struct task_struct *) param64; | 536 | if (tsk) |
562 | #else | 537 | get_task_struct(tsk); |
563 | tsk = (struct task_struct *) param32; | 538 | rcu_read_unlock(); |
564 | #endif | 539 | if (!tsk) |
565 | 540 | return; | |
541 | } else { | ||
542 | tsk = current; | ||
543 | } | ||
544 | spin_lock(&pfault_lock); | ||
566 | if (subcode & 0x0080) { | 545 | if (subcode & 0x0080) { |
567 | /* signal bit is set -> a page has been swapped in by VM */ | 546 | /* signal bit is set -> a page has been swapped in by VM */ |
568 | if (xchg(&tsk->thread.pfault_wait, -1) != 0) { | 547 | if (tsk->thread.pfault_wait == 1) { |
569 | /* Initial interrupt was faster than the completion | 548 | /* Initial interrupt was faster than the completion |
570 | * interrupt. pfault_wait is valid. Set pfault_wait | 549 | * interrupt. pfault_wait is valid. Set pfault_wait |
571 | * back to zero and wake up the process. This can | 550 | * back to zero and wake up the process. This can |
572 | * safely be done because the task is still sleeping | 551 | * safely be done because the task is still sleeping |
573 | * and can't produce new pfaults. */ | 552 | * and can't produce new pfaults. */ |
574 | tsk->thread.pfault_wait = 0; | 553 | tsk->thread.pfault_wait = 0; |
554 | list_del(&tsk->thread.list); | ||
575 | wake_up_process(tsk); | 555 | wake_up_process(tsk); |
576 | put_task_struct(tsk); | 556 | } else { |
557 | /* Completion interrupt was faster than initial | ||
558 | * interrupt. Set pfault_wait to -1 so the initial | ||
559 | * interrupt doesn't put the task to sleep. */ | ||
560 | tsk->thread.pfault_wait = -1; | ||
577 | } | 561 | } |
562 | put_task_struct(tsk); | ||
578 | } else { | 563 | } else { |
579 | /* signal bit not set -> a real page is missing. */ | 564 | /* signal bit not set -> a real page is missing. */ |
580 | get_task_struct(tsk); | 565 | if (tsk->thread.pfault_wait == -1) { |
581 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
582 | if (xchg(&tsk->thread.pfault_wait, 1) != 0) { | ||
583 | /* Completion interrupt was faster than the initial | 566 | /* Completion interrupt was faster than the initial |
584 | * interrupt (swapped in a -1 for pfault_wait). Set | 567 | * interrupt (pfault_wait == -1). Set pfault_wait |
585 | * pfault_wait back to zero and exit. This can be | 568 | * back to zero and exit. */ |
586 | * done safely because tsk is running in kernel | ||
587 | * mode and can't produce new pfaults. */ | ||
588 | tsk->thread.pfault_wait = 0; | 569 | tsk->thread.pfault_wait = 0; |
589 | set_task_state(tsk, TASK_RUNNING); | 570 | } else { |
590 | put_task_struct(tsk); | 571 | /* Initial interrupt arrived before completion |
591 | } else | 572 | * interrupt. Let the task sleep. */ |
573 | tsk->thread.pfault_wait = 1; | ||
574 | list_add(&tsk->thread.list, &pfault_list); | ||
575 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | ||
592 | set_tsk_need_resched(tsk); | 576 | set_tsk_need_resched(tsk); |
577 | } | ||
578 | } | ||
579 | spin_unlock(&pfault_lock); | ||
580 | } | ||
581 | |||
582 | static int __cpuinit pfault_cpu_notify(struct notifier_block *self, | ||
583 | unsigned long action, void *hcpu) | ||
584 | { | ||
585 | struct thread_struct *thread, *next; | ||
586 | struct task_struct *tsk; | ||
587 | |||
588 | switch (action) { | ||
589 | case CPU_DEAD: | ||
590 | case CPU_DEAD_FROZEN: | ||
591 | spin_lock_irq(&pfault_lock); | ||
592 | list_for_each_entry_safe(thread, next, &pfault_list, list) { | ||
593 | thread->pfault_wait = 0; | ||
594 | list_del(&thread->list); | ||
595 | tsk = container_of(thread, struct task_struct, thread); | ||
596 | wake_up_process(tsk); | ||
597 | } | ||
598 | spin_unlock_irq(&pfault_lock); | ||
599 | break; | ||
600 | default: | ||
601 | break; | ||
593 | } | 602 | } |
603 | return NOTIFY_OK; | ||
594 | } | 604 | } |
595 | 605 | ||
596 | static int __init pfault_irq_init(void) | 606 | static int __init pfault_irq_init(void) |
@@ -599,22 +609,21 @@ static int __init pfault_irq_init(void) | |||
599 | 609 | ||
600 | if (!MACHINE_IS_VM) | 610 | if (!MACHINE_IS_VM) |
601 | return 0; | 611 | return 0; |
602 | /* | ||
603 | * Try to get pfault pseudo page faults going. | ||
604 | */ | ||
605 | rc = register_external_interrupt(0x2603, pfault_interrupt); | 612 | rc = register_external_interrupt(0x2603, pfault_interrupt); |
606 | if (rc) { | 613 | if (rc) |
607 | pfault_disable = 1; | 614 | goto out_extint; |
608 | return rc; | 615 | rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; |
609 | } | 616 | if (rc) |
610 | if (pfault_init() == 0) | 617 | goto out_pfault; |
611 | return 0; | 618 | hotcpu_notifier(pfault_cpu_notify, 0); |
619 | return 0; | ||
612 | 620 | ||
613 | /* Tough luck, no pfault. */ | 621 | out_pfault: |
614 | pfault_disable = 1; | ||
615 | unregister_external_interrupt(0x2603, pfault_interrupt); | 622 | unregister_external_interrupt(0x2603, pfault_interrupt); |
616 | return 0; | 623 | out_extint: |
624 | pfault_disable = 1; | ||
625 | return rc; | ||
617 | } | 626 | } |
618 | early_initcall(pfault_irq_init); | 627 | early_initcall(pfault_irq_init); |
619 | 628 | ||
620 | #endif | 629 | #endif /* CONFIG_PFAULT */ |