diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-03-22 16:42:53 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2016-04-15 12:16:37 -0400 |
commit | 0227f7c42d9e01b00ea8cbd635aaf92a09b54abc (patch) | |
tree | 7b796ca59c6d703b857475cab4e07595cc2dd15c /arch/s390/mm | |
parent | 5a3b7b112884f80ff19b18028fabeb4f9c035518 (diff) |
s390: Clarify pagefault interrupt
While looking at set_task_state() users I stumbled over the s390 pfault
interrupt code. Since Heiko provided a great explanation on how it
worked, I figured we ought to preserve this.
Also make a few little tweaks to the code to aid in readability and
explicitly comment the unusual blocking scheme.
Based-on-text-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/fault.c | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index cce577feab1e..7a3144017301 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c | |||
@@ -631,6 +631,29 @@ void pfault_fini(void) | |||
631 | static DEFINE_SPINLOCK(pfault_lock); | 631 | static DEFINE_SPINLOCK(pfault_lock); |
632 | static LIST_HEAD(pfault_list); | 632 | static LIST_HEAD(pfault_list); |
633 | 633 | ||
634 | #define PF_COMPLETE 0x0080 | ||
635 | |||
636 | /* | ||
637 | * The mechanism of our pfault code: if Linux is running as guest, runs a user | ||
638 | * space process and the user space process accesses a page that the host has | ||
639 | * paged out we get a pfault interrupt. | ||
640 | * | ||
641 | * This allows us, within the guest, to schedule a different process. Without | ||
642 | * this mechanism the host would have to suspend the whole virtual cpu until | ||
643 | * the page has been paged in. | ||
644 | * | ||
645 | * So when we get such an interrupt then we set the state of the current task | ||
646 | * to uninterruptible and also set the need_resched flag. Both happens within | ||
647 | * interrupt context(!). If we later on want to return to user space we | ||
648 | * recognize the need_resched flag and then call schedule(). It's not very | ||
649 | * obvious how this works... | ||
650 | * | ||
651 | * Of course we have a lot of additional fun with the completion interrupt (-> | ||
652 | * host signals that a page of a process has been paged in and the process can | ||
653 | * continue to run). This interrupt can arrive on any cpu and, since we have | ||
654 | * virtual cpus, actually appear before the interrupt that signals that a page | ||
655 | * is missing. | ||
656 | */ | ||
634 | static void pfault_interrupt(struct ext_code ext_code, | 657 | static void pfault_interrupt(struct ext_code ext_code, |
635 | unsigned int param32, unsigned long param64) | 658 | unsigned int param32, unsigned long param64) |
636 | { | 659 | { |
@@ -639,10 +662,9 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
639 | pid_t pid; | 662 | pid_t pid; |
640 | 663 | ||
641 | /* | 664 | /* |
642 | * Get the external interruption subcode & pfault | 665 | * Get the external interruption subcode & pfault initial/completion |
643 | * initial/completion signal bit. VM stores this | 666 | * signal bit. VM stores this in the 'cpu address' field associated |
644 | * in the 'cpu address' field associated with the | 667 | * with the external interrupt. |
645 | * external interrupt. | ||
646 | */ | 668 | */ |
647 | subcode = ext_code.subcode; | 669 | subcode = ext_code.subcode; |
648 | if ((subcode & 0xff00) != __SUBCODE_MASK) | 670 | if ((subcode & 0xff00) != __SUBCODE_MASK) |
@@ -658,7 +680,7 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
658 | if (!tsk) | 680 | if (!tsk) |
659 | return; | 681 | return; |
660 | spin_lock(&pfault_lock); | 682 | spin_lock(&pfault_lock); |
661 | if (subcode & 0x0080) { | 683 | if (subcode & PF_COMPLETE) { |
662 | /* signal bit is set -> a page has been swapped in by VM */ | 684 | /* signal bit is set -> a page has been swapped in by VM */ |
663 | if (tsk->thread.pfault_wait == 1) { | 685 | if (tsk->thread.pfault_wait == 1) { |
664 | /* Initial interrupt was faster than the completion | 686 | /* Initial interrupt was faster than the completion |
@@ -687,8 +709,7 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
687 | goto out; | 709 | goto out; |
688 | if (tsk->thread.pfault_wait == 1) { | 710 | if (tsk->thread.pfault_wait == 1) { |
689 | /* Already on the list with a reference: put to sleep */ | 711 | /* Already on the list with a reference: put to sleep */ |
690 | __set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 712 | goto block; |
691 | set_tsk_need_resched(tsk); | ||
692 | } else if (tsk->thread.pfault_wait == -1) { | 713 | } else if (tsk->thread.pfault_wait == -1) { |
693 | /* Completion interrupt was faster than the initial | 714 | /* Completion interrupt was faster than the initial |
694 | * interrupt (pfault_wait == -1). Set pfault_wait | 715 | * interrupt (pfault_wait == -1). Set pfault_wait |
@@ -703,7 +724,11 @@ static void pfault_interrupt(struct ext_code ext_code, | |||
703 | get_task_struct(tsk); | 724 | get_task_struct(tsk); |
704 | tsk->thread.pfault_wait = 1; | 725 | tsk->thread.pfault_wait = 1; |
705 | list_add(&tsk->thread.list, &pfault_list); | 726 | list_add(&tsk->thread.list, &pfault_list); |
706 | __set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 727 | block: |
728 | /* Since this must be a userspace fault, there | ||
729 | * is no kernel task state to trample. Rely on the | ||
730 | * return to userspace schedule() to block. */ | ||
731 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
707 | set_tsk_need_resched(tsk); | 732 | set_tsk_need_resched(tsk); |
708 | } | 733 | } |
709 | } | 734 | } |