aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-03-22 16:42:53 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-04-15 12:16:37 -0400
commit0227f7c42d9e01b00ea8cbd635aaf92a09b54abc (patch)
tree7b796ca59c6d703b857475cab4e07595cc2dd15c /arch/s390/mm
parent5a3b7b112884f80ff19b18028fabeb4f9c035518 (diff)
s390: Clarify pagefault interrupt
While looking at set_task_state() users I stumbled over the s390 pfault interrupt code. Since Heiko provided a great explanation on how it worked, I figured we ought to preserve this. Also make a few little tweaks to the code to aid in readability and explicitly comment the unusual blocking scheme. Based-on-text-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c41
1 files changed, 33 insertions, 8 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index cce577feab1e..7a3144017301 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -631,6 +631,29 @@ void pfault_fini(void)
631static DEFINE_SPINLOCK(pfault_lock); 631static DEFINE_SPINLOCK(pfault_lock);
632static LIST_HEAD(pfault_list); 632static LIST_HEAD(pfault_list);
633 633
634#define PF_COMPLETE 0x0080
635
636/*
637 * The mechanism of our pfault code: if Linux is running as guest, runs a user
638 * space process and the user space process accesses a page that the host has
639 * paged out we get a pfault interrupt.
640 *
641 * This allows us, within the guest, to schedule a different process. Without
642 * this mechanism the host would have to suspend the whole virtual cpu until
643 * the page has been paged in.
644 *
645 * So when we get such an interrupt then we set the state of the current task
646 * to uninterruptible and also set the need_resched flag. Both happens within
647 * interrupt context(!). If we later on want to return to user space we
648 * recognize the need_resched flag and then call schedule(). It's not very
649 * obvious how this works...
650 *
651 * Of course we have a lot of additional fun with the completion interrupt (->
652 * host signals that a page of a process has been paged in and the process can
653 * continue to run). This interrupt can arrive on any cpu and, since we have
654 * virtual cpus, actually appear before the interrupt that signals that a page
655 * is missing.
656 */
634static void pfault_interrupt(struct ext_code ext_code, 657static void pfault_interrupt(struct ext_code ext_code,
635 unsigned int param32, unsigned long param64) 658 unsigned int param32, unsigned long param64)
636{ 659{
@@ -639,10 +662,9 @@ static void pfault_interrupt(struct ext_code ext_code,
639 pid_t pid; 662 pid_t pid;
640 663
641 /* 664 /*
642 * Get the external interruption subcode & pfault 665 * Get the external interruption subcode & pfault initial/completion
643 * initial/completion signal bit. VM stores this 666 * signal bit. VM stores this in the 'cpu address' field associated
644 * in the 'cpu address' field associated with the 667 * with the external interrupt.
645 * external interrupt.
646 */ 668 */
647 subcode = ext_code.subcode; 669 subcode = ext_code.subcode;
648 if ((subcode & 0xff00) != __SUBCODE_MASK) 670 if ((subcode & 0xff00) != __SUBCODE_MASK)
@@ -658,7 +680,7 @@ static void pfault_interrupt(struct ext_code ext_code,
658 if (!tsk) 680 if (!tsk)
659 return; 681 return;
660 spin_lock(&pfault_lock); 682 spin_lock(&pfault_lock);
661 if (subcode & 0x0080) { 683 if (subcode & PF_COMPLETE) {
662 /* signal bit is set -> a page has been swapped in by VM */ 684 /* signal bit is set -> a page has been swapped in by VM */
663 if (tsk->thread.pfault_wait == 1) { 685 if (tsk->thread.pfault_wait == 1) {
664 /* Initial interrupt was faster than the completion 686 /* Initial interrupt was faster than the completion
@@ -687,8 +709,7 @@ static void pfault_interrupt(struct ext_code ext_code,
687 goto out; 709 goto out;
688 if (tsk->thread.pfault_wait == 1) { 710 if (tsk->thread.pfault_wait == 1) {
689 /* Already on the list with a reference: put to sleep */ 711 /* Already on the list with a reference: put to sleep */
690 __set_task_state(tsk, TASK_UNINTERRUPTIBLE); 712 goto block;
691 set_tsk_need_resched(tsk);
692 } else if (tsk->thread.pfault_wait == -1) { 713 } else if (tsk->thread.pfault_wait == -1) {
693 /* Completion interrupt was faster than the initial 714 /* Completion interrupt was faster than the initial
694 * interrupt (pfault_wait == -1). Set pfault_wait 715 * interrupt (pfault_wait == -1). Set pfault_wait
@@ -703,7 +724,11 @@ static void pfault_interrupt(struct ext_code ext_code,
703 get_task_struct(tsk); 724 get_task_struct(tsk);
704 tsk->thread.pfault_wait = 1; 725 tsk->thread.pfault_wait = 1;
705 list_add(&tsk->thread.list, &pfault_list); 726 list_add(&tsk->thread.list, &pfault_list);
706 __set_task_state(tsk, TASK_UNINTERRUPTIBLE); 727block:
728 /* Since this must be a userspace fault, there
729 * is no kernel task state to trample. Rely on the
730 * return to userspace schedule() to block. */
731 __set_current_state(TASK_UNINTERRUPTIBLE);
707 set_tsk_need_resched(tsk); 732 set_tsk_need_resched(tsk);
708 } 733 }
709 } 734 }