aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2011-05-23 04:24:34 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2011-05-23 04:24:29 -0400
commitf2db2e6cb3f5f766cbb3788af44705685ff2445a (patch)
tree11fbf5522f332e13f9bfb6cf4552513e4d865003 /arch/s390/mm
parentb456d94a9757db54eca4677c1b3a13e7170c9bb3 (diff)
[S390] pfault: cpu hotplug vs missing completion interrupts
On cpu hot remove a PFAULT CANCEL command is sent to the hypervisor which in turn will cancel all outstanding pfault requests that have been issued on that cpu (the same happens with a SIGP cpu reset). The result is that we end up with uninterruptible processes where the interrupt that would wake up these processes never arrives. In order to solve this all processes which wait for a pfault completion interrupt get woken up after a cpu hot remove. The worst case that could happen is that they fault again and in turn need to wait again. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c89
1 files changed, 65 insertions, 24 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 177745c520ca..1ca656478326 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -466,7 +466,7 @@ typedef struct {
466int pfault_init(void) 466int pfault_init(void)
467{ 467{
468 pfault_refbk_t refbk = 468 pfault_refbk_t refbk =
469 { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, 469 { 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48,
470 __PF_RES_FIELD }; 470 __PF_RES_FIELD };
471 int rc; 471 int rc;
472 472
@@ -498,11 +498,15 @@ void pfault_fini(void)
498 : : "a" (&refbk), "m" (refbk) : "cc"); 498 : : "a" (&refbk), "m" (refbk) : "cc");
499} 499}
500 500
501static DEFINE_SPINLOCK(pfault_lock);
502static LIST_HEAD(pfault_list);
503
501static void pfault_interrupt(unsigned int ext_int_code, 504static void pfault_interrupt(unsigned int ext_int_code,
502 unsigned int param32, unsigned long param64) 505 unsigned int param32, unsigned long param64)
503{ 506{
504 struct task_struct *tsk; 507 struct task_struct *tsk;
505 __u16 subcode; 508 __u16 subcode;
509 pid_t pid;
506 510
507 /* 511 /*
508 * Get the external interruption subcode & pfault 512 * Get the external interruption subcode & pfault
@@ -514,44 +518,79 @@ static void pfault_interrupt(unsigned int ext_int_code,
514 if ((subcode & 0xff00) != __SUBCODE_MASK) 518 if ((subcode & 0xff00) != __SUBCODE_MASK)
515 return; 519 return;
516 kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; 520 kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
517 521 if (subcode & 0x0080) {
518 /* 522 /* Get the token (= pid of the affected task). */
519 * Get the token (= address of the task structure of the affected task). 523 pid = sizeof(void *) == 4 ? param32 : param64;
520 */ 524 rcu_read_lock();
521#ifdef CONFIG_64BIT 525 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
522 tsk = (struct task_struct *) param64; 526 if (tsk)
523#else 527 get_task_struct(tsk);
524 tsk = (struct task_struct *) param32; 528 rcu_read_unlock();
525#endif 529 if (!tsk)
526 530 return;
531 } else {
532 tsk = current;
533 }
534 spin_lock(&pfault_lock);
527 if (subcode & 0x0080) { 535 if (subcode & 0x0080) {
528 /* signal bit is set -> a page has been swapped in by VM */ 536 /* signal bit is set -> a page has been swapped in by VM */
529 if (xchg(&tsk->thread.pfault_wait, -1) != 0) { 537 if (tsk->thread.pfault_wait == 1) {
530 /* Initial interrupt was faster than the completion 538 /* Initial interrupt was faster than the completion
531 * interrupt. pfault_wait is valid. Set pfault_wait 539 * interrupt. pfault_wait is valid. Set pfault_wait
532 * back to zero and wake up the process. This can 540 * back to zero and wake up the process. This can
533 * safely be done because the task is still sleeping 541 * safely be done because the task is still sleeping
534 * and can't produce new pfaults. */ 542 * and can't produce new pfaults. */
535 tsk->thread.pfault_wait = 0; 543 tsk->thread.pfault_wait = 0;
544 list_del(&tsk->thread.list);
536 wake_up_process(tsk); 545 wake_up_process(tsk);
537 put_task_struct(tsk); 546 } else {
547 /* Completion interrupt was faster than initial
548 * interrupt. Set pfault_wait to -1 so the initial
549 * interrupt doesn't put the task to sleep. */
550 tsk->thread.pfault_wait = -1;
538 } 551 }
552 put_task_struct(tsk);
539 } else { 553 } else {
540 /* signal bit not set -> a real page is missing. */ 554 /* signal bit not set -> a real page is missing. */
541 get_task_struct(tsk); 555 if (tsk->thread.pfault_wait == -1) {
542 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
543 if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
544 /* Completion interrupt was faster than the initial 556 /* Completion interrupt was faster than the initial
545 * interrupt (swapped in a -1 for pfault_wait). Set 557 * interrupt (pfault_wait == -1). Set pfault_wait
546 * pfault_wait back to zero and exit. This can be 558 * back to zero and exit. */
547 * done safely because tsk is running in kernel
548 * mode and can't produce new pfaults. */
549 tsk->thread.pfault_wait = 0; 559 tsk->thread.pfault_wait = 0;
550 set_task_state(tsk, TASK_RUNNING); 560 } else {
551 put_task_struct(tsk); 561 /* Initial interrupt arrived before completion
552 } else 562 * interrupt. Let the task sleep. */
563 tsk->thread.pfault_wait = 1;
564 list_add(&tsk->thread.list, &pfault_list);
565 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
553 set_tsk_need_resched(tsk); 566 set_tsk_need_resched(tsk);
567 }
568 }
569 spin_unlock(&pfault_lock);
570}
571
572static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
573 unsigned long action, void *hcpu)
574{
575 struct thread_struct *thread, *next;
576 struct task_struct *tsk;
577
578 switch (action) {
579 case CPU_DEAD:
580 case CPU_DEAD_FROZEN:
581 spin_lock_irq(&pfault_lock);
582 list_for_each_entry_safe(thread, next, &pfault_list, list) {
583 thread->pfault_wait = 0;
584 list_del(&thread->list);
585 tsk = container_of(thread, struct task_struct, thread);
586 wake_up_process(tsk);
587 }
588 spin_unlock_irq(&pfault_lock);
589 break;
590 default:
591 break;
554 } 592 }
593 return NOTIFY_OK;
555} 594}
556 595
557static int __init pfault_irq_init(void) 596static int __init pfault_irq_init(void)
@@ -568,8 +607,10 @@ static int __init pfault_irq_init(void)
568 pfault_disable = 1; 607 pfault_disable = 1;
569 return rc; 608 return rc;
570 } 609 }
571 if (pfault_init() == 0) 610 if (pfault_init() == 0) {
611 hotcpu_notifier(pfault_cpu_notify, 0);
572 return 0; 612 return 0;
613 }
573 614
574 /* Tough luck, no pfault. */ 615 /* Tough luck, no pfault. */
575 pfault_disable = 1; 616 pfault_disable = 1;