aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2006-03-03 00:50:47 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:14:17 -0500
commit3cab0c3e8636d5005041aa52224f796c3a4ef872 (patch)
tree582c92940f46cb0ecf8fafd4fde1cfd346172366
parentbcc28ee0bf390df0d81cc9dafe980faef6b2771a (diff)
[SPARC64]: More SUN4V cpu mondo bug fixing.
This cpu mondo sending interface isn't all that easy to use correctly... We were clearing out the wrong bits from the "mask" after getting something other than EOK from the hypervisor. It turns out the hypervisor can just be resent the same cpu_list[] array, with the 0xffff "done" entries still in there, and it will do the right thing. So don't update or try to rebuild the cpu_list[] array to condense it. This requires the "forward_progress" check to be done slightly differently, but this new scheme is less bug prone than what we were doing before. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/kernel/smp.c40
1 files changed, 24 insertions, 16 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 6bc7fd47e443..c4548a88953c 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -563,7 +563,7 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
563 u64 *mondo; 563 u64 *mondo;
564 cpumask_t error_mask; 564 cpumask_t error_mask;
565 unsigned long flags, status; 565 unsigned long flags, status;
566 int cnt, retries, this_cpu, i; 566 int cnt, retries, this_cpu, prev_sent, i;
567 567
568 /* We have to do this whole thing with interrupts fully disabled. 568 /* We have to do this whole thing with interrupts fully disabled.
569 * Otherwise if we send an xcall from interrupt context it will 569 * Otherwise if we send an xcall from interrupt context it will
@@ -595,8 +595,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
595 595
596 cpus_clear(error_mask); 596 cpus_clear(error_mask);
597 retries = 0; 597 retries = 0;
598 prev_sent = 0;
598 do { 599 do {
599 int forward_progress; 600 int forward_progress, n_sent;
600 601
601 status = sun4v_cpu_mondo_send(cnt, 602 status = sun4v_cpu_mondo_send(cnt,
602 tb->cpu_list_pa, 603 tb->cpu_list_pa,
@@ -606,18 +607,23 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
606 if (likely(status == HV_EOK)) 607 if (likely(status == HV_EOK))
607 break; 608 break;
608 609
609 /* First, clear out all the cpus in the mask that were 610 /* First, see if we made any forward progress.
610 * successfully sent to. The hypervisor indicates this 611 *
611 * by setting the cpu list entry of such cpus to 0xffff. 612 * The hypervisor indicates successful sends by setting
613 * cpu list entries to the value 0xffff.
612 */ 614 */
613 forward_progress = 0; 615 n_sent = 0;
614 for (i = 0; i < cnt; i++) { 616 for (i = 0; i < cnt; i++) {
615 if (cpu_list[i] == 0xffff) { 617 if (likely(cpu_list[i] == 0xffff))
616 cpu_clear(i, mask); 618 n_sent++;
617 forward_progress = 1;
618 }
619 } 619 }
620 620
621 forward_progress = 0;
622 if (n_sent > prev_sent)
623 forward_progress = 1;
624
625 prev_sent = n_sent;
626
621 /* If we get a HV_ECPUERROR, then one or more of the cpus 627 /* If we get a HV_ECPUERROR, then one or more of the cpus
622 * in the list are in error state. Use the cpu_state() 628 * in the list are in error state. Use the cpu_state()
623 * hypervisor call to find out which cpus are in error state. 629 * hypervisor call to find out which cpus are in error state.
@@ -634,18 +640,20 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
634 err = sun4v_cpu_state(cpu); 640 err = sun4v_cpu_state(cpu);
635 if (err >= 0 && 641 if (err >= 0 &&
636 err == HV_CPU_STATE_ERROR) { 642 err == HV_CPU_STATE_ERROR) {
637 cpu_clear(cpu, mask); 643 cpu_list[i] = 0xffff;
638 cpu_set(cpu, error_mask); 644 cpu_set(cpu, error_mask);
639 } 645 }
640 } 646 }
641 } else if (unlikely(status != HV_EWOULDBLOCK)) 647 } else if (unlikely(status != HV_EWOULDBLOCK))
642 goto fatal_mondo_error; 648 goto fatal_mondo_error;
643 649
644 /* Rebuild the cpu_list[] array and try again. */ 650 /* Don't bother rewriting the CPU list, just leave the
645 cnt = 0; 651 * 0xffff and non-0xffff entries in there and the
646 for_each_cpu_mask(i, mask) 652 * hypervisor will do the right thing.
647 cpu_list[cnt++] = i; 653 *
648 654 * Only advance timeout state if we didn't make any
655 * forward progress.
656 */
649 if (unlikely(!forward_progress)) { 657 if (unlikely(!forward_progress)) {
650 if (unlikely(++retries > 10000)) 658 if (unlikely(++retries > 10000))
651 goto fatal_mondo_timeout; 659 goto fatal_mondo_timeout;