aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-02-28 18:10:26 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:14:09 -0500
commitb830ab665ad96c6b20d51a89b35cbc09ab5a2c29 (patch)
tree57c2c75b3e069f9f244259ae02f6f2fe3de68612
parentaac0aadf09b98ba36eab0bb02a560ebcb82ac39f (diff)
[SPARC64]: Fix bugs in SUN4V cpu mondo dispatch.
There were several bugs in the SUN4V cpu mondo dispatch code. In fact, if we ever got a EWOULDBLOCK or other error from the hypervisor call, we'd potentially send a cpu mondo multiple times to the same cpu and even worse we could loop until the timeout resending the same mondo over and over to such cpus. So let's bulletproof this thing as follows: 1) Implement cpu_mondo_send() and cpu_state() hypervisor calls in arch/sparc64/kernel/entry.S, add prototypes to asm/hypervisor.h 2) Don't build and update the cpulist using inline functions, this was causing the cpu mask to not get updated in the caller. 3) Disable interrupts during the entire mondo send, otherwise our cpu list and/or mondo block could get overwritten if we take an interrupt and do a cpu mondo send on the current cpu. 4) Check for all possible error return types from the cpu_mondo_send() hypervisor call. In particular: HV_EOK) Our work is done, all cpus have received the mondo. HV_CPUERROR) One or more of the cpus in the cpu list we passed to the hypervisor are in error state. Use cpu_state() calls over the entries in the cpu list to see which ones. Record them in "error_mask" and report this after we are done sending the mondo to cpus which are not in error state. HV_EWOULDBLOCK) We need to keep trying. Any other error we consider fatal, we report the event and exit immediately. 5) We only timeout if forward progress is not made. Forward progress is defined as having at least one cpu get the mondo successfully in a given cpu_mondo_send() call. Otherwise we bump a counter and delay a little. If the counter hits a limit, we signal an error and report the event. Also, smp_call_function_mask() error handling reports the number of cpus incorrectly. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/kernel/entry.S28
-rw-r--r--arch/sparc64/kernel/smp.c180
-rw-r--r--include/asm-sparc64/hypervisor.h10
3 files changed, 161 insertions, 57 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 9f3048e64e84..6d0b3ed77a02 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -1795,3 +1795,31 @@ sun4v_cpu_yield:
1795 ta HV_FAST_TRAP 1795 ta HV_FAST_TRAP
1796 retl 1796 retl
1797 nop 1797 nop
1798
1799 /* %o0: num cpus in cpu list
1800 * %o1: cpu list paddr
1801 * %o2: mondo block paddr
1802 *
1803 * returns %o0: status
1804 */
1805 .globl sun4v_cpu_mondo_send
1806sun4v_cpu_mondo_send:
1807 mov HV_FAST_CPU_MONDO_SEND, %o5
1808 ta HV_FAST_TRAP
1809 retl
1810 nop
1811
1812 /* %o0: CPU ID
1813 *
1814 * returns %o0: -status if status non-zero, else
1815 * %o0: cpu state as HV_CPU_STATE_*
1816 */
1817 .globl sun4v_cpu_state
1818sun4v_cpu_state:
1819 mov HV_FAST_CPU_STATE, %o5
1820 ta HV_FAST_TRAP
1821 brnz,pn %o0, 1f
1822 sub %g0, %o0, %o0
1823 mov %o1, %o0
18241: retl
1825 nop
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index eb7c0f855ba7..6bc7fd47e443 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -556,77 +556,144 @@ retry:
556} 556}
557 557
558/* Multi-cpu list version. */ 558/* Multi-cpu list version. */
559static int init_cpu_list(u16 *list, cpumask_t mask)
560{
561 int i, cnt;
562
563 cnt = 0;
564 for_each_cpu_mask(i, mask)
565 list[cnt++] = i;
566
567 return cnt;
568}
569
570static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask)
571{
572 int i;
573
574 for (i = 0; i < orig_cnt; i++) {
575 if (list[i] == 0xffff)
576 cpu_clear(i, mask);
577 }
578
579 return init_cpu_list(list, mask);
580}
581
582static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) 559static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
583{ 560{
584 int this_cpu = get_cpu(); 561 struct trap_per_cpu *tb;
585 struct trap_per_cpu *tb = &trap_block[this_cpu]; 562 u16 *cpu_list;
586 u64 *mondo = __va(tb->cpu_mondo_block_pa); 563 u64 *mondo;
587 u16 *cpu_list = __va(tb->cpu_list_pa); 564 cpumask_t error_mask;
588 int cnt, retries; 565 unsigned long flags, status;
566 int cnt, retries, this_cpu, i;
567
568 /* We have to do this whole thing with interrupts fully disabled.
569 * Otherwise if we send an xcall from interrupt context it will
570 * corrupt both our mondo block and cpu list state.
571 *
572 * One consequence of this is that we cannot use timeout mechanisms
573 * that depend upon interrupts being delivered locally. So, for
574 * example, we cannot sample jiffies and expect it to advance.
575 *
576 * Fortunately, udelay() uses %stick/%tick so we can use that.
577 */
578 local_irq_save(flags);
579
580 this_cpu = smp_processor_id();
581 tb = &trap_block[this_cpu];
589 582
583 mondo = __va(tb->cpu_mondo_block_pa);
590 mondo[0] = data0; 584 mondo[0] = data0;
591 mondo[1] = data1; 585 mondo[1] = data1;
592 mondo[2] = data2; 586 mondo[2] = data2;
593 wmb(); 587 wmb();
594 588
589 cpu_list = __va(tb->cpu_list_pa);
590
591 /* Setup the initial cpu list. */
592 cnt = 0;
593 for_each_cpu_mask(i, mask)
594 cpu_list[cnt++] = i;
595
596 cpus_clear(error_mask);
595 retries = 0; 597 retries = 0;
596 cnt = init_cpu_list(cpu_list, mask);
597 do { 598 do {
598 register unsigned long func __asm__("%o5"); 599 int forward_progress;
599 register unsigned long arg0 __asm__("%o0"); 600
600 register unsigned long arg1 __asm__("%o1"); 601 status = sun4v_cpu_mondo_send(cnt,
601 register unsigned long arg2 __asm__("%o2"); 602 tb->cpu_list_pa,
602 603 tb->cpu_mondo_block_pa);
603 func = HV_FAST_CPU_MONDO_SEND;
604 arg0 = cnt;
605 arg1 = tb->cpu_list_pa;
606 arg2 = tb->cpu_mondo_block_pa;
607
608 __asm__ __volatile__("ta %8"
609 : "=&r" (func), "=&r" (arg0),
610 "=&r" (arg1), "=&r" (arg2)
611 : "0" (func), "1" (arg0),
612 "2" (arg1), "3" (arg2),
613 "i" (HV_FAST_TRAP)
614 : "memory");
615 if (likely(arg0 == HV_EOK))
616 break;
617 604
618 if (unlikely(++retries > 100)) { 605 /* HV_EOK means all cpus received the xcall, we're done. */
619 printk("CPU[%d]: sun4v mondo error %lu\n", 606 if (likely(status == HV_EOK))
620 this_cpu, arg0);
621 break; 607 break;
608
609 /* First, clear out all the cpus in the mask that were
610 * successfully sent to. The hypervisor indicates this
611 * by setting the cpu list entry of such cpus to 0xffff.
612 */
613 forward_progress = 0;
614 for (i = 0; i < cnt; i++) {
615 if (cpu_list[i] == 0xffff) {
616 cpu_clear(i, mask);
617 forward_progress = 1;
618 }
622 } 619 }
623 620
624 cnt = update_cpu_list(cpu_list, cnt, mask); 621 /* If we get a HV_ECPUERROR, then one or more of the cpus
622 * in the list are in error state. Use the cpu_state()
623 * hypervisor call to find out which cpus are in error state.
624 */
625 if (unlikely(status == HV_ECPUERROR)) {
626 for (i = 0; i < cnt; i++) {
627 long err;
628 u16 cpu;
629
630 cpu = cpu_list[i];
631 if (cpu == 0xffff)
632 continue;
633
634 err = sun4v_cpu_state(cpu);
635 if (err >= 0 &&
636 err == HV_CPU_STATE_ERROR) {
637 cpu_clear(cpu, mask);
638 cpu_set(cpu, error_mask);
639 }
640 }
641 } else if (unlikely(status != HV_EWOULDBLOCK))
642 goto fatal_mondo_error;
643
644 /* Rebuild the cpu_list[] array and try again. */
645 cnt = 0;
646 for_each_cpu_mask(i, mask)
647 cpu_list[cnt++] = i;
625 648
626 udelay(2 * cnt); 649 if (unlikely(!forward_progress)) {
650 if (unlikely(++retries > 10000))
651 goto fatal_mondo_timeout;
652
653 /* Delay a little bit to let other cpus catch up
654 * on their cpu mondo queue work.
655 */
656 udelay(2 * cnt);
657 }
627 } while (1); 658 } while (1);
628 659
629 put_cpu(); 660 local_irq_restore(flags);
661
662 if (unlikely(!cpus_empty(error_mask)))
663 goto fatal_mondo_cpu_error;
664
665 return;
666
667fatal_mondo_cpu_error:
668 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
669 "were in error state\n",
670 this_cpu);
671 printk(KERN_CRIT "CPU[%d]: Error mask [ ", this_cpu);
672 for_each_cpu_mask(i, error_mask)
673 printk("%d ", i);
674 printk("]\n");
675 return;
676
677fatal_mondo_timeout:
678 local_irq_restore(flags);
679 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
680 " progress after %d retries.\n",
681 this_cpu, retries);
682 goto dump_cpu_list_and_out;
683
684fatal_mondo_error:
685 local_irq_restore(flags);
686 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
687 this_cpu, status);
688 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
689 "mondo_block_pa(%lx)\n",
690 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
691
692dump_cpu_list_and_out:
693 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
694 for (i = 0; i < cnt; i++)
695 printk("%u ", cpu_list[i]);
696 printk("]\n");
630} 697}
631 698
632/* Send cross call to all processors mentioned in MASK 699/* Send cross call to all processors mentioned in MASK
@@ -723,9 +790,8 @@ static int smp_call_function_mask(void (*func)(void *info), void *info,
723 790
724out_timeout: 791out_timeout:
725 spin_unlock(&call_lock); 792 spin_unlock(&call_lock);
726 printk("XCALL: Remote cpus not responding, ncpus=%ld finished=%ld\n", 793 printk("XCALL: Remote cpus not responding, ncpus=%d finished=%d\n",
727 (long) num_online_cpus() - 1L, 794 cpus, atomic_read(&data.finished));
728 (long) atomic_read(&data.finished));
729 return 0; 795 return 0;
730} 796}
731 797
diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h
index 726e2ea03ce3..612bf319753f 100644
--- a/include/asm-sparc64/hypervisor.h
+++ b/include/asm-sparc64/hypervisor.h
@@ -342,6 +342,8 @@ extern unsigned long sun4v_cpu_qconf(unsigned long type,
342 * ENOCPU Invalid cpu in CPU list 342 * ENOCPU Invalid cpu in CPU list
343 * EWOULDBLOCK Some or all of the listed CPUs did not receive 343 * EWOULDBLOCK Some or all of the listed CPUs did not receive
344 * the mondo 344 * the mondo
345 * ECPUERROR One or more of the listed CPUs are in error
346 * state, use HV_FAST_CPU_STATE to see which ones
345 * EINVAL CPU list includes caller's CPU ID 347 * EINVAL CPU list includes caller's CPU ID
346 * 348 *
347 * Send a mondo interrupt to the CPUs in the given CPU list with the 349 * Send a mondo interrupt to the CPUs in the given CPU list with the
@@ -355,6 +357,10 @@ extern unsigned long sun4v_cpu_qconf(unsigned long type,
355 */ 357 */
356#define HV_FAST_CPU_MONDO_SEND 0x42 358#define HV_FAST_CPU_MONDO_SEND 0x42
357 359
360#ifndef __ASSEMBLY__
361extern unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count, unsigned long cpu_list_pa, unsigned long mondo_block_pa);
362#endif
363
358/* cpu_myid() 364/* cpu_myid()
359 * TRAP: HV_FAST_TRAP 365 * TRAP: HV_FAST_TRAP
360 * FUNCTION: HV_FAST_CPU_MYID 366 * FUNCTION: HV_FAST_CPU_MYID
@@ -382,6 +388,10 @@ extern unsigned long sun4v_cpu_qconf(unsigned long type,
382#define HV_CPU_STATE_RUNNING 0x02 388#define HV_CPU_STATE_RUNNING 0x02
383#define HV_CPU_STATE_ERROR 0x03 389#define HV_CPU_STATE_ERROR 0x03
384 390
391#ifndef __ASSEMBLY__
392extern long sun4v_cpu_state(unsigned long cpuid);
393#endif
394
385/* cpu_set_rtba() 395/* cpu_set_rtba()
386 * TRAP: HV_FAST_TRAP 396 * TRAP: HV_FAST_TRAP
387 * FUNCTION: HV_FAST_CPU_SET_RTBA 397 * FUNCTION: HV_FAST_CPU_SET_RTBA