5 files changed, 139 insertions, 75 deletions
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
index ec9c04de3664..ff05992dae7a 100644
--- a/arch/sparc/include/asm/trap_block.h
+++ b/arch/sparc/include/asm/trap_block.h
@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
 void init_cur_cpu_trap(struct thread_info *);
 void setup_tba(void);
 extern int ncpus_probed;
+extern u64 cpu_mondo_counter[NR_CPUS];
 unsigned long real_hard_smp_processor_id(void);
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 24f21c726dfa..f10e2f712394 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 static int dma_4v_supported(struct device *dev, u64 device_mask)
 {
        struct iommu *iommu = dev->archdata.iommu;
-        u64 dma_addr_mask;
+        u64 dma_addr_mask = iommu->dma_addr_mask;
-        if (device_mask > DMA_BIT_MASK(32) && iommu->atu)
+        if (device_mask > DMA_BIT_MASK(32)) {
-                dma_addr_mask = iommu->atu->dma_addr_mask;
+                if (iommu->atu)
-        else
+                        dma_addr_mask = iommu->atu->dma_addr_mask;
-                dma_addr_mask = iommu->dma_addr_mask;
+                else
+                        return 0;
+        }
        if ((device_mask & dma_addr_mask) == dma_addr_mask)
                return 1;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fdf31040a7dc..3218bc43302e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -622,22 +622,48 @@ retry:
        }
 }
-/* Multi-cpu list version.  */
+#define CPU_MONDO_COUNTER(cpuid)        (cpu_mondo_counter[cpuid])
+#define MONDO_USEC_WAIT_MIN             2
+#define MONDO_USEC_WAIT_MAX             100
+#define MONDO_RETRY_LIMIT               500000
+/* Multi-cpu list version.
+ *
+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
+ * Sometimes not all cpus receive the mondo, requiring us to re-send
+ * the mondo until all cpus have received, or cpus are truly stuck
+ * unable to receive mondo, and we timeout.
+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
+ * perform guest service, such as PCIe error handling. Consider the
+ * service time, 1 second overall wait is reasonable for 1 cpu.
+ * Here two in-between mondo check wait time are defined: 2 usec for
+ * single cpu quick turn around and up to 100usec for large cpu count.
+ * Deliver mondo to large number of cpus could take longer, we adjusts
+ * the retry count as long as target cpus are making forward progress.
+ */
 static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 {
-        int retries, this_cpu, prev_sent, i, saw_cpu_error;
+        int this_cpu, tot_cpus, prev_sent, i, rem;
+        int usec_wait, retries, tot_retries;
+        u16 first_cpu = 0xffff;
+        unsigned long xc_rcvd = 0;
        unsigned long status;
+        int ecpuerror_id = 0;
+        int enocpu_id = 0;
        u16 *cpu_list;
+        u16 cpu;
        this_cpu = smp_processor_id();
        cpu_list = __va(tb->cpu_list_pa);
+        usec_wait = cnt * MONDO_USEC_WAIT_MIN;
-        saw_cpu_error = 0;
+        if (usec_wait > MONDO_USEC_WAIT_MAX)
-        retries = 0;
+                usec_wait = MONDO_USEC_WAIT_MAX;
+        retries = tot_retries = 0;
+        tot_cpus = cnt;
        prev_sent = 0;
        do {
-                int forward_progress, n_sent;
+                int n_sent, mondo_delivered, target_cpu_busy;
                status = sun4v_cpu_mondo_send(cnt,
                                              tb->cpu_list_pa,
@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
                /* HV_EOK means all cpus received the xcall, we're done.  */
                if (likely(status == HV_EOK))
-                        break;
+                        goto xcall_done;
+                /* If not these non-fatal errors, panic */
+                if (unlikely((status != HV_EWOULDBLOCK) &&
+                        (status != HV_ECPUERROR) &&
+                        (status != HV_ENOCPU)))
+                        goto fatal_errors;
                /* First, see if we made any forward progress.
                 *
+                 * Go through the cpu_list, count the target cpus that have
+                 * received our mondo (n_sent), and those that did not (rem).
+                 * Re-pack cpu_list with the cpus remain to be retried in the
+                 * front - this simplifies tracking the truly stalled cpus.
+                 *
                 * The hypervisor indicates successful sends by setting
                 * cpu list entries to the value 0xffff.
+                 *
+                 * EWOULDBLOCK means some target cpus did not receive the
+                 * mondo and retry usually helps.
+                 *
+                 * ECPUERROR means at least one target cpu is in error state,
+                 * it's usually safe to skip the faulty cpu and retry.
+                 *
+                 * ENOCPU means one of the target cpu doesn't belong to the
+                 * domain, perhaps offlined which is unexpected, but not
+                 * fatal and it's okay to skip the offlined cpu.
                 */
+                rem = 0;
                n_sent = 0;
                for (i = 0; i < cnt; i++) {
-                        if (likely(cpu_list[i] == 0xffff))
+                        cpu = cpu_list[i];
+                        if (likely(cpu == 0xffff)) {
                                n_sent++;
+                        } else if ((status == HV_ECPUERROR) &&
+                                (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
+                                ecpuerror_id = cpu + 1;
+                        } else if (status == HV_ENOCPU && !cpu_online(cpu)) {
+                                enocpu_id = cpu + 1;
+                        } else {
+                                cpu_list[rem++] = cpu;
+                        }
                }
-                forward_progress = 0;
+                /* No cpu remained, we're done. */
-                if (n_sent > prev_sent)
+                if (rem == 0)
-                        forward_progress = 1;
+                        break;
-                prev_sent = n_sent;
+                /* Otherwise, update the cpu count for retry. */
+                cnt = rem;
-                /* If we get a HV_ECPUERROR, then one or more of the cpus
+                /* Record the overall number of mondos received by the
-                 * in the list are in error state.  Use the cpu_state()
+                 * first of the remaining cpus.
-                 * hypervisor call to find out which cpus are in error state.
                 */
-                if (unlikely(status == HV_ECPUERROR)) {
+                if (first_cpu != cpu_list[0]) {
-                        for (i = 0; i < cnt; i++) {
+                        first_cpu = cpu_list[0];
-                                long err;
+                        xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
-                                u16 cpu;
+                }
-                                cpu = cpu_list[i];
+                /* Was any mondo delivered successfully? */
-                                if (cpu == 0xffff)
+                mondo_delivered = (n_sent > prev_sent);
-                                        continue;
+                prev_sent = n_sent;
-                                err = sun4v_cpu_state(cpu);
+                /* or, was any target cpu busy processing other mondos? */
-                                if (err == HV_CPU_STATE_ERROR) {
+                target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
-                                        saw_cpu_error = (cpu + 1);
+                xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
-                                        cpu_list[i] = 0xffff;
-                                }
-                        }
-                } else if (unlikely(status != HV_EWOULDBLOCK))
-                        goto fatal_mondo_error;
-                /* Don't bother rewriting the CPU list, just leave the
+                /* Retry count is for no progress. If we're making progress,
-                 * 0xffff and non-0xffff entries in there and the
+                 * reset the retry count.
-                 * hypervisor will do the right thing.
-                 *
-                 * Only advance timeout state if we didn't make any
-                 * forward progress.
                 */
-                if (unlikely(!forward_progress)) {
+                if (likely(mondo_delivered || target_cpu_busy)) {
-                        if (unlikely(++retries > 10000))
+                        tot_retries += retries;
-                                goto fatal_mondo_timeout;
+                        retries = 0;
+                } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
-                        /* Delay a little bit to let other cpus catch up
+                        goto fatal_mondo_timeout;
-                         * on their cpu mondo queue work.
-                         */
-                        udelay(2 * cnt);
                }
-        } while (1);
-        if (unlikely(saw_cpu_error))
+                /* Delay a little bit to let other cpus catch up on
-                goto fatal_mondo_cpu_error;
+                 * their cpu mondo queue work.
+                 */
+                if (!mondo_delivered)
+                        udelay(usec_wait);
-        return;
+                retries++;
+        } while (1);
-fatal_mondo_cpu_error:
+xcall_done:
-        printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+        if (unlikely(ecpuerror_id > 0)) {
-               "(including %d) were in error state\n",
+                pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
-               this_cpu, saw_cpu_error - 1);
+                       this_cpu, ecpuerror_id - 1);
+        } else if (unlikely(enocpu_id > 0)) {
+                pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
+                       this_cpu, enocpu_id - 1);
+        }
        return;
+fatal_errors:
+        /* fatal errors include bad alignment, etc */
+        pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
+               this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+        panic("Unexpected SUN4V mondo error %lu\n", status);
 fatal_mondo_timeout:
-        printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+        /* some cpus being non-responsive to the cpu mondo */
-               " progress after %d retries.\n",
+        pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
-               this_cpu, retries);
+               this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
-        goto dump_cpu_list_and_out;
+        panic("SUN4V mondo timeout panic\n");
-fatal_mondo_error:
-        printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
-               this_cpu, status);
-        printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
-               "mondo_block_pa(%lx)\n",
-               this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
-dump_cpu_list_and_out:
-        printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
-        for (i = 0; i < cnt; i++)
-                printk("%u ", cpu_list[i]);
-        printk("]\n");
 }
 static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S
index 559bc5e9c199..34631995859a 100644
--- a/arch/sparc/kernel/sun4v_ivec.S
+++ b/arch/sparc/kernel/sun4v_ivec.S
@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
        ldxa    [%g0] ASI_SCRATCHPAD, %g4
        sub     %g4, TRAP_PER_CPU_FAULT_INFO, %g4
+        /* Get smp_processor_id() into %g3 */
+        sethi   %hi(trap_block), %g5
+        or      %g5, %lo(trap_block), %g5
+        sub     %g4, %g5, %g3
+        srlx    %g3, TRAP_BLOCK_SZ_SHIFT, %g3
+        /* Increment cpu_mondo_counter[smp_processor_id()] */
+        sethi   %hi(cpu_mondo_counter), %g5
+        or      %g5, %lo(cpu_mondo_counter), %g5
+        sllx    %g3, 3, %g3
+        add     %g5, %g3, %g5
+        ldx     [%g5], %g3
+        add     %g3, 1, %g3
+        stx     %g3, [%g5]
        /* Get CPU mondo queue base phys address into %g7.  */
        ldx     [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 196ee5eb4d48..ad31af1dd726 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
        }
 }
+u64 cpu_mondo_counter[NR_CPUS] = {0};
 struct trap_per_cpu trap_block[NR_CPUS];
 EXPORT_SYMBOL(trap_block);

diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h
@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS];
54	void init_cur_cpu_trap(struct thread_info *);	54	void init_cur_cpu_trap(struct thread_info *);
55	void setup_tba(void);	55	void setup_tba(void);
56	extern int ncpus_probed;	56	extern int ncpus_probed;
		57	extern u64 cpu_mondo_counter[NR_CPUS];
57		58
58	unsigned long real_hard_smp_processor_id(void);	59	unsigned long real_hard_smp_processor_id(void);
59		60


diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 24f21c726dfa..f10e2f712394 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c
@@ -673,12 +673,14 @@ static void dma_4v_unmap_sg(struct device dev, struct scatterlist sglist,
673	static int dma_4v_supported(struct device *dev, u64 device_mask)	673	static int dma_4v_supported(struct device *dev, u64 device_mask)
674	{	674	{
675	struct iommu *iommu = dev->archdata.iommu;	675	struct iommu *iommu = dev->archdata.iommu;
676	u64 dma_addr_mask;	676	u64 dma_addr_mask = iommu->dma_addr_mask;
677		677
678	if (device_mask > DMA_BIT_MASK(32) && iommu->atu)	678	if (device_mask > DMA_BIT_MASK(32)) {
679	dma_addr_mask = iommu->atu->dma_addr_mask;	679	if (iommu->atu)
680	else	680	dma_addr_mask = iommu->atu->dma_addr_mask;
681	dma_addr_mask = iommu->dma_addr_mask;	681	else
		682	return 0;
		683	}
682		684
683	if ((device_mask & dma_addr_mask) == dma_addr_mask)	685	if ((device_mask & dma_addr_mask) == dma_addr_mask)
684	return 1;	686	return 1;


diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fdf31040a7dc..3218bc43302e 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c
@@ -622,22 +622,48 @@ retry:
622	}	622	}
623	}	623	}
624		624
625	/* Multi-cpu list version. */	625	#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
		626	#define MONDO_USEC_WAIT_MIN 2
		627	#define MONDO_USEC_WAIT_MAX 100
		628	#define MONDO_RETRY_LIMIT 500000
		629
		630	/* Multi-cpu list version.
		631	*
		632	* Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
		633	* Sometimes not all cpus receive the mondo, requiring us to re-send
		634	* the mondo until all cpus have received, or cpus are truly stuck
		635	* unable to receive mondo, and we timeout.
		636	* Occasionally a target cpu strand is borrowed briefly by hypervisor to
		637	* perform guest service, such as PCIe error handling. Consider the
		638	* service time, 1 second overall wait is reasonable for 1 cpu.
		639	* Here two in-between mondo check wait time are defined: 2 usec for
		640	* single cpu quick turn around and up to 100usec for large cpu count.
		641	* Deliver mondo to large number of cpus could take longer, we adjusts
		642	* the retry count as long as target cpus are making forward progress.
		643	*/
626	static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)	644	static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
627	{	645	{
628	int retries, this_cpu, prev_sent, i, saw_cpu_error;	646	int this_cpu, tot_cpus, prev_sent, i, rem;
		647	int usec_wait, retries, tot_retries;
		648	u16 first_cpu = 0xffff;
		649	unsigned long xc_rcvd = 0;
629	unsigned long status;	650	unsigned long status;
		651	int ecpuerror_id = 0;
		652	int enocpu_id = 0;
630	u16 *cpu_list;	653	u16 *cpu_list;
		654	u16 cpu;
631		655
632	this_cpu = smp_processor_id();	656	this_cpu = smp_processor_id();
633
634	cpu_list = __va(tb->cpu_list_pa);	657	cpu_list = __va(tb->cpu_list_pa);
635		658	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
636	saw_cpu_error = 0;	659	if (usec_wait > MONDO_USEC_WAIT_MAX)
637	retries = 0;	660	usec_wait = MONDO_USEC_WAIT_MAX;
		661	retries = tot_retries = 0;
		662	tot_cpus = cnt;
638	prev_sent = 0;	663	prev_sent = 0;
		664
639	do {	665	do {
640	int forward_progress, n_sent;	666	int n_sent, mondo_delivered, target_cpu_busy;
641		667
642	status = sun4v_cpu_mondo_send(cnt,	668	status = sun4v_cpu_mondo_send(cnt,
643	tb->cpu_list_pa,	669	tb->cpu_list_pa,
@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
645		671
646	/* HV_EOK means all cpus received the xcall, we're done. */	672	/* HV_EOK means all cpus received the xcall, we're done. */
647	if (likely(status == HV_EOK))	673	if (likely(status == HV_EOK))
648	break;	674	goto xcall_done;
		675
		676	/* If not these non-fatal errors, panic */
		677	if (unlikely((status != HV_EWOULDBLOCK) &&
		678	(status != HV_ECPUERROR) &&
		679	(status != HV_ENOCPU)))
		680	goto fatal_errors;
649		681
650	/* First, see if we made any forward progress.	682	/* First, see if we made any forward progress.
651	*	683	*
		684	* Go through the cpu_list, count the target cpus that have
		685	* received our mondo (n_sent), and those that did not (rem).
		686	* Re-pack cpu_list with the cpus remain to be retried in the
		687	* front - this simplifies tracking the truly stalled cpus.
		688	*
652	* The hypervisor indicates successful sends by setting	689	* The hypervisor indicates successful sends by setting
653	* cpu list entries to the value 0xffff.	690	* cpu list entries to the value 0xffff.
		691	*
		692	* EWOULDBLOCK means some target cpus did not receive the
		693	* mondo and retry usually helps.
		694	*
		695	* ECPUERROR means at least one target cpu is in error state,
		696	* it's usually safe to skip the faulty cpu and retry.
		697	*
		698	* ENOCPU means one of the target cpu doesn't belong to the
		699	* domain, perhaps offlined which is unexpected, but not
		700	* fatal and it's okay to skip the offlined cpu.
654	*/	701	*/
		702	rem = 0;
655	n_sent = 0;	703	n_sent = 0;
656	for (i = 0; i < cnt; i++) {	704	for (i = 0; i < cnt; i++) {
657	if (likely(cpu_list[i] == 0xffff))	705	cpu = cpu_list[i];
		706	if (likely(cpu == 0xffff)) {
658	n_sent++;	707	n_sent++;
		708	} else if ((status == HV_ECPUERROR) &&
		709	(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
		710	ecpuerror_id = cpu + 1;
		711	} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
		712	enocpu_id = cpu + 1;
		713	} else {
		714	cpu_list[rem++] = cpu;
		715	}
659	}	716	}
660		717
661	forward_progress = 0;	718	/* No cpu remained, we're done. */
662	if (n_sent > prev_sent)	719	if (rem == 0)
663	forward_progress = 1;	720	break;
664		721
665	prev_sent = n_sent;	722	/* Otherwise, update the cpu count for retry. */
		723	cnt = rem;
666		724
667	/* If we get a HV_ECPUERROR, then one or more of the cpus	725	/* Record the overall number of mondos received by the
668	* in the list are in error state. Use the cpu_state()	726	* first of the remaining cpus.
669	* hypervisor call to find out which cpus are in error state.
670	*/	727	*/
671	if (unlikely(status == HV_ECPUERROR)) {	728	if (first_cpu != cpu_list[0]) {
672	for (i = 0; i < cnt; i++) {	729	first_cpu = cpu_list[0];
673	long err;	730	xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
674	u16 cpu;	731	}
675		732
676	cpu = cpu_list[i];	733	/* Was any mondo delivered successfully? */
677	if (cpu == 0xffff)	734	mondo_delivered = (n_sent > prev_sent);
678	continue;	735	prev_sent = n_sent;
679		736
680	err = sun4v_cpu_state(cpu);	737	/* or, was any target cpu busy processing other mondos? */
681	if (err == HV_CPU_STATE_ERROR) {	738	target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
682	saw_cpu_error = (cpu + 1);	739	xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
683	cpu_list[i] = 0xffff;
684	}
685	}
686	} else if (unlikely(status != HV_EWOULDBLOCK))
687	goto fatal_mondo_error;
688		740
689	/* Don't bother rewriting the CPU list, just leave the	741	/* Retry count is for no progress. If we're making progress,
690	* 0xffff and non-0xffff entries in there and the	742	* reset the retry count.
691	* hypervisor will do the right thing.
692	*
693	* Only advance timeout state if we didn't make any
694	* forward progress.
695	*/	743	*/
696	if (unlikely(!forward_progress)) {	744	if (likely(mondo_delivered \|\| target_cpu_busy)) {
697	if (unlikely(++retries > 10000))	745	tot_retries += retries;
698	goto fatal_mondo_timeout;	746	retries = 0;
699		747	} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
700	/* Delay a little bit to let other cpus catch up	748	goto fatal_mondo_timeout;
701	* on their cpu mondo queue work.
702	*/
703	udelay(2 * cnt);
704	}	749	}
705	} while (1);
706		750
707	if (unlikely(saw_cpu_error))	751	/* Delay a little bit to let other cpus catch up on
708	goto fatal_mondo_cpu_error;	752	* their cpu mondo queue work.
		753	*/
		754	if (!mondo_delivered)
		755	udelay(usec_wait);
709		756
710	return;	757	retries++;
		758	} while (1);
711		759
712	fatal_mondo_cpu_error:	760	xcall_done:
713	printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "	761	if (unlikely(ecpuerror_id > 0)) {
714	"(including %d) were in error state\n",	762	pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
715	this_cpu, saw_cpu_error - 1);	763	this_cpu, ecpuerror_id - 1);
		764	} else if (unlikely(enocpu_id > 0)) {
		765	pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
		766	this_cpu, enocpu_id - 1);
		767	}
716	return;	768	return;
717		769
		770	fatal_errors:
		771	/* fatal errors include bad alignment, etc */
		772	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
		773	this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
		774	panic("Unexpected SUN4V mondo error %lu\n", status);
		775
718	fatal_mondo_timeout:	776	fatal_mondo_timeout:
719	printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "	777	/* some cpus being non-responsive to the cpu mondo */
720	" progress after %d retries.\n",	778	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
721	this_cpu, retries);	779	this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
722	goto dump_cpu_list_and_out;	780	panic("SUN4V mondo timeout panic\n");
723
724	fatal_mondo_error:
725	printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
726	this_cpu, status);
727	printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
728	"mondo_block_pa(%lx)\n",
729	this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
730
731	dump_cpu_list_and_out:
732	printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
733	for (i = 0; i < cnt; i++)
734	printk("%u ", cpu_list[i]);
735	printk("]\n");
736	}	781	}
737		782
738	static void (xcall_deliver_impl)(struct trap_per_cpu , int);	783	static void (xcall_deliver_impl)(struct trap_per_cpu , int);


diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S
@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
26	ldxa [%g0] ASI_SCRATCHPAD, %g4	26	ldxa [%g0] ASI_SCRATCHPAD, %g4
27	sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4	27	sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4
28		28
		29	/* Get smp_processor_id() into %g3 */
		30	sethi %hi(trap_block), %g5
		31	or %g5, %lo(trap_block), %g5
		32	sub %g4, %g5, %g3
		33	srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3
		34
		35	/* Increment cpu_mondo_counter[smp_processor_id()] */
		36	sethi %hi(cpu_mondo_counter), %g5
		37	or %g5, %lo(cpu_mondo_counter), %g5
		38	sllx %g3, 3, %g3
		39	add %g5, %g3, %g5
		40	ldx [%g5], %g3
		41	add %g3, 1, %g3
		42	stx %g3, [%g5]
		43
29	/* Get CPU mondo queue base phys address into %g7. */	44	/* Get CPU mondo queue base phys address into %g7. */
30	ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7	45	ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
31		46


diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 196ee5eb4d48..ad31af1dd726 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c
@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
2733	}	2733	}
2734	}	2734	}
2735		2735
		2736	u64 cpu_mondo_counter[NR_CPUS] = {0};
2736	struct trap_per_cpu trap_block[NR_CPUS];	2737	struct trap_per_cpu trap_block[NR_CPUS];
2737	EXPORT_SYMBOL(trap_block);	2738	EXPORT_SYMBOL(trap_block);
2738		2739