x86, UV: BAU broadcast to the local hub

Make the Broadcast Assist Unit driver use the BAU for TLB shootdowns of cpu's on the local uvhub. It was previously thought that IPI might be faster to the cpu's on the local hub. But the IPI operation would have to follow the completion of the BAU broadcast anyway. So we broadcast to the local uvhub in all cases except when the current cpu was the only local cpu in the mask. This simplifies uv_flush_send_and_wait() in that it returns either all shootdowns complete, or none. Adjust the statistics to account for shootdowns on the local uvhub. Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: gregkh@suse.de LKML-Reference: <E1OJvNy-0004aq-G7@eag09.americas.sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Cliff Wickman <cpw@sgi.com> 2010-06-02 17:22:02 -0400
committer: Ingo Molnar <mingo@elte.hu> 2010-06-08 15:13:48 -0400
commit: 450a007eebaf430426ea8f89bbc3f287949905b2 (patch)
tree: bb44fa25f4855f82e84a788bbdb8cda5a05a7a4f /arch
parent: 7fba1bcd4844a4a8619a03bf51cabc92aea365a8 (diff)
2 files changed, 58 insertions, 85 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index c19b870ea58a..7f6ea611cb71 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -346,6 +346,11 @@ struct ptc_stats {
        unsigned long s_time; /* time spent in sending side */
        unsigned long s_retriesok; /* successful retries */
        unsigned long s_ntargcpu; /* total number of cpu's targeted */
+        unsigned long s_ntargself; /* times the sending cpu was targeted */
+        unsigned long s_ntarglocals; /* targets of cpus on the local blade */
+        unsigned long s_ntargremotes; /* targets of cpus on remote blades */
+        unsigned long s_ntarglocaluvhub; /* targets of the local hub */
+        unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
        unsigned long s_ntarguvhub; /* total number of uvhubs targeted */
        unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/
        unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 4cb14dbd7fa3..a1615058fad3 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -400,10 +400,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
        unsigned long mmr_offset, int right_shift, int this_cpu,
        struct bau_control *bcp, struct bau_control *smaster, long try)
 {
-        int relaxes = 0;
        unsigned long descriptor_status;
-        unsigned long mmr;
-        unsigned long mask;
        cycles_t ttime;
        struct ptc_stats *stat = bcp->statp;
        struct bau_control *hmaster;
@@ -524,25 +521,19 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
 * The flush_mask contains the cpus the broadcast is to be sent to, plus
 * cpus that are on the local uvhub.
 *
- * Returns NULL if all flushing represented in the mask was done. The mask
+ * Returns 0 if all flushing represented in the mask was done.
- * is zeroed.
+ * Returns 1 if it gives up entirely and the original cpu mask is to be
- * Returns @flush_mask if some remote flushing remains to be done. The
+ * returned to the kernel.
- * mask will have some bits still set, representing any cpus on the local
- * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed.
 */
-const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
+int uv_flush_send_and_wait(struct bau_desc *bau_desc,
-                                             struct cpumask *flush_mask,
+                           struct cpumask *flush_mask, struct bau_control *bcp)
-                                             struct bau_control *bcp)
 {
        int right_shift;
-        int uvhub;
-        int bit;
        int completion_status = 0;
        int seq_number = 0;
        long try = 0;
        int cpu = bcp->uvhub_cpu;
        int this_cpu = bcp->cpu;
-        int this_uvhub = bcp->uvhub;
        unsigned long mmr_offset;
        unsigned long index;
        cycles_t time1;
@@ -552,10 +543,6 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
        struct bau_control *smaster = bcp->socket_master;
        struct bau_control *hmaster = bcp->uvhub_master;
-        /*
-         * Spin here while there are hmaster->max_bau_concurrent or more active
-         * descriptors. This is the per-uvhub 'throttle'.
-         */
        if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
                        &hmaster->active_descriptor_count,
                        hmaster->max_bau_concurrent)) {
@@ -591,9 +578,7 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
                index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
                        bcp->uvhub_cpu;
                bcp->send_message = get_cycles();
                uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
                try++;
                completion_status = uv_wait_completion(bau_desc, mmr_offset,
                        right_shift, this_cpu, bcp, smaster, try);
@@ -652,16 +637,9 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
            (hmaster->max_bau_concurrent <
                                        hmaster->max_bau_concurrent_constant))
                        hmaster->max_bau_concurrent++;
-        /*
-         * hold any cpu not timing out here; no other cpu currently held by
-         * the 'throttle' should enter the activation code
-         */
        while (hmaster->uvhub_quiesce)
                cpu_relax();
        atomic_dec(&hmaster->active_descriptor_count);
-        /* guard against cycles wrap */
        if (time2 > time1) {
                elapsed = time2 - time1;
                stat->s_time += elapsed;
@@ -674,32 +652,14 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
                        }
                }
        } else
-                stat->s_requestor--; /* don't count this one */
+                stat->s_requestor--;
        if (completion_status == FLUSH_COMPLETE && try > 1)
                stat->s_retriesok++;
        else if (completion_status == FLUSH_GIVEUP) {
-                /*
-                 * Cause the caller to do an IPI-style TLB shootdown on
-                 * the target cpu's, all of which are still in the mask.
-                 */
                stat->s_giveup++;
-                return flush_mask;
+                return 1;
        }
+        return 0;
-        /*
-         * Success, so clear the remote cpu's from the mask so we don't
-         * use the IPI method of shootdown on them.
-         */
-        for_each_cpu(bit, flush_mask) {
-                uvhub = uv_cpu_to_blade_id(bit);
-                if (uvhub == this_uvhub)
-                        continue;
-                cpumask_clear_cpu(bit, flush_mask);
-        }
-        if (!cpumask_empty(flush_mask))
-                return flush_mask;
-        return NULL;
 }
 /**
@@ -731,10 +691,11 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
                                          struct mm_struct *mm,
                                          unsigned long va, unsigned int cpu)
 {
-        int remotes;
        int tcpu;
        int uvhub;
        int locals = 0;
+        int remotes = 0;
+        int hubs = 0;
        struct bau_desc *bau_desc;
        struct cpumask *flush_mask;
        struct ptc_stats *stat;
@@ -768,54 +729,52 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
        /*
         * Each sending cpu has a per-cpu mask which it fills from the caller's
-         * cpu mask.  Only remote cpus are converted to uvhubs and copied.
+         * cpu mask.  All cpus are converted to uvhubs and copied to the
+         * activation descriptor.
         */
        flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
-        /*
+        /* don't actually do a shootdown of the local cpu */
-         * copy cpumask to flush_mask, removing current cpu
-         * (current cpu should already have been flushed by the caller and
-         *  should never be returned if we return flush_mask)
-         */
        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
        if (cpu_isset(cpu, *cpumask))
-                locals++;  /* current cpu was targeted */
+                stat->s_ntargself++;
        bau_desc = bcp->descriptor_base;
        bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
        bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
-        remotes = 0;
+        /* cpu statistics */
        for_each_cpu(tcpu, flush_mask) {
                uvhub = uv_cpu_to_blade_id(tcpu);
-                if (uvhub == bcp->uvhub) {
-                        locals++;
-                        continue;
-                }
                bau_uvhub_set(uvhub, &bau_desc->distribution);
-                remotes++;
+                if (uvhub == bcp->uvhub)
-        }
+                        locals++;
-        if (remotes == 0) {
-                /*
-                 * No off_hub flushing; return status for local hub.
-                 * Return the caller's mask if all were local (the current
-                 * cpu may be in that mask).
-                 */
-                if (locals)
-                        return cpumask;
                else
-                        return NULL;
+                        remotes++;
        }
+        if ((locals + remotes) == 0)
+                return NULL;
        stat->s_requestor++;
-        stat->s_ntargcpu += remotes;
+        stat->s_ntargcpu += remotes + locals;
+        stat->s_ntargremotes += remotes;
+        stat->s_ntarglocals += locals;
        remotes = bau_uvhub_weight(&bau_desc->distribution);
-        stat->s_ntarguvhub += remotes;
-        if (remotes >= 16)
+        /* uvhub statistics */
+        hubs = bau_uvhub_weight(&bau_desc->distribution);
+        if (locals) {
+                stat->s_ntarglocaluvhub++;
+                stat->s_ntargremoteuvhub += (hubs - 1);
+        } else
+                stat->s_ntargremoteuvhub += hubs;
+        stat->s_ntarguvhub += hubs;
+        if (hubs >= 16)
                stat->s_ntarguvhub16++;
-        else if (remotes >= 8)
+        else if (hubs >= 8)
                stat->s_ntarguvhub8++;
-        else if (remotes >= 4)
+        else if (hubs >= 4)
                stat->s_ntarguvhub4++;
-        else if (remotes >= 2)
+        else if (hubs >= 2)
                stat->s_ntarguvhub2++;
        else
                stat->s_ntarguvhub1++;
@@ -824,10 +783,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
        bau_desc->payload.sending_cpu = cpu;
        /*
-         * uv_flush_send_and_wait returns null if all cpu's were messaged, or
+         * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
-         * the adjusted flush_mask if any cpu's were not messaged.
+         * or 1 if it gave up and the original cpumask should be returned.
         */
-        return uv_flush_send_and_wait(bau_desc, flush_mask, bcp);
+        if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
+                return NULL;
+        else
+                return cpumask;
 }
 /*
@@ -976,9 +938,11 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
        if (!cpu) {
                seq_printf(file,
-                        "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 ");
+                        "# cpu sent stime self locals remotes ncpus localhub ");
+                seq_printf(file,
+                        "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
                seq_printf(file,
-                        "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto ");
+                        "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
                seq_printf(file,
                        "retries rok resetp resett giveup sto bz throt ");
                seq_printf(file,
@@ -994,10 +958,14 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
                seq_printf(file,
                        "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
                           cpu, stat->s_requestor, cycles_2_us(stat->s_time),
-                           stat->s_ntarguvhub, stat->s_ntarguvhub16,
+                           stat->s_ntargself, stat->s_ntarglocals,
+                           stat->s_ntargremotes, stat->s_ntargcpu,
+                           stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
+                           stat->s_ntarguvhub, stat->s_ntarguvhub16);
+                seq_printf(file, "%ld %ld %ld %ld %ld ",
                           stat->s_ntarguvhub8, stat->s_ntarguvhub4,
                           stat->s_ntarguvhub2, stat->s_ntarguvhub1,
-                           stat->s_ntargcpu, stat->s_dtimeout);
+                           stat->s_dtimeout);
                seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
                           stat->s_retry_messages, stat->s_retriesok,
                           stat->s_resets_plug, stat->s_resets_timeout,
author	Cliff Wickman <cpw@sgi.com>	2010-06-02 17:22:02 -0400
committer	Ingo Molnar <mingo@elte.hu>	2010-06-08 15:13:48 -0400
commit	450a007eebaf430426ea8f89bbc3f287949905b2 (patch)
tree	bb44fa25f4855f82e84a788bbdb8cda5a05a7a4f /arch
parent	7fba1bcd4844a4a8619a03bf51cabc92aea365a8 (diff)

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index c19b870ea58a..7f6ea611cb71 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -346,6 +346,11 @@ struct ptc_stats {
346	unsigned long s_time; /* time spent in sending side */	346	unsigned long s_time; /* time spent in sending side */
347	unsigned long s_retriesok; /* successful retries */	347	unsigned long s_retriesok; /* successful retries */
348	unsigned long s_ntargcpu; /* total number of cpu's targeted */	348	unsigned long s_ntargcpu; /* total number of cpu's targeted */
		349	unsigned long s_ntargself; /* times the sending cpu was targeted */
		350	unsigned long s_ntarglocals; /* targets of cpus on the local blade */
		351	unsigned long s_ntargremotes; /* targets of cpus on remote blades */
		352	unsigned long s_ntarglocaluvhub; /* targets of the local hub */
		353	unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
349	unsigned long s_ntarguvhub; /* total number of uvhubs targeted */	354	unsigned long s_ntarguvhub; /* total number of uvhubs targeted */
350	unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/	355	unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/
351	unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */	356	unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */


diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 4cb14dbd7fa3..a1615058fad3 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c
@@ -400,10 +400,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
400	unsigned long mmr_offset, int right_shift, int this_cpu,	400	unsigned long mmr_offset, int right_shift, int this_cpu,
401	struct bau_control bcp, struct bau_control smaster, long try)	401	struct bau_control bcp, struct bau_control smaster, long try)
402	{	402	{
403	int relaxes = 0;
404	unsigned long descriptor_status;	403	unsigned long descriptor_status;
405	unsigned long mmr;
406	unsigned long mask;
407	cycles_t ttime;	404	cycles_t ttime;
408	struct ptc_stats *stat = bcp->statp;	405	struct ptc_stats *stat = bcp->statp;
409	struct bau_control *hmaster;	406	struct bau_control *hmaster;
@@ -524,25 +521,19 @@ disable_for_congestion(struct bau_control bcp, struct ptc_stats stat)
524	* The flush_mask contains the cpus the broadcast is to be sent to, plus	521	* The flush_mask contains the cpus the broadcast is to be sent to, plus
525	* cpus that are on the local uvhub.	522	* cpus that are on the local uvhub.
526	*	523	*
527	* Returns NULL if all flushing represented in the mask was done. The mask	524	* Returns 0 if all flushing represented in the mask was done.
528	* is zeroed.	525	* Returns 1 if it gives up entirely and the original cpu mask is to be
529	* Returns @flush_mask if some remote flushing remains to be done. The	526	* returned to the kernel.
530	* mask will have some bits still set, representing any cpus on the local
531	* uvhub (not current cpu) and any on remote uvhubs if the broadcast failed.
532	*/	527	*/
533	const struct cpumask uv_flush_send_and_wait(struct bau_desc bau_desc,	528	int uv_flush_send_and_wait(struct bau_desc *bau_desc,
534	struct cpumask *flush_mask,	529	struct cpumask flush_mask, struct bau_control bcp)
535	struct bau_control *bcp)
536	{	530	{
537	int right_shift;	531	int right_shift;
538	int uvhub;
539	int bit;
540	int completion_status = 0;	532	int completion_status = 0;
541	int seq_number = 0;	533	int seq_number = 0;
542	long try = 0;	534	long try = 0;
543	int cpu = bcp->uvhub_cpu;	535	int cpu = bcp->uvhub_cpu;
544	int this_cpu = bcp->cpu;	536	int this_cpu = bcp->cpu;
545	int this_uvhub = bcp->uvhub;
546	unsigned long mmr_offset;	537	unsigned long mmr_offset;
547	unsigned long index;	538	unsigned long index;
548	cycles_t time1;	539	cycles_t time1;
@@ -552,10 +543,6 @@ const struct cpumask uv_flush_send_and_wait(struct bau_desc bau_desc,
552	struct bau_control *smaster = bcp->socket_master;	543	struct bau_control *smaster = bcp->socket_master;
553	struct bau_control *hmaster = bcp->uvhub_master;	544	struct bau_control *hmaster = bcp->uvhub_master;
554		545
555	/*
556	* Spin here while there are hmaster->max_bau_concurrent or more active
557	* descriptors. This is the per-uvhub 'throttle'.
558	*/
559	if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,	546	if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
560	&hmaster->active_descriptor_count,	547	&hmaster->active_descriptor_count,
561	hmaster->max_bau_concurrent)) {	548	hmaster->max_bau_concurrent)) {
@@ -591,9 +578,7 @@ const struct cpumask uv_flush_send_and_wait(struct bau_desc bau_desc,
591	index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) \|	578	index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) \|
592	bcp->uvhub_cpu;	579	bcp->uvhub_cpu;
593	bcp->send_message = get_cycles();	580	bcp->send_message = get_cycles();
594
595	uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);	581	uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
596
597	try++;	582	try++;
598	completion_status = uv_wait_completion(bau_desc, mmr_offset,	583	completion_status = uv_wait_completion(bau_desc, mmr_offset,
599	right_shift, this_cpu, bcp, smaster, try);	584	right_shift, this_cpu, bcp, smaster, try);
@@ -652,16 +637,9 @@ const struct cpumask uv_flush_send_and_wait(struct bau_desc bau_desc,
652	(hmaster->max_bau_concurrent <	637	(hmaster->max_bau_concurrent <
653	hmaster->max_bau_concurrent_constant))	638	hmaster->max_bau_concurrent_constant))
654	hmaster->max_bau_concurrent++;	639	hmaster->max_bau_concurrent++;
655
656	/*
657	* hold any cpu not timing out here; no other cpu currently held by
658	* the 'throttle' should enter the activation code
659	*/
660	while (hmaster->uvhub_quiesce)	640	while (hmaster->uvhub_quiesce)
661	cpu_relax();	641	cpu_relax();
662	atomic_dec(&hmaster->active_descriptor_count);	642	atomic_dec(&hmaster->active_descriptor_count);
663
664	/* guard against cycles wrap */
665	if (time2 > time1) {	643	if (time2 > time1) {
666	elapsed = time2 - time1;	644	elapsed = time2 - time1;
667	stat->s_time += elapsed;	645	stat->s_time += elapsed;
@@ -674,32 +652,14 @@ const struct cpumask uv_flush_send_and_wait(struct bau_desc bau_desc,
674	}	652	}
675	}	653	}
676	} else	654	} else
677	stat->s_requestor--; /* don't count this one */	655	stat->s_requestor--;
678	if (completion_status == FLUSH_COMPLETE && try > 1)	656	if (completion_status == FLUSH_COMPLETE && try > 1)
679	stat->s_retriesok++;	657	stat->s_retriesok++;
680	else if (completion_status == FLUSH_GIVEUP) {	658	else if (completion_status == FLUSH_GIVEUP) {
681	/*
682	* Cause the caller to do an IPI-style TLB shootdown on
683	* the target cpu's, all of which are still in the mask.
684	*/
685	stat->s_giveup++;	659	stat->s_giveup++;
686	return flush_mask;	660	return 1;
687	}	661	}
688		662	return 0;
689	/*
690	* Success, so clear the remote cpu's from the mask so we don't
691	* use the IPI method of shootdown on them.
692	*/
693	for_each_cpu(bit, flush_mask) {
694	uvhub = uv_cpu_to_blade_id(bit);
695	if (uvhub == this_uvhub)
696	continue;
697	cpumask_clear_cpu(bit, flush_mask);
698	}
699	if (!cpumask_empty(flush_mask))
700	return flush_mask;
701
702	return NULL;
703	}	663	}
704		664
705	/**	665	/**
@@ -731,10 +691,11 @@ const struct cpumask uv_flush_tlb_others(const struct cpumask cpumask,
731	struct mm_struct *mm,	691	struct mm_struct *mm,
732	unsigned long va, unsigned int cpu)	692	unsigned long va, unsigned int cpu)
733	{	693	{
734	int remotes;
735	int tcpu;	694	int tcpu;
736	int uvhub;	695	int uvhub;
737	int locals = 0;	696	int locals = 0;
		697	int remotes = 0;
		698	int hubs = 0;
738	struct bau_desc *bau_desc;	699	struct bau_desc *bau_desc;
739	struct cpumask *flush_mask;	700	struct cpumask *flush_mask;
740	struct ptc_stats *stat;	701	struct ptc_stats *stat;
@@ -768,54 +729,52 @@ const struct cpumask uv_flush_tlb_others(const struct cpumask cpumask,
768		729
769	/*	730	/*
770	* Each sending cpu has a per-cpu mask which it fills from the caller's	731	* Each sending cpu has a per-cpu mask which it fills from the caller's
771	* cpu mask. Only remote cpus are converted to uvhubs and copied.	732	* cpu mask. All cpus are converted to uvhubs and copied to the
		733	* activation descriptor.
772	*/	734	*/
773	flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);	735	flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
774	/*	736	/* don't actually do a shootdown of the local cpu */
775	* copy cpumask to flush_mask, removing current cpu
776	* (current cpu should already have been flushed by the caller and
777	* should never be returned if we return flush_mask)
778	*/
779	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));	737	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
780	if (cpu_isset(cpu, *cpumask))	738	if (cpu_isset(cpu, *cpumask))
781	locals++; /* current cpu was targeted */	739	stat->s_ntargself++;
782		740
783	bau_desc = bcp->descriptor_base;	741	bau_desc = bcp->descriptor_base;
784	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;	742	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
785		743
786	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);	744	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
787	remotes = 0;	745
		746	/* cpu statistics */
788	for_each_cpu(tcpu, flush_mask) {	747	for_each_cpu(tcpu, flush_mask) {
789	uvhub = uv_cpu_to_blade_id(tcpu);	748	uvhub = uv_cpu_to_blade_id(tcpu);
790	if (uvhub == bcp->uvhub) {
791	locals++;
792	continue;
793	}
794	bau_uvhub_set(uvhub, &bau_desc->distribution);	749	bau_uvhub_set(uvhub, &bau_desc->distribution);
795	remotes++;	750	if (uvhub == bcp->uvhub)
796	}	751	locals++;
797	if (remotes == 0) {
798	/*
799	* No off_hub flushing; return status for local hub.
800	* Return the caller's mask if all were local (the current
801	* cpu may be in that mask).
802	*/
803	if (locals)
804	return cpumask;
805	else	752	else
806	return NULL;	753	remotes++;
807	}	754	}
		755	if ((locals + remotes) == 0)
		756	return NULL;
808	stat->s_requestor++;	757	stat->s_requestor++;
809	stat->s_ntargcpu += remotes;	758	stat->s_ntargcpu += remotes + locals;
		759	stat->s_ntargremotes += remotes;
		760	stat->s_ntarglocals += locals;
810	remotes = bau_uvhub_weight(&bau_desc->distribution);	761	remotes = bau_uvhub_weight(&bau_desc->distribution);
811	stat->s_ntarguvhub += remotes;	762
812	if (remotes >= 16)	763	/* uvhub statistics */
		764	hubs = bau_uvhub_weight(&bau_desc->distribution);
		765	if (locals) {
		766	stat->s_ntarglocaluvhub++;
		767	stat->s_ntargremoteuvhub += (hubs - 1);
		768	} else
		769	stat->s_ntargremoteuvhub += hubs;
		770	stat->s_ntarguvhub += hubs;
		771	if (hubs >= 16)
813	stat->s_ntarguvhub16++;	772	stat->s_ntarguvhub16++;
814	else if (remotes >= 8)	773	else if (hubs >= 8)
815	stat->s_ntarguvhub8++;	774	stat->s_ntarguvhub8++;
816	else if (remotes >= 4)	775	else if (hubs >= 4)
817	stat->s_ntarguvhub4++;	776	stat->s_ntarguvhub4++;
818	else if (remotes >= 2)	777	else if (hubs >= 2)
819	stat->s_ntarguvhub2++;	778	stat->s_ntarguvhub2++;
820	else	779	else
821	stat->s_ntarguvhub1++;	780	stat->s_ntarguvhub1++;
@@ -824,10 +783,13 @@ const struct cpumask uv_flush_tlb_others(const struct cpumask cpumask,
824	bau_desc->payload.sending_cpu = cpu;	783	bau_desc->payload.sending_cpu = cpu;
825		784
826	/*	785	/*
827	* uv_flush_send_and_wait returns null if all cpu's were messaged, or	786	* uv_flush_send_and_wait returns 0 if all cpu's were messaged,
828	* the adjusted flush_mask if any cpu's were not messaged.	787	* or 1 if it gave up and the original cpumask should be returned.
829	*/	788	*/
830	return uv_flush_send_and_wait(bau_desc, flush_mask, bcp);	789	if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
		790	return NULL;
		791	else
		792	return cpumask;
831	}	793	}
832		794
833	/*	795	/*
@@ -976,9 +938,11 @@ static int uv_ptc_seq_show(struct seq_file file, void data)
976		938
977	if (!cpu) {	939	if (!cpu) {
978	seq_printf(file,	940	seq_printf(file,
979	"# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 ");	941	"# cpu sent stime self locals remotes ncpus localhub ");
		942	seq_printf(file,
		943	"remotehub numuvhubs numuvhubs16 numuvhubs8 ");
980	seq_printf(file,	944	seq_printf(file,
981	"numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto ");	945	"numuvhubs4 numuvhubs2 numuvhubs1 dto ");
982	seq_printf(file,	946	seq_printf(file,
983	"retries rok resetp resett giveup sto bz throt ");	947	"retries rok resetp resett giveup sto bz throt ");
984	seq_printf(file,	948	seq_printf(file,
@@ -994,10 +958,14 @@ static int uv_ptc_seq_show(struct seq_file file, void data)
994	seq_printf(file,	958	seq_printf(file,
995	"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",	959	"cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
996	cpu, stat->s_requestor, cycles_2_us(stat->s_time),	960	cpu, stat->s_requestor, cycles_2_us(stat->s_time),
997	stat->s_ntarguvhub, stat->s_ntarguvhub16,	961	stat->s_ntargself, stat->s_ntarglocals,
		962	stat->s_ntargremotes, stat->s_ntargcpu,
		963	stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
		964	stat->s_ntarguvhub, stat->s_ntarguvhub16);
		965	seq_printf(file, "%ld %ld %ld %ld %ld ",
998	stat->s_ntarguvhub8, stat->s_ntarguvhub4,	966	stat->s_ntarguvhub8, stat->s_ntarguvhub4,
999	stat->s_ntarguvhub2, stat->s_ntarguvhub1,	967	stat->s_ntarguvhub2, stat->s_ntarguvhub1,
1000	stat->s_ntargcpu, stat->s_dtimeout);	968	stat->s_dtimeout);
1001	seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",	969	seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
1002	stat->s_retry_messages, stat->s_retriesok,	970	stat->s_retry_messages, stat->s_retriesok,
1003	stat->s_resets_plug, stat->s_resets_timeout,	971	stat->s_resets_plug, stat->s_resets_timeout,