aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2010-06-02 17:22:02 -0400
committerIngo Molnar <mingo@elte.hu>2010-06-08 15:13:48 -0400
commit450a007eebaf430426ea8f89bbc3f287949905b2 (patch)
treebb44fa25f4855f82e84a788bbdb8cda5a05a7a4f
parent7fba1bcd4844a4a8619a03bf51cabc92aea365a8 (diff)
x86, UV: BAU broadcast to the local hub
Make the Broadcast Assist Unit driver use the BAU for TLB shootdowns of cpu's on the local uvhub. It was previously thought that IPI might be faster to the cpu's on the local hub. But the IPI operation would have to follow the completion of the BAU broadcast anyway. So we broadcast to the local uvhub in all cases except when the current cpu was the only local cpu in the mask. This simplifies uv_flush_send_and_wait() in that it returns either all shootdowns complete, or none. Adjust the statistics to account for shootdowns on the local uvhub. Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: gregkh@suse.de LKML-Reference: <E1OJvNy-0004aq-G7@eag09.americas.sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h5
-rw-r--r--arch/x86/kernel/tlb_uv.c138
2 files changed, 58 insertions, 85 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index c19b870ea58a..7f6ea611cb71 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -346,6 +346,11 @@ struct ptc_stats {
346 unsigned long s_time; /* time spent in sending side */ 346 unsigned long s_time; /* time spent in sending side */
347 unsigned long s_retriesok; /* successful retries */ 347 unsigned long s_retriesok; /* successful retries */
348 unsigned long s_ntargcpu; /* total number of cpu's targeted */ 348 unsigned long s_ntargcpu; /* total number of cpu's targeted */
349 unsigned long s_ntargself; /* times the sending cpu was targeted */
350 unsigned long s_ntarglocals; /* targets of cpus on the local blade */
351 unsigned long s_ntargremotes; /* targets of cpus on remote blades */
352 unsigned long s_ntarglocaluvhub; /* targets of the local hub */
353 unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
349 unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ 354 unsigned long s_ntarguvhub; /* total number of uvhubs targeted */
350 unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ 355 unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/
351 unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ 356 unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 4cb14dbd7fa3..a1615058fad3 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -400,10 +400,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
400 unsigned long mmr_offset, int right_shift, int this_cpu, 400 unsigned long mmr_offset, int right_shift, int this_cpu,
401 struct bau_control *bcp, struct bau_control *smaster, long try) 401 struct bau_control *bcp, struct bau_control *smaster, long try)
402{ 402{
403 int relaxes = 0;
404 unsigned long descriptor_status; 403 unsigned long descriptor_status;
405 unsigned long mmr;
406 unsigned long mask;
407 cycles_t ttime; 404 cycles_t ttime;
408 struct ptc_stats *stat = bcp->statp; 405 struct ptc_stats *stat = bcp->statp;
409 struct bau_control *hmaster; 406 struct bau_control *hmaster;
@@ -524,25 +521,19 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
524 * The flush_mask contains the cpus the broadcast is to be sent to, plus 521 * The flush_mask contains the cpus the broadcast is to be sent to, plus
525 * cpus that are on the local uvhub. 522 * cpus that are on the local uvhub.
526 * 523 *
527 * Returns NULL if all flushing represented in the mask was done. The mask 524 * Returns 0 if all flushing represented in the mask was done.
528 * is zeroed. 525 * Returns 1 if it gives up entirely and the original cpu mask is to be
529 * Returns @flush_mask if some remote flushing remains to be done. The 526 * returned to the kernel.
530 * mask will have some bits still set, representing any cpus on the local
531 * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed.
532 */ 527 */
533const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, 528int uv_flush_send_and_wait(struct bau_desc *bau_desc,
534 struct cpumask *flush_mask, 529 struct cpumask *flush_mask, struct bau_control *bcp)
535 struct bau_control *bcp)
536{ 530{
537 int right_shift; 531 int right_shift;
538 int uvhub;
539 int bit;
540 int completion_status = 0; 532 int completion_status = 0;
541 int seq_number = 0; 533 int seq_number = 0;
542 long try = 0; 534 long try = 0;
543 int cpu = bcp->uvhub_cpu; 535 int cpu = bcp->uvhub_cpu;
544 int this_cpu = bcp->cpu; 536 int this_cpu = bcp->cpu;
545 int this_uvhub = bcp->uvhub;
546 unsigned long mmr_offset; 537 unsigned long mmr_offset;
547 unsigned long index; 538 unsigned long index;
548 cycles_t time1; 539 cycles_t time1;
@@ -552,10 +543,6 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
552 struct bau_control *smaster = bcp->socket_master; 543 struct bau_control *smaster = bcp->socket_master;
553 struct bau_control *hmaster = bcp->uvhub_master; 544 struct bau_control *hmaster = bcp->uvhub_master;
554 545
555 /*
556 * Spin here while there are hmaster->max_bau_concurrent or more active
557 * descriptors. This is the per-uvhub 'throttle'.
558 */
559 if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, 546 if (!atomic_inc_unless_ge(&hmaster->uvhub_lock,
560 &hmaster->active_descriptor_count, 547 &hmaster->active_descriptor_count,
561 hmaster->max_bau_concurrent)) { 548 hmaster->max_bau_concurrent)) {
@@ -591,9 +578,7 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
591 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | 578 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
592 bcp->uvhub_cpu; 579 bcp->uvhub_cpu;
593 bcp->send_message = get_cycles(); 580 bcp->send_message = get_cycles();
594
595 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); 581 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
596
597 try++; 582 try++;
598 completion_status = uv_wait_completion(bau_desc, mmr_offset, 583 completion_status = uv_wait_completion(bau_desc, mmr_offset,
599 right_shift, this_cpu, bcp, smaster, try); 584 right_shift, this_cpu, bcp, smaster, try);
@@ -652,16 +637,9 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
652 (hmaster->max_bau_concurrent < 637 (hmaster->max_bau_concurrent <
653 hmaster->max_bau_concurrent_constant)) 638 hmaster->max_bau_concurrent_constant))
654 hmaster->max_bau_concurrent++; 639 hmaster->max_bau_concurrent++;
655
656 /*
657 * hold any cpu not timing out here; no other cpu currently held by
658 * the 'throttle' should enter the activation code
659 */
660 while (hmaster->uvhub_quiesce) 640 while (hmaster->uvhub_quiesce)
661 cpu_relax(); 641 cpu_relax();
662 atomic_dec(&hmaster->active_descriptor_count); 642 atomic_dec(&hmaster->active_descriptor_count);
663
664 /* guard against cycles wrap */
665 if (time2 > time1) { 643 if (time2 > time1) {
666 elapsed = time2 - time1; 644 elapsed = time2 - time1;
667 stat->s_time += elapsed; 645 stat->s_time += elapsed;
@@ -674,32 +652,14 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
674 } 652 }
675 } 653 }
676 } else 654 } else
677 stat->s_requestor--; /* don't count this one */ 655 stat->s_requestor--;
678 if (completion_status == FLUSH_COMPLETE && try > 1) 656 if (completion_status == FLUSH_COMPLETE && try > 1)
679 stat->s_retriesok++; 657 stat->s_retriesok++;
680 else if (completion_status == FLUSH_GIVEUP) { 658 else if (completion_status == FLUSH_GIVEUP) {
681 /*
682 * Cause the caller to do an IPI-style TLB shootdown on
683 * the target cpu's, all of which are still in the mask.
684 */
685 stat->s_giveup++; 659 stat->s_giveup++;
686 return flush_mask; 660 return 1;
687 } 661 }
688 662 return 0;
689 /*
690 * Success, so clear the remote cpu's from the mask so we don't
691 * use the IPI method of shootdown on them.
692 */
693 for_each_cpu(bit, flush_mask) {
694 uvhub = uv_cpu_to_blade_id(bit);
695 if (uvhub == this_uvhub)
696 continue;
697 cpumask_clear_cpu(bit, flush_mask);
698 }
699 if (!cpumask_empty(flush_mask))
700 return flush_mask;
701
702 return NULL;
703} 663}
704 664
705/** 665/**
@@ -731,10 +691,11 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
731 struct mm_struct *mm, 691 struct mm_struct *mm,
732 unsigned long va, unsigned int cpu) 692 unsigned long va, unsigned int cpu)
733{ 693{
734 int remotes;
735 int tcpu; 694 int tcpu;
736 int uvhub; 695 int uvhub;
737 int locals = 0; 696 int locals = 0;
697 int remotes = 0;
698 int hubs = 0;
738 struct bau_desc *bau_desc; 699 struct bau_desc *bau_desc;
739 struct cpumask *flush_mask; 700 struct cpumask *flush_mask;
740 struct ptc_stats *stat; 701 struct ptc_stats *stat;
@@ -768,54 +729,52 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
768 729
769 /* 730 /*
770 * Each sending cpu has a per-cpu mask which it fills from the caller's 731 * Each sending cpu has a per-cpu mask which it fills from the caller's
771 * cpu mask. Only remote cpus are converted to uvhubs and copied. 732 * cpu mask. All cpus are converted to uvhubs and copied to the
733 * activation descriptor.
772 */ 734 */
773 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); 735 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
774 /* 736 /* don't actually do a shootdown of the local cpu */
775 * copy cpumask to flush_mask, removing current cpu
776 * (current cpu should already have been flushed by the caller and
777 * should never be returned if we return flush_mask)
778 */
779 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); 737 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
780 if (cpu_isset(cpu, *cpumask)) 738 if (cpu_isset(cpu, *cpumask))
781 locals++; /* current cpu was targeted */ 739 stat->s_ntargself++;
782 740
783 bau_desc = bcp->descriptor_base; 741 bau_desc = bcp->descriptor_base;
784 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; 742 bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
785 743
786 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 744 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
787 remotes = 0; 745
746 /* cpu statistics */
788 for_each_cpu(tcpu, flush_mask) { 747 for_each_cpu(tcpu, flush_mask) {
789 uvhub = uv_cpu_to_blade_id(tcpu); 748 uvhub = uv_cpu_to_blade_id(tcpu);
790 if (uvhub == bcp->uvhub) {
791 locals++;
792 continue;
793 }
794 bau_uvhub_set(uvhub, &bau_desc->distribution); 749 bau_uvhub_set(uvhub, &bau_desc->distribution);
795 remotes++; 750 if (uvhub == bcp->uvhub)
796 } 751 locals++;
797 if (remotes == 0) {
798 /*
799 * No off_hub flushing; return status for local hub.
800 * Return the caller's mask if all were local (the current
801 * cpu may be in that mask).
802 */
803 if (locals)
804 return cpumask;
805 else 752 else
806 return NULL; 753 remotes++;
807 } 754 }
755 if ((locals + remotes) == 0)
756 return NULL;
808 stat->s_requestor++; 757 stat->s_requestor++;
809 stat->s_ntargcpu += remotes; 758 stat->s_ntargcpu += remotes + locals;
759 stat->s_ntargremotes += remotes;
760 stat->s_ntarglocals += locals;
810 remotes = bau_uvhub_weight(&bau_desc->distribution); 761 remotes = bau_uvhub_weight(&bau_desc->distribution);
811 stat->s_ntarguvhub += remotes; 762
812 if (remotes >= 16) 763 /* uvhub statistics */
764 hubs = bau_uvhub_weight(&bau_desc->distribution);
765 if (locals) {
766 stat->s_ntarglocaluvhub++;
767 stat->s_ntargremoteuvhub += (hubs - 1);
768 } else
769 stat->s_ntargremoteuvhub += hubs;
770 stat->s_ntarguvhub += hubs;
771 if (hubs >= 16)
813 stat->s_ntarguvhub16++; 772 stat->s_ntarguvhub16++;
814 else if (remotes >= 8) 773 else if (hubs >= 8)
815 stat->s_ntarguvhub8++; 774 stat->s_ntarguvhub8++;
816 else if (remotes >= 4) 775 else if (hubs >= 4)
817 stat->s_ntarguvhub4++; 776 stat->s_ntarguvhub4++;
818 else if (remotes >= 2) 777 else if (hubs >= 2)
819 stat->s_ntarguvhub2++; 778 stat->s_ntarguvhub2++;
820 else 779 else
821 stat->s_ntarguvhub1++; 780 stat->s_ntarguvhub1++;
@@ -824,10 +783,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
824 bau_desc->payload.sending_cpu = cpu; 783 bau_desc->payload.sending_cpu = cpu;
825 784
826 /* 785 /*
827 * uv_flush_send_and_wait returns null if all cpu's were messaged, or 786 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
828 * the adjusted flush_mask if any cpu's were not messaged. 787 * or 1 if it gave up and the original cpumask should be returned.
829 */ 788 */
830 return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); 789 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp))
790 return NULL;
791 else
792 return cpumask;
831} 793}
832 794
833/* 795/*
@@ -976,9 +938,11 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
976 938
977 if (!cpu) { 939 if (!cpu) {
978 seq_printf(file, 940 seq_printf(file,
979 "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); 941 "# cpu sent stime self locals remotes ncpus localhub ");
942 seq_printf(file,
943 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
980 seq_printf(file, 944 seq_printf(file,
981 "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); 945 "numuvhubs4 numuvhubs2 numuvhubs1 dto ");
982 seq_printf(file, 946 seq_printf(file,
983 "retries rok resetp resett giveup sto bz throt "); 947 "retries rok resetp resett giveup sto bz throt ");
984 seq_printf(file, 948 seq_printf(file,
@@ -994,10 +958,14 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
994 seq_printf(file, 958 seq_printf(file,
995 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 959 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
996 cpu, stat->s_requestor, cycles_2_us(stat->s_time), 960 cpu, stat->s_requestor, cycles_2_us(stat->s_time),
997 stat->s_ntarguvhub, stat->s_ntarguvhub16, 961 stat->s_ntargself, stat->s_ntarglocals,
962 stat->s_ntargremotes, stat->s_ntargcpu,
963 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
964 stat->s_ntarguvhub, stat->s_ntarguvhub16);
965 seq_printf(file, "%ld %ld %ld %ld %ld ",
998 stat->s_ntarguvhub8, stat->s_ntarguvhub4, 966 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
999 stat->s_ntarguvhub2, stat->s_ntarguvhub1, 967 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
1000 stat->s_ntargcpu, stat->s_dtimeout); 968 stat->s_dtimeout);
1001 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", 969 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
1002 stat->s_retry_messages, stat->s_retriesok, 970 stat->s_retry_messages, stat->s_retriesok,
1003 stat->s_resets_plug, stat->s_resets_timeout, 971 stat->s_resets_plug, stat->s_resets_timeout,