diff options
author | Cliff Wickman <cpw@sgi.com> | 2010-06-02 17:22:02 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-06-08 15:13:48 -0400 |
commit | 450a007eebaf430426ea8f89bbc3f287949905b2 (patch) | |
tree | bb44fa25f4855f82e84a788bbdb8cda5a05a7a4f /arch | |
parent | 7fba1bcd4844a4a8619a03bf51cabc92aea365a8 (diff) |
x86, UV: BAU broadcast to the local hub
Make the Broadcast Assist Unit driver use the BAU for TLB
shootdowns of cpu's on the local uvhub.
It was previously thought that IPI might be faster to the cpu's
on the local hub. But the IPI operation would have to follow
the completion of the BAU broadcast anyway. So we broadcast to
the local uvhub in all cases except when the current cpu was the
only local cpu in the mask.
This simplifies uv_flush_send_and_wait() in that it returns
either all shootdowns complete, or none.
Adjust the statistics to account for shootdowns on the local
uvhub.
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: gregkh@suse.de
LKML-Reference: <E1OJvNy-0004aq-G7@eag09.americas.sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/uv/uv_bau.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_uv.c | 138 |
2 files changed, 58 insertions, 85 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index c19b870ea58a..7f6ea611cb71 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -346,6 +346,11 @@ struct ptc_stats { | |||
346 | unsigned long s_time; /* time spent in sending side */ | 346 | unsigned long s_time; /* time spent in sending side */ |
347 | unsigned long s_retriesok; /* successful retries */ | 347 | unsigned long s_retriesok; /* successful retries */ |
348 | unsigned long s_ntargcpu; /* total number of cpu's targeted */ | 348 | unsigned long s_ntargcpu; /* total number of cpu's targeted */ |
349 | unsigned long s_ntargself; /* times the sending cpu was targeted */ | ||
350 | unsigned long s_ntarglocals; /* targets of cpus on the local blade */ | ||
351 | unsigned long s_ntargremotes; /* targets of cpus on remote blades */ | ||
352 | unsigned long s_ntarglocaluvhub; /* targets of the local hub */ | ||
353 | unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ | ||
349 | unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ | 354 | unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ |
350 | unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ | 355 | unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ |
351 | unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ | 356 | unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 4cb14dbd7fa3..a1615058fad3 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -400,10 +400,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
400 | unsigned long mmr_offset, int right_shift, int this_cpu, | 400 | unsigned long mmr_offset, int right_shift, int this_cpu, |
401 | struct bau_control *bcp, struct bau_control *smaster, long try) | 401 | struct bau_control *bcp, struct bau_control *smaster, long try) |
402 | { | 402 | { |
403 | int relaxes = 0; | ||
404 | unsigned long descriptor_status; | 403 | unsigned long descriptor_status; |
405 | unsigned long mmr; | ||
406 | unsigned long mask; | ||
407 | cycles_t ttime; | 404 | cycles_t ttime; |
408 | struct ptc_stats *stat = bcp->statp; | 405 | struct ptc_stats *stat = bcp->statp; |
409 | struct bau_control *hmaster; | 406 | struct bau_control *hmaster; |
@@ -524,25 +521,19 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | |||
524 | * The flush_mask contains the cpus the broadcast is to be sent to, plus | 521 | * The flush_mask contains the cpus the broadcast is to be sent to, plus |
525 | * cpus that are on the local uvhub. | 522 | * cpus that are on the local uvhub. |
526 | * | 523 | * |
527 | * Returns NULL if all flushing represented in the mask was done. The mask | 524 | * Returns 0 if all flushing represented in the mask was done. |
528 | * is zeroed. | 525 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
529 | * Returns @flush_mask if some remote flushing remains to be done. The | 526 | * returned to the kernel. |
530 | * mask will have some bits still set, representing any cpus on the local | ||
531 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
532 | */ | 527 | */ |
533 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | 528 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, |
534 | struct cpumask *flush_mask, | 529 | struct cpumask *flush_mask, struct bau_control *bcp) |
535 | struct bau_control *bcp) | ||
536 | { | 530 | { |
537 | int right_shift; | 531 | int right_shift; |
538 | int uvhub; | ||
539 | int bit; | ||
540 | int completion_status = 0; | 532 | int completion_status = 0; |
541 | int seq_number = 0; | 533 | int seq_number = 0; |
542 | long try = 0; | 534 | long try = 0; |
543 | int cpu = bcp->uvhub_cpu; | 535 | int cpu = bcp->uvhub_cpu; |
544 | int this_cpu = bcp->cpu; | 536 | int this_cpu = bcp->cpu; |
545 | int this_uvhub = bcp->uvhub; | ||
546 | unsigned long mmr_offset; | 537 | unsigned long mmr_offset; |
547 | unsigned long index; | 538 | unsigned long index; |
548 | cycles_t time1; | 539 | cycles_t time1; |
@@ -552,10 +543,6 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
552 | struct bau_control *smaster = bcp->socket_master; | 543 | struct bau_control *smaster = bcp->socket_master; |
553 | struct bau_control *hmaster = bcp->uvhub_master; | 544 | struct bau_control *hmaster = bcp->uvhub_master; |
554 | 545 | ||
555 | /* | ||
556 | * Spin here while there are hmaster->max_bau_concurrent or more active | ||
557 | * descriptors. This is the per-uvhub 'throttle'. | ||
558 | */ | ||
559 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 546 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
560 | &hmaster->active_descriptor_count, | 547 | &hmaster->active_descriptor_count, |
561 | hmaster->max_bau_concurrent)) { | 548 | hmaster->max_bau_concurrent)) { |
@@ -591,9 +578,7 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
591 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 578 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
592 | bcp->uvhub_cpu; | 579 | bcp->uvhub_cpu; |
593 | bcp->send_message = get_cycles(); | 580 | bcp->send_message = get_cycles(); |
594 | |||
595 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 581 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
596 | |||
597 | try++; | 582 | try++; |
598 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 583 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
599 | right_shift, this_cpu, bcp, smaster, try); | 584 | right_shift, this_cpu, bcp, smaster, try); |
@@ -652,16 +637,9 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
652 | (hmaster->max_bau_concurrent < | 637 | (hmaster->max_bau_concurrent < |
653 | hmaster->max_bau_concurrent_constant)) | 638 | hmaster->max_bau_concurrent_constant)) |
654 | hmaster->max_bau_concurrent++; | 639 | hmaster->max_bau_concurrent++; |
655 | |||
656 | /* | ||
657 | * hold any cpu not timing out here; no other cpu currently held by | ||
658 | * the 'throttle' should enter the activation code | ||
659 | */ | ||
660 | while (hmaster->uvhub_quiesce) | 640 | while (hmaster->uvhub_quiesce) |
661 | cpu_relax(); | 641 | cpu_relax(); |
662 | atomic_dec(&hmaster->active_descriptor_count); | 642 | atomic_dec(&hmaster->active_descriptor_count); |
663 | |||
664 | /* guard against cycles wrap */ | ||
665 | if (time2 > time1) { | 643 | if (time2 > time1) { |
666 | elapsed = time2 - time1; | 644 | elapsed = time2 - time1; |
667 | stat->s_time += elapsed; | 645 | stat->s_time += elapsed; |
@@ -674,32 +652,14 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
674 | } | 652 | } |
675 | } | 653 | } |
676 | } else | 654 | } else |
677 | stat->s_requestor--; /* don't count this one */ | 655 | stat->s_requestor--; |
678 | if (completion_status == FLUSH_COMPLETE && try > 1) | 656 | if (completion_status == FLUSH_COMPLETE && try > 1) |
679 | stat->s_retriesok++; | 657 | stat->s_retriesok++; |
680 | else if (completion_status == FLUSH_GIVEUP) { | 658 | else if (completion_status == FLUSH_GIVEUP) { |
681 | /* | ||
682 | * Cause the caller to do an IPI-style TLB shootdown on | ||
683 | * the target cpu's, all of which are still in the mask. | ||
684 | */ | ||
685 | stat->s_giveup++; | 659 | stat->s_giveup++; |
686 | return flush_mask; | 660 | return 1; |
687 | } | 661 | } |
688 | 662 | return 0; | |
689 | /* | ||
690 | * Success, so clear the remote cpu's from the mask so we don't | ||
691 | * use the IPI method of shootdown on them. | ||
692 | */ | ||
693 | for_each_cpu(bit, flush_mask) { | ||
694 | uvhub = uv_cpu_to_blade_id(bit); | ||
695 | if (uvhub == this_uvhub) | ||
696 | continue; | ||
697 | cpumask_clear_cpu(bit, flush_mask); | ||
698 | } | ||
699 | if (!cpumask_empty(flush_mask)) | ||
700 | return flush_mask; | ||
701 | |||
702 | return NULL; | ||
703 | } | 663 | } |
704 | 664 | ||
705 | /** | 665 | /** |
@@ -731,10 +691,11 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
731 | struct mm_struct *mm, | 691 | struct mm_struct *mm, |
732 | unsigned long va, unsigned int cpu) | 692 | unsigned long va, unsigned int cpu) |
733 | { | 693 | { |
734 | int remotes; | ||
735 | int tcpu; | 694 | int tcpu; |
736 | int uvhub; | 695 | int uvhub; |
737 | int locals = 0; | 696 | int locals = 0; |
697 | int remotes = 0; | ||
698 | int hubs = 0; | ||
738 | struct bau_desc *bau_desc; | 699 | struct bau_desc *bau_desc; |
739 | struct cpumask *flush_mask; | 700 | struct cpumask *flush_mask; |
740 | struct ptc_stats *stat; | 701 | struct ptc_stats *stat; |
@@ -768,54 +729,52 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
768 | 729 | ||
769 | /* | 730 | /* |
770 | * Each sending cpu has a per-cpu mask which it fills from the caller's | 731 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
771 | * cpu mask. Only remote cpus are converted to uvhubs and copied. | 732 | * cpu mask. All cpus are converted to uvhubs and copied to the |
733 | * activation descriptor. | ||
772 | */ | 734 | */ |
773 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | 735 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); |
774 | /* | 736 | /* don't actually do a shootdown of the local cpu */ |
775 | * copy cpumask to flush_mask, removing current cpu | ||
776 | * (current cpu should already have been flushed by the caller and | ||
777 | * should never be returned if we return flush_mask) | ||
778 | */ | ||
779 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 737 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
780 | if (cpu_isset(cpu, *cpumask)) | 738 | if (cpu_isset(cpu, *cpumask)) |
781 | locals++; /* current cpu was targeted */ | 739 | stat->s_ntargself++; |
782 | 740 | ||
783 | bau_desc = bcp->descriptor_base; | 741 | bau_desc = bcp->descriptor_base; |
784 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 742 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
785 | 743 | ||
786 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 744 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
787 | remotes = 0; | 745 | |
746 | /* cpu statistics */ | ||
788 | for_each_cpu(tcpu, flush_mask) { | 747 | for_each_cpu(tcpu, flush_mask) { |
789 | uvhub = uv_cpu_to_blade_id(tcpu); | 748 | uvhub = uv_cpu_to_blade_id(tcpu); |
790 | if (uvhub == bcp->uvhub) { | ||
791 | locals++; | ||
792 | continue; | ||
793 | } | ||
794 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 749 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
795 | remotes++; | 750 | if (uvhub == bcp->uvhub) |
796 | } | 751 | locals++; |
797 | if (remotes == 0) { | ||
798 | /* | ||
799 | * No off_hub flushing; return status for local hub. | ||
800 | * Return the caller's mask if all were local (the current | ||
801 | * cpu may be in that mask). | ||
802 | */ | ||
803 | if (locals) | ||
804 | return cpumask; | ||
805 | else | 752 | else |
806 | return NULL; | 753 | remotes++; |
807 | } | 754 | } |
755 | if ((locals + remotes) == 0) | ||
756 | return NULL; | ||
808 | stat->s_requestor++; | 757 | stat->s_requestor++; |
809 | stat->s_ntargcpu += remotes; | 758 | stat->s_ntargcpu += remotes + locals; |
759 | stat->s_ntargremotes += remotes; | ||
760 | stat->s_ntarglocals += locals; | ||
810 | remotes = bau_uvhub_weight(&bau_desc->distribution); | 761 | remotes = bau_uvhub_weight(&bau_desc->distribution); |
811 | stat->s_ntarguvhub += remotes; | 762 | |
812 | if (remotes >= 16) | 763 | /* uvhub statistics */ |
764 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
765 | if (locals) { | ||
766 | stat->s_ntarglocaluvhub++; | ||
767 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
768 | } else | ||
769 | stat->s_ntargremoteuvhub += hubs; | ||
770 | stat->s_ntarguvhub += hubs; | ||
771 | if (hubs >= 16) | ||
813 | stat->s_ntarguvhub16++; | 772 | stat->s_ntarguvhub16++; |
814 | else if (remotes >= 8) | 773 | else if (hubs >= 8) |
815 | stat->s_ntarguvhub8++; | 774 | stat->s_ntarguvhub8++; |
816 | else if (remotes >= 4) | 775 | else if (hubs >= 4) |
817 | stat->s_ntarguvhub4++; | 776 | stat->s_ntarguvhub4++; |
818 | else if (remotes >= 2) | 777 | else if (hubs >= 2) |
819 | stat->s_ntarguvhub2++; | 778 | stat->s_ntarguvhub2++; |
820 | else | 779 | else |
821 | stat->s_ntarguvhub1++; | 780 | stat->s_ntarguvhub1++; |
@@ -824,10 +783,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
824 | bau_desc->payload.sending_cpu = cpu; | 783 | bau_desc->payload.sending_cpu = cpu; |
825 | 784 | ||
826 | /* | 785 | /* |
827 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | 786 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
828 | * the adjusted flush_mask if any cpu's were not messaged. | 787 | * or 1 if it gave up and the original cpumask should be returned. |
829 | */ | 788 | */ |
830 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | 789 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) |
790 | return NULL; | ||
791 | else | ||
792 | return cpumask; | ||
831 | } | 793 | } |
832 | 794 | ||
833 | /* | 795 | /* |
@@ -976,9 +938,11 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
976 | 938 | ||
977 | if (!cpu) { | 939 | if (!cpu) { |
978 | seq_printf(file, | 940 | seq_printf(file, |
979 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); | 941 | "# cpu sent stime self locals remotes ncpus localhub "); |
942 | seq_printf(file, | ||
943 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
980 | seq_printf(file, | 944 | seq_printf(file, |
981 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); | 945 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); |
982 | seq_printf(file, | 946 | seq_printf(file, |
983 | "retries rok resetp resett giveup sto bz throt "); | 947 | "retries rok resetp resett giveup sto bz throt "); |
984 | seq_printf(file, | 948 | seq_printf(file, |
@@ -994,10 +958,14 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
994 | seq_printf(file, | 958 | seq_printf(file, |
995 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 959 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
996 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 960 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
997 | stat->s_ntarguvhub, stat->s_ntarguvhub16, | 961 | stat->s_ntargself, stat->s_ntarglocals, |
962 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
963 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
964 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
965 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
998 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | 966 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, |
999 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | 967 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, |
1000 | stat->s_ntargcpu, stat->s_dtimeout); | 968 | stat->s_dtimeout); |
1001 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | 969 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", |
1002 | stat->s_retry_messages, stat->s_retriesok, | 970 | stat->s_retry_messages, stat->s_retriesok, |
1003 | stat->s_resets_plug, stat->s_resets_timeout, | 971 | stat->s_resets_plug, stat->s_resets_timeout, |