diff options
-rw-r--r-- | arch/x86/include/asm/uv/uv_bau.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/tlb_uv.c | 138 |
2 files changed, 58 insertions, 85 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index c19b870ea58a..7f6ea611cb71 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -346,6 +346,11 @@ struct ptc_stats { | |||
346 | unsigned long s_time; /* time spent in sending side */ | 346 | unsigned long s_time; /* time spent in sending side */ |
347 | unsigned long s_retriesok; /* successful retries */ | 347 | unsigned long s_retriesok; /* successful retries */ |
348 | unsigned long s_ntargcpu; /* total number of cpu's targeted */ | 348 | unsigned long s_ntargcpu; /* total number of cpu's targeted */ |
349 | unsigned long s_ntargself; /* times the sending cpu was targeted */ | ||
350 | unsigned long s_ntarglocals; /* targets of cpus on the local blade */ | ||
351 | unsigned long s_ntargremotes; /* targets of cpus on remote blades */ | ||
352 | unsigned long s_ntarglocaluvhub; /* targets of the local hub */ | ||
353 | unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ | ||
349 | unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ | 354 | unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ |
350 | unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ | 355 | unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ |
351 | unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ | 356 | unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 4cb14dbd7fa3..a1615058fad3 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -400,10 +400,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
400 | unsigned long mmr_offset, int right_shift, int this_cpu, | 400 | unsigned long mmr_offset, int right_shift, int this_cpu, |
401 | struct bau_control *bcp, struct bau_control *smaster, long try) | 401 | struct bau_control *bcp, struct bau_control *smaster, long try) |
402 | { | 402 | { |
403 | int relaxes = 0; | ||
404 | unsigned long descriptor_status; | 403 | unsigned long descriptor_status; |
405 | unsigned long mmr; | ||
406 | unsigned long mask; | ||
407 | cycles_t ttime; | 404 | cycles_t ttime; |
408 | struct ptc_stats *stat = bcp->statp; | 405 | struct ptc_stats *stat = bcp->statp; |
409 | struct bau_control *hmaster; | 406 | struct bau_control *hmaster; |
@@ -524,25 +521,19 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | |||
524 | * The flush_mask contains the cpus the broadcast is to be sent to, plus | 521 | * The flush_mask contains the cpus the broadcast is to be sent to, plus |
525 | * cpus that are on the local uvhub. | 522 | * cpus that are on the local uvhub. |
526 | * | 523 | * |
527 | * Returns NULL if all flushing represented in the mask was done. The mask | 524 | * Returns 0 if all flushing represented in the mask was done. |
528 | * is zeroed. | 525 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
529 | * Returns @flush_mask if some remote flushing remains to be done. The | 526 | * returned to the kernel. |
530 | * mask will have some bits still set, representing any cpus on the local | ||
531 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
532 | */ | 527 | */ |
533 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | 528 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, |
534 | struct cpumask *flush_mask, | 529 | struct cpumask *flush_mask, struct bau_control *bcp) |
535 | struct bau_control *bcp) | ||
536 | { | 530 | { |
537 | int right_shift; | 531 | int right_shift; |
538 | int uvhub; | ||
539 | int bit; | ||
540 | int completion_status = 0; | 532 | int completion_status = 0; |
541 | int seq_number = 0; | 533 | int seq_number = 0; |
542 | long try = 0; | 534 | long try = 0; |
543 | int cpu = bcp->uvhub_cpu; | 535 | int cpu = bcp->uvhub_cpu; |
544 | int this_cpu = bcp->cpu; | 536 | int this_cpu = bcp->cpu; |
545 | int this_uvhub = bcp->uvhub; | ||
546 | unsigned long mmr_offset; | 537 | unsigned long mmr_offset; |
547 | unsigned long index; | 538 | unsigned long index; |
548 | cycles_t time1; | 539 | cycles_t time1; |
@@ -552,10 +543,6 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
552 | struct bau_control *smaster = bcp->socket_master; | 543 | struct bau_control *smaster = bcp->socket_master; |
553 | struct bau_control *hmaster = bcp->uvhub_master; | 544 | struct bau_control *hmaster = bcp->uvhub_master; |
554 | 545 | ||
555 | /* | ||
556 | * Spin here while there are hmaster->max_bau_concurrent or more active | ||
557 | * descriptors. This is the per-uvhub 'throttle'. | ||
558 | */ | ||
559 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 546 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
560 | &hmaster->active_descriptor_count, | 547 | &hmaster->active_descriptor_count, |
561 | hmaster->max_bau_concurrent)) { | 548 | hmaster->max_bau_concurrent)) { |
@@ -591,9 +578,7 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
591 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 578 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
592 | bcp->uvhub_cpu; | 579 | bcp->uvhub_cpu; |
593 | bcp->send_message = get_cycles(); | 580 | bcp->send_message = get_cycles(); |
594 | |||
595 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 581 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
596 | |||
597 | try++; | 582 | try++; |
598 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 583 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
599 | right_shift, this_cpu, bcp, smaster, try); | 584 | right_shift, this_cpu, bcp, smaster, try); |
@@ -652,16 +637,9 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
652 | (hmaster->max_bau_concurrent < | 637 | (hmaster->max_bau_concurrent < |
653 | hmaster->max_bau_concurrent_constant)) | 638 | hmaster->max_bau_concurrent_constant)) |
654 | hmaster->max_bau_concurrent++; | 639 | hmaster->max_bau_concurrent++; |
655 | |||
656 | /* | ||
657 | * hold any cpu not timing out here; no other cpu currently held by | ||
658 | * the 'throttle' should enter the activation code | ||
659 | */ | ||
660 | while (hmaster->uvhub_quiesce) | 640 | while (hmaster->uvhub_quiesce) |
661 | cpu_relax(); | 641 | cpu_relax(); |
662 | atomic_dec(&hmaster->active_descriptor_count); | 642 | atomic_dec(&hmaster->active_descriptor_count); |
663 | |||
664 | /* guard against cycles wrap */ | ||
665 | if (time2 > time1) { | 643 | if (time2 > time1) { |
666 | elapsed = time2 - time1; | 644 | elapsed = time2 - time1; |
667 | stat->s_time += elapsed; | 645 | stat->s_time += elapsed; |
@@ -674,32 +652,14 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
674 | } | 652 | } |
675 | } | 653 | } |
676 | } else | 654 | } else |
677 | stat->s_requestor--; /* don't count this one */ | 655 | stat->s_requestor--; |
678 | if (completion_status == FLUSH_COMPLETE && try > 1) | 656 | if (completion_status == FLUSH_COMPLETE && try > 1) |
679 | stat->s_retriesok++; | 657 | stat->s_retriesok++; |
680 | else if (completion_status == FLUSH_GIVEUP) { | 658 | else if (completion_status == FLUSH_GIVEUP) { |
681 | /* | ||
682 | * Cause the caller to do an IPI-style TLB shootdown on | ||
683 | * the target cpu's, all of which are still in the mask. | ||
684 | */ | ||
685 | stat->s_giveup++; | 659 | stat->s_giveup++; |
686 | return flush_mask; | 660 | return 1; |
687 | } | 661 | } |
688 | 662 | return 0; | |
689 | /* | ||
690 | * Success, so clear the remote cpu's from the mask so we don't | ||
691 | * use the IPI method of shootdown on them. | ||
692 | */ | ||
693 | for_each_cpu(bit, flush_mask) { | ||
694 | uvhub = uv_cpu_to_blade_id(bit); | ||
695 | if (uvhub == this_uvhub) | ||
696 | continue; | ||
697 | cpumask_clear_cpu(bit, flush_mask); | ||
698 | } | ||
699 | if (!cpumask_empty(flush_mask)) | ||
700 | return flush_mask; | ||
701 | |||
702 | return NULL; | ||
703 | } | 663 | } |
704 | 664 | ||
705 | /** | 665 | /** |
@@ -731,10 +691,11 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
731 | struct mm_struct *mm, | 691 | struct mm_struct *mm, |
732 | unsigned long va, unsigned int cpu) | 692 | unsigned long va, unsigned int cpu) |
733 | { | 693 | { |
734 | int remotes; | ||
735 | int tcpu; | 694 | int tcpu; |
736 | int uvhub; | 695 | int uvhub; |
737 | int locals = 0; | 696 | int locals = 0; |
697 | int remotes = 0; | ||
698 | int hubs = 0; | ||
738 | struct bau_desc *bau_desc; | 699 | struct bau_desc *bau_desc; |
739 | struct cpumask *flush_mask; | 700 | struct cpumask *flush_mask; |
740 | struct ptc_stats *stat; | 701 | struct ptc_stats *stat; |
@@ -768,54 +729,52 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
768 | 729 | ||
769 | /* | 730 | /* |
770 | * Each sending cpu has a per-cpu mask which it fills from the caller's | 731 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
771 | * cpu mask. Only remote cpus are converted to uvhubs and copied. | 732 | * cpu mask. All cpus are converted to uvhubs and copied to the |
733 | * activation descriptor. | ||
772 | */ | 734 | */ |
773 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | 735 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); |
774 | /* | 736 | /* don't actually do a shootdown of the local cpu */ |
775 | * copy cpumask to flush_mask, removing current cpu | ||
776 | * (current cpu should already have been flushed by the caller and | ||
777 | * should never be returned if we return flush_mask) | ||
778 | */ | ||
779 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 737 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
780 | if (cpu_isset(cpu, *cpumask)) | 738 | if (cpu_isset(cpu, *cpumask)) |
781 | locals++; /* current cpu was targeted */ | 739 | stat->s_ntargself++; |
782 | 740 | ||
783 | bau_desc = bcp->descriptor_base; | 741 | bau_desc = bcp->descriptor_base; |
784 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 742 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
785 | 743 | ||
786 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 744 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
787 | remotes = 0; | 745 | |
746 | /* cpu statistics */ | ||
788 | for_each_cpu(tcpu, flush_mask) { | 747 | for_each_cpu(tcpu, flush_mask) { |
789 | uvhub = uv_cpu_to_blade_id(tcpu); | 748 | uvhub = uv_cpu_to_blade_id(tcpu); |
790 | if (uvhub == bcp->uvhub) { | ||
791 | locals++; | ||
792 | continue; | ||
793 | } | ||
794 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 749 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
795 | remotes++; | 750 | if (uvhub == bcp->uvhub) |
796 | } | 751 | locals++; |
797 | if (remotes == 0) { | ||
798 | /* | ||
799 | * No off_hub flushing; return status for local hub. | ||
800 | * Return the caller's mask if all were local (the current | ||
801 | * cpu may be in that mask). | ||
802 | */ | ||
803 | if (locals) | ||
804 | return cpumask; | ||
805 | else | 752 | else |
806 | return NULL; | 753 | remotes++; |
807 | } | 754 | } |
755 | if ((locals + remotes) == 0) | ||
756 | return NULL; | ||
808 | stat->s_requestor++; | 757 | stat->s_requestor++; |
809 | stat->s_ntargcpu += remotes; | 758 | stat->s_ntargcpu += remotes + locals; |
759 | stat->s_ntargremotes += remotes; | ||
760 | stat->s_ntarglocals += locals; | ||
810 | remotes = bau_uvhub_weight(&bau_desc->distribution); | 761 | remotes = bau_uvhub_weight(&bau_desc->distribution); |
811 | stat->s_ntarguvhub += remotes; | 762 | |
812 | if (remotes >= 16) | 763 | /* uvhub statistics */ |
764 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
765 | if (locals) { | ||
766 | stat->s_ntarglocaluvhub++; | ||
767 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
768 | } else | ||
769 | stat->s_ntargremoteuvhub += hubs; | ||
770 | stat->s_ntarguvhub += hubs; | ||
771 | if (hubs >= 16) | ||
813 | stat->s_ntarguvhub16++; | 772 | stat->s_ntarguvhub16++; |
814 | else if (remotes >= 8) | 773 | else if (hubs >= 8) |
815 | stat->s_ntarguvhub8++; | 774 | stat->s_ntarguvhub8++; |
816 | else if (remotes >= 4) | 775 | else if (hubs >= 4) |
817 | stat->s_ntarguvhub4++; | 776 | stat->s_ntarguvhub4++; |
818 | else if (remotes >= 2) | 777 | else if (hubs >= 2) |
819 | stat->s_ntarguvhub2++; | 778 | stat->s_ntarguvhub2++; |
820 | else | 779 | else |
821 | stat->s_ntarguvhub1++; | 780 | stat->s_ntarguvhub1++; |
@@ -824,10 +783,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
824 | bau_desc->payload.sending_cpu = cpu; | 783 | bau_desc->payload.sending_cpu = cpu; |
825 | 784 | ||
826 | /* | 785 | /* |
827 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | 786 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
828 | * the adjusted flush_mask if any cpu's were not messaged. | 787 | * or 1 if it gave up and the original cpumask should be returned. |
829 | */ | 788 | */ |
830 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | 789 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) |
790 | return NULL; | ||
791 | else | ||
792 | return cpumask; | ||
831 | } | 793 | } |
832 | 794 | ||
833 | /* | 795 | /* |
@@ -976,9 +938,11 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
976 | 938 | ||
977 | if (!cpu) { | 939 | if (!cpu) { |
978 | seq_printf(file, | 940 | seq_printf(file, |
979 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); | 941 | "# cpu sent stime self locals remotes ncpus localhub "); |
942 | seq_printf(file, | ||
943 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
980 | seq_printf(file, | 944 | seq_printf(file, |
981 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); | 945 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); |
982 | seq_printf(file, | 946 | seq_printf(file, |
983 | "retries rok resetp resett giveup sto bz throt "); | 947 | "retries rok resetp resett giveup sto bz throt "); |
984 | seq_printf(file, | 948 | seq_printf(file, |
@@ -994,10 +958,14 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
994 | seq_printf(file, | 958 | seq_printf(file, |
995 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 959 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
996 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 960 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
997 | stat->s_ntarguvhub, stat->s_ntarguvhub16, | 961 | stat->s_ntargself, stat->s_ntarglocals, |
962 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
963 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
964 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
965 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
998 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | 966 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, |
999 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | 967 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, |
1000 | stat->s_ntargcpu, stat->s_dtimeout); | 968 | stat->s_dtimeout); |
1001 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | 969 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", |
1002 | stat->s_retry_messages, stat->s_retriesok, | 970 | stat->s_retry_messages, stat->s_retriesok, |
1003 | stat->s_resets_plug, stat->s_resets_timeout, | 971 | stat->s_resets_plug, stat->s_resets_timeout, |