diff options
author | Cliff Wickman <cpw@sgi.com> | 2012-06-22 09:14:59 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-06-25 08:45:05 -0400 |
commit | 8b6e511e51f7e540c8e71022318ee4cc9a4567a7 (patch) | |
tree | f87dc032f316f908b83c9033b56aea99f1180157 | |
parent | 26ef85770c765bb8b6b6922f8a413872dd8e3979 (diff) |
x86/uv: Work around UV2 BAU hangs
On SGI's UV2 the BAU (Broadcast Assist Unit) driver can hang
under a heavy load. To cure this:
- Disable the UV2 extended status mode (see UV2_EXT_SHFT), as
this mode changes BAU behavior in more ways then just delivering
an extra bit of status. Revert status to just two meaningful bits,
like UV1.
- Use no IPI-style resets on UV2. Just give up the request for
whatever the reason it failed and let it be accomplished with
the legacy IPI method.
- Use no alternate sending descriptor (the former UV2 workaround
bcp->using_desc and handle_uv2_busy() stuff). Just disable the
use of the BAU for a period of time in favor of the legacy IPI
method when the h/w bug leaves a descriptor busy.
-- new tunable: giveup_limit determines the threshold at which a hub is
so plugged that it should do all requests with the legacy IPI method for a
period of time
-- generalize disable_for_congestion() (renamed disable_for_period()) for
use whenever a hub should avoid using the BAU for a period of time
Also:
- Fix find_another_by_swack(), which is part of the UV2 bug workaround
- Correct and clarify the statistics (new stats s_overipilimit, s_giveuplimit,
s_enters, s_ipifordisabled, s_plugged, s_congested)
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120622131459.GC31884@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/include/asm/uv/uv_bau.h | 28 | ||||
-rw-r--r-- | arch/x86/platform/uv/tlb_uv.c | 387 |
2 files changed, 200 insertions, 215 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 847c00b721b2..a06983cdc125 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -140,6 +140,9 @@ | |||
140 | #define IPI_RESET_LIMIT 1 | 140 | #define IPI_RESET_LIMIT 1 |
141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ | 141 | /* after this # consecutive successes, bump up the throttle if it was lowered */ |
142 | #define COMPLETE_THRESHOLD 5 | 142 | #define COMPLETE_THRESHOLD 5 |
143 | /* after this # of giveups (fall back to kernel IPI's) disable the use of | ||
144 | the BAU for a period of time */ | ||
145 | #define GIVEUP_LIMIT 100 | ||
143 | 146 | ||
144 | #define UV_LB_SUBNODEID 0x10 | 147 | #define UV_LB_SUBNODEID 0x10 |
145 | 148 | ||
@@ -166,7 +169,6 @@ | |||
166 | #define FLUSH_RETRY_TIMEOUT 2 | 169 | #define FLUSH_RETRY_TIMEOUT 2 |
167 | #define FLUSH_GIVEUP 3 | 170 | #define FLUSH_GIVEUP 3 |
168 | #define FLUSH_COMPLETE 4 | 171 | #define FLUSH_COMPLETE 4 |
169 | #define FLUSH_RETRY_BUSYBUG 5 | ||
170 | 172 | ||
171 | /* | 173 | /* |
172 | * tuning the action when the numalink network is extremely delayed | 174 | * tuning the action when the numalink network is extremely delayed |
@@ -175,7 +177,7 @@ | |||
175 | microseconds */ | 177 | microseconds */ |
176 | #define CONGESTED_REPS 10 /* long delays averaged over | 178 | #define CONGESTED_REPS 10 /* long delays averaged over |
177 | this many broadcasts */ | 179 | this many broadcasts */ |
178 | #define CONGESTED_PERIOD 30 /* time for the bau to be | 180 | #define DISABLED_PERIOD 10 /* time for the bau to be |
179 | disabled, in seconds */ | 181 | disabled, in seconds */ |
180 | /* see msg_type: */ | 182 | /* see msg_type: */ |
181 | #define MSG_NOOP 0 | 183 | #define MSG_NOOP 0 |
@@ -520,7 +522,12 @@ struct ptc_stats { | |||
520 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ | 522 | unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ |
521 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ | 523 | unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ |
522 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ | 524 | unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ |
523 | unsigned long s_enters; /* entries to the driver */ | 525 | unsigned long s_overipilimit; /* over the ipi reset limit */ |
526 | unsigned long s_giveuplimit; /* disables, over giveup limit*/ | ||
527 | unsigned long s_enters; /* entries to the driver */ | ||
528 | unsigned long s_ipifordisabled; /* fall back to IPI; disabled */ | ||
529 | unsigned long s_plugged; /* plugged by h/w bug*/ | ||
530 | unsigned long s_congested; /* giveup on long wait */ | ||
524 | /* destination statistics */ | 531 | /* destination statistics */ |
525 | unsigned long d_alltlb; /* times all tlb's on this | 532 | unsigned long d_alltlb; /* times all tlb's on this |
526 | cpu were flushed */ | 533 | cpu were flushed */ |
@@ -588,8 +595,7 @@ struct bau_control { | |||
588 | int ipi_attempts; | 595 | int ipi_attempts; |
589 | int conseccompletes; | 596 | int conseccompletes; |
590 | short nobau; | 597 | short nobau; |
591 | int baudisabled; | 598 | short baudisabled; |
592 | int set_bau_off; | ||
593 | short cpu; | 599 | short cpu; |
594 | short osnode; | 600 | short osnode; |
595 | short uvhub_cpu; | 601 | short uvhub_cpu; |
@@ -598,14 +604,16 @@ struct bau_control { | |||
598 | short cpus_in_socket; | 604 | short cpus_in_socket; |
599 | short cpus_in_uvhub; | 605 | short cpus_in_uvhub; |
600 | short partition_base_pnode; | 606 | short partition_base_pnode; |
601 | short using_desc; /* an index, like uvhub_cpu */ | 607 | short busy; /* all were busy (war) */ |
602 | unsigned int inuse_map; | ||
603 | unsigned short message_number; | 608 | unsigned short message_number; |
604 | unsigned short uvhub_quiesce; | 609 | unsigned short uvhub_quiesce; |
605 | short socket_acknowledge_count[DEST_Q_SIZE]; | 610 | short socket_acknowledge_count[DEST_Q_SIZE]; |
606 | cycles_t send_message; | 611 | cycles_t send_message; |
612 | cycles_t period_end; | ||
613 | cycles_t period_time; | ||
607 | spinlock_t uvhub_lock; | 614 | spinlock_t uvhub_lock; |
608 | spinlock_t queue_lock; | 615 | spinlock_t queue_lock; |
616 | spinlock_t disable_lock; | ||
609 | /* tunables */ | 617 | /* tunables */ |
610 | int max_concurr; | 618 | int max_concurr; |
611 | int max_concurr_const; | 619 | int max_concurr_const; |
@@ -616,9 +624,9 @@ struct bau_control { | |||
616 | int complete_threshold; | 624 | int complete_threshold; |
617 | int cong_response_us; | 625 | int cong_response_us; |
618 | int cong_reps; | 626 | int cong_reps; |
619 | int cong_period; | 627 | cycles_t disabled_period; |
620 | unsigned long clocks_per_100_usec; | 628 | int period_giveups; |
621 | cycles_t period_time; | 629 | int giveup_limit; |
622 | long period_requests; | 630 | long period_requests; |
623 | struct hub_and_pnode *thp; | 631 | struct hub_and_pnode *thp; |
624 | }; | 632 | }; |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 1492170cbb5a..71b5d5a07d7b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
@@ -39,8 +39,6 @@ static int timeout_base_ns[] = { | |||
39 | static int timeout_us; | 39 | static int timeout_us; |
40 | static int nobau; | 40 | static int nobau; |
41 | static int nobau_perm; | 41 | static int nobau_perm; |
42 | static int baudisabled; | ||
43 | static spinlock_t disable_lock; | ||
44 | static cycles_t congested_cycles; | 42 | static cycles_t congested_cycles; |
45 | 43 | ||
46 | /* tunables: */ | 44 | /* tunables: */ |
@@ -48,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT; | |||
48 | static int max_concurr_const = MAX_BAU_CONCURRENT; | 46 | static int max_concurr_const = MAX_BAU_CONCURRENT; |
49 | static int plugged_delay = PLUGGED_DELAY; | 47 | static int plugged_delay = PLUGGED_DELAY; |
50 | static int plugsb4reset = PLUGSB4RESET; | 48 | static int plugsb4reset = PLUGSB4RESET; |
49 | static int giveup_limit = GIVEUP_LIMIT; | ||
51 | static int timeoutsb4reset = TIMEOUTSB4RESET; | 50 | static int timeoutsb4reset = TIMEOUTSB4RESET; |
52 | static int ipi_reset_limit = IPI_RESET_LIMIT; | 51 | static int ipi_reset_limit = IPI_RESET_LIMIT; |
53 | static int complete_threshold = COMPLETE_THRESHOLD; | 52 | static int complete_threshold = COMPLETE_THRESHOLD; |
54 | static int congested_respns_us = CONGESTED_RESPONSE_US; | 53 | static int congested_respns_us = CONGESTED_RESPONSE_US; |
55 | static int congested_reps = CONGESTED_REPS; | 54 | static int congested_reps = CONGESTED_REPS; |
56 | static int congested_period = CONGESTED_PERIOD; | 55 | static int disabled_period = DISABLED_PERIOD; |
57 | 56 | ||
58 | static struct tunables tunables[] = { | 57 | static struct tunables tunables[] = { |
59 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ | 58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ |
@@ -64,7 +63,8 @@ static struct tunables tunables[] = { | |||
64 | {&complete_threshold, COMPLETE_THRESHOLD}, | 63 | {&complete_threshold, COMPLETE_THRESHOLD}, |
65 | {&congested_respns_us, CONGESTED_RESPONSE_US}, | 64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, |
66 | {&congested_reps, CONGESTED_REPS}, | 65 | {&congested_reps, CONGESTED_REPS}, |
67 | {&congested_period, CONGESTED_PERIOD} | 66 | {&disabled_period, DISABLED_PERIOD}, |
67 | {&giveup_limit, GIVEUP_LIMIT} | ||
68 | }; | 68 | }; |
69 | 69 | ||
70 | static struct dentry *tunables_dir; | 70 | static struct dentry *tunables_dir; |
@@ -313,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, | |||
313 | * Both sockets dump their completed count total into | 313 | * Both sockets dump their completed count total into |
314 | * the message's count. | 314 | * the message's count. |
315 | */ | 315 | */ |
316 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | 316 | *sp = 0; |
317 | asp = (struct atomic_short *)&msg->acknowledge_count; | 317 | asp = (struct atomic_short *)&msg->acknowledge_count; |
318 | msg_ack_count = atom_asr(socket_ack_count, asp); | 318 | msg_ack_count = atom_asr(socket_ack_count, asp); |
319 | 319 | ||
@@ -526,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc, | |||
526 | } | 526 | } |
527 | 527 | ||
528 | /* | 528 | /* |
529 | * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. | 529 | * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register. |
530 | * But not currently used. | ||
530 | */ | 531 | */ |
531 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) | 532 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) |
532 | { | 533 | { |
533 | unsigned long descriptor_status; | 534 | unsigned long descriptor_status; |
534 | unsigned long descriptor_status2; | ||
535 | 535 | ||
536 | descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); | 536 | descriptor_status = |
537 | descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; | 537 | ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1; |
538 | descriptor_status = (descriptor_status << 1) | descriptor_status2; | ||
539 | return descriptor_status; | 538 | return descriptor_status; |
540 | } | 539 | } |
541 | 540 | ||
@@ -566,87 +565,11 @@ int normal_busy(struct bau_control *bcp) | |||
566 | */ | 565 | */ |
567 | int handle_uv2_busy(struct bau_control *bcp) | 566 | int handle_uv2_busy(struct bau_control *bcp) |
568 | { | 567 | { |
569 | int busy_one = bcp->using_desc; | ||
570 | int normal = bcp->uvhub_cpu; | ||
571 | int selected = -1; | ||
572 | int i; | ||
573 | unsigned long descriptor_status; | ||
574 | unsigned long status; | ||
575 | int mmr_offset; | ||
576 | struct bau_desc *bau_desc_old; | ||
577 | struct bau_desc *bau_desc_new; | ||
578 | struct bau_control *hmaster = bcp->uvhub_master; | ||
579 | struct ptc_stats *stat = bcp->statp; | 568 | struct ptc_stats *stat = bcp->statp; |
580 | cycles_t ttm; | ||
581 | 569 | ||
582 | stat->s_uv2_wars++; | 570 | stat->s_uv2_wars++; |
583 | spin_lock(&hmaster->uvhub_lock); | 571 | bcp->busy = 1; |
584 | /* try for the original first */ | 572 | return FLUSH_GIVEUP; |
585 | if (busy_one != normal) { | ||
586 | if (!normal_busy(bcp)) | ||
587 | selected = normal; | ||
588 | } | ||
589 | if (selected < 0) { | ||
590 | /* can't use the normal, select an alternate */ | ||
591 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
592 | descriptor_status = read_lmmr(mmr_offset); | ||
593 | |||
594 | /* scan available descriptors 32-63 */ | ||
595 | for (i = 0; i < UV_CPUS_PER_AS; i++) { | ||
596 | if ((hmaster->inuse_map & (1 << i)) == 0) { | ||
597 | status = ((descriptor_status >> | ||
598 | (i * UV_ACT_STATUS_SIZE)) & | ||
599 | UV_ACT_STATUS_MASK) << 1; | ||
600 | if (status != UV2H_DESC_BUSY) { | ||
601 | selected = i + UV_CPUS_PER_AS; | ||
602 | break; | ||
603 | } | ||
604 | } | ||
605 | } | ||
606 | } | ||
607 | |||
608 | if (busy_one != normal) | ||
609 | /* mark the busy alternate as not in-use */ | ||
610 | hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS)); | ||
611 | |||
612 | if (selected >= 0) { | ||
613 | /* switch to the selected descriptor */ | ||
614 | if (selected != normal) { | ||
615 | /* set the selected alternate as in-use */ | ||
616 | hmaster->inuse_map |= | ||
617 | (1 << (selected - UV_CPUS_PER_AS)); | ||
618 | if (selected > stat->s_uv2_wars_hw) | ||
619 | stat->s_uv2_wars_hw = selected; | ||
620 | } | ||
621 | bau_desc_old = bcp->descriptor_base; | ||
622 | bau_desc_old += (ITEMS_PER_DESC * busy_one); | ||
623 | bcp->using_desc = selected; | ||
624 | bau_desc_new = bcp->descriptor_base; | ||
625 | bau_desc_new += (ITEMS_PER_DESC * selected); | ||
626 | *bau_desc_new = *bau_desc_old; | ||
627 | } else { | ||
628 | /* | ||
629 | * All are busy. Wait for the normal one for this cpu to | ||
630 | * free up. | ||
631 | */ | ||
632 | stat->s_uv2_war_waits++; | ||
633 | spin_unlock(&hmaster->uvhub_lock); | ||
634 | ttm = get_cycles(); | ||
635 | do { | ||
636 | cpu_relax(); | ||
637 | } while (normal_busy(bcp)); | ||
638 | spin_lock(&hmaster->uvhub_lock); | ||
639 | /* switch to the original descriptor */ | ||
640 | bcp->using_desc = normal; | ||
641 | bau_desc_old = bcp->descriptor_base; | ||
642 | bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc); | ||
643 | bcp->using_desc = (ITEMS_PER_DESC * normal); | ||
644 | bau_desc_new = bcp->descriptor_base; | ||
645 | bau_desc_new += (ITEMS_PER_DESC * normal); | ||
646 | *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */ | ||
647 | } | ||
648 | spin_unlock(&hmaster->uvhub_lock); | ||
649 | return FLUSH_RETRY_BUSYBUG; | ||
650 | } | 573 | } |
651 | 574 | ||
652 | static int uv2_wait_completion(struct bau_desc *bau_desc, | 575 | static int uv2_wait_completion(struct bau_desc *bau_desc, |
@@ -655,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
655 | { | 578 | { |
656 | unsigned long descriptor_stat; | 579 | unsigned long descriptor_stat; |
657 | cycles_t ttm; | 580 | cycles_t ttm; |
658 | int desc = bcp->using_desc; | 581 | int desc = bcp->uvhub_cpu; |
659 | long busy_reps = 0; | 582 | long busy_reps = 0; |
660 | struct ptc_stats *stat = bcp->statp; | 583 | struct ptc_stats *stat = bcp->statp; |
661 | 584 | ||
@@ -663,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
663 | 586 | ||
664 | /* spin on the status MMR, waiting for it to go idle */ | 587 | /* spin on the status MMR, waiting for it to go idle */ |
665 | while (descriptor_stat != UV2H_DESC_IDLE) { | 588 | while (descriptor_stat != UV2H_DESC_IDLE) { |
666 | /* | 589 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { |
667 | * Our software ack messages may be blocked because | 590 | /* |
668 | * there are no swack resources available. As long | 591 | * A h/w bug on the destination side may |
669 | * as none of them has timed out hardware will NACK | 592 | * have prevented the message being marked |
670 | * our message and its state will stay IDLE. | 593 | * pending, thus it doesn't get replied to |
671 | */ | 594 | * and gets continually nacked until it times |
672 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || | 595 | * out with a SOURCE_TIMEOUT. |
673 | (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { | 596 | */ |
674 | stat->s_stimeout++; | 597 | stat->s_stimeout++; |
675 | return FLUSH_GIVEUP; | 598 | return FLUSH_GIVEUP; |
676 | } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) { | ||
677 | stat->s_strongnacks++; | ||
678 | bcp->conseccompletes = 0; | ||
679 | return FLUSH_GIVEUP; | ||
680 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { | 599 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { |
600 | ttm = get_cycles(); | ||
601 | |||
602 | /* | ||
603 | * Our retries may be blocked by all destination | ||
604 | * swack resources being consumed, and a timeout | ||
605 | * pending. In that case hardware returns the | ||
606 | * ERROR that looks like a destination timeout. | ||
607 | * Without using the extended status we have to | ||
608 | * deduce from the short time that this was a | ||
609 | * strong nack. | ||
610 | */ | ||
611 | if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { | ||
612 | bcp->conseccompletes = 0; | ||
613 | stat->s_plugged++; | ||
614 | /* FLUSH_RETRY_PLUGGED causes hang on boot */ | ||
615 | return FLUSH_GIVEUP; | ||
616 | } | ||
681 | stat->s_dtimeout++; | 617 | stat->s_dtimeout++; |
682 | bcp->conseccompletes = 0; | 618 | bcp->conseccompletes = 0; |
683 | return FLUSH_RETRY_TIMEOUT; | 619 | /* FLUSH_RETRY_TIMEOUT causes hang on boot */ |
620 | return FLUSH_GIVEUP; | ||
684 | } else { | 621 | } else { |
685 | busy_reps++; | 622 | busy_reps++; |
686 | if (busy_reps > 1000000) { | 623 | if (busy_reps > 1000000) { |
@@ -688,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc, | |||
688 | busy_reps = 0; | 625 | busy_reps = 0; |
689 | ttm = get_cycles(); | 626 | ttm = get_cycles(); |
690 | if ((ttm - bcp->send_message) > | 627 | if ((ttm - bcp->send_message) > |
691 | (bcp->clocks_per_100_usec)) { | 628 | bcp->timeout_interval) |
692 | return handle_uv2_busy(bcp); | 629 | return handle_uv2_busy(bcp); |
693 | } | ||
694 | } | 630 | } |
695 | /* | 631 | /* |
696 | * descriptor_stat is still BUSY | 632 | * descriptor_stat is still BUSY |
@@ -714,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc, | |||
714 | { | 650 | { |
715 | int right_shift; | 651 | int right_shift; |
716 | unsigned long mmr_offset; | 652 | unsigned long mmr_offset; |
717 | int desc = bcp->using_desc; | 653 | int desc = bcp->uvhub_cpu; |
718 | 654 | ||
719 | if (desc < UV_CPUS_PER_AS) { | 655 | if (desc < UV_CPUS_PER_AS) { |
720 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 656 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
@@ -793,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc, | |||
793 | } | 729 | } |
794 | 730 | ||
795 | /* | 731 | /* |
796 | * Completions are taking a very long time due to a congested numalink | 732 | * Stop all cpus on a uvhub from using the BAU for a period of time. |
797 | * network. | 733 | * This is reversed by check_enable. |
798 | */ | 734 | */ |
799 | static void disable_for_congestion(struct bau_control *bcp, | 735 | static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
800 | struct ptc_stats *stat) | ||
801 | { | 736 | { |
802 | /* let only one cpu do this disabling */ | 737 | int tcpu; |
803 | spin_lock(&disable_lock); | 738 | struct bau_control *tbcp; |
804 | 739 | struct bau_control *hmaster; | |
805 | if (!baudisabled && bcp->period_requests && | 740 | cycles_t tm1; |
806 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | 741 | |
807 | int tcpu; | 742 | hmaster = bcp->uvhub_master; |
808 | struct bau_control *tbcp; | 743 | spin_lock(&hmaster->disable_lock); |
809 | /* it becomes this cpu's job to turn on the use of the | 744 | if (!bcp->baudisabled) { |
810 | BAU again */ | ||
811 | baudisabled = 1; | ||
812 | bcp->set_bau_off = 1; | ||
813 | bcp->set_bau_on_time = get_cycles(); | ||
814 | bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); | ||
815 | stat->s_bau_disabled++; | 745 | stat->s_bau_disabled++; |
746 | tm1 = get_cycles(); | ||
816 | for_each_present_cpu(tcpu) { | 747 | for_each_present_cpu(tcpu) { |
817 | tbcp = &per_cpu(bau_control, tcpu); | 748 | tbcp = &per_cpu(bau_control, tcpu); |
818 | tbcp->baudisabled = 1; | 749 | if (tbcp->uvhub_master == hmaster) { |
750 | tbcp->baudisabled = 1; | ||
751 | tbcp->set_bau_on_time = | ||
752 | tm1 + bcp->disabled_period; | ||
753 | } | ||
819 | } | 754 | } |
820 | } | 755 | } |
821 | 756 | spin_unlock(&hmaster->disable_lock); | |
822 | spin_unlock(&disable_lock); | ||
823 | } | 757 | } |
824 | 758 | ||
825 | static void count_max_concurr(int stat, struct bau_control *bcp, | 759 | static void count_max_concurr(int stat, struct bau_control *bcp, |
@@ -850,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |||
850 | bcp->period_requests++; | 784 | bcp->period_requests++; |
851 | bcp->period_time += elapsed; | 785 | bcp->period_time += elapsed; |
852 | if ((elapsed > congested_cycles) && | 786 | if ((elapsed > congested_cycles) && |
853 | (bcp->period_requests > bcp->cong_reps)) | 787 | (bcp->period_requests > bcp->cong_reps) && |
854 | disable_for_congestion(bcp, stat); | 788 | ((bcp->period_time / bcp->period_requests) > |
789 | congested_cycles)) { | ||
790 | stat->s_congested++; | ||
791 | disable_for_period(bcp, stat); | ||
792 | } | ||
855 | } | 793 | } |
856 | } else | 794 | } else |
857 | stat->s_requestor--; | 795 | stat->s_requestor--; |
858 | 796 | ||
859 | if (completion_status == FLUSH_COMPLETE && try > 1) | 797 | if (completion_status == FLUSH_COMPLETE && try > 1) |
860 | stat->s_retriesok++; | 798 | stat->s_retriesok++; |
861 | else if (completion_status == FLUSH_GIVEUP) | 799 | else if (completion_status == FLUSH_GIVEUP) { |
862 | stat->s_giveup++; | 800 | stat->s_giveup++; |
801 | if (get_cycles() > bcp->period_end) | ||
802 | bcp->period_giveups = 0; | ||
803 | bcp->period_giveups++; | ||
804 | if (bcp->period_giveups == 1) | ||
805 | bcp->period_end = get_cycles() + bcp->disabled_period; | ||
806 | if (bcp->period_giveups > bcp->giveup_limit) { | ||
807 | disable_for_period(bcp, stat); | ||
808 | stat->s_giveuplimit++; | ||
809 | } | ||
810 | } | ||
863 | } | 811 | } |
864 | 812 | ||
865 | /* | 813 | /* |
@@ -903,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, | |||
903 | * Returns 1 if it gives up entirely and the original cpu mask is to be | 851 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
904 | * returned to the kernel. | 852 | * returned to the kernel. |
905 | */ | 853 | */ |
906 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | 854 | int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp, |
855 | struct bau_desc *bau_desc) | ||
907 | { | 856 | { |
908 | int seq_number = 0; | 857 | int seq_number = 0; |
909 | int completion_stat = 0; | 858 | int completion_stat = 0; |
@@ -916,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
916 | struct bau_control *hmaster = bcp->uvhub_master; | 865 | struct bau_control *hmaster = bcp->uvhub_master; |
917 | struct uv1_bau_msg_header *uv1_hdr = NULL; | 866 | struct uv1_bau_msg_header *uv1_hdr = NULL; |
918 | struct uv2_bau_msg_header *uv2_hdr = NULL; | 867 | struct uv2_bau_msg_header *uv2_hdr = NULL; |
919 | struct bau_desc *bau_desc; | ||
920 | 868 | ||
921 | if (bcp->uvhub_version == 1) | 869 | if (bcp->uvhub_version == 1) { |
870 | uv1 = 1; | ||
922 | uv1_throttle(hmaster, stat); | 871 | uv1_throttle(hmaster, stat); |
872 | } | ||
923 | 873 | ||
924 | while (hmaster->uvhub_quiesce) | 874 | while (hmaster->uvhub_quiesce) |
925 | cpu_relax(); | 875 | cpu_relax(); |
926 | 876 | ||
927 | time1 = get_cycles(); | 877 | time1 = get_cycles(); |
878 | if (uv1) | ||
879 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
880 | else | ||
881 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
882 | |||
928 | do { | 883 | do { |
929 | bau_desc = bcp->descriptor_base; | 884 | if (try == 0) { |
930 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | ||
931 | if (bcp->uvhub_version == 1) { | ||
932 | uv1 = 1; | ||
933 | uv1_hdr = &bau_desc->header.uv1_hdr; | ||
934 | } else | ||
935 | uv2_hdr = &bau_desc->header.uv2_hdr; | ||
936 | if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) { | ||
937 | if (uv1) | 885 | if (uv1) |
938 | uv1_hdr->msg_type = MSG_REGULAR; | 886 | uv1_hdr->msg_type = MSG_REGULAR; |
939 | else | 887 | else |
@@ -951,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
951 | uv1_hdr->sequence = seq_number; | 899 | uv1_hdr->sequence = seq_number; |
952 | else | 900 | else |
953 | uv2_hdr->sequence = seq_number; | 901 | uv2_hdr->sequence = seq_number; |
954 | index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; | 902 | index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; |
955 | bcp->send_message = get_cycles(); | 903 | bcp->send_message = get_cycles(); |
956 | 904 | ||
957 | write_mmr_activation(index); | 905 | write_mmr_activation(index); |
958 | 906 | ||
959 | try++; | 907 | try++; |
960 | completion_stat = wait_completion(bau_desc, bcp, try); | 908 | completion_stat = wait_completion(bau_desc, bcp, try); |
961 | /* UV2: wait_completion() may change the bcp->using_desc */ | ||
962 | 909 | ||
963 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); | 910 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); |
964 | 911 | ||
965 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | 912 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { |
966 | bcp->ipi_attempts = 0; | 913 | bcp->ipi_attempts = 0; |
914 | stat->s_overipilimit++; | ||
967 | completion_stat = FLUSH_GIVEUP; | 915 | completion_stat = FLUSH_GIVEUP; |
968 | break; | 916 | break; |
969 | } | 917 | } |
970 | cpu_relax(); | 918 | cpu_relax(); |
971 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || | 919 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || |
972 | (completion_stat == FLUSH_RETRY_BUSYBUG) || | ||
973 | (completion_stat == FLUSH_RETRY_TIMEOUT)); | 920 | (completion_stat == FLUSH_RETRY_TIMEOUT)); |
974 | 921 | ||
975 | time2 = get_cycles(); | 922 | time2 = get_cycles(); |
@@ -990,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) | |||
990 | } | 937 | } |
991 | 938 | ||
992 | /* | 939 | /* |
993 | * The BAU is disabled. When the disabled time period has expired, the cpu | 940 | * The BAU is disabled for this uvhub. When the disabled time period has |
994 | * that disabled it must re-enable it. | 941 | * expired re-enable it. |
995 | * Return 0 if it is re-enabled for all cpus. | 942 | * Return 0 if it is re-enabled for all cpus on this uvhub. |
996 | */ | 943 | */ |
997 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | 944 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
998 | { | 945 | { |
999 | int tcpu; | 946 | int tcpu; |
1000 | struct bau_control *tbcp; | 947 | struct bau_control *tbcp; |
948 | struct bau_control *hmaster; | ||
1001 | 949 | ||
1002 | if (bcp->set_bau_off) { | 950 | hmaster = bcp->uvhub_master; |
1003 | if (get_cycles() >= bcp->set_bau_on_time) { | 951 | spin_lock(&hmaster->disable_lock); |
1004 | stat->s_bau_reenabled++; | 952 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { |
1005 | baudisabled = 0; | 953 | stat->s_bau_reenabled++; |
1006 | for_each_present_cpu(tcpu) { | 954 | for_each_present_cpu(tcpu) { |
1007 | tbcp = &per_cpu(bau_control, tcpu); | 955 | tbcp = &per_cpu(bau_control, tcpu); |
956 | if (tbcp->uvhub_master == hmaster) { | ||
1008 | tbcp->baudisabled = 0; | 957 | tbcp->baudisabled = 0; |
1009 | tbcp->period_requests = 0; | 958 | tbcp->period_requests = 0; |
1010 | tbcp->period_time = 0; | 959 | tbcp->period_time = 0; |
960 | tbcp->period_giveups = 0; | ||
1011 | } | 961 | } |
1012 | return 0; | ||
1013 | } | 962 | } |
963 | spin_unlock(&hmaster->disable_lock); | ||
964 | return 0; | ||
1014 | } | 965 | } |
966 | spin_unlock(&hmaster->disable_lock); | ||
1015 | return -1; | 967 | return -1; |
1016 | } | 968 | } |
1017 | 969 | ||
@@ -1113,6 +1065,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1113 | struct cpumask *flush_mask; | 1065 | struct cpumask *flush_mask; |
1114 | struct ptc_stats *stat; | 1066 | struct ptc_stats *stat; |
1115 | struct bau_control *bcp; | 1067 | struct bau_control *bcp; |
1068 | unsigned long descriptor_status; | ||
1069 | unsigned long status; | ||
1116 | 1070 | ||
1117 | bcp = &per_cpu(bau_control, cpu); | 1071 | bcp = &per_cpu(bau_control, cpu); |
1118 | stat = bcp->statp; | 1072 | stat = bcp->statp; |
@@ -1121,10 +1075,22 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1121 | if (bcp->nobau) | 1075 | if (bcp->nobau) |
1122 | return cpumask; | 1076 | return cpumask; |
1123 | 1077 | ||
1078 | if (bcp->busy) { | ||
1079 | descriptor_status = | ||
1080 | read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0); | ||
1081 | status = ((descriptor_status >> (bcp->uvhub_cpu * | ||
1082 | UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1; | ||
1083 | if (status == UV2H_DESC_BUSY) | ||
1084 | return cpumask; | ||
1085 | bcp->busy = 0; | ||
1086 | } | ||
1087 | |||
1124 | /* bau was disabled due to slow response */ | 1088 | /* bau was disabled due to slow response */ |
1125 | if (bcp->baudisabled) { | 1089 | if (bcp->baudisabled) { |
1126 | if (check_enable(bcp, stat)) | 1090 | if (check_enable(bcp, stat)) { |
1091 | stat->s_ipifordisabled++; | ||
1127 | return cpumask; | 1092 | return cpumask; |
1093 | } | ||
1128 | } | 1094 | } |
1129 | 1095 | ||
1130 | /* | 1096 | /* |
@@ -1140,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1140 | stat->s_ntargself++; | 1106 | stat->s_ntargself++; |
1141 | 1107 | ||
1142 | bau_desc = bcp->descriptor_base; | 1108 | bau_desc = bcp->descriptor_base; |
1143 | bau_desc += (ITEMS_PER_DESC * bcp->using_desc); | 1109 | bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); |
1144 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 1110 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
1145 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) | 1111 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) |
1146 | return NULL; | 1112 | return NULL; |
@@ -1153,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1153 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | 1119 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
1154 | * or 1 if it gave up and the original cpumask should be returned. | 1120 | * or 1 if it gave up and the original cpumask should be returned. |
1155 | */ | 1121 | */ |
1156 | if (!uv_flush_send_and_wait(flush_mask, bcp)) | 1122 | if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc)) |
1157 | return NULL; | 1123 | return NULL; |
1158 | else | 1124 | else |
1159 | return cpumask; | 1125 | return cpumask; |
1160 | } | 1126 | } |
1161 | 1127 | ||
1162 | /* | 1128 | /* |
1163 | * Search the message queue for any 'other' message with the same software | 1129 | * Search the message queue for any 'other' unprocessed message with the |
1164 | * acknowledge resource bit vector. | 1130 | * same software acknowledge resource bit vector as the 'msg' message. |
1165 | */ | 1131 | */ |
1166 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, | 1132 | struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, |
1167 | struct bau_control *bcp, unsigned char swack_vec) | 1133 | struct bau_control *bcp) |
1168 | { | 1134 | { |
1169 | struct bau_pq_entry *msg_next = msg + 1; | 1135 | struct bau_pq_entry *msg_next = msg + 1; |
1136 | unsigned char swack_vec = msg->swack_vec; | ||
1170 | 1137 | ||
1171 | if (msg_next > bcp->queue_last) | 1138 | if (msg_next > bcp->queue_last) |
1172 | msg_next = bcp->queue_first; | 1139 | msg_next = bcp->queue_first; |
1173 | while ((msg_next->swack_vec != 0) && (msg_next != msg)) { | 1140 | while (msg_next != msg) { |
1174 | if (msg_next->swack_vec == swack_vec) | 1141 | if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) && |
1142 | (msg_next->swack_vec == swack_vec)) | ||
1175 | return msg_next; | 1143 | return msg_next; |
1176 | msg_next++; | 1144 | msg_next++; |
1177 | if (msg_next > bcp->queue_last) | 1145 | if (msg_next > bcp->queue_last) |
@@ -1200,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) | |||
1200 | * This message was assigned a swack resource, but no | 1168 | * This message was assigned a swack resource, but no |
1201 | * reserved acknowlegment is pending. | 1169 | * reserved acknowlegment is pending. |
1202 | * The bug has prevented this message from setting the MMR. | 1170 | * The bug has prevented this message from setting the MMR. |
1203 | * And no other message has used the same sw_ack resource. | ||
1204 | * Do the requested shootdown but do not reply to the msg. | ||
1205 | * (the 0 means make no acknowledge) | ||
1206 | */ | 1171 | */ |
1207 | bau_process_message(mdp, bcp, 0); | ||
1208 | return; | ||
1209 | } | ||
1210 | |||
1211 | /* | ||
1212 | * Some message has set the MMR 'pending' bit; it might have been | ||
1213 | * another message. Look for that message. | ||
1214 | */ | ||
1215 | other_msg = find_another_by_swack(msg, bcp, msg->swack_vec); | ||
1216 | if (other_msg) { | ||
1217 | /* There is another. Do not ack the current one. */ | ||
1218 | bau_process_message(mdp, bcp, 0); | ||
1219 | /* | 1172 | /* |
1220 | * Let the natural processing of that message acknowledge | 1173 | * Some message has set the MMR 'pending' bit; it might have |
1221 | * it. Don't get the processing of sw_ack's out of order. | 1174 | * been another message. Look for that message. |
1222 | */ | 1175 | */ |
1223 | return; | 1176 | other_msg = find_another_by_swack(msg, bcp); |
1177 | if (other_msg) { | ||
1178 | /* | ||
1179 | * There is another. Process this one but do not | ||
1180 | * ack it. | ||
1181 | */ | ||
1182 | bau_process_message(mdp, bcp, 0); | ||
1183 | /* | ||
1184 | * Let the natural processing of that other message | ||
1185 | * acknowledge it. Don't get the processing of sw_ack's | ||
1186 | * out of order. | ||
1187 | */ | ||
1188 | return; | ||
1189 | } | ||
1224 | } | 1190 | } |
1225 | 1191 | ||
1226 | /* | 1192 | /* |
1227 | * There is no other message using this sw_ack, so it is safe to | 1193 | * Either the MMR shows this one pending a reply or there is no |
1228 | * acknowledge it. | 1194 | * other message using this sw_ack, so it is safe to acknowledge it. |
1229 | */ | 1195 | */ |
1230 | bau_process_message(mdp, bcp, 1); | 1196 | bau_process_message(mdp, bcp, 1); |
1231 | 1197 | ||
@@ -1330,7 +1296,8 @@ static void __init enable_timeouts(void) | |||
1330 | */ | 1296 | */ |
1331 | mmr_image |= (1L << SOFTACK_MSHIFT); | 1297 | mmr_image |= (1L << SOFTACK_MSHIFT); |
1332 | if (is_uv2_hub()) { | 1298 | if (is_uv2_hub()) { |
1333 | mmr_image |= (1L << UV2_EXT_SHFT); | 1299 | /* hw bug workaround; do not use extended status */ |
1300 | mmr_image &= ~(1L << UV2_EXT_SHFT); | ||
1334 | } | 1301 | } |
1335 | write_mmr_misc_control(pnode, mmr_image); | 1302 | write_mmr_misc_control(pnode, mmr_image); |
1336 | } | 1303 | } |
@@ -1379,24 +1346,26 @@ static int ptc_seq_show(struct seq_file *file, void *data) | |||
1379 | cpu = *(loff_t *)data; | 1346 | cpu = *(loff_t *)data; |
1380 | if (!cpu) { | 1347 | if (!cpu) { |
1381 | seq_printf(file, | 1348 | seq_printf(file, |
1382 | "# cpu bauoff sent stime self locals remotes ncpus localhub "); | 1349 | "# cpu bauoff sent stime self locals remotes ncpus localhub "); |
1383 | seq_printf(file, | 1350 | seq_printf(file, |
1384 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | 1351 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); |
1385 | seq_printf(file, | 1352 | seq_printf(file, |
1386 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); | 1353 | "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries "); |
1354 | seq_printf(file, | ||
1355 | "rok resetp resett giveup sto bz throt disable "); | ||
1387 | seq_printf(file, | 1356 | seq_printf(file, |
1388 | "resetp resett giveup sto bz throt enters swack recv rtime "); | 1357 | "enable wars warshw warwaits enters ipidis plugged "); |
1389 | seq_printf(file, | 1358 | seq_printf(file, |
1390 | "all one mult none retry canc nocan reset rcan "); | 1359 | "ipiover glim cong swack recv rtime all one mult "); |
1391 | seq_printf(file, | 1360 | seq_printf(file, |
1392 | "disable enable wars warshw warwaits\n"); | 1361 | "none retry canc nocan reset rcan\n"); |
1393 | } | 1362 | } |
1394 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 1363 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
1395 | bcp = &per_cpu(bau_control, cpu); | 1364 | bcp = &per_cpu(bau_control, cpu); |
1396 | stat = bcp->statp; | 1365 | stat = bcp->statp; |
1397 | /* source side statistics */ | 1366 | /* source side statistics */ |
1398 | seq_printf(file, | 1367 | seq_printf(file, |
1399 | "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1368 | "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
1400 | cpu, bcp->nobau, stat->s_requestor, | 1369 | cpu, bcp->nobau, stat->s_requestor, |
1401 | cycles_2_us(stat->s_time), | 1370 | cycles_2_us(stat->s_time), |
1402 | stat->s_ntargself, stat->s_ntarglocals, | 1371 | stat->s_ntargself, stat->s_ntarglocals, |
@@ -1407,25 +1376,28 @@ static int ptc_seq_show(struct seq_file *file, void *data) | |||
1407 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | 1376 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, |
1408 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | 1377 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, |
1409 | stat->s_dtimeout, stat->s_strongnacks); | 1378 | stat->s_dtimeout, stat->s_strongnacks); |
1410 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1379 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", |
1411 | stat->s_retry_messages, stat->s_retriesok, | 1380 | stat->s_retry_messages, stat->s_retriesok, |
1412 | stat->s_resets_plug, stat->s_resets_timeout, | 1381 | stat->s_resets_plug, stat->s_resets_timeout, |
1413 | stat->s_giveup, stat->s_stimeout, | 1382 | stat->s_giveup, stat->s_stimeout, |
1414 | stat->s_busy, stat->s_throttles, stat->s_enters); | 1383 | stat->s_busy, stat->s_throttles); |
1384 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1385 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1386 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1387 | stat->s_uv2_war_waits, stat->s_enters, | ||
1388 | stat->s_ipifordisabled, stat->s_plugged, | ||
1389 | stat->s_overipilimit, stat->s_giveuplimit, | ||
1390 | stat->s_congested); | ||
1415 | 1391 | ||
1416 | /* destination side statistics */ | 1392 | /* destination side statistics */ |
1417 | seq_printf(file, | 1393 | seq_printf(file, |
1418 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 1394 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", |
1419 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), | 1395 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), |
1420 | stat->d_requestee, cycles_2_us(stat->d_time), | 1396 | stat->d_requestee, cycles_2_us(stat->d_time), |
1421 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | 1397 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
1422 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | 1398 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
1423 | stat->d_nocanceled, stat->d_resets, | 1399 | stat->d_nocanceled, stat->d_resets, |
1424 | stat->d_rcanceled); | 1400 | stat->d_rcanceled); |
1425 | seq_printf(file, "%ld %ld %ld %ld %ld\n", | ||
1426 | stat->s_bau_disabled, stat->s_bau_reenabled, | ||
1427 | stat->s_uv2_wars, stat->s_uv2_wars_hw, | ||
1428 | stat->s_uv2_war_waits); | ||
1429 | } | 1401 | } |
1430 | return 0; | 1402 | return 0; |
1431 | } | 1403 | } |
@@ -1439,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, | |||
1439 | char *buf; | 1411 | char *buf; |
1440 | int ret; | 1412 | int ret; |
1441 | 1413 | ||
1442 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | 1414 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n", |
1443 | "max_concur plugged_delay plugsb4reset", | 1415 | "max_concur plugged_delay plugsb4reset timeoutsb4reset", |
1444 | "timeoutsb4reset ipi_reset_limit complete_threshold", | 1416 | "ipi_reset_limit complete_threshold congested_response_us", |
1445 | "congested_response_us congested_reps congested_period", | 1417 | "congested_reps disabled_period giveup_limit", |
1446 | max_concurr, plugged_delay, plugsb4reset, | 1418 | max_concurr, plugged_delay, plugsb4reset, |
1447 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | 1419 | timeoutsb4reset, ipi_reset_limit, complete_threshold, |
1448 | congested_respns_us, congested_reps, congested_period); | 1420 | congested_respns_us, congested_reps, disabled_period, |
1421 | giveup_limit); | ||
1449 | 1422 | ||
1450 | if (!buf) | 1423 | if (!buf) |
1451 | return -ENOMEM; | 1424 | return -ENOMEM; |
@@ -1616,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, | |||
1616 | bcp->complete_threshold = complete_threshold; | 1589 | bcp->complete_threshold = complete_threshold; |
1617 | bcp->cong_response_us = congested_respns_us; | 1590 | bcp->cong_response_us = congested_respns_us; |
1618 | bcp->cong_reps = congested_reps; | 1591 | bcp->cong_reps = congested_reps; |
1619 | bcp->cong_period = congested_period; | 1592 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1593 | bcp->giveup_limit = giveup_limit; | ||
1620 | } | 1594 | } |
1621 | return count; | 1595 | return count; |
1622 | } | 1596 | } |
@@ -1745,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) | |||
1745 | * fairness chaining multilevel count replied_to | 1719 | * fairness chaining multilevel count replied_to |
1746 | */ | 1720 | */ |
1747 | } else { | 1721 | } else { |
1722 | /* | ||
1723 | * BIOS uses legacy mode, but UV2 hardware always | ||
1724 | * uses native mode for selective broadcasts. | ||
1725 | */ | ||
1748 | uv2_hdr = &bd2->header.uv2_hdr; | 1726 | uv2_hdr = &bd2->header.uv2_hdr; |
1749 | uv2_hdr->swack_flag = 1; | 1727 | uv2_hdr->swack_flag = 1; |
1750 | uv2_hdr->base_dest_nasid = | 1728 | uv2_hdr->base_dest_nasid = |
@@ -1896,10 +1874,11 @@ static void __init init_per_cpu_tunables(void) | |||
1896 | bcp->complete_threshold = complete_threshold; | 1874 | bcp->complete_threshold = complete_threshold; |
1897 | bcp->cong_response_us = congested_respns_us; | 1875 | bcp->cong_response_us = congested_respns_us; |
1898 | bcp->cong_reps = congested_reps; | 1876 | bcp->cong_reps = congested_reps; |
1899 | bcp->cong_period = congested_period; | 1877 | bcp->disabled_period = sec_2_cycles(disabled_period); |
1900 | bcp->clocks_per_100_usec = usec_2_cycles(100); | 1878 | bcp->giveup_limit = giveup_limit; |
1901 | spin_lock_init(&bcp->queue_lock); | 1879 | spin_lock_init(&bcp->queue_lock); |
1902 | spin_lock_init(&bcp->uvhub_lock); | 1880 | spin_lock_init(&bcp->uvhub_lock); |
1881 | spin_lock_init(&bcp->disable_lock); | ||
1903 | } | 1882 | } |
1904 | } | 1883 | } |
1905 | 1884 | ||
@@ -2020,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, | |||
2020 | } | 1999 | } |
2021 | bcp->uvhub_master = *hmasterp; | 2000 | bcp->uvhub_master = *hmasterp; |
2022 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; | 2001 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; |
2023 | bcp->using_desc = bcp->uvhub_cpu; | ||
2024 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | 2002 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { |
2025 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", | 2003 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", |
2026 | bcp->uvhub_cpu); | 2004 | bcp->uvhub_cpu); |
@@ -2123,7 +2101,6 @@ static int __init uv_bau_init(void) | |||
2123 | } | 2101 | } |
2124 | 2102 | ||
2125 | nuvhubs = uv_num_possible_blades(); | 2103 | nuvhubs = uv_num_possible_blades(); |
2126 | spin_lock_init(&disable_lock); | ||
2127 | congested_cycles = usec_2_cycles(congested_respns_us); | 2104 | congested_cycles = usec_2_cycles(congested_respns_us); |
2128 | 2105 | ||
2129 | uv_base_pnode = 0x7fffffff; | 2106 | uv_base_pnode = 0x7fffffff; |