aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h28
-rw-r--r--arch/x86/platform/uv/tlb_uv.c453
2 files changed, 257 insertions, 224 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 6149b476d9df..a06983cdc125 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -140,6 +140,9 @@
140#define IPI_RESET_LIMIT 1 140#define IPI_RESET_LIMIT 1
141/* after this # consecutive successes, bump up the throttle if it was lowered */ 141/* after this # consecutive successes, bump up the throttle if it was lowered */
142#define COMPLETE_THRESHOLD 5 142#define COMPLETE_THRESHOLD 5
143/* after this # of giveups (fall back to kernel IPI's) disable the use of
144 the BAU for a period of time */
145#define GIVEUP_LIMIT 100
143 146
144#define UV_LB_SUBNODEID 0x10 147#define UV_LB_SUBNODEID 0x10
145 148
@@ -166,7 +169,6 @@
166#define FLUSH_RETRY_TIMEOUT 2 169#define FLUSH_RETRY_TIMEOUT 2
167#define FLUSH_GIVEUP 3 170#define FLUSH_GIVEUP 3
168#define FLUSH_COMPLETE 4 171#define FLUSH_COMPLETE 4
169#define FLUSH_RETRY_BUSYBUG 5
170 172
171/* 173/*
172 * tuning the action when the numalink network is extremely delayed 174 * tuning the action when the numalink network is extremely delayed
@@ -175,7 +177,7 @@
175 microseconds */ 177 microseconds */
176#define CONGESTED_REPS 10 /* long delays averaged over 178#define CONGESTED_REPS 10 /* long delays averaged over
177 this many broadcasts */ 179 this many broadcasts */
178#define CONGESTED_PERIOD 30 /* time for the bau to be 180#define DISABLED_PERIOD 10 /* time for the bau to be
179 disabled, in seconds */ 181 disabled, in seconds */
180/* see msg_type: */ 182/* see msg_type: */
181#define MSG_NOOP 0 183#define MSG_NOOP 0
@@ -520,6 +522,12 @@ struct ptc_stats {
520 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ 522 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
521 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ 523 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
522 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ 524 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
525 unsigned long s_overipilimit; /* over the ipi reset limit */
526 unsigned long s_giveuplimit; /* disables, over giveup limit*/
527 unsigned long s_enters; /* entries to the driver */
528 unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
529 unsigned long s_plugged; /* plugged by h/w bug*/
530 unsigned long s_congested; /* giveup on long wait */
523 /* destination statistics */ 531 /* destination statistics */
524 unsigned long d_alltlb; /* times all tlb's on this 532 unsigned long d_alltlb; /* times all tlb's on this
525 cpu were flushed */ 533 cpu were flushed */
@@ -586,8 +594,8 @@ struct bau_control {
586 int timeout_tries; 594 int timeout_tries;
587 int ipi_attempts; 595 int ipi_attempts;
588 int conseccompletes; 596 int conseccompletes;
589 int baudisabled; 597 short nobau;
590 int set_bau_off; 598 short baudisabled;
591 short cpu; 599 short cpu;
592 short osnode; 600 short osnode;
593 short uvhub_cpu; 601 short uvhub_cpu;
@@ -596,14 +604,16 @@ struct bau_control {
596 short cpus_in_socket; 604 short cpus_in_socket;
597 short cpus_in_uvhub; 605 short cpus_in_uvhub;
598 short partition_base_pnode; 606 short partition_base_pnode;
599 short using_desc; /* an index, like uvhub_cpu */ 607 short busy; /* all were busy (war) */
600 unsigned int inuse_map;
601 unsigned short message_number; 608 unsigned short message_number;
602 unsigned short uvhub_quiesce; 609 unsigned short uvhub_quiesce;
603 short socket_acknowledge_count[DEST_Q_SIZE]; 610 short socket_acknowledge_count[DEST_Q_SIZE];
604 cycles_t send_message; 611 cycles_t send_message;
612 cycles_t period_end;
613 cycles_t period_time;
605 spinlock_t uvhub_lock; 614 spinlock_t uvhub_lock;
606 spinlock_t queue_lock; 615 spinlock_t queue_lock;
616 spinlock_t disable_lock;
607 /* tunables */ 617 /* tunables */
608 int max_concurr; 618 int max_concurr;
609 int max_concurr_const; 619 int max_concurr_const;
@@ -614,9 +624,9 @@ struct bau_control {
614 int complete_threshold; 624 int complete_threshold;
615 int cong_response_us; 625 int cong_response_us;
616 int cong_reps; 626 int cong_reps;
617 int cong_period; 627 cycles_t disabled_period;
618 unsigned long clocks_per_100_usec; 628 int period_giveups;
619 cycles_t period_time; 629 int giveup_limit;
620 long period_requests; 630 long period_requests;
621 struct hub_and_pnode *thp; 631 struct hub_and_pnode *thp;
622}; 632};
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..71b5d5a07d7b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * SGI UltraViolet TLB flush routines. 2 * SGI UltraViolet TLB flush routines.
3 * 3 *
4 * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. 4 * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
5 * 5 *
6 * This code is released under the GNU General Public License version 2 or 6 * This code is released under the GNU General Public License version 2 or
7 * later. 7 * later.
@@ -38,8 +38,7 @@ static int timeout_base_ns[] = {
38 38
39static int timeout_us; 39static int timeout_us;
40static int nobau; 40static int nobau;
41static int baudisabled; 41static int nobau_perm;
42static spinlock_t disable_lock;
43static cycles_t congested_cycles; 42static cycles_t congested_cycles;
44 43
45/* tunables: */ 44/* tunables: */
@@ -47,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT;
47static int max_concurr_const = MAX_BAU_CONCURRENT; 46static int max_concurr_const = MAX_BAU_CONCURRENT;
48static int plugged_delay = PLUGGED_DELAY; 47static int plugged_delay = PLUGGED_DELAY;
49static int plugsb4reset = PLUGSB4RESET; 48static int plugsb4reset = PLUGSB4RESET;
49static int giveup_limit = GIVEUP_LIMIT;
50static int timeoutsb4reset = TIMEOUTSB4RESET; 50static int timeoutsb4reset = TIMEOUTSB4RESET;
51static int ipi_reset_limit = IPI_RESET_LIMIT; 51static int ipi_reset_limit = IPI_RESET_LIMIT;
52static int complete_threshold = COMPLETE_THRESHOLD; 52static int complete_threshold = COMPLETE_THRESHOLD;
53static int congested_respns_us = CONGESTED_RESPONSE_US; 53static int congested_respns_us = CONGESTED_RESPONSE_US;
54static int congested_reps = CONGESTED_REPS; 54static int congested_reps = CONGESTED_REPS;
55static int congested_period = CONGESTED_PERIOD; 55static int disabled_period = DISABLED_PERIOD;
56 56
57static struct tunables tunables[] = { 57static struct tunables tunables[] = {
58 {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ 58 {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
@@ -63,7 +63,8 @@ static struct tunables tunables[] = {
63 {&complete_threshold, COMPLETE_THRESHOLD}, 63 {&complete_threshold, COMPLETE_THRESHOLD},
64 {&congested_respns_us, CONGESTED_RESPONSE_US}, 64 {&congested_respns_us, CONGESTED_RESPONSE_US},
65 {&congested_reps, CONGESTED_REPS}, 65 {&congested_reps, CONGESTED_REPS},
66 {&congested_period, CONGESTED_PERIOD} 66 {&disabled_period, DISABLED_PERIOD},
67 {&giveup_limit, GIVEUP_LIMIT}
67}; 68};
68 69
69static struct dentry *tunables_dir; 70static struct dentry *tunables_dir;
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
120static DEFINE_PER_CPU(struct bau_control, bau_control); 121static DEFINE_PER_CPU(struct bau_control, bau_control);
121static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); 122static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
122 123
124static void
125set_bau_on(void)
126{
127 int cpu;
128 struct bau_control *bcp;
129
130 if (nobau_perm) {
131 pr_info("BAU not initialized; cannot be turned on\n");
132 return;
133 }
134 nobau = 0;
135 for_each_present_cpu(cpu) {
136 bcp = &per_cpu(bau_control, cpu);
137 bcp->nobau = 0;
138 }
139 pr_info("BAU turned on\n");
140 return;
141}
142
143static void
144set_bau_off(void)
145{
146 int cpu;
147 struct bau_control *bcp;
148
149 nobau = 1;
150 for_each_present_cpu(cpu) {
151 bcp = &per_cpu(bau_control, cpu);
152 bcp->nobau = 1;
153 }
154 pr_info("BAU turned off\n");
155 return;
156}
157
123/* 158/*
124 * Determine the first node on a uvhub. 'Nodes' are used for kernel 159 * Determine the first node on a uvhub. 'Nodes' are used for kernel
125 * memory allocation. 160 * memory allocation.
@@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
278 * Both sockets dump their completed count total into 313 * Both sockets dump their completed count total into
279 * the message's count. 314 * the message's count.
280 */ 315 */
281 smaster->socket_acknowledge_count[mdp->msg_slot] = 0; 316 *sp = 0;
282 asp = (struct atomic_short *)&msg->acknowledge_count; 317 asp = (struct atomic_short *)&msg->acknowledge_count;
283 msg_ack_count = atom_asr(socket_ack_count, asp); 318 msg_ack_count = atom_asr(socket_ack_count, asp);
284 319
@@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
491} 526}
492 527
493/* 528/*
494 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 529 * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
530 * But not currently used.
495 */ 531 */
496static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) 532static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
497{ 533{
498 unsigned long descriptor_status; 534 unsigned long descriptor_status;
499 unsigned long descriptor_status2;
500 535
501 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 536 descriptor_status =
502 descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; 537 ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
503 descriptor_status = (descriptor_status << 1) | descriptor_status2;
504 return descriptor_status; 538 return descriptor_status;
505} 539}
506 540
@@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp)
531 */ 565 */
532int handle_uv2_busy(struct bau_control *bcp) 566int handle_uv2_busy(struct bau_control *bcp)
533{ 567{
534 int busy_one = bcp->using_desc;
535 int normal = bcp->uvhub_cpu;
536 int selected = -1;
537 int i;
538 unsigned long descriptor_status;
539 unsigned long status;
540 int mmr_offset;
541 struct bau_desc *bau_desc_old;
542 struct bau_desc *bau_desc_new;
543 struct bau_control *hmaster = bcp->uvhub_master;
544 struct ptc_stats *stat = bcp->statp; 568 struct ptc_stats *stat = bcp->statp;
545 cycles_t ttm;
546 569
547 stat->s_uv2_wars++; 570 stat->s_uv2_wars++;
548 spin_lock(&hmaster->uvhub_lock); 571 bcp->busy = 1;
549 /* try for the original first */ 572 return FLUSH_GIVEUP;
550 if (busy_one != normal) {
551 if (!normal_busy(bcp))
552 selected = normal;
553 }
554 if (selected < 0) {
555 /* can't use the normal, select an alternate */
556 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
557 descriptor_status = read_lmmr(mmr_offset);
558
559 /* scan available descriptors 32-63 */
560 for (i = 0; i < UV_CPUS_PER_AS; i++) {
561 if ((hmaster->inuse_map & (1 << i)) == 0) {
562 status = ((descriptor_status >>
563 (i * UV_ACT_STATUS_SIZE)) &
564 UV_ACT_STATUS_MASK) << 1;
565 if (status != UV2H_DESC_BUSY) {
566 selected = i + UV_CPUS_PER_AS;
567 break;
568 }
569 }
570 }
571 }
572
573 if (busy_one != normal)
574 /* mark the busy alternate as not in-use */
575 hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
576
577 if (selected >= 0) {
578 /* switch to the selected descriptor */
579 if (selected != normal) {
580 /* set the selected alternate as in-use */
581 hmaster->inuse_map |=
582 (1 << (selected - UV_CPUS_PER_AS));
583 if (selected > stat->s_uv2_wars_hw)
584 stat->s_uv2_wars_hw = selected;
585 }
586 bau_desc_old = bcp->descriptor_base;
587 bau_desc_old += (ITEMS_PER_DESC * busy_one);
588 bcp->using_desc = selected;
589 bau_desc_new = bcp->descriptor_base;
590 bau_desc_new += (ITEMS_PER_DESC * selected);
591 *bau_desc_new = *bau_desc_old;
592 } else {
593 /*
594 * All are busy. Wait for the normal one for this cpu to
595 * free up.
596 */
597 stat->s_uv2_war_waits++;
598 spin_unlock(&hmaster->uvhub_lock);
599 ttm = get_cycles();
600 do {
601 cpu_relax();
602 } while (normal_busy(bcp));
603 spin_lock(&hmaster->uvhub_lock);
604 /* switch to the original descriptor */
605 bcp->using_desc = normal;
606 bau_desc_old = bcp->descriptor_base;
607 bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
608 bcp->using_desc = (ITEMS_PER_DESC * normal);
609 bau_desc_new = bcp->descriptor_base;
610 bau_desc_new += (ITEMS_PER_DESC * normal);
611 *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
612 }
613 spin_unlock(&hmaster->uvhub_lock);
614 return FLUSH_RETRY_BUSYBUG;
615} 573}
616 574
617static int uv2_wait_completion(struct bau_desc *bau_desc, 575static int uv2_wait_completion(struct bau_desc *bau_desc,
@@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
620{ 578{
621 unsigned long descriptor_stat; 579 unsigned long descriptor_stat;
622 cycles_t ttm; 580 cycles_t ttm;
623 int desc = bcp->using_desc; 581 int desc = bcp->uvhub_cpu;
624 long busy_reps = 0; 582 long busy_reps = 0;
625 struct ptc_stats *stat = bcp->statp; 583 struct ptc_stats *stat = bcp->statp;
626 584
@@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
628 586
629 /* spin on the status MMR, waiting for it to go idle */ 587 /* spin on the status MMR, waiting for it to go idle */
630 while (descriptor_stat != UV2H_DESC_IDLE) { 588 while (descriptor_stat != UV2H_DESC_IDLE) {
631 /* 589 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
632 * Our software ack messages may be blocked because 590 /*
633 * there are no swack resources available. As long 591 * A h/w bug on the destination side may
634 * as none of them has timed out hardware will NACK 592 * have prevented the message being marked
635 * our message and its state will stay IDLE. 593 * pending, thus it doesn't get replied to
636 */ 594 * and gets continually nacked until it times
637 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || 595 * out with a SOURCE_TIMEOUT.
638 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { 596 */
639 stat->s_stimeout++; 597 stat->s_stimeout++;
640 return FLUSH_GIVEUP; 598 return FLUSH_GIVEUP;
641 } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
642 stat->s_strongnacks++;
643 bcp->conseccompletes = 0;
644 return FLUSH_GIVEUP;
645 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { 599 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
600 ttm = get_cycles();
601
602 /*
603 * Our retries may be blocked by all destination
604 * swack resources being consumed, and a timeout
605 * pending. In that case hardware returns the
606 * ERROR that looks like a destination timeout.
607 * Without using the extended status we have to
608 * deduce from the short time that this was a
609 * strong nack.
610 */
611 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
612 bcp->conseccompletes = 0;
613 stat->s_plugged++;
614 /* FLUSH_RETRY_PLUGGED causes hang on boot */
615 return FLUSH_GIVEUP;
616 }
646 stat->s_dtimeout++; 617 stat->s_dtimeout++;
647 bcp->conseccompletes = 0; 618 bcp->conseccompletes = 0;
648 return FLUSH_RETRY_TIMEOUT; 619 /* FLUSH_RETRY_TIMEOUT causes hang on boot */
620 return FLUSH_GIVEUP;
649 } else { 621 } else {
650 busy_reps++; 622 busy_reps++;
651 if (busy_reps > 1000000) { 623 if (busy_reps > 1000000) {
@@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
653 busy_reps = 0; 625 busy_reps = 0;
654 ttm = get_cycles(); 626 ttm = get_cycles();
655 if ((ttm - bcp->send_message) > 627 if ((ttm - bcp->send_message) >
656 (bcp->clocks_per_100_usec)) { 628 bcp->timeout_interval)
657 return handle_uv2_busy(bcp); 629 return handle_uv2_busy(bcp);
658 }
659 } 630 }
660 /* 631 /*
661 * descriptor_stat is still BUSY 632 * descriptor_stat is still BUSY
@@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc,
679{ 650{
680 int right_shift; 651 int right_shift;
681 unsigned long mmr_offset; 652 unsigned long mmr_offset;
682 int desc = bcp->using_desc; 653 int desc = bcp->uvhub_cpu;
683 654
684 if (desc < UV_CPUS_PER_AS) { 655 if (desc < UV_CPUS_PER_AS) {
685 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 656 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
@@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc,
758} 729}
759 730
760/* 731/*
761 * Completions are taking a very long time due to a congested numalink 732 * Stop all cpus on a uvhub from using the BAU for a period of time.
762 * network. 733 * This is reversed by check_enable.
763 */ 734 */
764static void disable_for_congestion(struct bau_control *bcp, 735static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
765 struct ptc_stats *stat)
766{ 736{
767 /* let only one cpu do this disabling */ 737 int tcpu;
768 spin_lock(&disable_lock); 738 struct bau_control *tbcp;
769 739 struct bau_control *hmaster;
770 if (!baudisabled && bcp->period_requests && 740 cycles_t tm1;
771 ((bcp->period_time / bcp->period_requests) > congested_cycles)) { 741
772 int tcpu; 742 hmaster = bcp->uvhub_master;
773 struct bau_control *tbcp; 743 spin_lock(&hmaster->disable_lock);
774 /* it becomes this cpu's job to turn on the use of the 744 if (!bcp->baudisabled) {
775 BAU again */
776 baudisabled = 1;
777 bcp->set_bau_off = 1;
778 bcp->set_bau_on_time = get_cycles();
779 bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
780 stat->s_bau_disabled++; 745 stat->s_bau_disabled++;
746 tm1 = get_cycles();
781 for_each_present_cpu(tcpu) { 747 for_each_present_cpu(tcpu) {
782 tbcp = &per_cpu(bau_control, tcpu); 748 tbcp = &per_cpu(bau_control, tcpu);
783 tbcp->baudisabled = 1; 749 if (tbcp->uvhub_master == hmaster) {
750 tbcp->baudisabled = 1;
751 tbcp->set_bau_on_time =
752 tm1 + bcp->disabled_period;
753 }
784 } 754 }
785 } 755 }
786 756 spin_unlock(&hmaster->disable_lock);
787 spin_unlock(&disable_lock);
788} 757}
789 758
790static void count_max_concurr(int stat, struct bau_control *bcp, 759static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
815 bcp->period_requests++; 784 bcp->period_requests++;
816 bcp->period_time += elapsed; 785 bcp->period_time += elapsed;
817 if ((elapsed > congested_cycles) && 786 if ((elapsed > congested_cycles) &&
818 (bcp->period_requests > bcp->cong_reps)) 787 (bcp->period_requests > bcp->cong_reps) &&
819 disable_for_congestion(bcp, stat); 788 ((bcp->period_time / bcp->period_requests) >
789 congested_cycles)) {
790 stat->s_congested++;
791 disable_for_period(bcp, stat);
792 }
820 } 793 }
821 } else 794 } else
822 stat->s_requestor--; 795 stat->s_requestor--;
823 796
824 if (completion_status == FLUSH_COMPLETE && try > 1) 797 if (completion_status == FLUSH_COMPLETE && try > 1)
825 stat->s_retriesok++; 798 stat->s_retriesok++;
826 else if (completion_status == FLUSH_GIVEUP) 799 else if (completion_status == FLUSH_GIVEUP) {
827 stat->s_giveup++; 800 stat->s_giveup++;
801 if (get_cycles() > bcp->period_end)
802 bcp->period_giveups = 0;
803 bcp->period_giveups++;
804 if (bcp->period_giveups == 1)
805 bcp->period_end = get_cycles() + bcp->disabled_period;
806 if (bcp->period_giveups > bcp->giveup_limit) {
807 disable_for_period(bcp, stat);
808 stat->s_giveuplimit++;
809 }
810 }
828} 811}
829 812
830/* 813/*
@@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
868 * Returns 1 if it gives up entirely and the original cpu mask is to be 851 * Returns 1 if it gives up entirely and the original cpu mask is to be
869 * returned to the kernel. 852 * returned to the kernel.
870 */ 853 */
871int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) 854int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
855 struct bau_desc *bau_desc)
872{ 856{
873 int seq_number = 0; 857 int seq_number = 0;
874 int completion_stat = 0; 858 int completion_stat = 0;
@@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
881 struct bau_control *hmaster = bcp->uvhub_master; 865 struct bau_control *hmaster = bcp->uvhub_master;
882 struct uv1_bau_msg_header *uv1_hdr = NULL; 866 struct uv1_bau_msg_header *uv1_hdr = NULL;
883 struct uv2_bau_msg_header *uv2_hdr = NULL; 867 struct uv2_bau_msg_header *uv2_hdr = NULL;
884 struct bau_desc *bau_desc;
885 868
886 if (bcp->uvhub_version == 1) 869 if (bcp->uvhub_version == 1) {
870 uv1 = 1;
887 uv1_throttle(hmaster, stat); 871 uv1_throttle(hmaster, stat);
872 }
888 873
889 while (hmaster->uvhub_quiesce) 874 while (hmaster->uvhub_quiesce)
890 cpu_relax(); 875 cpu_relax();
891 876
892 time1 = get_cycles(); 877 time1 = get_cycles();
878 if (uv1)
879 uv1_hdr = &bau_desc->header.uv1_hdr;
880 else
881 uv2_hdr = &bau_desc->header.uv2_hdr;
882
893 do { 883 do {
894 bau_desc = bcp->descriptor_base; 884 if (try == 0) {
895 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
896 if (bcp->uvhub_version == 1) {
897 uv1 = 1;
898 uv1_hdr = &bau_desc->header.uv1_hdr;
899 } else
900 uv2_hdr = &bau_desc->header.uv2_hdr;
901 if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
902 if (uv1) 885 if (uv1)
903 uv1_hdr->msg_type = MSG_REGULAR; 886 uv1_hdr->msg_type = MSG_REGULAR;
904 else 887 else
@@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
916 uv1_hdr->sequence = seq_number; 899 uv1_hdr->sequence = seq_number;
917 else 900 else
918 uv2_hdr->sequence = seq_number; 901 uv2_hdr->sequence = seq_number;
919 index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; 902 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
920 bcp->send_message = get_cycles(); 903 bcp->send_message = get_cycles();
921 904
922 write_mmr_activation(index); 905 write_mmr_activation(index);
923 906
924 try++; 907 try++;
925 completion_stat = wait_completion(bau_desc, bcp, try); 908 completion_stat = wait_completion(bau_desc, bcp, try);
926 /* UV2: wait_completion() may change the bcp->using_desc */
927 909
928 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 910 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
929 911
930 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { 912 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
931 bcp->ipi_attempts = 0; 913 bcp->ipi_attempts = 0;
914 stat->s_overipilimit++;
932 completion_stat = FLUSH_GIVEUP; 915 completion_stat = FLUSH_GIVEUP;
933 break; 916 break;
934 } 917 }
935 cpu_relax(); 918 cpu_relax();
936 } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 919 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
937 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
938 (completion_stat == FLUSH_RETRY_TIMEOUT)); 920 (completion_stat == FLUSH_RETRY_TIMEOUT));
939 921
940 time2 = get_cycles(); 922 time2 = get_cycles();
@@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
955} 937}
956 938
957/* 939/*
958 * The BAU is disabled. When the disabled time period has expired, the cpu 940 * The BAU is disabled for this uvhub. When the disabled time period has
959 * that disabled it must re-enable it. 941 * expired re-enable it.
960 * Return 0 if it is re-enabled for all cpus. 942 * Return 0 if it is re-enabled for all cpus on this uvhub.
961 */ 943 */
962static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) 944static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
963{ 945{
964 int tcpu; 946 int tcpu;
965 struct bau_control *tbcp; 947 struct bau_control *tbcp;
948 struct bau_control *hmaster;
966 949
967 if (bcp->set_bau_off) { 950 hmaster = bcp->uvhub_master;
968 if (get_cycles() >= bcp->set_bau_on_time) { 951 spin_lock(&hmaster->disable_lock);
969 stat->s_bau_reenabled++; 952 if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
970 baudisabled = 0; 953 stat->s_bau_reenabled++;
971 for_each_present_cpu(tcpu) { 954 for_each_present_cpu(tcpu) {
972 tbcp = &per_cpu(bau_control, tcpu); 955 tbcp = &per_cpu(bau_control, tcpu);
956 if (tbcp->uvhub_master == hmaster) {
973 tbcp->baudisabled = 0; 957 tbcp->baudisabled = 0;
974 tbcp->period_requests = 0; 958 tbcp->period_requests = 0;
975 tbcp->period_time = 0; 959 tbcp->period_time = 0;
960 tbcp->period_giveups = 0;
976 } 961 }
977 return 0;
978 } 962 }
963 spin_unlock(&hmaster->disable_lock);
964 return 0;
979 } 965 }
966 spin_unlock(&hmaster->disable_lock);
980 return -1; 967 return -1;
981} 968}
982 969
@@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1078 struct cpumask *flush_mask; 1065 struct cpumask *flush_mask;
1079 struct ptc_stats *stat; 1066 struct ptc_stats *stat;
1080 struct bau_control *bcp; 1067 struct bau_control *bcp;
1081 1068 unsigned long descriptor_status;
1082 /* kernel was booted 'nobau' */ 1069 unsigned long status;
1083 if (nobau)
1084 return cpumask;
1085 1070
1086 bcp = &per_cpu(bau_control, cpu); 1071 bcp = &per_cpu(bau_control, cpu);
1087 stat = bcp->statp; 1072 stat = bcp->statp;
1073 stat->s_enters++;
1074
1075 if (bcp->nobau)
1076 return cpumask;
1077
1078 if (bcp->busy) {
1079 descriptor_status =
1080 read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
1081 status = ((descriptor_status >> (bcp->uvhub_cpu *
1082 UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
1083 if (status == UV2H_DESC_BUSY)
1084 return cpumask;
1085 bcp->busy = 0;
1086 }
1088 1087
1089 /* bau was disabled due to slow response */ 1088 /* bau was disabled due to slow response */
1090 if (bcp->baudisabled) { 1089 if (bcp->baudisabled) {
1091 if (check_enable(bcp, stat)) 1090 if (check_enable(bcp, stat)) {
1091 stat->s_ipifordisabled++;
1092 return cpumask; 1092 return cpumask;
1093 }
1093 } 1094 }
1094 1095
1095 /* 1096 /*
@@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1105 stat->s_ntargself++; 1106 stat->s_ntargself++;
1106 1107
1107 bau_desc = bcp->descriptor_base; 1108 bau_desc = bcp->descriptor_base;
1108 bau_desc += (ITEMS_PER_DESC * bcp->using_desc); 1109 bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
1109 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 1110 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
1110 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 1111 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
1111 return NULL; 1112 return NULL;
@@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1119 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
1119 * or 1 if it gave up and the original cpumask should be returned. 1120 * or 1 if it gave up and the original cpumask should be returned.
1120 */ 1121 */
1121 if (!uv_flush_send_and_wait(flush_mask, bcp)) 1122 if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
1122 return NULL; 1123 return NULL;
1123 else 1124 else
1124 return cpumask; 1125 return cpumask;
1125} 1126}
1126 1127
1127/* 1128/*
1128 * Search the message queue for any 'other' message with the same software 1129 * Search the message queue for any 'other' unprocessed message with the
1129 * acknowledge resource bit vector. 1130 * same software acknowledge resource bit vector as the 'msg' message.
1130 */ 1131 */
1131struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, 1132struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1132 struct bau_control *bcp, unsigned char swack_vec) 1133 struct bau_control *bcp)
1133{ 1134{
1134 struct bau_pq_entry *msg_next = msg + 1; 1135 struct bau_pq_entry *msg_next = msg + 1;
1136 unsigned char swack_vec = msg->swack_vec;
1135 1137
1136 if (msg_next > bcp->queue_last) 1138 if (msg_next > bcp->queue_last)
1137 msg_next = bcp->queue_first; 1139 msg_next = bcp->queue_first;
1138 while ((msg_next->swack_vec != 0) && (msg_next != msg)) { 1140 while (msg_next != msg) {
1139 if (msg_next->swack_vec == swack_vec) 1141 if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
1142 (msg_next->swack_vec == swack_vec))
1140 return msg_next; 1143 return msg_next;
1141 msg_next++; 1144 msg_next++;
1142 if (msg_next > bcp->queue_last) 1145 if (msg_next > bcp->queue_last)
@@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1165 * This message was assigned a swack resource, but no 1168 * This message was assigned a swack resource, but no
1166 * reserved acknowlegment is pending. 1169 * reserved acknowlegment is pending.
1167 * The bug has prevented this message from setting the MMR. 1170 * The bug has prevented this message from setting the MMR.
1168 * And no other message has used the same sw_ack resource.
1169 * Do the requested shootdown but do not reply to the msg.
1170 * (the 0 means make no acknowledge)
1171 */ 1171 */
1172 bau_process_message(mdp, bcp, 0);
1173 return;
1174 }
1175
1176 /*
1177 * Some message has set the MMR 'pending' bit; it might have been
1178 * another message. Look for that message.
1179 */
1180 other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
1181 if (other_msg) {
1182 /* There is another. Do not ack the current one. */
1183 bau_process_message(mdp, bcp, 0);
1184 /* 1172 /*
1185 * Let the natural processing of that message acknowledge 1173 * Some message has set the MMR 'pending' bit; it might have
1186 * it. Don't get the processing of sw_ack's out of order. 1174 * been another message. Look for that message.
1187 */ 1175 */
1188 return; 1176 other_msg = find_another_by_swack(msg, bcp);
1177 if (other_msg) {
1178 /*
1179 * There is another. Process this one but do not
1180 * ack it.
1181 */
1182 bau_process_message(mdp, bcp, 0);
1183 /*
1184 * Let the natural processing of that other message
1185 * acknowledge it. Don't get the processing of sw_ack's
1186 * out of order.
1187 */
1188 return;
1189 }
1189 } 1190 }
1190 1191
1191 /* 1192 /*
1192 * There is no other message using this sw_ack, so it is safe to 1193 * Either the MMR shows this one pending a reply or there is no
1193 * acknowledge it. 1194 * other message using this sw_ack, so it is safe to acknowledge it.
1194 */ 1195 */
1195 bau_process_message(mdp, bcp, 1); 1196 bau_process_message(mdp, bcp, 1);
1196 1197
@@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void)
1295 */ 1296 */
1296 mmr_image |= (1L << SOFTACK_MSHIFT); 1297 mmr_image |= (1L << SOFTACK_MSHIFT);
1297 if (is_uv2_hub()) { 1298 if (is_uv2_hub()) {
1298 mmr_image |= (1L << UV2_EXT_SHFT); 1299 /* hw bug workaround; do not use extended status */
1300 mmr_image &= ~(1L << UV2_EXT_SHFT);
1299 } 1301 }
1300 write_mmr_misc_control(pnode, mmr_image); 1302 write_mmr_misc_control(pnode, mmr_image);
1301 } 1303 }
@@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec)
1338static int ptc_seq_show(struct seq_file *file, void *data) 1340static int ptc_seq_show(struct seq_file *file, void *data)
1339{ 1341{
1340 struct ptc_stats *stat; 1342 struct ptc_stats *stat;
1343 struct bau_control *bcp;
1341 int cpu; 1344 int cpu;
1342 1345
1343 cpu = *(loff_t *)data; 1346 cpu = *(loff_t *)data;
1344 if (!cpu) { 1347 if (!cpu) {
1345 seq_printf(file, 1348 seq_printf(file,
1346 "# cpu sent stime self locals remotes ncpus localhub "); 1349 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
1347 seq_printf(file, 1350 seq_printf(file,
1348 "remotehub numuvhubs numuvhubs16 numuvhubs8 "); 1351 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1349 seq_printf(file, 1352 seq_printf(file,
1350 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); 1353 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
1354 seq_printf(file,
1355 "rok resetp resett giveup sto bz throt disable ");
1351 seq_printf(file, 1356 seq_printf(file,
1352 "resetp resett giveup sto bz throt swack recv rtime "); 1357 "enable wars warshw warwaits enters ipidis plugged ");
1353 seq_printf(file, 1358 seq_printf(file,
1354 "all one mult none retry canc nocan reset rcan "); 1359 "ipiover glim cong swack recv rtime all one mult ");
1355 seq_printf(file, 1360 seq_printf(file,
1356 "disable enable wars warshw warwaits\n"); 1361 "none retry canc nocan reset rcan\n");
1357 } 1362 }
1358 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1363 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1359 stat = &per_cpu(ptcstats, cpu); 1364 bcp = &per_cpu(bau_control, cpu);
1365 stat = bcp->statp;
1360 /* source side statistics */ 1366 /* source side statistics */
1361 seq_printf(file, 1367 seq_printf(file,
1362 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 1368 "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1363 cpu, stat->s_requestor, cycles_2_us(stat->s_time), 1369 cpu, bcp->nobau, stat->s_requestor,
1370 cycles_2_us(stat->s_time),
1364 stat->s_ntargself, stat->s_ntarglocals, 1371 stat->s_ntargself, stat->s_ntarglocals,
1365 stat->s_ntargremotes, stat->s_ntargcpu, 1372 stat->s_ntargremotes, stat->s_ntargcpu,
1366 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, 1373 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
@@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1374 stat->s_resets_plug, stat->s_resets_timeout, 1381 stat->s_resets_plug, stat->s_resets_timeout,
1375 stat->s_giveup, stat->s_stimeout, 1382 stat->s_giveup, stat->s_stimeout,
1376 stat->s_busy, stat->s_throttles); 1383 stat->s_busy, stat->s_throttles);
1384 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1385 stat->s_bau_disabled, stat->s_bau_reenabled,
1386 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1387 stat->s_uv2_war_waits, stat->s_enters,
1388 stat->s_ipifordisabled, stat->s_plugged,
1389 stat->s_overipilimit, stat->s_giveuplimit,
1390 stat->s_congested);
1377 1391
1378 /* destination side statistics */ 1392 /* destination side statistics */
1379 seq_printf(file, 1393 seq_printf(file,
1380 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 1394 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
1381 read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), 1395 read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
1382 stat->d_requestee, cycles_2_us(stat->d_time), 1396 stat->d_requestee, cycles_2_us(stat->d_time),
1383 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, 1397 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
1384 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1398 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1385 stat->d_nocanceled, stat->d_resets, 1399 stat->d_nocanceled, stat->d_resets,
1386 stat->d_rcanceled); 1400 stat->d_rcanceled);
1387 seq_printf(file, "%ld %ld %ld %ld %ld\n",
1388 stat->s_bau_disabled, stat->s_bau_reenabled,
1389 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1390 stat->s_uv2_war_waits);
1391 } 1401 }
1392 return 0; 1402 return 0;
1393} 1403}
@@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,
1401 char *buf; 1411 char *buf;
1402 int ret; 1412 int ret;
1403 1413
1404 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", 1414 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
1405 "max_concur plugged_delay plugsb4reset", 1415 "max_concur plugged_delay plugsb4reset timeoutsb4reset",
1406 "timeoutsb4reset ipi_reset_limit complete_threshold", 1416 "ipi_reset_limit complete_threshold congested_response_us",
1407 "congested_response_us congested_reps congested_period", 1417 "congested_reps disabled_period giveup_limit",
1408 max_concurr, plugged_delay, plugsb4reset, 1418 max_concurr, plugged_delay, plugsb4reset,
1409 timeoutsb4reset, ipi_reset_limit, complete_threshold, 1419 timeoutsb4reset, ipi_reset_limit, complete_threshold,
1410 congested_respns_us, congested_reps, congested_period); 1420 congested_respns_us, congested_reps, disabled_period,
1421 giveup_limit);
1411 1422
1412 if (!buf) 1423 if (!buf)
1413 return -ENOMEM; 1424 return -ENOMEM;
@@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1438 return -EFAULT; 1449 return -EFAULT;
1439 optstr[count - 1] = '\0'; 1450 optstr[count - 1] = '\0';
1440 1451
1452 if (!strcmp(optstr, "on")) {
1453 set_bau_on();
1454 return count;
1455 } else if (!strcmp(optstr, "off")) {
1456 set_bau_off();
1457 return count;
1458 }
1459
1441 if (strict_strtol(optstr, 10, &input_arg) < 0) { 1460 if (strict_strtol(optstr, 10, &input_arg) < 0) {
1442 printk(KERN_DEBUG "%s is invalid\n", optstr); 1461 printk(KERN_DEBUG "%s is invalid\n", optstr);
1443 return -EINVAL; 1462 return -EINVAL;
@@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
1570 bcp->complete_threshold = complete_threshold; 1589 bcp->complete_threshold = complete_threshold;
1571 bcp->cong_response_us = congested_respns_us; 1590 bcp->cong_response_us = congested_respns_us;
1572 bcp->cong_reps = congested_reps; 1591 bcp->cong_reps = congested_reps;
1573 bcp->cong_period = congested_period; 1592 bcp->disabled_period = sec_2_cycles(disabled_period);
1593 bcp->giveup_limit = giveup_limit;
1574 } 1594 }
1575 return count; 1595 return count;
1576} 1596}
@@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1699 * fairness chaining multilevel count replied_to 1719 * fairness chaining multilevel count replied_to
1700 */ 1720 */
1701 } else { 1721 } else {
1722 /*
1723 * BIOS uses legacy mode, but UV2 hardware always
1724 * uses native mode for selective broadcasts.
1725 */
1702 uv2_hdr = &bd2->header.uv2_hdr; 1726 uv2_hdr = &bd2->header.uv2_hdr;
1703 uv2_hdr->swack_flag = 1; 1727 uv2_hdr->swack_flag = 1;
1704 uv2_hdr->base_dest_nasid = 1728 uv2_hdr->base_dest_nasid =
@@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void)
1811 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; 1835 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1812 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); 1836 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1813 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; 1837 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1814 base = timeout_base_ns[index]; 1838 ts_ns = timeout_base_ns[index];
1815 ts_ns = base * mult1 * mult2; 1839 ts_ns *= (mult1 * mult2);
1816 ret = ts_ns / 1000; 1840 ret = ts_ns / 1000;
1817 } else { 1841 } else {
1818 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ 1842 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
@@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void)
1836 for_each_present_cpu(cpu) { 1860 for_each_present_cpu(cpu) {
1837 bcp = &per_cpu(bau_control, cpu); 1861 bcp = &per_cpu(bau_control, cpu);
1838 bcp->baudisabled = 0; 1862 bcp->baudisabled = 0;
1863 if (nobau)
1864 bcp->nobau = 1;
1839 bcp->statp = &per_cpu(ptcstats, cpu); 1865 bcp->statp = &per_cpu(ptcstats, cpu);
1840 /* time interval to catch a hardware stay-busy bug */ 1866 /* time interval to catch a hardware stay-busy bug */
1841 bcp->timeout_interval = usec_2_cycles(2*timeout_us); 1867 bcp->timeout_interval = usec_2_cycles(2*timeout_us);
@@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void)
1848 bcp->complete_threshold = complete_threshold; 1874 bcp->complete_threshold = complete_threshold;
1849 bcp->cong_response_us = congested_respns_us; 1875 bcp->cong_response_us = congested_respns_us;
1850 bcp->cong_reps = congested_reps; 1876 bcp->cong_reps = congested_reps;
1851 bcp->cong_period = congested_period; 1877 bcp->disabled_period = sec_2_cycles(disabled_period);
1852 bcp->clocks_per_100_usec = usec_2_cycles(100); 1878 bcp->giveup_limit = giveup_limit;
1853 spin_lock_init(&bcp->queue_lock); 1879 spin_lock_init(&bcp->queue_lock);
1854 spin_lock_init(&bcp->uvhub_lock); 1880 spin_lock_init(&bcp->uvhub_lock);
1881 spin_lock_init(&bcp->disable_lock);
1855 } 1882 }
1856} 1883}
1857 1884
@@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
1972 } 1999 }
1973 bcp->uvhub_master = *hmasterp; 2000 bcp->uvhub_master = *hmasterp;
1974 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 2001 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
1975 bcp->using_desc = bcp->uvhub_cpu;
1976 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 2002 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1977 printk(KERN_EMERG "%d cpus per uvhub invalid\n", 2003 printk(KERN_EMERG "%d cpus per uvhub invalid\n",
1978 bcp->uvhub_cpu); 2004 bcp->uvhub_cpu);
@@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void)
2069 if (!is_uv_system()) 2095 if (!is_uv_system())
2070 return 0; 2096 return 0;
2071 2097
2072 if (nobau)
2073 return 0;
2074
2075 for_each_possible_cpu(cur_cpu) { 2098 for_each_possible_cpu(cur_cpu) {
2076 mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); 2099 mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
2077 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); 2100 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
2078 } 2101 }
2079 2102
2080 nuvhubs = uv_num_possible_blades(); 2103 nuvhubs = uv_num_possible_blades();
2081 spin_lock_init(&disable_lock);
2082 congested_cycles = usec_2_cycles(congested_respns_us); 2104 congested_cycles = usec_2_cycles(congested_respns_us);
2083 2105
2084 uv_base_pnode = 0x7fffffff; 2106 uv_base_pnode = 0x7fffffff;
@@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void)
2091 enable_timeouts(); 2113 enable_timeouts();
2092 2114
2093 if (init_per_cpu(nuvhubs, uv_base_pnode)) { 2115 if (init_per_cpu(nuvhubs, uv_base_pnode)) {
2094 nobau = 1; 2116 set_bau_off();
2117 nobau_perm = 1;
2095 return 0; 2118 return 0;
2096 } 2119 }
2097 2120