aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2012-01-16 16:19:47 -0500
committerIngo Molnar <mingo@elte.hu>2012-01-17 03:09:54 -0500
commitc5d35d399e685acccc85a675e8765c26b2a9813a (patch)
tree5b66e875217ccc2e106162a089efddd5fec40c21
parentd059f9fa84a30e04279c6ff615e9e2cf3b260191 (diff)
x86/UV2: Work around BAU bug
This patch implements a workaround for a UV2 hardware bug. The bug is a non-atomic update of a memory-mapped register. When hardware message delivery and software message acknowledge occur simultaneously the pending message acknowledge for the arriving message may be lost. This causes the sender's message status to stay busy. Part of the workaround is to not acknowledge a completed message until it is verified that no other message is actually using the resource that is mistakenly recorded in the completed message. Part of the workaround is to test for long elapsed time in such a busy condition, then handle it by using a spare sending descriptor. The stay-busy condition is eventually timed out by hardware, and then the original sending descriptor can be re-used. Most of that logic change is in keeping track of the current descriptor and the state of the spares. The occurrences of the workaround are added to the BAU statistics. Signed-off-by: Cliff Wickman <cpw@sgi.com> Link: http://lkml.kernel.org/r/20120116211947.GC5767@sgi.com Cc: <stable@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h13
-rw-r--r--arch/x86/platform/uv/tlb_uv.c274
2 files changed, 254 insertions, 33 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 4a46b27ee9a0..1b82f7e87393 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -167,6 +167,7 @@
167#define FLUSH_RETRY_TIMEOUT 2 167#define FLUSH_RETRY_TIMEOUT 2
168#define FLUSH_GIVEUP 3 168#define FLUSH_GIVEUP 3
169#define FLUSH_COMPLETE 4 169#define FLUSH_COMPLETE 4
170#define FLUSH_RETRY_BUSYBUG 5
170 171
171/* 172/*
172 * tuning the action when the numalink network is extremely delayed 173 * tuning the action when the numalink network is extremely delayed
@@ -463,7 +464,6 @@ struct bau_pq_entry {
463struct msg_desc { 464struct msg_desc {
464 struct bau_pq_entry *msg; 465 struct bau_pq_entry *msg;
465 int msg_slot; 466 int msg_slot;
466 int swack_slot;
467 struct bau_pq_entry *queue_first; 467 struct bau_pq_entry *queue_first;
468 struct bau_pq_entry *queue_last; 468 struct bau_pq_entry *queue_last;
469}; 469};
@@ -517,6 +517,9 @@ struct ptc_stats {
517 unsigned long s_retry_messages; /* retry broadcasts */ 517 unsigned long s_retry_messages; /* retry broadcasts */
518 unsigned long s_bau_reenabled; /* for bau enable/disable */ 518 unsigned long s_bau_reenabled; /* for bau enable/disable */
519 unsigned long s_bau_disabled; /* for bau enable/disable */ 519 unsigned long s_bau_disabled; /* for bau enable/disable */
520 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
521 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
522 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
520 /* destination statistics */ 523 /* destination statistics */
521 unsigned long d_alltlb; /* times all tlb's on this 524 unsigned long d_alltlb; /* times all tlb's on this
522 cpu were flushed */ 525 cpu were flushed */
@@ -593,6 +596,8 @@ struct bau_control {
593 short cpus_in_socket; 596 short cpus_in_socket;
594 short cpus_in_uvhub; 597 short cpus_in_uvhub;
595 short partition_base_pnode; 598 short partition_base_pnode;
599 short using_desc; /* an index, like uvhub_cpu */
600 unsigned int inuse_map;
596 unsigned short message_number; 601 unsigned short message_number;
597 unsigned short uvhub_quiesce; 602 unsigned short uvhub_quiesce;
598 short socket_acknowledge_count[DEST_Q_SIZE]; 603 short socket_acknowledge_count[DEST_Q_SIZE];
@@ -610,6 +615,7 @@ struct bau_control {
610 int cong_response_us; 615 int cong_response_us;
611 int cong_reps; 616 int cong_reps;
612 int cong_period; 617 int cong_period;
618 unsigned long clocks_per_100_usec;
613 cycles_t period_time; 619 cycles_t period_time;
614 long period_requests; 620 long period_requests;
615 struct hub_and_pnode *thp; 621 struct hub_and_pnode *thp;
@@ -670,6 +676,11 @@ static inline void write_mmr_sw_ack(unsigned long mr)
670 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); 676 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
671} 677}
672 678
679static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
680{
681 write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
682}
683
673static inline unsigned long read_mmr_sw_ack(void) 684static inline unsigned long read_mmr_sw_ack(void)
674{ 685{
675 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); 686 return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index c425ff1a9cc3..9010ca715c03 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -157,13 +157,14 @@ static int __init uvhub_to_first_apicid(int uvhub)
157 * clear of the Timeout bit (as well) will free the resource. No reply will 157 * clear of the Timeout bit (as well) will free the resource. No reply will
158 * be sent (the hardware will only do one reply per message). 158 * be sent (the hardware will only do one reply per message).
159 */ 159 */
160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) 160static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
161 int do_acknowledge)
161{ 162{
162 unsigned long dw; 163 unsigned long dw;
163 struct bau_pq_entry *msg; 164 struct bau_pq_entry *msg;
164 165
165 msg = mdp->msg; 166 msg = mdp->msg;
166 if (!msg->canceled) { 167 if (!msg->canceled && do_acknowledge) {
167 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; 168 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
168 write_mmr_sw_ack(dw); 169 write_mmr_sw_ack(dw);
169 } 170 }
@@ -212,8 +213,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
212 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { 213 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
213 unsigned long mr; 214 unsigned long mr;
214 /* 215 /*
215 * is the resource timed out? 216 * Is the resource timed out?
216 * make everyone ignore the cancelled message. 217 * Make everyone ignore the cancelled message.
217 */ 218 */
218 msg2->canceled = 1; 219 msg2->canceled = 1;
219 stat->d_canceled++; 220 stat->d_canceled++;
@@ -231,8 +232,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
231 * Do all the things a cpu should do for a TLB shootdown message. 232 * Do all the things a cpu should do for a TLB shootdown message.
232 * Other cpu's may come here at the same time for this message. 233 * Other cpu's may come here at the same time for this message.
233 */ 234 */
234static void bau_process_message(struct msg_desc *mdp, 235static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
235 struct bau_control *bcp) 236 int do_acknowledge)
236{ 237{
237 short socket_ack_count = 0; 238 short socket_ack_count = 0;
238 short *sp; 239 short *sp;
@@ -284,8 +285,9 @@ static void bau_process_message(struct msg_desc *mdp,
284 if (msg_ack_count == bcp->cpus_in_uvhub) { 285 if (msg_ack_count == bcp->cpus_in_uvhub) {
285 /* 286 /*
286 * All cpus in uvhub saw it; reply 287 * All cpus in uvhub saw it; reply
288 * (unless we are in the UV2 workaround)
287 */ 289 */
288 reply_to_message(mdp, bcp); 290 reply_to_message(mdp, bcp, do_acknowledge);
289 } 291 }
290 } 292 }
291 293
@@ -491,27 +493,138 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
491/* 493/*
492 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 494 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register.
493 */ 495 */
494static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) 496static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
495{ 497{
496 unsigned long descriptor_status; 498 unsigned long descriptor_status;
497 unsigned long descriptor_status2; 499 unsigned long descriptor_status2;
498 500
499 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 501 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK);
500 descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; 502 descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL;
501 descriptor_status = (descriptor_status << 1) | descriptor_status2; 503 descriptor_status = (descriptor_status << 1) | descriptor_status2;
502 return descriptor_status; 504 return descriptor_status;
503} 505}
504 506
507/*
508 * Return whether the status of the descriptor that is normally used for this
509 * cpu (the one indexed by its hub-relative cpu number) is busy.
510 * The status of the original 32 descriptors is always reflected in the 64
511 * bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
512 * The bit provided by the activation_status_2 register is irrelevant to
513 * the status if it is only being tested for busy or not busy.
514 */
515int normal_busy(struct bau_control *bcp)
516{
517 int cpu = bcp->uvhub_cpu;
518 int mmr_offset;
519 int right_shift;
520
521 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
522 right_shift = cpu * UV_ACT_STATUS_SIZE;
523 return (((((read_lmmr(mmr_offset) >> right_shift) &
524 UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
525}
526
527/*
528 * Entered when a bau descriptor has gone into a permanent busy wait because
529 * of a hardware bug.
530 * Workaround the bug.
531 */
532int handle_uv2_busy(struct bau_control *bcp)
533{
534 int busy_one = bcp->using_desc;
535 int normal = bcp->uvhub_cpu;
536 int selected = -1;
537 int i;
538 unsigned long descriptor_status;
539 unsigned long status;
540 int mmr_offset;
541 struct bau_desc *bau_desc_old;
542 struct bau_desc *bau_desc_new;
543 struct bau_control *hmaster = bcp->uvhub_master;
544 struct ptc_stats *stat = bcp->statp;
545 cycles_t ttm;
546
547 stat->s_uv2_wars++;
548 spin_lock(&hmaster->uvhub_lock);
549 /* try for the original first */
550 if (busy_one != normal) {
551 if (!normal_busy(bcp))
552 selected = normal;
553 }
554 if (selected < 0) {
555 /* can't use the normal, select an alternate */
556 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
557 descriptor_status = read_lmmr(mmr_offset);
558
559 /* scan available descriptors 32-63 */
560 for (i = 0; i < UV_CPUS_PER_AS; i++) {
561 if ((hmaster->inuse_map & (1 << i)) == 0) {
562 status = ((descriptor_status >>
563 (i * UV_ACT_STATUS_SIZE)) &
564 UV_ACT_STATUS_MASK) << 1;
565 if (status != UV2H_DESC_BUSY) {
566 selected = i + UV_CPUS_PER_AS;
567 break;
568 }
569 }
570 }
571 }
572
573 if (busy_one != normal)
574 /* mark the busy alternate as not in-use */
575 hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
576
577 if (selected >= 0) {
578 /* switch to the selected descriptor */
579 if (selected != normal) {
580 /* set the selected alternate as in-use */
581 hmaster->inuse_map |=
582 (1 << (selected - UV_CPUS_PER_AS));
583 if (selected > stat->s_uv2_wars_hw)
584 stat->s_uv2_wars_hw = selected;
585 }
586 bau_desc_old = bcp->descriptor_base;
587 bau_desc_old += (ITEMS_PER_DESC * busy_one);
588 bcp->using_desc = selected;
589 bau_desc_new = bcp->descriptor_base;
590 bau_desc_new += (ITEMS_PER_DESC * selected);
591 *bau_desc_new = *bau_desc_old;
592 } else {
593 /*
594 * All are busy. Wait for the normal one for this cpu to
595 * free up.
596 */
597 stat->s_uv2_war_waits++;
598 spin_unlock(&hmaster->uvhub_lock);
599 ttm = get_cycles();
600 do {
601 cpu_relax();
602 } while (normal_busy(bcp));
603 spin_lock(&hmaster->uvhub_lock);
604 /* switch to the original descriptor */
605 bcp->using_desc = normal;
606 bau_desc_old = bcp->descriptor_base;
607 bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
608 bcp->using_desc = (ITEMS_PER_DESC * normal);
609 bau_desc_new = bcp->descriptor_base;
610 bau_desc_new += (ITEMS_PER_DESC * normal);
611 *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
612 }
613 spin_unlock(&hmaster->uvhub_lock);
614 return FLUSH_RETRY_BUSYBUG;
615}
616
505static int uv2_wait_completion(struct bau_desc *bau_desc, 617static int uv2_wait_completion(struct bau_desc *bau_desc,
506 unsigned long mmr_offset, int right_shift, 618 unsigned long mmr_offset, int right_shift,
507 struct bau_control *bcp, long try) 619 struct bau_control *bcp, long try)
508{ 620{
509 unsigned long descriptor_stat; 621 unsigned long descriptor_stat;
510 cycles_t ttm; 622 cycles_t ttm;
511 int cpu = bcp->uvhub_cpu; 623 int desc = bcp->using_desc;
624 long busy_reps = 0;
512 struct ptc_stats *stat = bcp->statp; 625 struct ptc_stats *stat = bcp->statp;
513 626
514 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 627 descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
515 628
516 /* spin on the status MMR, waiting for it to go idle */ 629 /* spin on the status MMR, waiting for it to go idle */
517 while (descriptor_stat != UV2H_DESC_IDLE) { 630 while (descriptor_stat != UV2H_DESC_IDLE) {
@@ -542,12 +655,23 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
542 bcp->conseccompletes = 0; 655 bcp->conseccompletes = 0;
543 return FLUSH_RETRY_TIMEOUT; 656 return FLUSH_RETRY_TIMEOUT;
544 } else { 657 } else {
658 busy_reps++;
659 if (busy_reps > 1000000) {
660 /* not to hammer on the clock */
661 busy_reps = 0;
662 ttm = get_cycles();
663 if ((ttm - bcp->send_message) >
664 (bcp->clocks_per_100_usec)) {
665 return handle_uv2_busy(bcp);
666 }
667 }
545 /* 668 /*
546 * descriptor_stat is still BUSY 669 * descriptor_stat is still BUSY
547 */ 670 */
548 cpu_relax(); 671 cpu_relax();
549 } 672 }
550 descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); 673 descriptor_stat = uv2_read_status(mmr_offset, right_shift,
674 desc);
551 } 675 }
552 bcp->conseccompletes++; 676 bcp->conseccompletes++;
553 return FLUSH_COMPLETE; 677 return FLUSH_COMPLETE;
@@ -563,14 +687,14 @@ static int wait_completion(struct bau_desc *bau_desc,
563{ 687{
564 int right_shift; 688 int right_shift;
565 unsigned long mmr_offset; 689 unsigned long mmr_offset;
566 int cpu = bcp->uvhub_cpu; 690 int desc = bcp->using_desc;
567 691
568 if (cpu < UV_CPUS_PER_AS) { 692 if (desc < UV_CPUS_PER_AS) {
569 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 693 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
570 right_shift = cpu * UV_ACT_STATUS_SIZE; 694 right_shift = desc * UV_ACT_STATUS_SIZE;
571 } else { 695 } else {
572 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; 696 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
573 right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); 697 right_shift = ((desc - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
574 } 698 }
575 699
576 if (bcp->uvhub_version == 1) 700 if (bcp->uvhub_version == 1)
@@ -752,8 +876,7 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
752 * Returns 1 if it gives up entirely and the original cpu mask is to be 876 * Returns 1 if it gives up entirely and the original cpu mask is to be
753 * returned to the kernel. 877 * returned to the kernel.
754 */ 878 */
755int uv_flush_send_and_wait(struct bau_desc *bau_desc, 879int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
756 struct cpumask *flush_mask, struct bau_control *bcp)
757{ 880{
758 int seq_number = 0; 881 int seq_number = 0;
759 int completion_stat = 0; 882 int completion_stat = 0;
@@ -766,20 +889,24 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
766 struct bau_control *hmaster = bcp->uvhub_master; 889 struct bau_control *hmaster = bcp->uvhub_master;
767 struct uv1_bau_msg_header *uv1_hdr = NULL; 890 struct uv1_bau_msg_header *uv1_hdr = NULL;
768 struct uv2_bau_msg_header *uv2_hdr = NULL; 891 struct uv2_bau_msg_header *uv2_hdr = NULL;
892 struct bau_desc *bau_desc;
769 893
770 if (bcp->uvhub_version == 1) { 894 if (bcp->uvhub_version == 1)
771 uv1 = 1;
772 uv1_throttle(hmaster, stat); 895 uv1_throttle(hmaster, stat);
773 uv1_hdr = &bau_desc->header.uv1_hdr;
774 } else
775 uv2_hdr = &bau_desc->header.uv2_hdr;
776 896
777 while (hmaster->uvhub_quiesce) 897 while (hmaster->uvhub_quiesce)
778 cpu_relax(); 898 cpu_relax();
779 899
780 time1 = get_cycles(); 900 time1 = get_cycles();
781 do { 901 do {
782 if (try == 0) { 902 bau_desc = bcp->descriptor_base;
903 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
904 if (bcp->uvhub_version == 1) {
905 uv1 = 1;
906 uv1_hdr = &bau_desc->header.uv1_hdr;
907 } else
908 uv2_hdr = &bau_desc->header.uv2_hdr;
909 if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
783 if (uv1) 910 if (uv1)
784 uv1_hdr->msg_type = MSG_REGULAR; 911 uv1_hdr->msg_type = MSG_REGULAR;
785 else 912 else
@@ -797,13 +924,14 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
797 uv1_hdr->sequence = seq_number; 924 uv1_hdr->sequence = seq_number;
798 else 925 else
799 uv2_hdr->sequence = seq_number; 926 uv2_hdr->sequence = seq_number;
800 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; 927 index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc;
801 bcp->send_message = get_cycles(); 928 bcp->send_message = get_cycles();
802 929
803 write_mmr_activation(index); 930 write_mmr_activation(index);
804 931
805 try++; 932 try++;
806 completion_stat = wait_completion(bau_desc, bcp, try); 933 completion_stat = wait_completion(bau_desc, bcp, try);
934 /* UV2: wait_completion() may change the bcp->using_desc */
807 935
808 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 936 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
809 937
@@ -814,6 +942,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
814 } 942 }
815 cpu_relax(); 943 cpu_relax();
816 } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 944 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
945 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
817 (completion_stat == FLUSH_RETRY_TIMEOUT)); 946 (completion_stat == FLUSH_RETRY_TIMEOUT));
818 947
819 time2 = get_cycles(); 948 time2 = get_cycles();
@@ -828,6 +957,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
828 record_send_stats(time1, time2, bcp, stat, completion_stat, try); 957 record_send_stats(time1, time2, bcp, stat, completion_stat, try);
829 958
830 if (completion_stat == FLUSH_GIVEUP) 959 if (completion_stat == FLUSH_GIVEUP)
960 /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
831 return 1; 961 return 1;
832 return 0; 962 return 0;
833} 963}
@@ -983,7 +1113,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
983 stat->s_ntargself++; 1113 stat->s_ntargself++;
984 1114
985 bau_desc = bcp->descriptor_base; 1115 bau_desc = bcp->descriptor_base;
986 bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu); 1116 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
987 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 1117 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
988 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 1118 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
989 return NULL; 1119 return NULL;
@@ -996,13 +1126,86 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
996 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1126 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
997 * or 1 if it gave up and the original cpumask should be returned. 1127 * or 1 if it gave up and the original cpumask should be returned.
998 */ 1128 */
999 if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) 1129 if (!uv_flush_send_and_wait(flush_mask, bcp))
1000 return NULL; 1130 return NULL;
1001 else 1131 else
1002 return cpumask; 1132 return cpumask;
1003} 1133}
1004 1134
1005/* 1135/*
1136 * Search the message queue for any 'other' message with the same software
1137 * acknowledge resource bit vector.
1138 */
1139struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1140 struct bau_control *bcp, unsigned char swack_vec)
1141{
1142 struct bau_pq_entry *msg_next = msg + 1;
1143
1144 if (msg_next > bcp->queue_last)
1145 msg_next = bcp->queue_first;
1146 while ((msg_next->swack_vec != 0) && (msg_next != msg)) {
1147 if (msg_next->swack_vec == swack_vec)
1148 return msg_next;
1149 msg_next++;
1150 if (msg_next > bcp->queue_last)
1151 msg_next = bcp->queue_first;
1152 }
1153 return NULL;
1154}
1155
1156/*
1157 * UV2 needs to work around a bug in which an arriving message has not
1158 * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
1159 * Such a message must be ignored.
1160 */
1161void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1162{
1163 unsigned long mmr_image;
1164 unsigned char swack_vec;
1165 struct bau_pq_entry *msg = mdp->msg;
1166 struct bau_pq_entry *other_msg;
1167
1168 mmr_image = read_mmr_sw_ack();
1169 swack_vec = msg->swack_vec;
1170
1171 if ((swack_vec & mmr_image) == 0) {
1172 /*
1173 * This message was assigned a swack resource, but no
1174 * reserved acknowlegment is pending.
1175 * The bug has prevented this message from setting the MMR.
1176 * And no other message has used the same sw_ack resource.
1177 * Do the requested shootdown but do not reply to the msg.
1178 * (the 0 means make no acknowledge)
1179 */
1180 bau_process_message(mdp, bcp, 0);
1181 return;
1182 }
1183
1184 /*
1185 * Some message has set the MMR 'pending' bit; it might have been
1186 * another message. Look for that message.
1187 */
1188 other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
1189 if (other_msg) {
1190 /* There is another. Do not ack the current one. */
1191 bau_process_message(mdp, bcp, 0);
1192 /*
1193 * Let the natural processing of that message acknowledge
1194 * it. Don't get the processing of sw_ack's out of order.
1195 */
1196 return;
1197 }
1198
1199 /*
1200 * There is no other message using this sw_ack, so it is safe to
1201 * acknowledge it.
1202 */
1203 bau_process_message(mdp, bcp, 1);
1204
1205 return;
1206}
1207
1208/*
1006 * The BAU message interrupt comes here. (registered by set_intr_gate) 1209 * The BAU message interrupt comes here. (registered by set_intr_gate)
1007 * See entry_64.S 1210 * See entry_64.S
1008 * 1211 *
@@ -1038,9 +1241,11 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
1038 count++; 1241 count++;
1039 1242
1040 msgdesc.msg_slot = msg - msgdesc.queue_first; 1243 msgdesc.msg_slot = msg - msgdesc.queue_first;
1041 msgdesc.swack_slot = ffs(msg->swack_vec) - 1;
1042 msgdesc.msg = msg; 1244 msgdesc.msg = msg;
1043 bau_process_message(&msgdesc, bcp); 1245 if (bcp->uvhub_version == 2)
1246 process_uv2_message(&msgdesc, bcp);
1247 else
1248 bau_process_message(&msgdesc, bcp, 1);
1044 1249
1045 msg++; 1250 msg++;
1046 if (msg > msgdesc.queue_last) 1251 if (msg > msgdesc.queue_last)
@@ -1158,7 +1363,7 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1158 seq_printf(file, 1363 seq_printf(file,
1159 "all one mult none retry canc nocan reset rcan "); 1364 "all one mult none retry canc nocan reset rcan ");
1160 seq_printf(file, 1365 seq_printf(file,
1161 "disable enable\n"); 1366 "disable enable wars warshw warwaits\n");
1162 } 1367 }
1163 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1368 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1164 stat = &per_cpu(ptcstats, cpu); 1369 stat = &per_cpu(ptcstats, cpu);
@@ -1189,8 +1394,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1189 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1394 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1190 stat->d_nocanceled, stat->d_resets, 1395 stat->d_nocanceled, stat->d_resets,
1191 stat->d_rcanceled); 1396 stat->d_rcanceled);
1192 seq_printf(file, "%ld %ld\n", 1397 seq_printf(file, "%ld %ld %ld %ld %ld\n",
1193 stat->s_bau_disabled, stat->s_bau_reenabled); 1398 stat->s_bau_disabled, stat->s_bau_reenabled,
1399 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1400 stat->s_uv2_war_waits);
1194 } 1401 }
1195 return 0; 1402 return 0;
1196} 1403}
@@ -1564,6 +1771,7 @@ static void pq_init(int node, int pnode)
1564 write_mmr_payload_first(pnode, pn_first); 1771 write_mmr_payload_first(pnode, pn_first);
1565 write_mmr_payload_tail(pnode, first); 1772 write_mmr_payload_tail(pnode, first);
1566 write_mmr_payload_last(pnode, last); 1773 write_mmr_payload_last(pnode, last);
1774 write_gmmr_sw_ack(pnode, 0xffffUL);
1567 1775
1568 /* in effect, all msg_type's are set to MSG_NOOP */ 1776 /* in effect, all msg_type's are set to MSG_NOOP */
1569 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); 1777 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1651,6 +1859,7 @@ static void __init init_per_cpu_tunables(void)
1651 bcp->cong_response_us = congested_respns_us; 1859 bcp->cong_response_us = congested_respns_us;
1652 bcp->cong_reps = congested_reps; 1860 bcp->cong_reps = congested_reps;
1653 bcp->cong_period = congested_period; 1861 bcp->cong_period = congested_period;
1862 bcp->clocks_per_100_usec = usec_2_cycles(100);
1654 } 1863 }
1655} 1864}
1656 1865
@@ -1771,6 +1980,7 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
1771 } 1980 }
1772 bcp->uvhub_master = *hmasterp; 1981 bcp->uvhub_master = *hmasterp;
1773 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 1982 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
1983 bcp->using_desc = bcp->uvhub_cpu;
1774 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 1984 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1775 printk(KERN_EMERG "%d cpus per uvhub invalid\n", 1985 printk(KERN_EMERG "%d cpus per uvhub invalid\n",
1776 bcp->uvhub_cpu); 1986 bcp->uvhub_cpu);