aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2010-06-02 17:22:02 -0400
committerIngo Molnar <mingo@elte.hu>2010-06-08 15:13:45 -0400
commit50fb55acc5bbe5ee29d0a65262f4ec286b14d156 (patch)
tree2099e2a96ae882b206e405976bf01725b7c4167e
parente8e5e8a8048006a12d7777a93baebd6e39496101 (diff)
x86, UV: Disable BAU on network congestion
The numalink network can become so congested that TLB shootdown using the Broadcast Assist Unit becomes slower than using IPI's. In that case, disable the use of the BAU for a period of time. The period is tunable. When the period expires the use of the BAU is re-enabled. A count of these actions is added to the statistics file. Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: gregkh@suse.de LKML-Reference: <E1OJvNy-0004a4-0a@eag09.americas.sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h4
-rw-r--r--arch/x86/kernel/tlb_uv.c76
2 files changed, 77 insertions, 3 deletions
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e5543c1a80ca..9b3e750ef2d8 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -34,6 +34,7 @@
34 */ 34 */
35 35
36#define UV_ITEMS_PER_DESCRIPTOR 8 36#define UV_ITEMS_PER_DESCRIPTOR 8
37/* the 'throttle' to prevent the hardware stay-busy bug */
37#define MAX_BAU_CONCURRENT 3 38#define MAX_BAU_CONCURRENT 3
38#define UV_CPUS_PER_ACT_STATUS 32 39#define UV_CPUS_PER_ACT_STATUS 32
39#define UV_ACT_STATUS_MASK 0x3 40#define UV_ACT_STATUS_MASK 0x3
@@ -338,6 +339,7 @@ struct bau_control {
338 int timeout_tries; 339 int timeout_tries;
339 int ipi_attempts; 340 int ipi_attempts;
340 int conseccompletes; 341 int conseccompletes;
342 int baudisabled;
341 int set_bau_off; 343 int set_bau_off;
342 short cpu; 344 short cpu;
343 short uvhub_cpu; 345 short uvhub_cpu;
@@ -389,6 +391,8 @@ struct ptc_stats {
389 unsigned long s_busy; /* status stayed busy past s/w timer */ 391 unsigned long s_busy; /* status stayed busy past s/w timer */
390 unsigned long s_throttles; /* waits in throttle */ 392 unsigned long s_throttles; /* waits in throttle */
391 unsigned long s_retry_messages; /* retry broadcasts */ 393 unsigned long s_retry_messages; /* retry broadcasts */
394 unsigned long s_bau_reenabled; /* for bau enable/disable */
395 unsigned long s_bau_disabled; /* for bau enable/disable */
392 /* destination statistics */ 396 /* destination statistics */
393 unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ 397 unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */
394 unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ 398 unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index c8661779c51e..dc6a68312758 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -44,6 +44,9 @@ static int timeout_base_ns[] = {
44}; 44};
45static int timeout_us; 45static int timeout_us;
46static int nobau; 46static int nobau;
47static int baudisabled;
48static spinlock_t disable_lock;
49static cycles_t congested_cycles;
47 50
48/* tunables: */ 51/* tunables: */
49static int max_bau_concurrent = MAX_BAU_CONCURRENT; 52static int max_bau_concurrent = MAX_BAU_CONCURRENT;
@@ -519,6 +522,35 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
519 return 1; 522 return 1;
520} 523}
521 524
525/*
526 * Completions are taking a very long time due to a congested numalink
527 * network.
528 */
529static void
530disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat)
531{
532 int tcpu;
533 struct bau_control *tbcp;
534
535 /* let only one cpu do this disabling */
536 spin_lock(&disable_lock);
537 if (!baudisabled && bcp->period_requests &&
538 ((bcp->period_time / bcp->period_requests) > congested_cycles)) {
539 /* it becomes this cpu's job to turn on the use of the
540 BAU again */
541 baudisabled = 1;
542 bcp->set_bau_off = 1;
543 bcp->set_bau_on_time = get_cycles() +
544 sec_2_cycles(bcp->congested_period);
545 stat->s_bau_disabled++;
546 for_each_present_cpu(tcpu) {
547 tbcp = &per_cpu(bau_control, tcpu);
548 tbcp->baudisabled = 1;
549 }
550 }
551 spin_unlock(&disable_lock);
552}
553
522/** 554/**
523 * uv_flush_send_and_wait 555 * uv_flush_send_and_wait
524 * 556 *
@@ -681,6 +713,14 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc,
681 if (time2 > time1) { 713 if (time2 > time1) {
682 elapsed = time2 - time1; 714 elapsed = time2 - time1;
683 stat->s_time += elapsed; 715 stat->s_time += elapsed;
716 if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
717 bcp->period_requests++;
718 bcp->period_time += elapsed;
719 if ((elapsed > congested_cycles) &&
720 (bcp->period_requests > bcp->congested_reps)) {
721 disable_for_congestion(bcp, stat);
722 }
723 }
684 } else 724 } else
685 stat->s_requestor--; /* don't count this one */ 725 stat->s_requestor--; /* don't count this one */
686 if (completion_status == FLUSH_COMPLETE && try > 1) 726 if (completion_status == FLUSH_COMPLETE && try > 1)
@@ -747,12 +787,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
747 struct cpumask *flush_mask; 787 struct cpumask *flush_mask;
748 struct ptc_stats *stat; 788 struct ptc_stats *stat;
749 struct bau_control *bcp; 789 struct bau_control *bcp;
790 struct bau_control *tbcp;
750 791
751 /* kernel was booted 'nobau' */ 792 /* kernel was booted 'nobau' */
752 if (nobau) 793 if (nobau)
753 return cpumask; 794 return cpumask;
754 795
755 bcp = &per_cpu(bau_control, cpu); 796 bcp = &per_cpu(bau_control, cpu);
797 stat = &per_cpu(ptcstats, cpu);
798
799 /* bau was disabled due to slow response */
800 if (bcp->baudisabled) {
801 /* the cpu that disabled it must re-enable it */
802 if (bcp->set_bau_off) {
803 if (get_cycles() >= bcp->set_bau_on_time) {
804 stat->s_bau_reenabled++;
805 baudisabled = 0;
806 for_each_present_cpu(tcpu) {
807 tbcp = &per_cpu(bau_control, tcpu);
808 tbcp->baudisabled = 0;
809 tbcp->period_requests = 0;
810 tbcp->period_time = 0;
811 }
812 }
813 }
814 return cpumask;
815 }
756 816
757 /* 817 /*
758 * Each sending cpu has a per-cpu mask which it fills from the caller's 818 * Each sending cpu has a per-cpu mask which it fills from the caller's
@@ -793,7 +853,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
793 else 853 else
794 return NULL; 854 return NULL;
795 } 855 }
796 stat = &per_cpu(ptcstats, cpu);
797 stat->s_requestor++; 856 stat->s_requestor++;
798 stat->s_ntargcpu += remotes; 857 stat->s_ntargcpu += remotes;
799 remotes = bau_uvhub_weight(&bau_desc->distribution); 858 remotes = bau_uvhub_weight(&bau_desc->distribution);
@@ -973,7 +1032,9 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
973 seq_printf(file, 1032 seq_printf(file,
974 "sw_ack recv rtime all "); 1033 "sw_ack recv rtime all ");
975 seq_printf(file, 1034 seq_printf(file,
976 "one mult none retry canc nocan reset rcan\n"); 1035 "one mult none retry canc nocan reset rcan ");
1036 seq_printf(file,
1037 "disable enable\n");
977 } 1038 }
978 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1039 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
979 stat = &per_cpu(ptcstats, cpu); 1040 stat = &per_cpu(ptcstats, cpu);
@@ -993,7 +1054,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
993 1054
994 /* destination side statistics */ 1055 /* destination side statistics */
995 seq_printf(file, 1056 seq_printf(file,
996 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", 1057 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
997 uv_read_global_mmr64(uv_cpu_to_pnode(cpu), 1058 uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
998 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), 1059 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
999 stat->d_requestee, cycles_2_us(stat->d_time), 1060 stat->d_requestee, cycles_2_us(stat->d_time),
@@ -1001,6 +1062,8 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
1001 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1062 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1002 stat->d_nocanceled, stat->d_resets, 1063 stat->d_nocanceled, stat->d_resets,
1003 stat->d_rcanceled); 1064 stat->d_rcanceled);
1065 seq_printf(file, "%ld %ld\n",
1066 stat->s_bau_disabled, stat->s_bau_reenabled);
1004 } 1067 }
1005 1068
1006 return 0; 1069 return 0;
@@ -1112,6 +1175,10 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
1112 "reset: number of ipi-style reset requests processed\n"); 1175 "reset: number of ipi-style reset requests processed\n");
1113 printk(KERN_DEBUG 1176 printk(KERN_DEBUG
1114 "rcan: number messages canceled by reset requests\n"); 1177 "rcan: number messages canceled by reset requests\n");
1178 printk(KERN_DEBUG
1179 "disable: number times use of the BAU was disabled\n");
1180 printk(KERN_DEBUG
1181 "enable: number times use of the BAU was re-enabled\n");
1115 } else if (input_arg == -1) { 1182 } else if (input_arg == -1) {
1116 for_each_present_cpu(cpu) { 1183 for_each_present_cpu(cpu) {
1117 stat = &per_cpu(ptcstats, cpu); 1184 stat = &per_cpu(ptcstats, cpu);
@@ -1568,6 +1635,7 @@ static void uv_init_per_cpu(int nuvhubs)
1568 kfree(uvhub_descs); 1635 kfree(uvhub_descs);
1569 for_each_present_cpu(cpu) { 1636 for_each_present_cpu(cpu) {
1570 bcp = &per_cpu(bau_control, cpu); 1637 bcp = &per_cpu(bau_control, cpu);
1638 bcp->baudisabled = 0;
1571 /* time interval to catch a hardware stay-busy bug */ 1639 /* time interval to catch a hardware stay-busy bug */
1572 bcp->timeout_interval = microsec_2_cycles(2*timeout_us); 1640 bcp->timeout_interval = microsec_2_cycles(2*timeout_us);
1573 bcp->max_bau_concurrent = max_bau_concurrent; 1641 bcp->max_bau_concurrent = max_bau_concurrent;
@@ -1609,6 +1677,8 @@ static int __init uv_bau_init(void)
1609 uv_nshift = uv_hub_info->m_val; 1677 uv_nshift = uv_hub_info->m_val;
1610 uv_mmask = (1UL << uv_hub_info->m_val) - 1; 1678 uv_mmask = (1UL << uv_hub_info->m_val) - 1;
1611 nuvhubs = uv_num_possible_blades(); 1679 nuvhubs = uv_num_possible_blades();
1680 spin_lock_init(&disable_lock);
1681 congested_cycles = microsec_2_cycles(congested_response_us);
1612 1682
1613 uv_init_per_cpu(nuvhubs); 1683 uv_init_per_cpu(nuvhubs);
1614 1684