aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@qlogic.com>2011-07-21 09:21:16 -0400
committerRoland Dreier <roland@purestorage.com>2011-07-22 14:56:05 -0400
commite67306a38063d75f61d405527ff8bf1c8e92eb84 (patch)
treed40991481f25dea940bd549524a10679cbbe5699 /drivers/infiniband
parente800bd032c2623b10ef38a4d7d646e3e3c7bb3ad (diff)
IB/qib: Defer HCA error events to tasklet
With ib_qib options: options ib_qib krcvqs=1 pcie_caps=0x51 rcvhdrcnt=4096 singleport=1 ibmtu=4 a run of ib_write_bw -a yields the following: ------------------------------------------------------------------ #bytes #iterations BW peak[MB/sec] BW average[MB/sec] 1048576 5000 2910.64 229.80 ------------------------------------------------------------------ The top cpu use in a profile is: CPU: Intel Architectural Perfmon, speed 2400.15 MHz (estimated) Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit mask of 0x00 (No unit mask) count 1002300 Counted LLC_MISSES events (Last level cache demand requests from this core that missed the LLC) with a unit mask of 0x41 (No unit mask) count 10000 samples % samples % app name symbol name 15237 29.2642 964 17.1195 ib_qib.ko qib_7322intr 12320 23.6618 1040 18.4692 ib_qib.ko handle_7322_errors 4106 7.8860 0 0 vmlinux vsnprintf Analysis of the stats, profile, the code, and the annotated profile indicate: - All of the overflow interrupts (one per packet overflow) are serviced on CPU0 with no mitigation on the frequency. - All of the receive interrupts are being serviced by CPU0. (That is the way truescale.cmds statically allocates the kctx IRQs to CPU) - The code is spending all of its time servicing QIB_I_C_ERROR RcvEgrFullErr interrupts on CPU0, starving the packet receive processing. - The decode_err routine is very inefficient, using a printf variant to format a "%s" and continues to loop when the errs mask has been cleared. - Both qib_7322intr and handle_7322_errors read pci registers, which is very inefficient. The fix does the following: - Adds a tasklet to service QIB_I_C_ERROR - Replaces the very inefficient scnprintf() with a memcpy(). A field is added to qib_hwerror_msgs to save the sizeof("string") at compile time so that a strlen is not needed during err_decode(). - The most frequent errors (Overflows) are serviced first to exit the loop as early as possible. - The loop now exits as soon as the errs mask is clear rather than fruitlessly looping through the msp array. With this fix the performance changes to: ------------------------------------------------------------------ #bytes #iterations BW peak[MB/sec] BW average[MB/sec] 1048576 5000 2990.64 2941.35 ------------------------------------------------------------------ During testing of the error handling overflow patch, it was determined that some CPU's were slower when servicing both overflow and receive interrupts on CPU0 with different MSI interrupt vectors. This patch adds an option (krcvq01_no_msi) to not use a dedicated MSI interrupt for kctx's < 2 and to service them on the default interrupt. For some CPUs, the cost of the interrupt enter/exit is more costly than then the additional PCI read in the default handler. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c71
2 files changed, 53 insertions, 21 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 769a1d9da4b7..c9624ea87209 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1012,6 +1012,8 @@ struct qib_devdata {
1012 u8 psxmitwait_supported; 1012 u8 psxmitwait_supported;
1013 /* cycle length of PS* counters in HW (in picoseconds) */ 1013 /* cycle length of PS* counters in HW (in picoseconds) */
1014 u16 psxmitwait_check_rate; 1014 u16 psxmitwait_check_rate;
1015 /* high volume overflow errors defered to tasklet */
1016 struct tasklet_struct error_tasklet;
1015}; 1017};
1016 1018
1017/* hol_state values */ 1019/* hol_state values */
@@ -1433,6 +1435,7 @@ extern struct mutex qib_mutex;
1433struct qib_hwerror_msgs { 1435struct qib_hwerror_msgs {
1434 u64 mask; 1436 u64 mask;
1435 const char *msg; 1437 const char *msg;
1438 size_t sz;
1436}; 1439};
1437 1440
1438#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b } 1441#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b }
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 821226cf6002..5ea9ece23b33 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -114,6 +114,10 @@ static ushort qib_singleport;
114module_param_named(singleport, qib_singleport, ushort, S_IRUGO); 114module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
115MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space"); 115MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
116 116
117static ushort qib_krcvq01_no_msi;
118module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO);
119MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2");
120
117/* 121/*
118 * Receive header queue sizes 122 * Receive header queue sizes
119 */ 123 */
@@ -1106,9 +1110,9 @@ static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd,
1106#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */ 1110#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */
1107 1111
1108#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \ 1112#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \
1109 .msg = #fldname } 1113 .msg = #fldname , .sz = sizeof(#fldname) }
1110#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \ 1114#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \
1111 fldname##Mask##_##port), .msg = #fldname } 1115 fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) }
1112static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { 1116static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = {
1113 HWE_AUTO_P(IBSerdesPClkNotDetect, 1), 1117 HWE_AUTO_P(IBSerdesPClkNotDetect, 1),
1114 HWE_AUTO_P(IBSerdesPClkNotDetect, 0), 1118 HWE_AUTO_P(IBSerdesPClkNotDetect, 0),
@@ -1126,14 +1130,16 @@ static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = {
1126 HWE_AUTO_P(IBCBusFromSPCParityErr, 0), 1130 HWE_AUTO_P(IBCBusFromSPCParityErr, 0),
1127 HWE_AUTO(statusValidNoEop), 1131 HWE_AUTO(statusValidNoEop),
1128 HWE_AUTO(LATriggered), 1132 HWE_AUTO(LATriggered),
1129 { .mask = 0 } 1133 { .mask = 0, .sz = 0 }
1130}; 1134};
1131 1135
1132#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \ 1136#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \
1133 .msg = #fldname } 1137 .msg = #fldname, .sz = sizeof(#fldname) }
1134#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \ 1138#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \
1135 .msg = #fldname } 1139 .msg = #fldname, .sz = sizeof(#fldname) }
1136static const struct qib_hwerror_msgs qib_7322error_msgs[] = { 1140static const struct qib_hwerror_msgs qib_7322error_msgs[] = {
1141 E_AUTO(RcvEgrFullErr),
1142 E_AUTO(RcvHdrFullErr),
1137 E_AUTO(ResetNegated), 1143 E_AUTO(ResetNegated),
1138 E_AUTO(HardwareErr), 1144 E_AUTO(HardwareErr),
1139 E_AUTO(InvalidAddrErr), 1145 E_AUTO(InvalidAddrErr),
@@ -1146,9 +1152,7 @@ static const struct qib_hwerror_msgs qib_7322error_msgs[] = {
1146 E_AUTO(SendSpecialTriggerErr), 1152 E_AUTO(SendSpecialTriggerErr),
1147 E_AUTO(SDmaWrongPortErr), 1153 E_AUTO(SDmaWrongPortErr),
1148 E_AUTO(SDmaBufMaskDuplicateErr), 1154 E_AUTO(SDmaBufMaskDuplicateErr),
1149 E_AUTO(RcvHdrFullErr), 1155 { .mask = 0, .sz = 0 }
1150 E_AUTO(RcvEgrFullErr),
1151 { .mask = 0 }
1152}; 1156};
1153 1157
1154static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { 1158static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
@@ -1158,7 +1162,8 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
1158 /* 1162 /*
1159 * SDmaHaltErr is not really an error, make it clearer; 1163 * SDmaHaltErr is not really an error, make it clearer;
1160 */ 1164 */
1161 {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted"}, 1165 {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted",
1166 .sz = 11},
1162 E_P_AUTO(SDmaDescAddrMisalignErr), 1167 E_P_AUTO(SDmaDescAddrMisalignErr),
1163 E_P_AUTO(SDmaUnexpDataErr), 1168 E_P_AUTO(SDmaUnexpDataErr),
1164 E_P_AUTO(SDmaMissingDwErr), 1169 E_P_AUTO(SDmaMissingDwErr),
@@ -1194,7 +1199,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
1194 E_P_AUTO(RcvICRCErr), 1199 E_P_AUTO(RcvICRCErr),
1195 E_P_AUTO(RcvVCRCErr), 1200 E_P_AUTO(RcvVCRCErr),
1196 E_P_AUTO(RcvFormatErr), 1201 E_P_AUTO(RcvFormatErr),
1197 { .mask = 0 } 1202 { .mask = 0, .sz = 0 }
1198}; 1203};
1199 1204
1200/* 1205/*
@@ -1202,17 +1207,17 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
1202 * context 1207 * context
1203 */ 1208 */
1204#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \ 1209#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \
1205 .msg = #fldname } 1210 .msg = #fldname, .sz = sizeof(#fldname) }
1206/* Below generates "auto-message" for interrupts specific to a port */ 1211/* Below generates "auto-message" for interrupts specific to a port */
1207#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\ 1212#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\
1208 SYM_LSB(IntMask, fldname##Mask##_0), \ 1213 SYM_LSB(IntMask, fldname##Mask##_0), \
1209 SYM_LSB(IntMask, fldname##Mask##_1)), \ 1214 SYM_LSB(IntMask, fldname##Mask##_1)), \
1210 .msg = #fldname "_P" } 1215 .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
1211/* For some reason, the SerDesTrimDone bits are reversed */ 1216/* For some reason, the SerDesTrimDone bits are reversed */
1212#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\ 1217#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\
1213 SYM_LSB(IntMask, fldname##Mask##_1), \ 1218 SYM_LSB(IntMask, fldname##Mask##_1), \
1214 SYM_LSB(IntMask, fldname##Mask##_0)), \ 1219 SYM_LSB(IntMask, fldname##Mask##_0)), \
1215 .msg = #fldname "_P" } 1220 .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
1216/* 1221/*
1217 * Below generates "auto-message" for interrupts specific to a context, 1222 * Below generates "auto-message" for interrupts specific to a context,
1218 * with ctxt-number appended 1223 * with ctxt-number appended
@@ -1220,7 +1225,7 @@ static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
1220#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\ 1225#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\
1221 SYM_LSB(IntMask, fldname##0IntMask), \ 1226 SYM_LSB(IntMask, fldname##0IntMask), \
1222 SYM_LSB(IntMask, fldname##17IntMask)), \ 1227 SYM_LSB(IntMask, fldname##17IntMask)), \
1223 .msg = #fldname "_C"} 1228 .msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
1224 1229
1225static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { 1230static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
1226 INTR_AUTO_P(SDmaInt), 1231 INTR_AUTO_P(SDmaInt),
@@ -1234,11 +1239,12 @@ static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
1234 INTR_AUTO_P(SendDoneInt), 1239 INTR_AUTO_P(SendDoneInt),
1235 INTR_AUTO(SendBufAvailInt), 1240 INTR_AUTO(SendBufAvailInt),
1236 INTR_AUTO_C(RcvAvail), 1241 INTR_AUTO_C(RcvAvail),
1237 { .mask = 0 } 1242 { .mask = 0, .sz = 0 }
1238}; 1243};
1239 1244
1240#define TXSYMPTOM_AUTO_P(fldname) \ 1245#define TXSYMPTOM_AUTO_P(fldname) \
1241 { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), .msg = #fldname } 1246 { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
1247 .msg = #fldname, .sz = sizeof(#fldname) }
1242static const struct qib_hwerror_msgs hdrchk_msgs[] = { 1248static const struct qib_hwerror_msgs hdrchk_msgs[] = {
1243 TXSYMPTOM_AUTO_P(NonKeyPacket), 1249 TXSYMPTOM_AUTO_P(NonKeyPacket),
1244 TXSYMPTOM_AUTO_P(GRHFail), 1250 TXSYMPTOM_AUTO_P(GRHFail),
@@ -1247,7 +1253,7 @@ static const struct qib_hwerror_msgs hdrchk_msgs[] = {
1247 TXSYMPTOM_AUTO_P(SLIDFail), 1253 TXSYMPTOM_AUTO_P(SLIDFail),
1248 TXSYMPTOM_AUTO_P(RawIPV6), 1254 TXSYMPTOM_AUTO_P(RawIPV6),
1249 TXSYMPTOM_AUTO_P(PacketTooSmall), 1255 TXSYMPTOM_AUTO_P(PacketTooSmall),
1250 { .mask = 0 } 1256 { .mask = 0, .sz = 0 }
1251}; 1257};
1252 1258
1253#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ 1259#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
@@ -1292,7 +1298,7 @@ static void err_decode(char *msg, size_t len, u64 errs,
1292 u64 these, lmask; 1298 u64 these, lmask;
1293 int took, multi, n = 0; 1299 int took, multi, n = 0;
1294 1300
1295 while (msp && msp->mask) { 1301 while (errs && msp && msp->mask) {
1296 multi = (msp->mask & (msp->mask - 1)); 1302 multi = (msp->mask & (msp->mask - 1));
1297 while (errs & msp->mask) { 1303 while (errs & msp->mask) {
1298 these = (errs & msp->mask); 1304 these = (errs & msp->mask);
@@ -1303,9 +1309,14 @@ static void err_decode(char *msg, size_t len, u64 errs,
1303 *msg++ = ','; 1309 *msg++ = ',';
1304 len--; 1310 len--;
1305 } 1311 }
1306 took = scnprintf(msg, len, "%s", msp->msg); 1312 BUG_ON(!msp->sz);
1313 /* msp->sz counts the nul */
1314 took = min_t(size_t, msp->sz - (size_t)1, len);
1315 memcpy(msg, msp->msg, took);
1307 len -= took; 1316 len -= took;
1308 msg += took; 1317 msg += took;
1318 if (len)
1319 *msg = '\0';
1309 } 1320 }
1310 errs &= ~lmask; 1321 errs &= ~lmask;
1311 if (len && multi) { 1322 if (len && multi) {
@@ -1643,6 +1654,14 @@ done:
1643 return; 1654 return;
1644} 1655}
1645 1656
1657static void qib_error_tasklet(unsigned long data)
1658{
1659 struct qib_devdata *dd = (struct qib_devdata *)data;
1660
1661 handle_7322_errors(dd);
1662 qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
1663}
1664
1646static void reenable_chase(unsigned long opaque) 1665static void reenable_chase(unsigned long opaque)
1647{ 1666{
1648 struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; 1667 struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -2724,8 +2743,10 @@ static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat)
2724 unknown_7322_ibits(dd, istat); 2743 unknown_7322_ibits(dd, istat);
2725 if (istat & QIB_I_GPIO) 2744 if (istat & QIB_I_GPIO)
2726 unknown_7322_gpio_intr(dd); 2745 unknown_7322_gpio_intr(dd);
2727 if (istat & QIB_I_C_ERROR) 2746 if (istat & QIB_I_C_ERROR) {
2728 handle_7322_errors(dd); 2747 qib_write_kreg(dd, kr_errmask, 0ULL);
2748 tasklet_schedule(&dd->error_tasklet);
2749 }
2729 if (istat & INT_MASK_P(Err, 0) && dd->rcd[0]) 2750 if (istat & INT_MASK_P(Err, 0) && dd->rcd[0])
2730 handle_7322_p_errors(dd->rcd[0]->ppd); 2751 handle_7322_p_errors(dd->rcd[0]->ppd);
2731 if (istat & INT_MASK_P(Err, 1) && dd->rcd[1]) 2752 if (istat & INT_MASK_P(Err, 1) && dd->rcd[1])
@@ -3124,6 +3145,8 @@ try_intx:
3124 arg = dd->rcd[ctxt]; 3145 arg = dd->rcd[ctxt];
3125 if (!arg) 3146 if (!arg)
3126 continue; 3147 continue;
3148 if (qib_krcvq01_no_msi && ctxt < 2)
3149 continue;
3127 lsb = QIB_I_RCVAVAIL_LSB + ctxt; 3150 lsb = QIB_I_RCVAVAIL_LSB + ctxt;
3128 handler = qib_7322pintr; 3151 handler = qib_7322pintr;
3129 name = QIB_DRV_NAME " (kctx)"; 3152 name = QIB_DRV_NAME " (kctx)";
@@ -3158,6 +3181,8 @@ try_intx:
3158 for (i = 0; i < ARRAY_SIZE(redirect); i++) 3181 for (i = 0; i < ARRAY_SIZE(redirect); i++)
3159 qib_write_kreg(dd, kr_intredirect + i, redirect[i]); 3182 qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
3160 dd->cspec->main_int_mask = mask; 3183 dd->cspec->main_int_mask = mask;
3184 tasklet_init(&dd->error_tasklet, qib_error_tasklet,
3185 (unsigned long)dd);
3161bail:; 3186bail:;
3162} 3187}
3163 3188
@@ -6787,6 +6812,10 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6787 (i >= ARRAY_SIZE(irq_table) && 6812 (i >= ARRAY_SIZE(irq_table) &&
6788 dd->rcd[i - ARRAY_SIZE(irq_table)])) 6813 dd->rcd[i - ARRAY_SIZE(irq_table)]))
6789 actual_cnt++; 6814 actual_cnt++;
6815 /* reduce by ctxt's < 2 */
6816 if (qib_krcvq01_no_msi)
6817 actual_cnt -= dd->num_pports;
6818
6790 tabsize = actual_cnt; 6819 tabsize = actual_cnt;
6791 dd->cspec->msix_entries = kmalloc(tabsize * 6820 dd->cspec->msix_entries = kmalloc(tabsize *
6792 sizeof(struct msix_entry), GFP_KERNEL); 6821 sizeof(struct msix_entry), GFP_KERNEL);