aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@qlogic.com>2012-02-25 20:45:49 -0500
committerRoland Dreier <roland@purestorage.com>2012-02-25 20:45:49 -0500
commita778f3fddc6fc2ed4c065f6e160d517a5959f949 (patch)
treeac024b855f69f9f512458b8a0e8e4a4cf7b9f1c7 /drivers/infiniband
parent6b21d18ed50c7d145220b0724ea7f2613abf0f95 (diff)
IB/qib: Add logic for affinity hint
Call irq_set_affinity_hint() to give userspace programs such as irqbalance the information to be able to distribute qib interrupts appropriately. The logic allocates all non-receive interrupts to the first CPU local to the HCA. Receive interrupts are allocated round robin starting with the second CPU local to the HCA with potential wrap back to the second CPU. This patch also adds a refinement to the name registered for MSI-X interrupts so that user level scripts can determine the device associated with the IRQs when there are multiple HCAs with a potentially different set of local CPUs. Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib.h10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c107
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c21
3 files changed, 104 insertions, 34 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index b881bdc401f5..6b811e3e8bd1 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -427,6 +427,14 @@ struct qib_verbs_txreq {
427/* how often we check for packet activity for "power on hours (in seconds) */ 427/* how often we check for packet activity for "power on hours (in seconds) */
428#define ACTIVITY_TIMER 5 428#define ACTIVITY_TIMER 5
429 429
430#define MAX_NAME_SIZE 64
431struct qib_msix_entry {
432 struct msix_entry msix;
433 void *arg;
434 char name[MAX_NAME_SIZE];
435 cpumask_var_t mask;
436};
437
430/* Below is an opaque struct. Each chip (device) can maintain 438/* Below is an opaque struct. Each chip (device) can maintain
431 * private data needed for its operation, but not germane to the 439 * private data needed for its operation, but not germane to the
432 * rest of the driver. For convenience, we define another that 440 * rest of the driver. For convenience, we define another that
@@ -1355,7 +1363,7 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
1355int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, 1363int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
1356 const struct pci_device_id *); 1364 const struct pci_device_id *);
1357void qib_pcie_ddcleanup(struct qib_devdata *); 1365void qib_pcie_ddcleanup(struct qib_devdata *);
1358int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *); 1366int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
1359int qib_reinit_intr(struct qib_devdata *); 1367int qib_reinit_intr(struct qib_devdata *);
1360void qib_enable_intx(struct pci_dev *); 1368void qib_enable_intx(struct pci_dev *);
1361void qib_nomsi(struct qib_devdata *); 1369void qib_nomsi(struct qib_devdata *);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 41e92089e41b..060b96064469 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -541,8 +541,7 @@ struct qib_chip_specific {
541 u32 lastbuf_for_pio; 541 u32 lastbuf_for_pio;
542 u32 stay_in_freeze; 542 u32 stay_in_freeze;
543 u32 recovery_ports_initted; 543 u32 recovery_ports_initted;
544 struct msix_entry *msix_entries; 544 struct qib_msix_entry *msix_entries;
545 void **msix_arg;
546 unsigned long *sendchkenable; 545 unsigned long *sendchkenable;
547 unsigned long *sendgrhchk; 546 unsigned long *sendgrhchk;
548 unsigned long *sendibchk; 547 unsigned long *sendibchk;
@@ -639,24 +638,24 @@ static struct {
639 int lsb; 638 int lsb;
640 int port; /* 0 if not port-specific, else port # */ 639 int port; /* 0 if not port-specific, else port # */
641} irq_table[] = { 640} irq_table[] = {
642 { QIB_DRV_NAME, qib_7322intr, -1, 0 }, 641 { "", qib_7322intr, -1, 0 },
643 { QIB_DRV_NAME " (buf avail)", qib_7322bufavail, 642 { " (buf avail)", qib_7322bufavail,
644 SYM_LSB(IntStatus, SendBufAvail), 0 }, 643 SYM_LSB(IntStatus, SendBufAvail), 0 },
645 { QIB_DRV_NAME " (sdma 0)", sdma_intr, 644 { " (sdma 0)", sdma_intr,
646 SYM_LSB(IntStatus, SDmaInt_0), 1 }, 645 SYM_LSB(IntStatus, SDmaInt_0), 1 },
647 { QIB_DRV_NAME " (sdma 1)", sdma_intr, 646 { " (sdma 1)", sdma_intr,
648 SYM_LSB(IntStatus, SDmaInt_1), 2 }, 647 SYM_LSB(IntStatus, SDmaInt_1), 2 },
649 { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr, 648 { " (sdmaI 0)", sdma_idle_intr,
650 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, 649 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 },
651 { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr, 650 { " (sdmaI 1)", sdma_idle_intr,
652 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, 651 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 },
653 { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr, 652 { " (sdmaP 0)", sdma_progress_intr,
654 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, 653 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 },
655 { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr, 654 { " (sdmaP 1)", sdma_progress_intr,
656 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, 655 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 },
657 { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr, 656 { " (sdmaC 0)", sdma_cleanup_intr,
658 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, 657 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 },
659 { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr, 658 { " (sdmaC 1)", sdma_cleanup_intr,
660 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, 659 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 },
661}; 660};
662 661
@@ -2567,9 +2566,13 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
2567 int i; 2566 int i;
2568 2567
2569 dd->cspec->num_msix_entries = 0; 2568 dd->cspec->num_msix_entries = 0;
2570 for (i = 0; i < n; i++) 2569 for (i = 0; i < n; i++) {
2571 free_irq(dd->cspec->msix_entries[i].vector, 2570 irq_set_affinity_hint(
2572 dd->cspec->msix_arg[i]); 2571 dd->cspec->msix_entries[i].msix.vector, NULL);
2572 free_cpumask_var(dd->cspec->msix_entries[i].mask);
2573 free_irq(dd->cspec->msix_entries[i].msix.vector,
2574 dd->cspec->msix_entries[i].arg);
2575 }
2573 qib_nomsix(dd); 2576 qib_nomsix(dd);
2574 } 2577 }
2575 /* make sure no MSIx interrupts are left pending */ 2578 /* make sure no MSIx interrupts are left pending */
@@ -2597,7 +2600,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
2597 kfree(dd->cspec->sendgrhchk); 2600 kfree(dd->cspec->sendgrhchk);
2598 kfree(dd->cspec->sendibchk); 2601 kfree(dd->cspec->sendibchk);
2599 kfree(dd->cspec->msix_entries); 2602 kfree(dd->cspec->msix_entries);
2600 kfree(dd->cspec->msix_arg);
2601 for (i = 0; i < dd->num_pports; i++) { 2603 for (i = 0; i < dd->num_pports; i++) {
2602 unsigned long flags; 2604 unsigned long flags;
2603 u32 mask = QSFP_GPIO_MOD_PRS_N | 2605 u32 mask = QSFP_GPIO_MOD_PRS_N |
@@ -3070,6 +3072,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
3070 int ret, i, msixnum; 3072 int ret, i, msixnum;
3071 u64 redirect[6]; 3073 u64 redirect[6];
3072 u64 mask; 3074 u64 mask;
3075 const struct cpumask *local_mask;
3076 int firstcpu, secondcpu = 0, currrcvcpu = 0;
3073 3077
3074 if (!dd->num_pports) 3078 if (!dd->num_pports)
3075 return; 3079 return;
@@ -3118,13 +3122,28 @@ try_intx:
3118 memset(redirect, 0, sizeof redirect); 3122 memset(redirect, 0, sizeof redirect);
3119 mask = ~0ULL; 3123 mask = ~0ULL;
3120 msixnum = 0; 3124 msixnum = 0;
3125 local_mask = cpumask_of_pcibus(dd->pcidev->bus);
3126 firstcpu = cpumask_first(local_mask);
3127 if (firstcpu >= nr_cpu_ids ||
3128 cpumask_weight(local_mask) == num_online_cpus()) {
3129 local_mask = topology_core_cpumask(0);
3130 firstcpu = cpumask_first(local_mask);
3131 }
3132 if (firstcpu < nr_cpu_ids) {
3133 secondcpu = cpumask_next(firstcpu, local_mask);
3134 if (secondcpu >= nr_cpu_ids)
3135 secondcpu = firstcpu;
3136 currrcvcpu = secondcpu;
3137 }
3121 for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { 3138 for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) {
3122 irq_handler_t handler; 3139 irq_handler_t handler;
3123 const char *name;
3124 void *arg; 3140 void *arg;
3125 u64 val; 3141 u64 val;
3126 int lsb, reg, sh; 3142 int lsb, reg, sh;
3127 3143
3144 dd->cspec->msix_entries[msixnum].
3145 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
3146 = '\0';
3128 if (i < ARRAY_SIZE(irq_table)) { 3147 if (i < ARRAY_SIZE(irq_table)) {
3129 if (irq_table[i].port) { 3148 if (irq_table[i].port) {
3130 /* skip if for a non-configured port */ 3149 /* skip if for a non-configured port */
@@ -3135,7 +3154,11 @@ try_intx:
3135 arg = dd; 3154 arg = dd;
3136 lsb = irq_table[i].lsb; 3155 lsb = irq_table[i].lsb;
3137 handler = irq_table[i].handler; 3156 handler = irq_table[i].handler;
3138 name = irq_table[i].name; 3157 snprintf(dd->cspec->msix_entries[msixnum].name,
3158 sizeof(dd->cspec->msix_entries[msixnum].name)
3159 - 1,
3160 QIB_DRV_NAME "%d%s", dd->unit,
3161 irq_table[i].name);
3139 } else { 3162 } else {
3140 unsigned ctxt; 3163 unsigned ctxt;
3141 3164
@@ -3148,23 +3171,28 @@ try_intx:
3148 continue; 3171 continue;
3149 lsb = QIB_I_RCVAVAIL_LSB + ctxt; 3172 lsb = QIB_I_RCVAVAIL_LSB + ctxt;
3150 handler = qib_7322pintr; 3173 handler = qib_7322pintr;
3151 name = QIB_DRV_NAME " (kctx)"; 3174 snprintf(dd->cspec->msix_entries[msixnum].name,
3175 sizeof(dd->cspec->msix_entries[msixnum].name)
3176 - 1,
3177 QIB_DRV_NAME "%d (kctx)", dd->unit);
3152 } 3178 }
3153 ret = request_irq(dd->cspec->msix_entries[msixnum].vector, 3179 ret = request_irq(
3154 handler, 0, name, arg); 3180 dd->cspec->msix_entries[msixnum].msix.vector,
3181 handler, 0, dd->cspec->msix_entries[msixnum].name,
3182 arg);
3155 if (ret) { 3183 if (ret) {
3156 /* 3184 /*
3157 * Shouldn't happen since the enable said we could 3185 * Shouldn't happen since the enable said we could
3158 * have as many as we are trying to setup here. 3186 * have as many as we are trying to setup here.
3159 */ 3187 */
3160 qib_dev_err(dd, "Couldn't setup MSIx " 3188 qib_dev_err(dd, "Couldn't setup MSIx "
3161 "interrupt (vec=%d, irq=%d): %d\n", msixnum, 3189 "interrupt (vec=%d, irq=%d): %d\n", msixnum,
3162 dd->cspec->msix_entries[msixnum].vector, 3190 dd->cspec->msix_entries[msixnum].msix.vector,
3163 ret); 3191 ret);
3164 qib_7322_nomsix(dd); 3192 qib_7322_nomsix(dd);
3165 goto try_intx; 3193 goto try_intx;
3166 } 3194 }
3167 dd->cspec->msix_arg[msixnum] = arg; 3195 dd->cspec->msix_entries[msixnum].arg = arg;
3168 if (lsb >= 0) { 3196 if (lsb >= 0) {
3169 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; 3197 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
3170 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * 3198 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -3174,6 +3202,25 @@ try_intx:
3174 } 3202 }
3175 val = qib_read_kreg64(dd, 2 * msixnum + 1 + 3203 val = qib_read_kreg64(dd, 2 * msixnum + 1 +
3176 (QIB_7322_MsixTable_OFFS / sizeof(u64))); 3204 (QIB_7322_MsixTable_OFFS / sizeof(u64)));
3205 if (firstcpu < nr_cpu_ids &&
3206 zalloc_cpumask_var(
3207 &dd->cspec->msix_entries[msixnum].mask,
3208 GFP_KERNEL)) {
3209 if (handler == qib_7322pintr) {
3210 cpumask_set_cpu(currrcvcpu,
3211 dd->cspec->msix_entries[msixnum].mask);
3212 currrcvcpu = cpumask_next(currrcvcpu,
3213 local_mask);
3214 if (currrcvcpu >= nr_cpu_ids)
3215 currrcvcpu = secondcpu;
3216 } else {
3217 cpumask_set_cpu(firstcpu,
3218 dd->cspec->msix_entries[msixnum].mask);
3219 }
3220 irq_set_affinity_hint(
3221 dd->cspec->msix_entries[msixnum].msix.vector,
3222 dd->cspec->msix_entries[msixnum].mask);
3223 }
3177 msixnum++; 3224 msixnum++;
3178 } 3225 }
3179 /* Initialize the vector mapping */ 3226 /* Initialize the vector mapping */
@@ -3365,7 +3412,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
3365 if (msix_entries) { 3412 if (msix_entries) {
3366 /* restore the MSIx vector address and data if saved above */ 3413 /* restore the MSIx vector address and data if saved above */
3367 for (i = 0; i < msix_entries; i++) { 3414 for (i = 0; i < msix_entries; i++) {
3368 dd->cspec->msix_entries[i].entry = i; 3415 dd->cspec->msix_entries[i].msix.entry = i;
3369 if (!msix_vecsave || !msix_vecsave[2 * i]) 3416 if (!msix_vecsave || !msix_vecsave[2 * i])
3370 continue; 3417 continue;
3371 qib_write_kreg(dd, 2 * i + 3418 qib_write_kreg(dd, 2 * i +
@@ -6865,15 +6912,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6865 6912
6866 tabsize = actual_cnt; 6913 tabsize = actual_cnt;
6867 dd->cspec->msix_entries = kmalloc(tabsize * 6914 dd->cspec->msix_entries = kmalloc(tabsize *
6868 sizeof(struct msix_entry), GFP_KERNEL); 6915 sizeof(struct qib_msix_entry), GFP_KERNEL);
6869 dd->cspec->msix_arg = kmalloc(tabsize * 6916 if (!dd->cspec->msix_entries) {
6870 sizeof(void *), GFP_KERNEL);
6871 if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) {
6872 qib_dev_err(dd, "No memory for MSIx table\n"); 6917 qib_dev_err(dd, "No memory for MSIx table\n");
6873 tabsize = 0; 6918 tabsize = 0;
6874 } 6919 }
6875 for (i = 0; i < tabsize; i++) 6920 for (i = 0; i < tabsize; i++)
6876 dd->cspec->msix_entries[i].entry = i; 6921 dd->cspec->msix_entries[i].msix.entry = i;
6877 6922
6878 if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) 6923 if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
6879 qib_dev_err(dd, "Failed to setup PCIe or interrupts; " 6924 qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 0fde788e1100..790646ef5106 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -194,11 +194,24 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
194} 194}
195 195
196static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, 196static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
197 struct msix_entry *msix_entry) 197 struct qib_msix_entry *qib_msix_entry)
198{ 198{
199 int ret; 199 int ret;
200 u32 tabsize = 0; 200 u32 tabsize = 0;
201 u16 msix_flags; 201 u16 msix_flags;
202 struct msix_entry *msix_entry;
203 int i;
204
205 /* We can't pass qib_msix_entry array to qib_msix_setup
206 * so use a dummy msix_entry array and copy the allocated
207 * irq back to the qib_msix_entry array. */
208 msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL);
209 if (!msix_entry) {
210 ret = -ENOMEM;
211 goto do_intx;
212 }
213 for (i = 0; i < *msixcnt; i++)
214 msix_entry[i] = qib_msix_entry[i].msix;
202 215
203 pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); 216 pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
204 tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); 217 tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
@@ -209,11 +222,15 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
209 tabsize = ret; 222 tabsize = ret;
210 ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); 223 ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
211 } 224 }
225do_intx:
212 if (ret) { 226 if (ret) {
213 qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " 227 qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, "
214 "falling back to INTx\n", tabsize, ret); 228 "falling back to INTx\n", tabsize, ret);
215 tabsize = 0; 229 tabsize = 0;
216 } 230 }
231 for (i = 0; i < tabsize; i++)
232 qib_msix_entry[i].msix = msix_entry[i];
233 kfree(msix_entry);
217 *msixcnt = tabsize; 234 *msixcnt = tabsize;
218 235
219 if (ret) 236 if (ret)
@@ -251,7 +268,7 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
251} 268}
252 269
253int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, 270int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
254 struct msix_entry *entry) 271 struct qib_msix_entry *entry)
255{ 272{
256 u16 linkstat, speed; 273 u16 linkstat, speed;
257 int pos = 0, pose, ret = 1; 274 int pos = 0, pose, ret = 1;