diff options
author | Tadeusz Struk <tadeusz.struk@intel.com> | 2017-04-28 13:40:02 -0400 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2017-04-28 13:56:15 -0400 |
commit | 22546b741af8355cd2e16739b6af4a8f17081839 (patch) | |
tree | 019358144eb006a79f69fe6f965b8c6a7b5a09ab | |
parent | b6eac931b9bb2bce4db7032c35b41e5e34ec22a5 (diff) |
IB/hfi1: Fix softlockup issue
Soft lockups can occur because the mad processing on different CPUs acquire
the spin lock dc8051_lock:
[534552.835870] [<ffffffffa026f993>] ? read_dev_port_cntr.isra.37+0x23/0x160 [hfi1]
[534552.835880] [<ffffffffa02775af>] read_dev_cntr+0x4f/0x60 [hfi1]
[534552.835893] [<ffffffffa028d7cd>] pma_get_opa_portstatus+0x64d/0x8c0 [hfi1]
[534552.835904] [<ffffffffa0290e7d>] hfi1_process_mad+0x48d/0x18c0 [hfi1]
[534552.835908] [<ffffffff811dc1f1>] ? __slab_free+0x81/0x2f0
[534552.835936] [<ffffffffa024c34e>] ? ib_mad_recv_done+0x21e/0xa30 [ib_core]
[534552.835939] [<ffffffff811dd153>] ? __kmalloc+0x1f3/0x240
[534552.835947] [<ffffffffa024c3fb>] ib_mad_recv_done+0x2cb/0xa30 [ib_core]
[534552.835955] [<ffffffffa0237c85>] __ib_process_cq+0x55/0xd0 [ib_core]
[534552.835962] [<ffffffffa0237d70>] ib_cq_poll_work+0x20/0x60 [ib_core]
[534552.835964] [<ffffffff810a7f3b>] process_one_work+0x17b/0x470
[534552.835966] [<ffffffff810a8d76>] worker_thread+0x126/0x410
[534552.835969] [<ffffffff810a8c50>] ? rescuer_thread+0x460/0x460
[534552.835971] [<ffffffff810b052f>] kthread+0xcf/0xe0
[534552.835974] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140
[534552.835977] [<ffffffff81696418>] ret_from_fork+0x58/0x90
[534552.835980] [<ffffffff810b0460>] ? kthread_create_on_node+0x140/0x140
This issue is made worse when the 8051 is busy and the reads take longer.
Fix by using a non-spinning lock procure.
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Mike Marciszyn <mike.marciniszyn@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c | 86 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c | 2 |
3 files changed, 57 insertions, 38 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e520929ac501..07aa76a74f64 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c | |||
@@ -6410,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) | |||
6410 | * | 6410 | * |
6411 | * The expectation is that the caller of this routine would have taken | 6411 | * The expectation is that the caller of this routine would have taken |
6412 | * care of properly transitioning the link into the correct state. | 6412 | * care of properly transitioning the link into the correct state. |
6413 | * NOTE: the caller needs to acquire the dd->dc8051_lock lock | ||
6414 | * before calling this function. | ||
6413 | */ | 6415 | */ |
6414 | static void dc_shutdown(struct hfi1_devdata *dd) | 6416 | static void _dc_shutdown(struct hfi1_devdata *dd) |
6415 | { | 6417 | { |
6416 | unsigned long flags; | 6418 | lockdep_assert_held(&dd->dc8051_lock); |
6417 | 6419 | ||
6418 | spin_lock_irqsave(&dd->dc8051_lock, flags); | 6420 | if (dd->dc_shutdown) |
6419 | if (dd->dc_shutdown) { | ||
6420 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | ||
6421 | return; | 6421 | return; |
6422 | } | 6422 | |
6423 | dd->dc_shutdown = 1; | 6423 | dd->dc_shutdown = 1; |
6424 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | ||
6425 | /* Shutdown the LCB */ | 6424 | /* Shutdown the LCB */ |
6426 | lcb_shutdown(dd, 1); | 6425 | lcb_shutdown(dd, 1); |
6427 | /* | 6426 | /* |
@@ -6432,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd) | |||
6432 | write_csr(dd, DC_DC8051_CFG_RST, 0x1); | 6431 | write_csr(dd, DC_DC8051_CFG_RST, 0x1); |
6433 | } | 6432 | } |
6434 | 6433 | ||
6434 | static void dc_shutdown(struct hfi1_devdata *dd) | ||
6435 | { | ||
6436 | mutex_lock(&dd->dc8051_lock); | ||
6437 | _dc_shutdown(dd); | ||
6438 | mutex_unlock(&dd->dc8051_lock); | ||
6439 | } | ||
6440 | |||
6435 | /* | 6441 | /* |
6436 | * Calling this after the DC has been brought out of reset should not | 6442 | * Calling this after the DC has been brought out of reset should not |
6437 | * do any damage. | 6443 | * do any damage. |
6444 | * NOTE: the caller needs to acquire the dd->dc8051_lock lock | ||
6445 | * before calling this function. | ||
6438 | */ | 6446 | */ |
6439 | static void dc_start(struct hfi1_devdata *dd) | 6447 | static void _dc_start(struct hfi1_devdata *dd) |
6440 | { | 6448 | { |
6441 | unsigned long flags; | 6449 | lockdep_assert_held(&dd->dc8051_lock); |
6442 | int ret; | ||
6443 | 6450 | ||
6444 | spin_lock_irqsave(&dd->dc8051_lock, flags); | ||
6445 | if (!dd->dc_shutdown) | 6451 | if (!dd->dc_shutdown) |
6446 | goto done; | 6452 | return; |
6447 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | 6453 | |
6448 | /* Take the 8051 out of reset */ | 6454 | /* Take the 8051 out of reset */ |
6449 | write_csr(dd, DC_DC8051_CFG_RST, 0ull); | 6455 | write_csr(dd, DC_DC8051_CFG_RST, 0ull); |
6450 | /* Wait until 8051 is ready */ | 6456 | /* Wait until 8051 is ready */ |
6451 | ret = wait_fm_ready(dd, TIMEOUT_8051_START); | 6457 | if (wait_fm_ready(dd, TIMEOUT_8051_START)) |
6452 | if (ret) { | ||
6453 | dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", | 6458 | dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", |
6454 | __func__); | 6459 | __func__); |
6455 | } | 6460 | |
6456 | /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ | 6461 | /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ |
6457 | write_csr(dd, DCC_CFG_RESET, 0x10); | 6462 | write_csr(dd, DCC_CFG_RESET, 0x10); |
6458 | /* lcb_shutdown() with abort=1 does not restore these */ | 6463 | /* lcb_shutdown() with abort=1 does not restore these */ |
6459 | write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); | 6464 | write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); |
6460 | spin_lock_irqsave(&dd->dc8051_lock, flags); | ||
6461 | dd->dc_shutdown = 0; | 6465 | dd->dc_shutdown = 0; |
6462 | done: | 6466 | } |
6463 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | 6467 | |
6468 | static void dc_start(struct hfi1_devdata *dd) | ||
6469 | { | ||
6470 | mutex_lock(&dd->dc8051_lock); | ||
6471 | _dc_start(dd); | ||
6472 | mutex_unlock(&dd->dc8051_lock); | ||
6464 | } | 6473 | } |
6465 | 6474 | ||
6466 | /* | 6475 | /* |
@@ -8513,16 +8522,11 @@ static int do_8051_command( | |||
8513 | { | 8522 | { |
8514 | u64 reg, completed; | 8523 | u64 reg, completed; |
8515 | int return_code; | 8524 | int return_code; |
8516 | unsigned long flags; | ||
8517 | unsigned long timeout; | 8525 | unsigned long timeout; |
8518 | 8526 | ||
8519 | hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); | 8527 | hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); |
8520 | 8528 | ||
8521 | /* | 8529 | mutex_lock(&dd->dc8051_lock); |
8522 | * Alternative to holding the lock for a long time: | ||
8523 | * - keep busy wait - have other users bounce off | ||
8524 | */ | ||
8525 | spin_lock_irqsave(&dd->dc8051_lock, flags); | ||
8526 | 8530 | ||
8527 | /* We can't send any commands to the 8051 if it's in reset */ | 8531 | /* We can't send any commands to the 8051 if it's in reset */ |
8528 | if (dd->dc_shutdown) { | 8532 | if (dd->dc_shutdown) { |
@@ -8548,10 +8552,8 @@ static int do_8051_command( | |||
8548 | return_code = -ENXIO; | 8552 | return_code = -ENXIO; |
8549 | goto fail; | 8553 | goto fail; |
8550 | } | 8554 | } |
8551 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | 8555 | _dc_shutdown(dd); |
8552 | dc_shutdown(dd); | 8556 | _dc_start(dd); |
8553 | dc_start(dd); | ||
8554 | spin_lock_irqsave(&dd->dc8051_lock, flags); | ||
8555 | } | 8557 | } |
8556 | 8558 | ||
8557 | /* | 8559 | /* |
@@ -8632,8 +8634,7 @@ static int do_8051_command( | |||
8632 | write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); | 8634 | write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); |
8633 | 8635 | ||
8634 | fail: | 8636 | fail: |
8635 | spin_unlock_irqrestore(&dd->dc8051_lock, flags); | 8637 | mutex_unlock(&dd->dc8051_lock); |
8636 | |||
8637 | return return_code; | 8638 | return return_code; |
8638 | } | 8639 | } |
8639 | 8640 | ||
@@ -12007,6 +12008,10 @@ static void free_cntrs(struct hfi1_devdata *dd) | |||
12007 | dd->scntrs = NULL; | 12008 | dd->scntrs = NULL; |
12008 | kfree(dd->cntrnames); | 12009 | kfree(dd->cntrnames); |
12009 | dd->cntrnames = NULL; | 12010 | dd->cntrnames = NULL; |
12011 | if (dd->update_cntr_wq) { | ||
12012 | destroy_workqueue(dd->update_cntr_wq); | ||
12013 | dd->update_cntr_wq = NULL; | ||
12014 | } | ||
12010 | } | 12015 | } |
12011 | 12016 | ||
12012 | static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, | 12017 | static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, |
@@ -12162,7 +12167,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data) | |||
12162 | return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); | 12167 | return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); |
12163 | } | 12168 | } |
12164 | 12169 | ||
12165 | static void update_synth_timer(unsigned long opaque) | 12170 | static void do_update_synth_timer(struct work_struct *work) |
12166 | { | 12171 | { |
12167 | u64 cur_tx; | 12172 | u64 cur_tx; |
12168 | u64 cur_rx; | 12173 | u64 cur_rx; |
@@ -12171,8 +12176,8 @@ static void update_synth_timer(unsigned long opaque) | |||
12171 | int i, j, vl; | 12176 | int i, j, vl; |
12172 | struct hfi1_pportdata *ppd; | 12177 | struct hfi1_pportdata *ppd; |
12173 | struct cntr_entry *entry; | 12178 | struct cntr_entry *entry; |
12174 | 12179 | struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata, | |
12175 | struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; | 12180 | update_cntr_work); |
12176 | 12181 | ||
12177 | /* | 12182 | /* |
12178 | * Rather than keep beating on the CSRs pick a minimal set that we can | 12183 | * Rather than keep beating on the CSRs pick a minimal set that we can |
@@ -12255,7 +12260,13 @@ static void update_synth_timer(unsigned long opaque) | |||
12255 | } else { | 12260 | } else { |
12256 | hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); | 12261 | hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); |
12257 | } | 12262 | } |
12263 | } | ||
12258 | 12264 | ||
12265 | static void update_synth_timer(unsigned long opaque) | ||
12266 | { | ||
12267 | struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; | ||
12268 | |||
12269 | queue_work(dd->update_cntr_wq, &dd->update_cntr_work); | ||
12259 | mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); | 12270 | mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); |
12260 | } | 12271 | } |
12261 | 12272 | ||
@@ -12491,6 +12502,13 @@ static int init_cntrs(struct hfi1_devdata *dd) | |||
12491 | if (init_cpu_counters(dd)) | 12502 | if (init_cpu_counters(dd)) |
12492 | goto bail; | 12503 | goto bail; |
12493 | 12504 | ||
12505 | dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d", | ||
12506 | WQ_MEM_RECLAIM, dd->unit); | ||
12507 | if (!dd->update_cntr_wq) | ||
12508 | goto bail; | ||
12509 | |||
12510 | INIT_WORK(&dd->update_cntr_work, do_update_synth_timer); | ||
12511 | |||
12494 | mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); | 12512 | mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); |
12495 | return 0; | 12513 | return 0; |
12496 | bail: | 12514 | bail: |
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 550116fd9d48..d253ea2521a0 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h | |||
@@ -484,7 +484,7 @@ struct rvt_sge_state; | |||
484 | #define HFI1_PART_ENFORCE_OUT 0x2 | 484 | #define HFI1_PART_ENFORCE_OUT 0x2 |
485 | 485 | ||
486 | /* how often we check for synthetic counter wrap around */ | 486 | /* how often we check for synthetic counter wrap around */ |
487 | #define SYNTH_CNT_TIME 2 | 487 | #define SYNTH_CNT_TIME 3 |
488 | 488 | ||
489 | /* Counter flags */ | 489 | /* Counter flags */ |
490 | #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ | 490 | #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ |
@@ -962,8 +962,9 @@ struct hfi1_devdata { | |||
962 | spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ | 962 | spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ |
963 | /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ | 963 | /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ |
964 | spinlock_t uctxt_lock; /* rcd and user context changes */ | 964 | spinlock_t uctxt_lock; /* rcd and user context changes */ |
965 | /* exclusive access to 8051 */ | 965 | struct mutex dc8051_lock; /* exclusive access to 8051 */ |
966 | spinlock_t dc8051_lock; | 966 | struct workqueue_struct *update_cntr_wq; |
967 | struct work_struct update_cntr_work; | ||
967 | /* exclusive access to 8051 memory */ | 968 | /* exclusive access to 8051 memory */ |
968 | spinlock_t dc8051_memlock; | 969 | spinlock_t dc8051_memlock; |
969 | int dc8051_timed_out; /* remember if the 8051 timed out */ | 970 | int dc8051_timed_out; /* remember if the 8051 timed out */ |
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b4c7e04f4578..21dca7ac059c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c | |||
@@ -1081,11 +1081,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) | |||
1081 | spin_lock_init(&dd->uctxt_lock); | 1081 | spin_lock_init(&dd->uctxt_lock); |
1082 | spin_lock_init(&dd->hfi1_diag_trans_lock); | 1082 | spin_lock_init(&dd->hfi1_diag_trans_lock); |
1083 | spin_lock_init(&dd->sc_init_lock); | 1083 | spin_lock_init(&dd->sc_init_lock); |
1084 | spin_lock_init(&dd->dc8051_lock); | ||
1085 | spin_lock_init(&dd->dc8051_memlock); | 1084 | spin_lock_init(&dd->dc8051_memlock); |
1086 | seqlock_init(&dd->sc2vl_lock); | 1085 | seqlock_init(&dd->sc2vl_lock); |
1087 | spin_lock_init(&dd->sde_map_lock); | 1086 | spin_lock_init(&dd->sde_map_lock); |
1088 | spin_lock_init(&dd->pio_map_lock); | 1087 | spin_lock_init(&dd->pio_map_lock); |
1088 | mutex_init(&dd->dc8051_lock); | ||
1089 | init_waitqueue_head(&dd->event_queue); | 1089 | init_waitqueue_head(&dd->event_queue); |
1090 | 1090 | ||
1091 | dd->int_counter = alloc_percpu(u64); | 1091 | dd->int_counter = alloc_percpu(u64); |