aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorSagi Grimberg <sagig@mellanox.com>2014-02-23 07:19:12 -0500
committerRoland Dreier <roland@purestorage.com>2014-03-07 14:40:04 -0500
commitd5436ba01075ef4629015f7a00914d64ffd795d6 (patch)
tree1eb91ca4b7454c86c01fa794f93250203c37023c /drivers/infiniband/hw
parente6631814fb3ac454fbbf47ea343c2b9508e4e1ba (diff)
IB/mlx5: Collect signature error completion
This commit takes care of the generated signature error CQE generated by the HW (if happened). The underlying mlx5 driver will handle signature error completions and will mark the relevant memory region as dirty. Once the consumer gets the completion for the transaction, it must check for signature errors on signature memory region using a new lightweight verb ib_check_mr_status(). In case the user doesn't check for signature error (i.e. doesn't call ib_check_mr_status() with status check IB_MR_CHECK_SIG_STATUS), the memory region cannot be used for another signature operation (REG_SIG_MR work request will fail). Signed-off-by: Sagi Grimberg <sagig@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c62
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h7
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c46
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c8
5 files changed, 122 insertions, 2 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b1705ce6eb88..62bb6b49dc1d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
366 mlx5_buf_free(&dev->mdev, &buf->buf); 366 mlx5_buf_free(&dev->mdev, &buf->buf);
367} 367}
368 368
369static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
370 struct ib_sig_err *item)
371{
372 u16 syndrome = be16_to_cpu(cqe->syndrome);
373
374#define GUARD_ERR (1 << 13)
375#define APPTAG_ERR (1 << 12)
376#define REFTAG_ERR (1 << 11)
377
378 if (syndrome & GUARD_ERR) {
379 item->err_type = IB_SIG_BAD_GUARD;
380 item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
381 item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
382 } else
383 if (syndrome & REFTAG_ERR) {
384 item->err_type = IB_SIG_BAD_REFTAG;
385 item->expected = be32_to_cpu(cqe->expected_reftag);
386 item->actual = be32_to_cpu(cqe->actual_reftag);
387 } else
388 if (syndrome & APPTAG_ERR) {
389 item->err_type = IB_SIG_BAD_APPTAG;
390 item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
391 item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
392 } else {
393 pr_err("Got signature completion error with bad syndrome %04x\n",
394 syndrome);
395 }
396
397 item->sig_err_offset = be64_to_cpu(cqe->err_offset);
398 item->key = be32_to_cpu(cqe->mkey);
399}
400
369static int mlx5_poll_one(struct mlx5_ib_cq *cq, 401static int mlx5_poll_one(struct mlx5_ib_cq *cq,
370 struct mlx5_ib_qp **cur_qp, 402 struct mlx5_ib_qp **cur_qp,
371 struct ib_wc *wc) 403 struct ib_wc *wc)
@@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
375 struct mlx5_cqe64 *cqe64; 407 struct mlx5_cqe64 *cqe64;
376 struct mlx5_core_qp *mqp; 408 struct mlx5_core_qp *mqp;
377 struct mlx5_ib_wq *wq; 409 struct mlx5_ib_wq *wq;
410 struct mlx5_sig_err_cqe *sig_err_cqe;
411 struct mlx5_core_mr *mmr;
412 struct mlx5_ib_mr *mr;
378 uint8_t opcode; 413 uint8_t opcode;
379 uint32_t qpn; 414 uint32_t qpn;
380 u16 wqe_ctr; 415 u16 wqe_ctr;
@@ -475,6 +510,33 @@ repoll:
475 } 510 }
476 } 511 }
477 break; 512 break;
513 case MLX5_CQE_SIG_ERR:
514 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
515
516 read_lock(&dev->mdev.priv.mr_table.lock);
517 mmr = __mlx5_mr_lookup(&dev->mdev,
518 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
519 if (unlikely(!mmr)) {
520 read_unlock(&dev->mdev.priv.mr_table.lock);
521 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
522 cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
523 return -EINVAL;
524 }
525
526 mr = to_mibmr(mmr);
527 get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
528 mr->sig->sig_err_exists = true;
529 mr->sig->sigerr_count++;
530
531 mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
532 cq->mcq.cqn, mr->sig->err_item.key,
533 mr->sig->err_item.err_type,
534 mr->sig->err_item.sig_err_offset,
535 mr->sig->err_item.expected,
536 mr->sig->err_item.actual);
537
538 read_unlock(&dev->mdev.priv.mr_table.lock);
539 goto repoll;
478 } 540 }
479 541
480 return 0; 542 return 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7260a299c6db..ba3ecec7fa65 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1431,6 +1431,7 @@ static int init_one(struct pci_dev *pdev,
1431 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; 1431 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
1432 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; 1432 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
1433 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; 1433 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
1434 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
1434 1435
1435 if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { 1436 if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
1436 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 1437 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e438f08899ae..50541586e0a6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -400,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
400 return container_of(mqp, struct mlx5_ib_qp, mqp); 400 return container_of(mqp, struct mlx5_ib_qp, mqp);
401} 401}
402 402
403static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
404{
405 return container_of(mmr, struct mlx5_ib_mr, mmr);
406}
407
403static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) 408static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
404{ 409{
405 return container_of(ibpd, struct mlx5_ib_pd, ibpd); 410 return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -537,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
537int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 542int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
538int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); 543int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
539void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); 544void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
545int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
546 struct ib_mr_status *mr_status);
540 547
541static inline void init_query_mad(struct ib_smp *mad) 548static inline void init_query_mad(struct ib_smp *mad)
542{ 549{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 032445c47608..81392b26d078 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1038,6 +1038,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1038 access_mode = MLX5_ACCESS_MODE_KLM; 1038 access_mode = MLX5_ACCESS_MODE_KLM;
1039 mr->sig->psv_memory.psv_idx = psv_index[0]; 1039 mr->sig->psv_memory.psv_idx = psv_index[0];
1040 mr->sig->psv_wire.psv_idx = psv_index[1]; 1040 mr->sig->psv_wire.psv_idx = psv_index[1];
1041
1042 mr->sig->sig_status_checked = true;
1043 mr->sig->sig_err_exists = false;
1044 /* Next UMR, Arm SIGERR */
1045 ++mr->sig->sigerr_count;
1041 } 1046 }
1042 1047
1043 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1048 in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
@@ -1188,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1188 kfree(mfrpl->ibfrpl.page_list); 1193 kfree(mfrpl->ibfrpl.page_list);
1189 kfree(mfrpl); 1194 kfree(mfrpl);
1190} 1195}
1196
1197int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1198 struct ib_mr_status *mr_status)
1199{
1200 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1201 int ret = 0;
1202
1203 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1204 pr_err("Invalid status check mask\n");
1205 ret = -EINVAL;
1206 goto done;
1207 }
1208
1209 mr_status->fail_status = 0;
1210 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1211 if (!mmr->sig) {
1212 ret = -EINVAL;
1213 pr_err("signature status check requested on a non-signature enabled MR\n");
1214 goto done;
1215 }
1216
1217 mmr->sig->sig_status_checked = true;
1218 if (!mmr->sig->sig_err_exists)
1219 goto done;
1220
1221 if (ibmr->lkey == mmr->sig->err_item.key)
1222 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1223 sizeof(mr_status->sig_err));
1224 else {
1225 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1226 mr_status->sig_err.sig_err_offset = 0;
1227 mr_status->sig_err.key = mmr->sig->err_item.key;
1228 }
1229
1230 mmr->sig->sig_err_exists = false;
1231 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1232 }
1233
1234done:
1235 return ret;
1236}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 67e79989b181..ae788d27b93f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1784,6 +1784,7 @@ static __be64 sig_mkey_mask(void)
1784 result = MLX5_MKEY_MASK_LEN | 1784 result = MLX5_MKEY_MASK_LEN |
1785 MLX5_MKEY_MASK_PAGE_SIZE | 1785 MLX5_MKEY_MASK_PAGE_SIZE |
1786 MLX5_MKEY_MASK_START_ADDR | 1786 MLX5_MKEY_MASK_START_ADDR |
1787 MLX5_MKEY_MASK_EN_SIGERR |
1787 MLX5_MKEY_MASK_EN_RINVAL | 1788 MLX5_MKEY_MASK_EN_RINVAL |
1788 MLX5_MKEY_MASK_KEY | 1789 MLX5_MKEY_MASK_KEY |
1789 MLX5_MKEY_MASK_LR | 1790 MLX5_MKEY_MASK_LR |
@@ -2219,13 +2220,14 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
2219{ 2220{
2220 struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; 2221 struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
2221 u32 sig_key = sig_mr->rkey; 2222 u32 sig_key = sig_mr->rkey;
2223 u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
2222 2224
2223 memset(seg, 0, sizeof(*seg)); 2225 memset(seg, 0, sizeof(*seg));
2224 2226
2225 seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | 2227 seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
2226 MLX5_ACCESS_MODE_KLM; 2228 MLX5_ACCESS_MODE_KLM;
2227 seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); 2229 seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
2228 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | 2230 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
2229 MLX5_MKEY_BSF_EN | pdn); 2231 MLX5_MKEY_BSF_EN | pdn);
2230 seg->len = cpu_to_be64(length); 2232 seg->len = cpu_to_be64(length);
2231 seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); 2233 seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
@@ -2255,7 +2257,8 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
2255 if (unlikely(wr->num_sge != 1) || 2257 if (unlikely(wr->num_sge != 1) ||
2256 unlikely(wr->wr.sig_handover.access_flags & 2258 unlikely(wr->wr.sig_handover.access_flags &
2257 IB_ACCESS_REMOTE_ATOMIC) || 2259 IB_ACCESS_REMOTE_ATOMIC) ||
2258 unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) 2260 unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
2261 unlikely(!sig_mr->sig->sig_status_checked))
2259 return -EINVAL; 2262 return -EINVAL;
2260 2263
2261 /* length of the protected region, data + protection */ 2264 /* length of the protected region, data + protection */
@@ -2286,6 +2289,7 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
2286 if (ret) 2289 if (ret)
2287 return ret; 2290 return ret;
2288 2291
2292 sig_mr->sig->sig_status_checked = false;
2289 return 0; 2293 return 0;
2290} 2294}
2291 2295