diff options
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 31 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 67 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 45 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 145 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 26 | ||||
| -rw-r--r-- | include/linux/mlx5/driver.h | 2 |
6 files changed, 294 insertions, 22 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e6d775f2446d..a801baa79c8e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c | |||
| @@ -864,7 +864,7 @@ static ssize_t show_reg_pages(struct device *device, | |||
| 864 | struct mlx5_ib_dev *dev = | 864 | struct mlx5_ib_dev *dev = |
| 865 | container_of(device, struct mlx5_ib_dev, ib_dev.dev); | 865 | container_of(device, struct mlx5_ib_dev, ib_dev.dev); |
| 866 | 866 | ||
| 867 | return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages); | 867 | return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); |
| 868 | } | 868 | } |
| 869 | 869 | ||
| 870 | static ssize_t show_hca(struct device *device, struct device_attribute *attr, | 870 | static ssize_t show_hca(struct device *device, struct device_attribute *attr, |
| @@ -1389,16 +1389,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) | |||
| 1389 | goto err_eqs; | 1389 | goto err_eqs; |
| 1390 | 1390 | ||
| 1391 | mutex_init(&dev->cap_mask_mutex); | 1391 | mutex_init(&dev->cap_mask_mutex); |
| 1392 | spin_lock_init(&dev->mr_lock); | ||
| 1393 | 1392 | ||
| 1394 | err = create_dev_resources(&dev->devr); | 1393 | err = create_dev_resources(&dev->devr); |
| 1395 | if (err) | 1394 | if (err) |
| 1396 | goto err_eqs; | 1395 | goto err_eqs; |
| 1397 | 1396 | ||
| 1398 | err = ib_register_device(&dev->ib_dev, NULL); | 1397 | err = mlx5_ib_odp_init_one(dev); |
| 1399 | if (err) | 1398 | if (err) |
| 1400 | goto err_rsrc; | 1399 | goto err_rsrc; |
| 1401 | 1400 | ||
| 1401 | err = ib_register_device(&dev->ib_dev, NULL); | ||
| 1402 | if (err) | ||
| 1403 | goto err_odp; | ||
| 1404 | |||
| 1402 | err = create_umr_res(dev); | 1405 | err = create_umr_res(dev); |
| 1403 | if (err) | 1406 | if (err) |
| 1404 | goto err_dev; | 1407 | goto err_dev; |
| @@ -1420,6 +1423,9 @@ err_umrc: | |||
| 1420 | err_dev: | 1423 | err_dev: |
| 1421 | ib_unregister_device(&dev->ib_dev); | 1424 | ib_unregister_device(&dev->ib_dev); |
| 1422 | 1425 | ||
| 1426 | err_odp: | ||
| 1427 | mlx5_ib_odp_remove_one(dev); | ||
| 1428 | |||
| 1423 | err_rsrc: | 1429 | err_rsrc: |
| 1424 | destroy_dev_resources(&dev->devr); | 1430 | destroy_dev_resources(&dev->devr); |
| 1425 | 1431 | ||
| @@ -1435,8 +1441,10 @@ err_dealloc: | |||
| 1435 | static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) | 1441 | static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) |
| 1436 | { | 1442 | { |
| 1437 | struct mlx5_ib_dev *dev = context; | 1443 | struct mlx5_ib_dev *dev = context; |
| 1444 | |||
| 1438 | ib_unregister_device(&dev->ib_dev); | 1445 | ib_unregister_device(&dev->ib_dev); |
| 1439 | destroy_umrc_res(dev); | 1446 | destroy_umrc_res(dev); |
| 1447 | mlx5_ib_odp_remove_one(dev); | ||
| 1440 | destroy_dev_resources(&dev->devr); | 1448 | destroy_dev_resources(&dev->devr); |
| 1441 | free_comp_eqs(dev); | 1449 | free_comp_eqs(dev); |
| 1442 | ib_dealloc_device(&dev->ib_dev); | 1450 | ib_dealloc_device(&dev->ib_dev); |
| @@ -1450,15 +1458,30 @@ static struct mlx5_interface mlx5_ib_interface = { | |||
| 1450 | 1458 | ||
| 1451 | static int __init mlx5_ib_init(void) | 1459 | static int __init mlx5_ib_init(void) |
| 1452 | { | 1460 | { |
| 1461 | int err; | ||
| 1462 | |||
| 1453 | if (deprecated_prof_sel != 2) | 1463 | if (deprecated_prof_sel != 2) |
| 1454 | pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); | 1464 | pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); |
| 1455 | 1465 | ||
| 1456 | return mlx5_register_interface(&mlx5_ib_interface); | 1466 | err = mlx5_ib_odp_init(); |
| 1467 | if (err) | ||
| 1468 | return err; | ||
| 1469 | |||
| 1470 | err = mlx5_register_interface(&mlx5_ib_interface); | ||
| 1471 | if (err) | ||
| 1472 | goto clean_odp; | ||
| 1473 | |||
| 1474 | return err; | ||
| 1475 | |||
| 1476 | clean_odp: | ||
| 1477 | mlx5_ib_odp_cleanup(); | ||
| 1478 | return err; | ||
| 1457 | } | 1479 | } |
| 1458 | 1480 | ||
| 1459 | static void __exit mlx5_ib_cleanup(void) | 1481 | static void __exit mlx5_ib_cleanup(void) |
| 1460 | { | 1482 | { |
| 1461 | mlx5_unregister_interface(&mlx5_ib_interface); | 1483 | mlx5_unregister_interface(&mlx5_ib_interface); |
| 1484 | mlx5_ib_odp_cleanup(); | ||
| 1462 | } | 1485 | } |
| 1463 | 1486 | ||
| 1464 | module_init(mlx5_ib_init); | 1487 | module_init(mlx5_ib_init); |
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6856e27bfb6a..c6ceec3e3d6a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h | |||
| @@ -149,6 +149,29 @@ enum { | |||
| 149 | MLX5_QP_EMPTY | 149 | MLX5_QP_EMPTY |
| 150 | }; | 150 | }; |
| 151 | 151 | ||
| 152 | /* | ||
| 153 | * Connect-IB can trigger up to four concurrent pagefaults | ||
| 154 | * per-QP. | ||
| 155 | */ | ||
| 156 | enum mlx5_ib_pagefault_context { | ||
| 157 | MLX5_IB_PAGEFAULT_RESPONDER_READ, | ||
| 158 | MLX5_IB_PAGEFAULT_REQUESTOR_READ, | ||
| 159 | MLX5_IB_PAGEFAULT_RESPONDER_WRITE, | ||
| 160 | MLX5_IB_PAGEFAULT_REQUESTOR_WRITE, | ||
| 161 | MLX5_IB_PAGEFAULT_CONTEXTS | ||
| 162 | }; | ||
| 163 | |||
| 164 | static inline enum mlx5_ib_pagefault_context | ||
| 165 | mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault) | ||
| 166 | { | ||
| 167 | return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE); | ||
| 168 | } | ||
| 169 | |||
| 170 | struct mlx5_ib_pfault { | ||
| 171 | struct work_struct work; | ||
| 172 | struct mlx5_pagefault mpfault; | ||
| 173 | }; | ||
| 174 | |||
| 152 | struct mlx5_ib_qp { | 175 | struct mlx5_ib_qp { |
| 153 | struct ib_qp ibqp; | 176 | struct ib_qp ibqp; |
| 154 | struct mlx5_core_qp mqp; | 177 | struct mlx5_core_qp mqp; |
| @@ -194,6 +217,21 @@ struct mlx5_ib_qp { | |||
| 194 | 217 | ||
| 195 | /* Store signature errors */ | 218 | /* Store signature errors */ |
| 196 | bool signature_en; | 219 | bool signature_en; |
| 220 | |||
| 221 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
| 222 | /* | ||
| 223 | * A flag that is true for QP's that are in a state that doesn't | ||
| 224 | * allow page faults, and shouldn't schedule any more faults. | ||
| 225 | */ | ||
| 226 | int disable_page_faults; | ||
| 227 | /* | ||
| 228 | * The disable_page_faults_lock protects a QP's disable_page_faults | ||
| 229 | * field, allowing for a thread to atomically check whether the QP | ||
| 230 | * allows page faults, and if so schedule a page fault. | ||
| 231 | */ | ||
| 232 | spinlock_t disable_page_faults_lock; | ||
| 233 | struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; | ||
| 234 | #endif | ||
| 197 | }; | 235 | }; |
| 198 | 236 | ||
| 199 | struct mlx5_ib_cq_buf { | 237 | struct mlx5_ib_cq_buf { |
| @@ -392,13 +430,17 @@ struct mlx5_ib_dev { | |||
| 392 | struct umr_common umrc; | 430 | struct umr_common umrc; |
| 393 | /* sync used page count stats | 431 | /* sync used page count stats |
| 394 | */ | 432 | */ |
| 395 | spinlock_t mr_lock; | ||
| 396 | struct mlx5_ib_resources devr; | 433 | struct mlx5_ib_resources devr; |
| 397 | struct mlx5_mr_cache cache; | 434 | struct mlx5_mr_cache cache; |
| 398 | struct timer_list delay_timer; | 435 | struct timer_list delay_timer; |
| 399 | int fill_delay; | 436 | int fill_delay; |
| 400 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | 437 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
| 401 | struct ib_odp_caps odp_caps; | 438 | struct ib_odp_caps odp_caps; |
| 439 | /* | ||
| 440 | * Sleepable RCU that prevents destruction of MRs while they are still | ||
| 441 | * being used by a page fault handler. | ||
| 442 | */ | ||
| 443 | struct srcu_struct mr_srcu; | ||
| 402 | #endif | 444 | #endif |
| 403 | }; | 445 | }; |
| 404 | 446 | ||
| @@ -575,12 +617,33 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, | |||
| 575 | struct ib_mr_status *mr_status); | 617 | struct ib_mr_status *mr_status); |
| 576 | 618 | ||
| 577 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | 619 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
| 620 | extern struct workqueue_struct *mlx5_ib_page_fault_wq; | ||
| 621 | |||
| 578 | int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); | 622 | int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev); |
| 579 | #else | 623 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, |
| 624 | struct mlx5_ib_pfault *pfault); | ||
| 625 | void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); | ||
| 626 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); | ||
| 627 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); | ||
| 628 | int __init mlx5_ib_odp_init(void); | ||
| 629 | void mlx5_ib_odp_cleanup(void); | ||
| 630 | void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); | ||
| 631 | void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); | ||
| 632 | |||
| 633 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | ||
| 580 | static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) | 634 | static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) |
| 581 | { | 635 | { |
| 582 | return 0; | 636 | return 0; |
| 583 | } | 637 | } |
| 638 | |||
| 639 | static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} | ||
| 640 | static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } | ||
| 641 | static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} | ||
| 642 | static inline int mlx5_ib_odp_init(void) { return 0; } | ||
| 643 | static inline void mlx5_ib_odp_cleanup(void) {} | ||
| 644 | static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {} | ||
| 645 | static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} | ||
| 646 | |||
| 584 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | 647 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
| 585 | 648 | ||
| 586 | static inline void init_query_mad(struct ib_smp *mad) | 649 | static inline void init_query_mad(struct ib_smp *mad) |
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 38b06267798e..922ac85b7198 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
| @@ -52,6 +52,8 @@ static __be64 mlx5_ib_update_mtt_emergency_buffer[ | |||
| 52 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); | 52 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); |
| 53 | #endif | 53 | #endif |
| 54 | 54 | ||
| 55 | static int clean_mr(struct mlx5_ib_mr *mr); | ||
| 56 | |||
| 55 | static int order2idx(struct mlx5_ib_dev *dev, int order) | 57 | static int order2idx(struct mlx5_ib_dev *dev, int order) |
| 56 | { | 58 | { |
| 57 | struct mlx5_mr_cache *cache = &dev->cache; | 59 | struct mlx5_mr_cache *cache = &dev->cache; |
| @@ -1049,6 +1051,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
| 1049 | mlx5_ib_dbg(dev, "cache empty for order %d", order); | 1051 | mlx5_ib_dbg(dev, "cache empty for order %d", order); |
| 1050 | mr = NULL; | 1052 | mr = NULL; |
| 1051 | } | 1053 | } |
| 1054 | } else if (access_flags & IB_ACCESS_ON_DEMAND) { | ||
| 1055 | err = -EINVAL; | ||
| 1056 | pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); | ||
| 1057 | goto error; | ||
| 1052 | } | 1058 | } |
| 1053 | 1059 | ||
| 1054 | if (!mr) | 1060 | if (!mr) |
| @@ -1064,9 +1070,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
| 1064 | 1070 | ||
| 1065 | mr->umem = umem; | 1071 | mr->umem = umem; |
| 1066 | mr->npages = npages; | 1072 | mr->npages = npages; |
| 1067 | spin_lock(&dev->mr_lock); | 1073 | atomic_add(npages, &dev->mdev->priv.reg_pages); |
| 1068 | dev->mdev->priv.reg_pages += npages; | ||
| 1069 | spin_unlock(&dev->mr_lock); | ||
| 1070 | mr->ibmr.lkey = mr->mmr.key; | 1074 | mr->ibmr.lkey = mr->mmr.key; |
| 1071 | mr->ibmr.rkey = mr->mmr.key; | 1075 | mr->ibmr.rkey = mr->mmr.key; |
| 1072 | 1076 | ||
| @@ -1110,12 +1114,9 @@ error: | |||
| 1110 | return err; | 1114 | return err; |
| 1111 | } | 1115 | } |
| 1112 | 1116 | ||
| 1113 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | 1117 | static int clean_mr(struct mlx5_ib_mr *mr) |
| 1114 | { | 1118 | { |
| 1115 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | 1119 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); |
| 1116 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
| 1117 | struct ib_umem *umem = mr->umem; | ||
| 1118 | int npages = mr->npages; | ||
| 1119 | int umred = mr->umred; | 1120 | int umred = mr->umred; |
| 1120 | int err; | 1121 | int err; |
| 1121 | 1122 | ||
| @@ -1135,16 +1136,32 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | |||
| 1135 | free_cached_mr(dev, mr); | 1136 | free_cached_mr(dev, mr); |
| 1136 | } | 1137 | } |
| 1137 | 1138 | ||
| 1139 | if (!umred) | ||
| 1140 | kfree(mr); | ||
| 1141 | |||
| 1142 | return 0; | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | ||
| 1146 | { | ||
| 1147 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | ||
| 1148 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
| 1149 | int npages = mr->npages; | ||
| 1150 | struct ib_umem *umem = mr->umem; | ||
| 1151 | |||
| 1152 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
| 1153 | if (umem) | ||
| 1154 | /* Wait for all running page-fault handlers to finish. */ | ||
| 1155 | synchronize_srcu(&dev->mr_srcu); | ||
| 1156 | #endif | ||
| 1157 | |||
| 1158 | clean_mr(mr); | ||
| 1159 | |||
| 1138 | if (umem) { | 1160 | if (umem) { |
| 1139 | ib_umem_release(umem); | 1161 | ib_umem_release(umem); |
| 1140 | spin_lock(&dev->mr_lock); | 1162 | atomic_sub(npages, &dev->mdev->priv.reg_pages); |
| 1141 | dev->mdev->priv.reg_pages -= npages; | ||
| 1142 | spin_unlock(&dev->mr_lock); | ||
| 1143 | } | 1163 | } |
| 1144 | 1164 | ||
| 1145 | if (!umred) | ||
| 1146 | kfree(mr); | ||
| 1147 | |||
| 1148 | return 0; | 1165 | return 0; |
| 1149 | } | 1166 | } |
| 1150 | 1167 | ||
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 66c39ee16aff..63bbdba396f1 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c | |||
| @@ -32,6 +32,8 @@ | |||
| 32 | 32 | ||
| 33 | #include "mlx5_ib.h" | 33 | #include "mlx5_ib.h" |
| 34 | 34 | ||
| 35 | struct workqueue_struct *mlx5_ib_page_fault_wq; | ||
| 36 | |||
| 35 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ | 37 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ |
| 36 | if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ | 38 | if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name) \ |
| 37 | ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ | 39 | ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name; \ |
| @@ -58,3 +60,146 @@ int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev) | |||
| 58 | out: | 60 | out: |
| 59 | return err; | 61 | return err; |
| 60 | } | 62 | } |
| 63 | |||
| 64 | static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev, | ||
| 65 | u32 key) | ||
| 66 | { | ||
| 67 | u32 base_key = mlx5_base_mkey(key); | ||
| 68 | struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); | ||
| 69 | |||
| 70 | if (!mmr || mmr->key != key) | ||
| 71 | return NULL; | ||
| 72 | |||
| 73 | return container_of(mmr, struct mlx5_ib_mr, mmr); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, | ||
| 77 | struct mlx5_ib_pfault *pfault, | ||
| 78 | int error) { | ||
| 79 | struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); | ||
| 80 | int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn, | ||
| 81 | pfault->mpfault.flags, | ||
| 82 | error); | ||
| 83 | if (ret) | ||
| 84 | pr_err("Failed to resolve the page fault on QP 0x%x\n", | ||
| 85 | qp->mqp.qpn); | ||
| 86 | } | ||
| 87 | |||
| 88 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | ||
| 89 | struct mlx5_ib_pfault *pfault) | ||
| 90 | { | ||
| 91 | u8 event_subtype = pfault->mpfault.event_subtype; | ||
| 92 | |||
| 93 | switch (event_subtype) { | ||
| 94 | default: | ||
| 95 | pr_warn("Invalid page fault event subtype: 0x%x\n", | ||
| 96 | event_subtype); | ||
| 97 | mlx5_ib_page_fault_resume(qp, pfault, 1); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | static void mlx5_ib_qp_pfault_action(struct work_struct *work) | ||
| 103 | { | ||
| 104 | struct mlx5_ib_pfault *pfault = container_of(work, | ||
| 105 | struct mlx5_ib_pfault, | ||
| 106 | work); | ||
| 107 | enum mlx5_ib_pagefault_context context = | ||
| 108 | mlx5_ib_get_pagefault_context(&pfault->mpfault); | ||
| 109 | struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp, | ||
| 110 | pagefaults[context]); | ||
| 111 | mlx5_ib_mr_pfault_handler(qp, pfault); | ||
| 112 | } | ||
| 113 | |||
| 114 | void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) | ||
| 115 | { | ||
| 116 | unsigned long flags; | ||
| 117 | |||
| 118 | spin_lock_irqsave(&qp->disable_page_faults_lock, flags); | ||
| 119 | qp->disable_page_faults = 1; | ||
| 120 | spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); | ||
| 121 | |||
| 122 | /* | ||
| 123 | * Note that at this point, we are guarenteed that no more | ||
| 124 | * work queue elements will be posted to the work queue with | ||
| 125 | * the QP we are closing. | ||
| 126 | */ | ||
| 127 | flush_workqueue(mlx5_ib_page_fault_wq); | ||
| 128 | } | ||
| 129 | |||
| 130 | void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) | ||
| 131 | { | ||
| 132 | unsigned long flags; | ||
| 133 | |||
| 134 | spin_lock_irqsave(&qp->disable_page_faults_lock, flags); | ||
| 135 | qp->disable_page_faults = 0; | ||
| 136 | spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags); | ||
| 137 | } | ||
| 138 | |||
| 139 | static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp, | ||
| 140 | struct mlx5_pagefault *pfault) | ||
| 141 | { | ||
| 142 | /* | ||
| 143 | * Note that we will only get one fault event per QP per context | ||
| 144 | * (responder/initiator, read/write), until we resolve the page fault | ||
| 145 | * with the mlx5_ib_page_fault_resume command. Since this function is | ||
| 146 | * called from within the work element, there is no risk of missing | ||
| 147 | * events. | ||
| 148 | */ | ||
| 149 | struct mlx5_ib_qp *mibqp = to_mibqp(qp); | ||
| 150 | enum mlx5_ib_pagefault_context context = | ||
| 151 | mlx5_ib_get_pagefault_context(pfault); | ||
| 152 | struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context]; | ||
| 153 | |||
| 154 | qp_pfault->mpfault = *pfault; | ||
| 155 | |||
| 156 | /* No need to stop interrupts here since we are in an interrupt */ | ||
| 157 | spin_lock(&mibqp->disable_page_faults_lock); | ||
| 158 | if (!mibqp->disable_page_faults) | ||
| 159 | queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work); | ||
| 160 | spin_unlock(&mibqp->disable_page_faults_lock); | ||
| 161 | } | ||
| 162 | |||
| 163 | void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) | ||
| 164 | { | ||
| 165 | int i; | ||
| 166 | |||
| 167 | qp->disable_page_faults = 1; | ||
| 168 | spin_lock_init(&qp->disable_page_faults_lock); | ||
| 169 | |||
| 170 | qp->mqp.pfault_handler = mlx5_ib_pfault_handler; | ||
| 171 | |||
| 172 | for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i) | ||
| 173 | INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action); | ||
| 174 | } | ||
| 175 | |||
| 176 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) | ||
| 177 | { | ||
| 178 | int ret; | ||
| 179 | |||
| 180 | ret = init_srcu_struct(&ibdev->mr_srcu); | ||
| 181 | if (ret) | ||
| 182 | return ret; | ||
| 183 | |||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) | ||
| 188 | { | ||
| 189 | cleanup_srcu_struct(&ibdev->mr_srcu); | ||
| 190 | } | ||
| 191 | |||
| 192 | int __init mlx5_ib_odp_init(void) | ||
| 193 | { | ||
| 194 | mlx5_ib_page_fault_wq = | ||
| 195 | create_singlethread_workqueue("mlx5_ib_page_faults"); | ||
| 196 | if (!mlx5_ib_page_fault_wq) | ||
| 197 | return -ENOMEM; | ||
| 198 | |||
| 199 | return 0; | ||
| 200 | } | ||
| 201 | |||
| 202 | void mlx5_ib_odp_cleanup(void) | ||
| 203 | { | ||
| 204 | destroy_workqueue(mlx5_ib_page_fault_wq); | ||
| 205 | } | ||
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9783c3342dbf..be0cd358b080 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c | |||
| @@ -876,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, | |||
| 876 | int inlen = sizeof(*in); | 876 | int inlen = sizeof(*in); |
| 877 | int err; | 877 | int err; |
| 878 | 878 | ||
| 879 | mlx5_ib_odp_create_qp(qp); | ||
| 880 | |||
| 879 | gen = &dev->mdev->caps.gen; | 881 | gen = &dev->mdev->caps.gen; |
| 880 | mutex_init(&qp->mutex); | 882 | mutex_init(&qp->mutex); |
| 881 | spin_lock_init(&qp->sq.lock); | 883 | spin_lock_init(&qp->sq.lock); |
| @@ -1160,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) | |||
| 1160 | in = kzalloc(sizeof(*in), GFP_KERNEL); | 1162 | in = kzalloc(sizeof(*in), GFP_KERNEL); |
| 1161 | if (!in) | 1163 | if (!in) |
| 1162 | return; | 1164 | return; |
| 1163 | if (qp->state != IB_QPS_RESET) | 1165 | if (qp->state != IB_QPS_RESET) { |
| 1166 | mlx5_ib_qp_disable_pagefaults(qp); | ||
| 1164 | if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), | 1167 | if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), |
| 1165 | MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) | 1168 | MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) |
| 1166 | mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", | 1169 | mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", |
| 1167 | qp->mqp.qpn); | 1170 | qp->mqp.qpn); |
| 1171 | } | ||
| 1168 | 1172 | ||
| 1169 | get_cqs(qp, &send_cq, &recv_cq); | 1173 | get_cqs(qp, &send_cq, &recv_cq); |
| 1170 | 1174 | ||
| @@ -1712,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, | |||
| 1712 | if (mlx5_st < 0) | 1716 | if (mlx5_st < 0) |
| 1713 | goto out; | 1717 | goto out; |
| 1714 | 1718 | ||
| 1719 | /* If moving to a reset or error state, we must disable page faults on | ||
| 1720 | * this QP and flush all current page faults. Otherwise a stale page | ||
| 1721 | * fault may attempt to work on this QP after it is reset and moved | ||
| 1722 | * again to RTS, and may cause the driver and the device to get out of | ||
| 1723 | * sync. */ | ||
| 1724 | if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && | ||
| 1725 | (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) | ||
| 1726 | mlx5_ib_qp_disable_pagefaults(qp); | ||
| 1727 | |||
| 1715 | optpar = ib_mask_to_mlx5_opt(attr_mask); | 1728 | optpar = ib_mask_to_mlx5_opt(attr_mask); |
| 1716 | optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; | 1729 | optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; |
| 1717 | in->optparam = cpu_to_be32(optpar); | 1730 | in->optparam = cpu_to_be32(optpar); |
| @@ -1721,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, | |||
| 1721 | if (err) | 1734 | if (err) |
| 1722 | goto out; | 1735 | goto out; |
| 1723 | 1736 | ||
| 1737 | if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) | ||
| 1738 | mlx5_ib_qp_enable_pagefaults(qp); | ||
| 1739 | |||
| 1724 | qp->state = new_state; | 1740 | qp->state = new_state; |
| 1725 | 1741 | ||
| 1726 | if (attr_mask & IB_QP_ACCESS_FLAGS) | 1742 | if (attr_mask & IB_QP_ACCESS_FLAGS) |
| @@ -3026,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr | |||
| 3026 | int mlx5_state; | 3042 | int mlx5_state; |
| 3027 | int err = 0; | 3043 | int err = 0; |
| 3028 | 3044 | ||
| 3045 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
| 3046 | /* | ||
| 3047 | * Wait for any outstanding page faults, in case the user frees memory | ||
| 3048 | * based upon this query's result. | ||
| 3049 | */ | ||
| 3050 | flush_workqueue(mlx5_ib_page_fault_wq); | ||
| 3051 | #endif | ||
| 3052 | |||
| 3029 | mutex_lock(&qp->mutex); | 3053 | mutex_lock(&qp->mutex); |
| 3030 | outb = kzalloc(sizeof(*outb), GFP_KERNEL); | 3054 | outb = kzalloc(sizeof(*outb), GFP_KERNEL); |
| 3031 | if (!outb) { | 3055 | if (!outb) { |
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7088dcd19214..166d9315fe4b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
| @@ -474,7 +474,7 @@ struct mlx5_priv { | |||
| 474 | struct workqueue_struct *pg_wq; | 474 | struct workqueue_struct *pg_wq; |
| 475 | struct rb_root page_root; | 475 | struct rb_root page_root; |
| 476 | int fw_pages; | 476 | int fw_pages; |
| 477 | int reg_pages; | 477 | atomic_t reg_pages; |
| 478 | struct list_head free_list; | 478 | struct list_head free_list; |
| 479 | 479 | ||
| 480 | struct mlx5_core_health health; | 480 | struct mlx5_core_health health; |
