diff options
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 20 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 33 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 505 | ||||
| -rw-r--r-- | include/linux/mlx5/driver.h | 2 |
5 files changed, 513 insertions, 49 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fe37da2be26f..eb8719ca500e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c | |||
| @@ -3583,6 +3583,8 @@ static int __init mlx5_ib_init(void) | |||
| 3583 | { | 3583 | { |
| 3584 | int err; | 3584 | int err; |
| 3585 | 3585 | ||
| 3586 | mlx5_ib_odp_init(); | ||
| 3587 | |||
| 3586 | err = mlx5_register_interface(&mlx5_ib_interface); | 3588 | err = mlx5_register_interface(&mlx5_ib_interface); |
| 3587 | 3589 | ||
| 3588 | return err; | 3590 | return err; |
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efc44de3c7d7..3cd064b5f0bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h | |||
| @@ -202,6 +202,7 @@ struct mlx5_ib_flow_db { | |||
| 202 | #define MLX5_IB_UPD_XLT_ADDR BIT(3) | 202 | #define MLX5_IB_UPD_XLT_ADDR BIT(3) |
| 203 | #define MLX5_IB_UPD_XLT_PD BIT(4) | 203 | #define MLX5_IB_UPD_XLT_PD BIT(4) |
| 204 | #define MLX5_IB_UPD_XLT_ACCESS BIT(5) | 204 | #define MLX5_IB_UPD_XLT_ACCESS BIT(5) |
| 205 | #define MLX5_IB_UPD_XLT_INDIRECT BIT(6) | ||
| 205 | 206 | ||
| 206 | /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. | 207 | /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. |
| 207 | * | 208 | * |
| @@ -503,6 +504,10 @@ struct mlx5_ib_mr { | |||
| 503 | int live; | 504 | int live; |
| 504 | void *descs_alloc; | 505 | void *descs_alloc; |
| 505 | int access_flags; /* Needed for rereg MR */ | 506 | int access_flags; /* Needed for rereg MR */ |
| 507 | |||
| 508 | struct mlx5_ib_mr *parent; | ||
| 509 | atomic_t num_leaf_free; | ||
| 510 | wait_queue_head_t q_leaf_free; | ||
| 506 | }; | 511 | }; |
| 507 | 512 | ||
| 508 | struct mlx5_ib_mw { | 513 | struct mlx5_ib_mw { |
| @@ -637,6 +642,7 @@ struct mlx5_ib_dev { | |||
| 637 | * being used by a page fault handler. | 642 | * being used by a page fault handler. |
| 638 | */ | 643 | */ |
| 639 | struct srcu_struct mr_srcu; | 644 | struct srcu_struct mr_srcu; |
| 645 | u32 null_mkey; | ||
| 640 | #endif | 646 | #endif |
| 641 | struct mlx5_ib_flow_db flow_db; | 647 | struct mlx5_ib_flow_db flow_db; |
| 642 | /* protect resources needed as part of reset flow */ | 648 | /* protect resources needed as part of reset flow */ |
| @@ -789,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, | |||
| 789 | int mlx5_ib_dealloc_mw(struct ib_mw *mw); | 795 | int mlx5_ib_dealloc_mw(struct ib_mw *mw); |
| 790 | int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, | 796 | int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, |
| 791 | int page_shift, int flags); | 797 | int page_shift, int flags); |
| 798 | struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, | ||
| 799 | int access_flags); | ||
| 800 | void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); | ||
| 792 | int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, | 801 | int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, |
| 793 | u64 length, u64 virt_addr, int access_flags, | 802 | u64 length, u64 virt_addr, int access_flags, |
| 794 | struct ib_pd *pd, struct ib_udata *udata); | 803 | struct ib_pd *pd, struct ib_udata *udata); |
| @@ -868,6 +877,9 @@ int __init mlx5_ib_odp_init(void); | |||
| 868 | void mlx5_ib_odp_cleanup(void); | 877 | void mlx5_ib_odp_cleanup(void); |
| 869 | void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, | 878 | void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, |
| 870 | unsigned long end); | 879 | unsigned long end); |
| 880 | void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); | ||
| 881 | void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, | ||
| 882 | size_t nentries, struct mlx5_ib_mr *mr, int flags); | ||
| 871 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | 883 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
| 872 | static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) | 884 | static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) |
| 873 | { | 885 | { |
| @@ -875,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) | |||
| 875 | } | 887 | } |
| 876 | 888 | ||
| 877 | static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } | 889 | static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } |
| 878 | static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} | 890 | static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} |
| 879 | static inline int mlx5_ib_odp_init(void) { return 0; } | 891 | static inline int mlx5_ib_odp_init(void) { return 0; } |
| 880 | static inline void mlx5_ib_odp_cleanup(void) {} | 892 | static inline void mlx5_ib_odp_cleanup(void) {} |
| 893 | static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} | ||
| 894 | static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, | ||
| 895 | size_t nentries, struct mlx5_ib_mr *mr, | ||
| 896 | int flags) {} | ||
| 881 | 897 | ||
| 882 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | 898 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ |
| 883 | 899 | ||
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8f5b94d483e4..3c1f483d003f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
| @@ -469,7 +469,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) | |||
| 469 | spin_unlock_irq(&ent->lock); | 469 | spin_unlock_irq(&ent->lock); |
| 470 | 470 | ||
| 471 | err = add_keys(dev, entry, 1); | 471 | err = add_keys(dev, entry, 1); |
| 472 | if (err) | 472 | if (err && err != -EAGAIN) |
| 473 | return ERR_PTR(err); | 473 | return ERR_PTR(err); |
| 474 | 474 | ||
| 475 | wait_for_completion(&ent->compl); | 475 | wait_for_completion(&ent->compl); |
| @@ -669,8 +669,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) | |||
| 669 | INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); | 669 | INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); |
| 670 | queue_work(cache->wq, &ent->work); | 670 | queue_work(cache->wq, &ent->work); |
| 671 | 671 | ||
| 672 | if (i > MAX_UMR_CACHE_ENTRY) | 672 | if (i > MAX_UMR_CACHE_ENTRY) { |
| 673 | mlx5_odp_init_mr_cache_entry(ent); | ||
| 673 | continue; | 674 | continue; |
| 675 | } | ||
| 674 | 676 | ||
| 675 | if (!use_umr(dev, ent->order)) | 677 | if (!use_umr(dev, ent->order)) |
| 676 | continue; | 678 | continue; |
| @@ -935,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, | |||
| 935 | { | 937 | { |
| 936 | struct mlx5_ib_dev *dev = mr->dev; | 938 | struct mlx5_ib_dev *dev = mr->dev; |
| 937 | struct ib_umem *umem = mr->umem; | 939 | struct ib_umem *umem = mr->umem; |
| 940 | if (flags & MLX5_IB_UPD_XLT_INDIRECT) { | ||
| 941 | mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); | ||
| 942 | return npages; | ||
| 943 | } | ||
| 938 | 944 | ||
| 939 | npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); | 945 | npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); |
| 940 | 946 | ||
| @@ -968,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, | |||
| 968 | struct mlx5_umr_wr wr; | 974 | struct mlx5_umr_wr wr; |
| 969 | struct ib_sge sg; | 975 | struct ib_sge sg; |
| 970 | int err = 0; | 976 | int err = 0; |
| 971 | int desc_size = sizeof(struct mlx5_mtt); | 977 | int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) |
| 978 | ? sizeof(struct mlx5_klm) | ||
| 979 | : sizeof(struct mlx5_mtt); | ||
| 972 | const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; | 980 | const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; |
| 973 | const int page_mask = page_align - 1; | 981 | const int page_mask = page_align - 1; |
| 974 | size_t pages_mapped = 0; | 982 | size_t pages_mapped = 0; |
| @@ -1186,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
| 1186 | 1194 | ||
| 1187 | mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", | 1195 | mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", |
| 1188 | start, virt_addr, length, access_flags); | 1196 | start, virt_addr, length, access_flags); |
| 1197 | |||
| 1198 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
| 1199 | if (!start && length == U64_MAX) { | ||
| 1200 | if (!(access_flags & IB_ACCESS_ON_DEMAND) || | ||
| 1201 | !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) | ||
| 1202 | return ERR_PTR(-EINVAL); | ||
| 1203 | |||
| 1204 | mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags); | ||
| 1205 | return &mr->ibmr; | ||
| 1206 | } | ||
| 1207 | #endif | ||
| 1208 | |||
| 1189 | err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, | 1209 | err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, |
| 1190 | &page_shift, &ncont, &order); | 1210 | &page_shift, &ncont, &order); |
| 1191 | 1211 | ||
| @@ -1471,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | |||
| 1471 | /* Wait for all running page-fault handlers to finish. */ | 1491 | /* Wait for all running page-fault handlers to finish. */ |
| 1472 | synchronize_srcu(&dev->mr_srcu); | 1492 | synchronize_srcu(&dev->mr_srcu); |
| 1473 | /* Destroy all page mappings */ | 1493 | /* Destroy all page mappings */ |
| 1474 | mlx5_ib_invalidate_range(umem, ib_umem_start(umem), | 1494 | if (umem->odp_data->page_list) |
| 1475 | ib_umem_end(umem)); | 1495 | mlx5_ib_invalidate_range(umem, ib_umem_start(umem), |
| 1496 | ib_umem_end(umem)); | ||
| 1497 | else | ||
| 1498 | mlx5_ib_free_implicit_mr(mr); | ||
| 1476 | /* | 1499 | /* |
| 1477 | * We kill the umem before the MR for ODP, | 1500 | * We kill the umem before the MR for ODP, |
| 1478 | * so that there will not be any invalidations in | 1501 | * so that there will not be any invalidations in |
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e5bc267aca73..d7b12f0750e2 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <rdma/ib_umem_odp.h> | 34 | #include <rdma/ib_umem_odp.h> |
| 35 | 35 | ||
| 36 | #include "mlx5_ib.h" | 36 | #include "mlx5_ib.h" |
| 37 | #include "cmd.h" | ||
| 37 | 38 | ||
| 38 | #define MAX_PREFETCH_LEN (4*1024*1024U) | 39 | #define MAX_PREFETCH_LEN (4*1024*1024U) |
| 39 | 40 | ||
| @@ -41,6 +42,140 @@ | |||
| 41 | * a pagefault. */ | 42 | * a pagefault. */ |
| 42 | #define MMU_NOTIFIER_TIMEOUT 1000 | 43 | #define MMU_NOTIFIER_TIMEOUT 1000 |
| 43 | 44 | ||
| 45 | #define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT) | ||
| 46 | #define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT) | ||
| 47 | #define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS) | ||
| 48 | #define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT) | ||
| 49 | #define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1)) | ||
| 50 | |||
| 51 | #define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT | ||
| 52 | |||
| 53 | static u64 mlx5_imr_ksm_entries; | ||
| 54 | |||
| 55 | static int check_parent(struct ib_umem_odp *odp, | ||
| 56 | struct mlx5_ib_mr *parent) | ||
| 57 | { | ||
| 58 | struct mlx5_ib_mr *mr = odp->private; | ||
| 59 | |||
| 60 | return mr && mr->parent == parent; | ||
| 61 | } | ||
| 62 | |||
| 63 | static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) | ||
| 64 | { | ||
| 65 | struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; | ||
| 66 | struct ib_ucontext *ctx = odp->umem->context; | ||
| 67 | struct rb_node *rb; | ||
| 68 | |||
| 69 | down_read(&ctx->umem_rwsem); | ||
| 70 | while (1) { | ||
| 71 | rb = rb_next(&odp->interval_tree.rb); | ||
| 72 | if (!rb) | ||
| 73 | goto not_found; | ||
| 74 | odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); | ||
| 75 | if (check_parent(odp, parent)) | ||
| 76 | goto end; | ||
| 77 | } | ||
| 78 | not_found: | ||
| 79 | odp = NULL; | ||
| 80 | end: | ||
| 81 | up_read(&ctx->umem_rwsem); | ||
| 82 | return odp; | ||
| 83 | } | ||
| 84 | |||
| 85 | static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, | ||
| 86 | u64 start, u64 length, | ||
| 87 | struct mlx5_ib_mr *parent) | ||
| 88 | { | ||
| 89 | struct ib_umem_odp *odp; | ||
| 90 | struct rb_node *rb; | ||
| 91 | |||
| 92 | down_read(&ctx->umem_rwsem); | ||
| 93 | odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); | ||
| 94 | if (!odp) | ||
| 95 | goto end; | ||
| 96 | |||
| 97 | while (1) { | ||
| 98 | if (check_parent(odp, parent)) | ||
| 99 | goto end; | ||
| 100 | rb = rb_next(&odp->interval_tree.rb); | ||
| 101 | if (!rb) | ||
| 102 | goto not_found; | ||
| 103 | odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); | ||
| 104 | if (ib_umem_start(odp->umem) > start + length) | ||
| 105 | goto not_found; | ||
| 106 | } | ||
| 107 | not_found: | ||
| 108 | odp = NULL; | ||
| 109 | end: | ||
| 110 | up_read(&ctx->umem_rwsem); | ||
| 111 | return odp; | ||
| 112 | } | ||
| 113 | |||
| 114 | void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, | ||
| 115 | size_t nentries, struct mlx5_ib_mr *mr, int flags) | ||
| 116 | { | ||
| 117 | struct ib_pd *pd = mr->ibmr.pd; | ||
| 118 | struct ib_ucontext *ctx = pd->uobject->context; | ||
| 119 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | ||
| 120 | struct ib_umem_odp *odp; | ||
| 121 | unsigned long va; | ||
| 122 | int i; | ||
| 123 | |||
| 124 | if (flags & MLX5_IB_UPD_XLT_ZAP) { | ||
| 125 | for (i = 0; i < nentries; i++, pklm++) { | ||
| 126 | pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); | ||
| 127 | pklm->key = cpu_to_be32(dev->null_mkey); | ||
| 128 | pklm->va = 0; | ||
| 129 | } | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | |||
| 133 | odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, | ||
| 134 | nentries * MLX5_IMR_MTT_SIZE, mr); | ||
| 135 | |||
| 136 | for (i = 0; i < nentries; i++, pklm++) { | ||
| 137 | pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); | ||
| 138 | va = (offset + i) * MLX5_IMR_MTT_SIZE; | ||
| 139 | if (odp && odp->umem->address == va) { | ||
| 140 | struct mlx5_ib_mr *mtt = odp->private; | ||
| 141 | |||
| 142 | pklm->key = cpu_to_be32(mtt->ibmr.lkey); | ||
| 143 | odp = odp_next(odp); | ||
| 144 | } else { | ||
| 145 | pklm->key = cpu_to_be32(dev->null_mkey); | ||
| 146 | } | ||
| 147 | mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", | ||
| 148 | i, va, be32_to_cpu(pklm->key)); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | static void mr_leaf_free_action(struct work_struct *work) | ||
| 153 | { | ||
| 154 | struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); | ||
| 155 | int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; | ||
| 156 | struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; | ||
| 157 | |||
| 158 | mr->parent = NULL; | ||
| 159 | synchronize_srcu(&mr->dev->mr_srcu); | ||
| 160 | |||
| 161 | if (!READ_ONCE(odp->dying)) { | ||
| 162 | mr->parent = imr; | ||
| 163 | if (atomic_dec_and_test(&imr->num_leaf_free)) | ||
| 164 | wake_up(&imr->q_leaf_free); | ||
| 165 | return; | ||
| 166 | } | ||
| 167 | |||
| 168 | ib_umem_release(odp->umem); | ||
| 169 | if (imr->live) | ||
| 170 | mlx5_ib_update_xlt(imr, idx, 1, 0, | ||
| 171 | MLX5_IB_UPD_XLT_INDIRECT | | ||
| 172 | MLX5_IB_UPD_XLT_ATOMIC); | ||
| 173 | mlx5_mr_cache_free(mr->dev, mr); | ||
| 174 | |||
| 175 | if (atomic_dec_and_test(&imr->num_leaf_free)) | ||
| 176 | wake_up(&imr->q_leaf_free); | ||
| 177 | } | ||
| 178 | |||
| 44 | void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, | 179 | void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, |
| 45 | unsigned long end) | 180 | unsigned long end) |
| 46 | { | 181 | { |
| @@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, | |||
| 111 | */ | 246 | */ |
| 112 | 247 | ||
| 113 | ib_umem_odp_unmap_dma_pages(umem, start, end); | 248 | ib_umem_odp_unmap_dma_pages(umem, start, end); |
| 249 | |||
| 250 | if (unlikely(!umem->npages && mr->parent && | ||
| 251 | !umem->odp_data->dying)) { | ||
| 252 | WRITE_ONCE(umem->odp_data->dying, 1); | ||
| 253 | atomic_inc(&mr->parent->num_leaf_free); | ||
| 254 | schedule_work(&umem->odp_data->work); | ||
| 255 | } | ||
| 114 | } | 256 | } |
| 115 | 257 | ||
| 116 | void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) | 258 | void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) |
| @@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) | |||
| 147 | if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) | 289 | if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) |
| 148 | caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; | 290 | caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; |
| 149 | 291 | ||
| 292 | if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && | ||
| 293 | MLX5_CAP_GEN(dev->mdev, null_mkey) && | ||
| 294 | MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) | ||
| 295 | caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; | ||
| 296 | |||
| 150 | return; | 297 | return; |
| 151 | } | 298 | } |
| 152 | 299 | ||
| @@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, | |||
| 184 | wq_num); | 331 | wq_num); |
| 185 | } | 332 | } |
| 186 | 333 | ||
| 334 | static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, | ||
| 335 | struct ib_umem *umem, | ||
| 336 | bool ksm, int access_flags) | ||
| 337 | { | ||
| 338 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | ||
| 339 | struct mlx5_ib_mr *mr; | ||
| 340 | int err; | ||
| 341 | |||
| 342 | mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : | ||
| 343 | MLX5_IMR_MTT_CACHE_ENTRY); | ||
| 344 | |||
| 345 | if (IS_ERR(mr)) | ||
| 346 | return mr; | ||
| 347 | |||
| 348 | mr->ibmr.pd = pd; | ||
| 349 | |||
| 350 | mr->dev = dev; | ||
| 351 | mr->access_flags = access_flags; | ||
| 352 | mr->mmkey.iova = 0; | ||
| 353 | mr->umem = umem; | ||
| 354 | |||
| 355 | if (ksm) { | ||
| 356 | err = mlx5_ib_update_xlt(mr, 0, | ||
| 357 | mlx5_imr_ksm_entries, | ||
| 358 | MLX5_KSM_PAGE_SHIFT, | ||
| 359 | MLX5_IB_UPD_XLT_INDIRECT | | ||
| 360 | MLX5_IB_UPD_XLT_ZAP | | ||
| 361 | MLX5_IB_UPD_XLT_ENABLE); | ||
| 362 | |||
| 363 | } else { | ||
| 364 | err = mlx5_ib_update_xlt(mr, 0, | ||
| 365 | MLX5_IMR_MTT_ENTRIES, | ||
| 366 | PAGE_SHIFT, | ||
| 367 | MLX5_IB_UPD_XLT_ZAP | | ||
| 368 | MLX5_IB_UPD_XLT_ENABLE | | ||
| 369 | MLX5_IB_UPD_XLT_ATOMIC); | ||
| 370 | } | ||
| 371 | |||
| 372 | if (err) | ||
| 373 | goto fail; | ||
| 374 | |||
| 375 | mr->ibmr.lkey = mr->mmkey.key; | ||
| 376 | mr->ibmr.rkey = mr->mmkey.key; | ||
| 377 | |||
| 378 | mr->live = 1; | ||
| 379 | |||
| 380 | mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", | ||
| 381 | mr->mmkey.key, dev->mdev, mr); | ||
| 382 | |||
| 383 | return mr; | ||
| 384 | |||
| 385 | fail: | ||
| 386 | mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); | ||
| 387 | mlx5_mr_cache_free(dev, mr); | ||
| 388 | |||
| 389 | return ERR_PTR(err); | ||
| 390 | } | ||
| 391 | |||
| 392 | static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, | ||
| 393 | u64 io_virt, size_t bcnt) | ||
| 394 | { | ||
| 395 | struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context; | ||
| 396 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); | ||
| 397 | struct ib_umem_odp *odp, *result = NULL; | ||
| 398 | u64 addr = io_virt & MLX5_IMR_MTT_MASK; | ||
| 399 | int nentries = 0, start_idx = 0, ret; | ||
| 400 | struct mlx5_ib_mr *mtt; | ||
| 401 | struct ib_umem *umem; | ||
| 402 | |||
| 403 | mutex_lock(&mr->umem->odp_data->umem_mutex); | ||
| 404 | odp = odp_lookup(ctx, addr, 1, mr); | ||
| 405 | |||
| 406 | mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", | ||
| 407 | io_virt, bcnt, addr, odp); | ||
| 408 | |||
| 409 | next_mr: | ||
| 410 | if (likely(odp)) { | ||
| 411 | if (nentries) | ||
| 412 | nentries++; | ||
| 413 | } else { | ||
| 414 | umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); | ||
| 415 | if (IS_ERR(umem)) { | ||
| 416 | mutex_unlock(&mr->umem->odp_data->umem_mutex); | ||
| 417 | return ERR_CAST(umem); | ||
| 418 | } | ||
| 419 | |||
| 420 | mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); | ||
| 421 | if (IS_ERR(mtt)) { | ||
| 422 | mutex_unlock(&mr->umem->odp_data->umem_mutex); | ||
| 423 | ib_umem_release(umem); | ||
| 424 | return ERR_CAST(mtt); | ||
| 425 | } | ||
| 426 | |||
| 427 | odp = umem->odp_data; | ||
| 428 | odp->private = mtt; | ||
| 429 | mtt->umem = umem; | ||
| 430 | mtt->mmkey.iova = addr; | ||
| 431 | mtt->parent = mr; | ||
| 432 | INIT_WORK(&odp->work, mr_leaf_free_action); | ||
| 433 | |||
| 434 | if (!nentries) | ||
| 435 | start_idx = addr >> MLX5_IMR_MTT_SHIFT; | ||
| 436 | nentries++; | ||
| 437 | } | ||
| 438 | |||
| 439 | odp->dying = 0; | ||
| 440 | |||
| 441 | /* Return first odp if region not covered by single one */ | ||
| 442 | if (likely(!result)) | ||
| 443 | result = odp; | ||
| 444 | |||
| 445 | addr += MLX5_IMR_MTT_SIZE; | ||
| 446 | if (unlikely(addr < io_virt + bcnt)) { | ||
| 447 | odp = odp_next(odp); | ||
| 448 | if (odp && odp->umem->address != addr) | ||
| 449 | odp = NULL; | ||
| 450 | goto next_mr; | ||
| 451 | } | ||
| 452 | |||
| 453 | if (unlikely(nentries)) { | ||
| 454 | ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, | ||
| 455 | MLX5_IB_UPD_XLT_INDIRECT | | ||
| 456 | MLX5_IB_UPD_XLT_ATOMIC); | ||
| 457 | if (ret) { | ||
| 458 | mlx5_ib_err(dev, "Failed to update PAS\n"); | ||
| 459 | result = ERR_PTR(ret); | ||
| 460 | } | ||
| 461 | } | ||
| 462 | |||
| 463 | mutex_unlock(&mr->umem->odp_data->umem_mutex); | ||
| 464 | return result; | ||
| 465 | } | ||
| 466 | |||
| 467 | struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, | ||
| 468 | int access_flags) | ||
| 469 | { | ||
| 470 | struct ib_ucontext *ctx = pd->ibpd.uobject->context; | ||
| 471 | struct mlx5_ib_mr *imr; | ||
| 472 | struct ib_umem *umem; | ||
| 473 | |||
| 474 | umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0); | ||
| 475 | if (IS_ERR(umem)) | ||
| 476 | return ERR_CAST(umem); | ||
| 477 | |||
| 478 | imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); | ||
| 479 | if (IS_ERR(imr)) { | ||
| 480 | ib_umem_release(umem); | ||
| 481 | return ERR_CAST(imr); | ||
| 482 | } | ||
| 483 | |||
| 484 | imr->umem = umem; | ||
| 485 | init_waitqueue_head(&imr->q_leaf_free); | ||
| 486 | atomic_set(&imr->num_leaf_free, 0); | ||
| 487 | |||
| 488 | return imr; | ||
| 489 | } | ||
| 490 | |||
| 491 | static int mr_leaf_free(struct ib_umem *umem, u64 start, | ||
| 492 | u64 end, void *cookie) | ||
| 493 | { | ||
| 494 | struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; | ||
| 495 | |||
| 496 | if (mr->parent != imr) | ||
| 497 | return 0; | ||
| 498 | |||
| 499 | ib_umem_odp_unmap_dma_pages(umem, | ||
| 500 | ib_umem_start(umem), | ||
| 501 | ib_umem_end(umem)); | ||
| 502 | |||
| 503 | if (umem->odp_data->dying) | ||
| 504 | return 0; | ||
| 505 | |||
| 506 | WRITE_ONCE(umem->odp_data->dying, 1); | ||
| 507 | atomic_inc(&imr->num_leaf_free); | ||
| 508 | schedule_work(&umem->odp_data->work); | ||
| 509 | |||
| 510 | return 0; | ||
| 511 | } | ||
| 512 | |||
| 513 | void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) | ||
| 514 | { | ||
| 515 | struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; | ||
| 516 | |||
| 517 | down_read(&ctx->umem_rwsem); | ||
| 518 | rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, | ||
| 519 | mr_leaf_free, imr); | ||
| 520 | up_read(&ctx->umem_rwsem); | ||
| 521 | |||
| 522 | wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); | ||
| 523 | } | ||
| 524 | |||
| 187 | /* | 525 | /* |
| 188 | * Handle a single data segment in a page-fault WQE or RDMA region. | 526 | * Handle a single data segment in a page-fault WQE or RDMA region. |
| 189 | * | 527 | * |
| @@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, | |||
| 195 | * -EFAULT when there's an error mapping the requested pages. The caller will | 533 | * -EFAULT when there's an error mapping the requested pages. The caller will |
| 196 | * abort the page fault handling. | 534 | * abort the page fault handling. |
| 197 | */ | 535 | */ |
| 198 | static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev, | 536 | static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, |
| 199 | u32 key, u64 io_virt, size_t bcnt, | 537 | u32 key, u64 io_virt, size_t bcnt, |
| 200 | u32 *bytes_committed, | 538 | u32 *bytes_committed, |
| 201 | u32 *bytes_mapped) | 539 | u32 *bytes_mapped) |
| 202 | { | 540 | { |
| 203 | int srcu_key; | 541 | int srcu_key; |
| 204 | unsigned int current_seq; | 542 | unsigned int current_seq = 0; |
| 205 | u64 start_idx; | 543 | u64 start_idx; |
| 206 | int npages = 0, ret = 0; | 544 | int npages = 0, ret = 0; |
| 207 | struct mlx5_ib_mr *mr; | 545 | struct mlx5_ib_mr *mr; |
| 208 | u64 access_mask = ODP_READ_ALLOWED_BIT; | 546 | u64 access_mask = ODP_READ_ALLOWED_BIT; |
| 547 | struct ib_umem_odp *odp; | ||
| 548 | int implicit = 0; | ||
| 549 | size_t size; | ||
| 209 | 550 | ||
| 210 | srcu_key = srcu_read_lock(&mib_dev->mr_srcu); | 551 | srcu_key = srcu_read_lock(&dev->mr_srcu); |
| 211 | mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key); | 552 | mr = mlx5_ib_odp_find_mr_lkey(dev, key); |
| 212 | /* | 553 | /* |
| 213 | * If we didn't find the MR, it means the MR was closed while we were | 554 | * If we didn't find the MR, it means the MR was closed while we were |
| 214 | * handling the ODP event. In this case we return -EFAULT so that the | 555 | * handling the ODP event. In this case we return -EFAULT so that the |
| 215 | * QP will be closed. | 556 | * QP will be closed. |
| 216 | */ | 557 | */ |
| 217 | if (!mr || !mr->ibmr.pd) { | 558 | if (!mr || !mr->ibmr.pd) { |
| 218 | pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", | 559 | mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", |
| 219 | key); | 560 | key); |
| 220 | ret = -EFAULT; | 561 | ret = -EFAULT; |
| 221 | goto srcu_unlock; | 562 | goto srcu_unlock; |
| 222 | } | 563 | } |
| 223 | if (!mr->umem->odp_data) { | 564 | if (!mr->umem->odp_data) { |
| 224 | pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", | 565 | mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", |
| 225 | key); | 566 | key); |
| 226 | if (bytes_mapped) | 567 | if (bytes_mapped) |
| 227 | *bytes_mapped += | 568 | *bytes_mapped += |
| 228 | (bcnt - *bytes_committed); | 569 | (bcnt - *bytes_committed); |
| 229 | goto srcu_unlock; | 570 | goto srcu_unlock; |
| 230 | } | 571 | } |
| 231 | 572 | ||
| 232 | current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq); | ||
| 233 | /* | ||
| 234 | * Ensure the sequence number is valid for some time before we call | ||
| 235 | * gup. | ||
| 236 | */ | ||
| 237 | smp_rmb(); | ||
| 238 | |||
| 239 | /* | 573 | /* |
| 240 | * Avoid branches - this code will perform correctly | 574 | * Avoid branches - this code will perform correctly |
| 241 | * in all iterations (in iteration 2 and above, | 575 | * in all iterations (in iteration 2 and above, |
| @@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev, | |||
| 244 | io_virt += *bytes_committed; | 578 | io_virt += *bytes_committed; |
| 245 | bcnt -= *bytes_committed; | 579 | bcnt -= *bytes_committed; |
| 246 | 580 | ||
| 581 | if (!mr->umem->odp_data->page_list) { | ||
| 582 | odp = implicit_mr_get_data(mr, io_virt, bcnt); | ||
| 583 | |||
| 584 | if (IS_ERR(odp)) { | ||
| 585 | ret = PTR_ERR(odp); | ||
| 586 | goto srcu_unlock; | ||
| 587 | } | ||
| 588 | mr = odp->private; | ||
| 589 | implicit = 1; | ||
| 590 | |||
| 591 | } else { | ||
| 592 | odp = mr->umem->odp_data; | ||
| 593 | } | ||
| 594 | |||
| 595 | next_mr: | ||
| 596 | current_seq = READ_ONCE(odp->notifiers_seq); | ||
| 597 | /* | ||
| 598 | * Ensure the sequence number is valid for some time before we call | ||
| 599 | * gup. | ||
| 600 | */ | ||
| 601 | smp_rmb(); | ||
| 602 | |||
| 603 | size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); | ||
| 247 | start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; | 604 | start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; |
| 248 | 605 | ||
| 249 | if (mr->umem->writable) | 606 | if (mr->umem->writable) |
| 250 | access_mask |= ODP_WRITE_ALLOWED_BIT; | 607 | access_mask |= ODP_WRITE_ALLOWED_BIT; |
| 251 | npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt, | 608 | |
| 252 | access_mask, current_seq); | 609 | ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, |
| 253 | if (npages < 0) { | 610 | access_mask, current_seq); |
| 254 | ret = npages; | 611 | |
| 612 | if (ret < 0) | ||
| 255 | goto srcu_unlock; | 613 | goto srcu_unlock; |
| 256 | } | ||
| 257 | 614 | ||
| 258 | if (npages > 0) { | 615 | if (ret > 0) { |
| 259 | mutex_lock(&mr->umem->odp_data->umem_mutex); | 616 | int np = ret; |
| 617 | |||
| 618 | mutex_lock(&odp->umem_mutex); | ||
| 260 | if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { | 619 | if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { |
| 261 | /* | 620 | /* |
| 262 | * No need to check whether the MTTs really belong to | 621 | * No need to check whether the MTTs really belong to |
| 263 | * this MR, since ib_umem_odp_map_dma_pages already | 622 | * this MR, since ib_umem_odp_map_dma_pages already |
| 264 | * checks this. | 623 | * checks this. |
| 265 | */ | 624 | */ |
| 266 | ret = mlx5_ib_update_xlt(mr, start_idx, npages, | 625 | ret = mlx5_ib_update_xlt(mr, start_idx, np, |
| 267 | PAGE_SHIFT, | 626 | PAGE_SHIFT, |
| 268 | MLX5_IB_UPD_XLT_ATOMIC); | 627 | MLX5_IB_UPD_XLT_ATOMIC); |
| 269 | } else { | 628 | } else { |
| 270 | ret = -EAGAIN; | 629 | ret = -EAGAIN; |
| 271 | } | 630 | } |
| 272 | mutex_unlock(&mr->umem->odp_data->umem_mutex); | 631 | mutex_unlock(&odp->umem_mutex); |
| 273 | if (ret < 0) { | 632 | if (ret < 0) { |
| 274 | if (ret != -EAGAIN) | 633 | if (ret != -EAGAIN) |
| 275 | pr_err("Failed to update mkey page tables\n"); | 634 | mlx5_ib_err(dev, "Failed to update mkey page tables\n"); |
| 276 | goto srcu_unlock; | 635 | goto srcu_unlock; |
| 277 | } | 636 | } |
| 278 | 637 | ||
| 279 | if (bytes_mapped) { | 638 | if (bytes_mapped) { |
| 280 | u32 new_mappings = npages * PAGE_SIZE - | 639 | u32 new_mappings = np * PAGE_SIZE - |
| 281 | (io_virt - round_down(io_virt, PAGE_SIZE)); | 640 | (io_virt - round_down(io_virt, PAGE_SIZE)); |
| 282 | *bytes_mapped += min_t(u32, new_mappings, bcnt); | 641 | *bytes_mapped += min_t(u32, new_mappings, size); |
| 283 | } | 642 | } |
| 643 | |||
| 644 | npages += np; | ||
| 645 | } | ||
| 646 | |||
| 647 | bcnt -= size; | ||
| 648 | if (unlikely(bcnt)) { | ||
| 649 | struct ib_umem_odp *next; | ||
| 650 | |||
| 651 | io_virt += size; | ||
| 652 | next = odp_next(odp); | ||
| 653 | if (unlikely(!next || next->umem->address != io_virt)) { | ||
| 654 | mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", | ||
| 655 | io_virt, next); | ||
| 656 | ret = -EAGAIN; | ||
| 657 | goto srcu_unlock_no_wait; | ||
| 658 | } | ||
| 659 | odp = next; | ||
| 660 | mr = odp->private; | ||
| 661 | goto next_mr; | ||
| 284 | } | 662 | } |
| 285 | 663 | ||
| 286 | srcu_unlock: | 664 | srcu_unlock: |
| 287 | if (ret == -EAGAIN) { | 665 | if (ret == -EAGAIN) { |
| 288 | if (!mr->umem->odp_data->dying) { | 666 | if (implicit || !odp->dying) { |
| 289 | struct ib_umem_odp *odp_data = mr->umem->odp_data; | ||
| 290 | unsigned long timeout = | 667 | unsigned long timeout = |
| 291 | msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); | 668 | msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); |
| 292 | 669 | ||
| 293 | if (!wait_for_completion_timeout( | 670 | if (!wait_for_completion_timeout( |
| 294 | &odp_data->notifier_completion, | 671 | &odp->notifier_completion, |
| 295 | timeout)) { | 672 | timeout)) { |
| 296 | pr_warn("timeout waiting for mmu notifier completion\n"); | 673 | mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", |
| 674 | current_seq, odp->notifiers_seq); | ||
| 297 | } | 675 | } |
| 298 | } else { | 676 | } else { |
| 299 | /* The MR is being killed, kill the QP as well. */ | 677 | /* The MR is being killed, kill the QP as well. */ |
| 300 | ret = -EFAULT; | 678 | ret = -EFAULT; |
| 301 | } | 679 | } |
| 302 | } | 680 | } |
| 303 | srcu_read_unlock(&mib_dev->mr_srcu, srcu_key); | 681 | |
| 682 | srcu_unlock_no_wait: | ||
| 683 | srcu_read_unlock(&dev->mr_srcu, srcu_key); | ||
| 304 | *bytes_committed = 0; | 684 | *bytes_committed = 0; |
| 305 | return ret ? ret : npages; | 685 | return ret ? ret : npages; |
| 306 | } | 686 | } |
| @@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, | |||
| 618 | goto resolve_page_fault; | 998 | goto resolve_page_fault; |
| 619 | } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { | 999 | } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { |
| 620 | if (ret != -ENOENT) | 1000 | if (ret != -ENOENT) |
| 621 | mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n", | 1001 | mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n", |
| 622 | ret); | 1002 | ret, pfault->wqe.wq_num, pfault->type); |
| 623 | goto resolve_page_fault; | 1003 | goto resolve_page_fault; |
| 624 | } | 1004 | } |
| 625 | 1005 | ||
| @@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, | |||
| 627 | resolve_page_fault: | 1007 | resolve_page_fault: |
| 628 | mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); | 1008 | mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); |
| 629 | mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", | 1009 | mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", |
| 630 | pfault->token, resume_with_error, | 1010 | pfault->wqe.wq_num, resume_with_error, |
| 631 | pfault->type); | 1011 | pfault->type); |
| 632 | free_page((unsigned long)buffer); | 1012 | free_page((unsigned long)buffer); |
| 633 | } | 1013 | } |
| @@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, | |||
| 700 | ret = pagefault_single_data_segment(dev, rkey, address, | 1080 | ret = pagefault_single_data_segment(dev, rkey, address, |
| 701 | prefetch_len, | 1081 | prefetch_len, |
| 702 | &bytes_committed, NULL); | 1082 | &bytes_committed, NULL); |
| 703 | if (ret < 0) { | 1083 | if (ret < 0 && ret != -EAGAIN) { |
| 704 | mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", | 1084 | mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", |
| 705 | ret, pfault->token, address, | 1085 | ret, pfault->token, address, prefetch_len); |
| 706 | prefetch_len); | ||
| 707 | } | 1086 | } |
| 708 | } | 1087 | } |
| 709 | } | 1088 | } |
| @@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, | |||
| 728 | } | 1107 | } |
| 729 | } | 1108 | } |
| 730 | 1109 | ||
| 731 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) | 1110 | void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) |
| 1111 | { | ||
| 1112 | if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) | ||
| 1113 | return; | ||
| 1114 | |||
| 1115 | switch (ent->order - 2) { | ||
| 1116 | case MLX5_IMR_MTT_CACHE_ENTRY: | ||
| 1117 | ent->page = PAGE_SHIFT; | ||
| 1118 | ent->xlt = MLX5_IMR_MTT_ENTRIES * | ||
| 1119 | sizeof(struct mlx5_mtt) / | ||
| 1120 | MLX5_IB_UMR_OCTOWORD; | ||
| 1121 | ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; | ||
| 1122 | ent->limit = 0; | ||
| 1123 | break; | ||
| 1124 | |||
| 1125 | case MLX5_IMR_KSM_CACHE_ENTRY: | ||
| 1126 | ent->page = MLX5_KSM_PAGE_SHIFT; | ||
| 1127 | ent->xlt = mlx5_imr_ksm_entries * | ||
| 1128 | sizeof(struct mlx5_klm) / | ||
| 1129 | MLX5_IB_UMR_OCTOWORD; | ||
| 1130 | ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; | ||
| 1131 | ent->limit = 0; | ||
| 1132 | break; | ||
| 1133 | } | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) | ||
| 732 | { | 1137 | { |
| 733 | int ret; | 1138 | int ret; |
| 734 | 1139 | ||
| 735 | ret = init_srcu_struct(&ibdev->mr_srcu); | 1140 | ret = init_srcu_struct(&dev->mr_srcu); |
| 736 | if (ret) | 1141 | if (ret) |
| 737 | return ret; | 1142 | return ret; |
| 738 | 1143 | ||
| 1144 | if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { | ||
| 1145 | ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); | ||
| 1146 | if (ret) { | ||
| 1147 | mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret); | ||
| 1148 | return ret; | ||
| 1149 | } | ||
| 1150 | } | ||
| 1151 | |||
| 739 | return 0; | 1152 | return 0; |
| 740 | } | 1153 | } |
| 741 | 1154 | ||
| 742 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) | 1155 | void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev) |
| 1156 | { | ||
| 1157 | cleanup_srcu_struct(&dev->mr_srcu); | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | int mlx5_ib_odp_init(void) | ||
| 743 | { | 1161 | { |
| 744 | cleanup_srcu_struct(&ibdev->mr_srcu); | 1162 | mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - |
| 1163 | MLX5_IMR_MTT_BITS); | ||
| 1164 | |||
| 1165 | return 0; | ||
| 745 | } | 1166 | } |
| 746 | 1167 | ||
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2534b8a0fd7b..886ff2b00500 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
| @@ -1053,6 +1053,8 @@ enum { | |||
| 1053 | 1053 | ||
| 1054 | enum { | 1054 | enum { |
| 1055 | MAX_UMR_CACHE_ENTRY = 20, | 1055 | MAX_UMR_CACHE_ENTRY = 20, |
| 1056 | MLX5_IMR_MTT_CACHE_ENTRY, | ||
| 1057 | MLX5_IMR_KSM_CACHE_ENTRY, | ||
| 1056 | MAX_MR_CACHE_ENTRIES | 1058 | MAX_MR_CACHE_ENTRIES |
| 1057 | }; | 1059 | }; |
| 1058 | 1060 | ||
