aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHaggai Eran <haggaie@mellanox.com>2014-12-11 10:04:22 -0500
committerRoland Dreier <roland@purestorage.com>2014-12-15 21:19:02 -0500
commit832a6b06ab5e13c228fc27e333ad360aa03ace6f (patch)
treef47db1bab24137255c54f055b4bddbddff1fb7e4
parentcc149f751b75211df8c41fcd60bd0006e6143ed6 (diff)
IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation
The new function allows updating the page tables of a memory region after it was created. This can be used to handle page faults and page invalidations. Since mlx5_ib_update_mtt will need to work from within page invalidation, so it must not block on memory allocation. It employs an atomic memory allocation mechanism that is used as a fallback when kmalloc(GFP_ATOMIC) fails. In order to reuse code from mlx5_ib_populate_pas, the patch splits this function and add the needed parameters. Signed-off-by: Haggai Eran <haggaie@mellanox.com> Signed-off-by: Shachar Raindel <raindel@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c19
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c132
-rw-r--r--include/linux/mlx5/device.h1
4 files changed, 149 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 5f7b30147180..b56e4c5593ee 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -140,12 +140,16 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
140 * dev - mlx5_ib device 140 * dev - mlx5_ib device
141 * umem - umem to use to fill the pages 141 * umem - umem to use to fill the pages
142 * page_shift - determines the page size used in the resulting array 142 * page_shift - determines the page size used in the resulting array
143 * offset - offset into the umem to start from,
144 * only implemented for ODP umems
145 * num_pages - total number of pages to fill
143 * pas - bus addresses array to fill 146 * pas - bus addresses array to fill
144 * access_flags - access flags to set on all present pages. 147 * access_flags - access flags to set on all present pages.
145 use enum mlx5_ib_mtt_access_flags for this. 148 use enum mlx5_ib_mtt_access_flags for this.
146 */ 149 */
147void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 150void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
148 int page_shift, __be64 *pas, int access_flags) 151 int page_shift, size_t offset, size_t num_pages,
152 __be64 *pas, int access_flags)
149{ 153{
150 unsigned long umem_page_shift = ilog2(umem->page_size); 154 unsigned long umem_page_shift = ilog2(umem->page_size);
151 int shift = page_shift - umem_page_shift; 155 int shift = page_shift - umem_page_shift;
@@ -160,13 +164,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
160 const bool odp = umem->odp_data != NULL; 164 const bool odp = umem->odp_data != NULL;
161 165
162 if (odp) { 166 if (odp) {
163 int num_pages = ib_umem_num_pages(umem);
164
165 WARN_ON(shift != 0); 167 WARN_ON(shift != 0);
166 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); 168 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
167 169
168 for (i = 0; i < num_pages; ++i) { 170 for (i = 0; i < num_pages; ++i) {
169 dma_addr_t pa = umem->odp_data->dma_list[i]; 171 dma_addr_t pa = umem->odp_data->dma_list[offset + i];
170 172
171 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); 173 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
172 } 174 }
@@ -194,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
194 } 196 }
195} 197}
196 198
199void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
200 int page_shift, __be64 *pas, int access_flags)
201{
202 return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
203 ib_umem_num_pages(umem), pas,
204 access_flags);
205}
197int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) 206int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
198{ 207{
199 u64 page_size; 208 u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 83c1690e9dd0..6856e27bfb6a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -527,6 +527,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
527struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 527struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
528 u64 virt_addr, int access_flags, 528 u64 virt_addr, int access_flags,
529 struct ib_udata *udata); 529 struct ib_udata *udata);
530int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
531 int npages, int zap);
530int mlx5_ib_dereg_mr(struct ib_mr *ibmr); 532int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
531int mlx5_ib_destroy_mr(struct ib_mr *ibmr); 533int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
532struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, 534struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -558,6 +560,9 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
558void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); 560void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
559void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, 561void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
560 int *ncont, int *order); 562 int *ncont, int *order);
563void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
564 int page_shift, size_t offset, size_t num_pages,
565 __be64 *pas, int access_flags);
561void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 566void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
562 int page_shift, __be64 *pas, int access_flags); 567 int page_shift, __be64 *pas, int access_flags);
563void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); 568void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 49fc3ca735a4..38b06267798e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -44,9 +44,13 @@ enum {
44 MAX_PENDING_REG_MR = 8, 44 MAX_PENDING_REG_MR = 8,
45}; 45};
46 46
47enum { 47#define MLX5_UMR_ALIGN 2048
48 MLX5_UMR_ALIGN = 2048 48#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
49}; 49static __be64 mlx5_ib_update_mtt_emergency_buffer[
50 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
51 __aligned(MLX5_UMR_ALIGN);
52static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
53#endif
50 54
51static int order2idx(struct mlx5_ib_dev *dev, int order) 55static int order2idx(struct mlx5_ib_dev *dev, int order)
52{ 56{
@@ -822,6 +826,128 @@ free_mr:
822 return mr; 826 return mr;
823} 827}
824 828
829#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
830int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
831 int zap)
832{
833 struct mlx5_ib_dev *dev = mr->dev;
834 struct device *ddev = dev->ib_dev.dma_device;
835 struct umr_common *umrc = &dev->umrc;
836 struct mlx5_ib_umr_context umr_context;
837 struct ib_umem *umem = mr->umem;
838 int size;
839 __be64 *pas;
840 dma_addr_t dma;
841 struct ib_send_wr wr, *bad;
842 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
843 struct ib_sge sg;
844 int err = 0;
845 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
846 const int page_index_mask = page_index_alignment - 1;
847 size_t pages_mapped = 0;
848 size_t pages_to_map = 0;
849 size_t pages_iter = 0;
850 int use_emergency_buf = 0;
851
852 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
853 * so we need to align the offset and length accordingly */
854 if (start_page_index & page_index_mask) {
855 npages += start_page_index & page_index_mask;
856 start_page_index &= ~page_index_mask;
857 }
858
859 pages_to_map = ALIGN(npages, page_index_alignment);
860
861 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
862 return -EINVAL;
863
864 size = sizeof(u64) * pages_to_map;
865 size = min_t(int, PAGE_SIZE, size);
866 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
867 * code, when we are called from an invalidation. The pas buffer must
868 * be 2k-aligned for Connect-IB. */
869 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
870 if (!pas) {
871 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
872 pas = mlx5_ib_update_mtt_emergency_buffer;
873 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
874 use_emergency_buf = 1;
875 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
876 memset(pas, 0, size);
877 }
878 pages_iter = size / sizeof(u64);
879 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
880 if (dma_mapping_error(ddev, dma)) {
881 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
882 err = -ENOMEM;
883 goto free_pas;
884 }
885
886 for (pages_mapped = 0;
887 pages_mapped < pages_to_map && !err;
888 pages_mapped += pages_iter, start_page_index += pages_iter) {
889 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
890
891 npages = min_t(size_t,
892 pages_iter,
893 ib_umem_num_pages(umem) - start_page_index);
894
895 if (!zap) {
896 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
897 start_page_index, npages, pas,
898 MLX5_IB_MTT_PRESENT);
899 /* Clear padding after the pages brought from the
900 * umem. */
901 memset(pas + npages, 0, size - npages * sizeof(u64));
902 }
903
904 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
905
906 memset(&wr, 0, sizeof(wr));
907 wr.wr_id = (u64)(unsigned long)&umr_context;
908
909 sg.addr = dma;
910 sg.length = ALIGN(npages * sizeof(u64),
911 MLX5_UMR_MTT_ALIGNMENT);
912 sg.lkey = dev->umrc.mr->lkey;
913
914 wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
915 MLX5_IB_SEND_UMR_UPDATE_MTT;
916 wr.sg_list = &sg;
917 wr.num_sge = 1;
918 wr.opcode = MLX5_IB_WR_UMR;
919 umrwr->npages = sg.length / sizeof(u64);
920 umrwr->page_shift = PAGE_SHIFT;
921 umrwr->mkey = mr->mmr.key;
922 umrwr->target.offset = start_page_index;
923
924 mlx5_ib_init_umr_context(&umr_context);
925 down(&umrc->sem);
926 err = ib_post_send(umrc->qp, &wr, &bad);
927 if (err) {
928 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
929 } else {
930 wait_for_completion(&umr_context.done);
931 if (umr_context.status != IB_WC_SUCCESS) {
932 mlx5_ib_err(dev, "UMR completion failed, code %d\n",
933 umr_context.status);
934 err = -EFAULT;
935 }
936 }
937 up(&umrc->sem);
938 }
939 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
940
941free_pas:
942 if (!use_emergency_buf)
943 free_page((unsigned long)pas);
944 else
945 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
946
947 return err;
948}
949#endif
950
825static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 951static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
826 u64 length, struct ib_umem *umem, 952 u64 length, struct ib_umem *umem,
827 int npages, int page_shift, 953 int npages, int page_shift,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 64512a7354cb..4e5bd813bb9a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -200,6 +200,7 @@ enum {
200 200
201#define MLX5_UMR_MTT_ALIGNMENT 0x40 201#define MLX5_UMR_MTT_ALIGNMENT 0x40
202#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) 202#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
203#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
203 204
204enum mlx5_event { 205enum mlx5_event {
205 MLX5_EVENT_TYPE_COMP = 0x0, 206 MLX5_EVENT_TYPE_COMP = 0x0,