diff options
author | Haggai Eran <haggaie@mellanox.com> | 2014-12-11 10:04:22 -0500 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-12-15 21:19:02 -0500 |
commit | 832a6b06ab5e13c228fc27e333ad360aa03ace6f (patch) | |
tree | f47db1bab24137255c54f055b4bddbddff1fb7e4 | |
parent | cc149f751b75211df8c41fcd60bd0006e6143ed6 (diff) |
IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation
The new function allows updating the page tables of a memory region
after it was created. This can be used to handle page faults and page
invalidations.
Since mlx5_ib_update_mtt will need to work from within page invalidation,
so it must not block on memory allocation. It employs an atomic memory
allocation mechanism that is used as a fallback when kmalloc(GFP_ATOMIC) fails.
In order to reuse code from mlx5_ib_populate_pas, the patch splits
this function and add the needed parameters.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 19 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 132 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 1 |
4 files changed, 149 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 5f7b30147180..b56e4c5593ee 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c | |||
@@ -140,12 +140,16 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma) | |||
140 | * dev - mlx5_ib device | 140 | * dev - mlx5_ib device |
141 | * umem - umem to use to fill the pages | 141 | * umem - umem to use to fill the pages |
142 | * page_shift - determines the page size used in the resulting array | 142 | * page_shift - determines the page size used in the resulting array |
143 | * offset - offset into the umem to start from, | ||
144 | * only implemented for ODP umems | ||
145 | * num_pages - total number of pages to fill | ||
143 | * pas - bus addresses array to fill | 146 | * pas - bus addresses array to fill |
144 | * access_flags - access flags to set on all present pages. | 147 | * access_flags - access flags to set on all present pages. |
145 | use enum mlx5_ib_mtt_access_flags for this. | 148 | use enum mlx5_ib_mtt_access_flags for this. |
146 | */ | 149 | */ |
147 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | 150 | void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, |
148 | int page_shift, __be64 *pas, int access_flags) | 151 | int page_shift, size_t offset, size_t num_pages, |
152 | __be64 *pas, int access_flags) | ||
149 | { | 153 | { |
150 | unsigned long umem_page_shift = ilog2(umem->page_size); | 154 | unsigned long umem_page_shift = ilog2(umem->page_size); |
151 | int shift = page_shift - umem_page_shift; | 155 | int shift = page_shift - umem_page_shift; |
@@ -160,13 +164,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | |||
160 | const bool odp = umem->odp_data != NULL; | 164 | const bool odp = umem->odp_data != NULL; |
161 | 165 | ||
162 | if (odp) { | 166 | if (odp) { |
163 | int num_pages = ib_umem_num_pages(umem); | ||
164 | |||
165 | WARN_ON(shift != 0); | 167 | WARN_ON(shift != 0); |
166 | WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); | 168 | WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); |
167 | 169 | ||
168 | for (i = 0; i < num_pages; ++i) { | 170 | for (i = 0; i < num_pages; ++i) { |
169 | dma_addr_t pa = umem->odp_data->dma_list[i]; | 171 | dma_addr_t pa = umem->odp_data->dma_list[offset + i]; |
170 | 172 | ||
171 | pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); | 173 | pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); |
172 | } | 174 | } |
@@ -194,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | |||
194 | } | 196 | } |
195 | } | 197 | } |
196 | 198 | ||
199 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | ||
200 | int page_shift, __be64 *pas, int access_flags) | ||
201 | { | ||
202 | return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, | ||
203 | ib_umem_num_pages(umem), pas, | ||
204 | access_flags); | ||
205 | } | ||
197 | int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) | 206 | int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) |
198 | { | 207 | { |
199 | u64 page_size; | 208 | u64 page_size; |
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 83c1690e9dd0..6856e27bfb6a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h | |||
@@ -527,6 +527,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); | |||
527 | struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | 527 | struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
528 | u64 virt_addr, int access_flags, | 528 | u64 virt_addr, int access_flags, |
529 | struct ib_udata *udata); | 529 | struct ib_udata *udata); |
530 | int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, | ||
531 | int npages, int zap); | ||
530 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr); | 532 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr); |
531 | int mlx5_ib_destroy_mr(struct ib_mr *ibmr); | 533 | int mlx5_ib_destroy_mr(struct ib_mr *ibmr); |
532 | struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, | 534 | struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, |
@@ -558,6 +560,9 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); | |||
558 | void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); | 560 | void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); |
559 | void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, | 561 | void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, |
560 | int *ncont, int *order); | 562 | int *ncont, int *order); |
563 | void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | ||
564 | int page_shift, size_t offset, size_t num_pages, | ||
565 | __be64 *pas, int access_flags); | ||
561 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | 566 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, |
562 | int page_shift, __be64 *pas, int access_flags); | 567 | int page_shift, __be64 *pas, int access_flags); |
563 | void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); | 568 | void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); |
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 49fc3ca735a4..38b06267798e 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
@@ -44,9 +44,13 @@ enum { | |||
44 | MAX_PENDING_REG_MR = 8, | 44 | MAX_PENDING_REG_MR = 8, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | enum { | 47 | #define MLX5_UMR_ALIGN 2048 |
48 | MLX5_UMR_ALIGN = 2048 | 48 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
49 | }; | 49 | static __be64 mlx5_ib_update_mtt_emergency_buffer[ |
50 | MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] | ||
51 | __aligned(MLX5_UMR_ALIGN); | ||
52 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
53 | #endif | ||
50 | 54 | ||
51 | static int order2idx(struct mlx5_ib_dev *dev, int order) | 55 | static int order2idx(struct mlx5_ib_dev *dev, int order) |
52 | { | 56 | { |
@@ -822,6 +826,128 @@ free_mr: | |||
822 | return mr; | 826 | return mr; |
823 | } | 827 | } |
824 | 828 | ||
829 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
830 | int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, | ||
831 | int zap) | ||
832 | { | ||
833 | struct mlx5_ib_dev *dev = mr->dev; | ||
834 | struct device *ddev = dev->ib_dev.dma_device; | ||
835 | struct umr_common *umrc = &dev->umrc; | ||
836 | struct mlx5_ib_umr_context umr_context; | ||
837 | struct ib_umem *umem = mr->umem; | ||
838 | int size; | ||
839 | __be64 *pas; | ||
840 | dma_addr_t dma; | ||
841 | struct ib_send_wr wr, *bad; | ||
842 | struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; | ||
843 | struct ib_sge sg; | ||
844 | int err = 0; | ||
845 | const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); | ||
846 | const int page_index_mask = page_index_alignment - 1; | ||
847 | size_t pages_mapped = 0; | ||
848 | size_t pages_to_map = 0; | ||
849 | size_t pages_iter = 0; | ||
850 | int use_emergency_buf = 0; | ||
851 | |||
852 | /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, | ||
853 | * so we need to align the offset and length accordingly */ | ||
854 | if (start_page_index & page_index_mask) { | ||
855 | npages += start_page_index & page_index_mask; | ||
856 | start_page_index &= ~page_index_mask; | ||
857 | } | ||
858 | |||
859 | pages_to_map = ALIGN(npages, page_index_alignment); | ||
860 | |||
861 | if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) | ||
862 | return -EINVAL; | ||
863 | |||
864 | size = sizeof(u64) * pages_to_map; | ||
865 | size = min_t(int, PAGE_SIZE, size); | ||
866 | /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim | ||
867 | * code, when we are called from an invalidation. The pas buffer must | ||
868 | * be 2k-aligned for Connect-IB. */ | ||
869 | pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); | ||
870 | if (!pas) { | ||
871 | mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); | ||
872 | pas = mlx5_ib_update_mtt_emergency_buffer; | ||
873 | size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; | ||
874 | use_emergency_buf = 1; | ||
875 | mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
876 | memset(pas, 0, size); | ||
877 | } | ||
878 | pages_iter = size / sizeof(u64); | ||
879 | dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); | ||
880 | if (dma_mapping_error(ddev, dma)) { | ||
881 | mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); | ||
882 | err = -ENOMEM; | ||
883 | goto free_pas; | ||
884 | } | ||
885 | |||
886 | for (pages_mapped = 0; | ||
887 | pages_mapped < pages_to_map && !err; | ||
888 | pages_mapped += pages_iter, start_page_index += pages_iter) { | ||
889 | dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); | ||
890 | |||
891 | npages = min_t(size_t, | ||
892 | pages_iter, | ||
893 | ib_umem_num_pages(umem) - start_page_index); | ||
894 | |||
895 | if (!zap) { | ||
896 | __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, | ||
897 | start_page_index, npages, pas, | ||
898 | MLX5_IB_MTT_PRESENT); | ||
899 | /* Clear padding after the pages brought from the | ||
900 | * umem. */ | ||
901 | memset(pas + npages, 0, size - npages * sizeof(u64)); | ||
902 | } | ||
903 | |||
904 | dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); | ||
905 | |||
906 | memset(&wr, 0, sizeof(wr)); | ||
907 | wr.wr_id = (u64)(unsigned long)&umr_context; | ||
908 | |||
909 | sg.addr = dma; | ||
910 | sg.length = ALIGN(npages * sizeof(u64), | ||
911 | MLX5_UMR_MTT_ALIGNMENT); | ||
912 | sg.lkey = dev->umrc.mr->lkey; | ||
913 | |||
914 | wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | | ||
915 | MLX5_IB_SEND_UMR_UPDATE_MTT; | ||
916 | wr.sg_list = &sg; | ||
917 | wr.num_sge = 1; | ||
918 | wr.opcode = MLX5_IB_WR_UMR; | ||
919 | umrwr->npages = sg.length / sizeof(u64); | ||
920 | umrwr->page_shift = PAGE_SHIFT; | ||
921 | umrwr->mkey = mr->mmr.key; | ||
922 | umrwr->target.offset = start_page_index; | ||
923 | |||
924 | mlx5_ib_init_umr_context(&umr_context); | ||
925 | down(&umrc->sem); | ||
926 | err = ib_post_send(umrc->qp, &wr, &bad); | ||
927 | if (err) { | ||
928 | mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); | ||
929 | } else { | ||
930 | wait_for_completion(&umr_context.done); | ||
931 | if (umr_context.status != IB_WC_SUCCESS) { | ||
932 | mlx5_ib_err(dev, "UMR completion failed, code %d\n", | ||
933 | umr_context.status); | ||
934 | err = -EFAULT; | ||
935 | } | ||
936 | } | ||
937 | up(&umrc->sem); | ||
938 | } | ||
939 | dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); | ||
940 | |||
941 | free_pas: | ||
942 | if (!use_emergency_buf) | ||
943 | free_page((unsigned long)pas); | ||
944 | else | ||
945 | mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
946 | |||
947 | return err; | ||
948 | } | ||
949 | #endif | ||
950 | |||
825 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | 951 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, |
826 | u64 length, struct ib_umem *umem, | 952 | u64 length, struct ib_umem *umem, |
827 | int npages, int page_shift, | 953 | int npages, int page_shift, |
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 64512a7354cb..4e5bd813bb9a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h | |||
@@ -200,6 +200,7 @@ enum { | |||
200 | 200 | ||
201 | #define MLX5_UMR_MTT_ALIGNMENT 0x40 | 201 | #define MLX5_UMR_MTT_ALIGNMENT 0x40 |
202 | #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) | 202 | #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) |
203 | #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT | ||
203 | 204 | ||
204 | enum mlx5_event { | 205 | enum mlx5_event { |
205 | MLX5_EVENT_TYPE_COMP = 0x0, | 206 | MLX5_EVENT_TYPE_COMP = 0x0, |