diff options
-rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 33 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 3 |
4 files changed, 88 insertions, 18 deletions
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index dae07eae9507..5f7b30147180 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c | |||
@@ -32,6 +32,7 @@ | |||
32 | 32 | ||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <rdma/ib_umem.h> | 34 | #include <rdma/ib_umem.h> |
35 | #include <rdma/ib_umem_odp.h> | ||
35 | #include "mlx5_ib.h" | 36 | #include "mlx5_ib.h" |
36 | 37 | ||
37 | /* @umem: umem object to scan | 38 | /* @umem: umem object to scan |
@@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, | |||
57 | int entry; | 58 | int entry; |
58 | unsigned long page_shift = ilog2(umem->page_size); | 59 | unsigned long page_shift = ilog2(umem->page_size); |
59 | 60 | ||
61 | /* With ODP we must always match OS page size. */ | ||
62 | if (umem->odp_data) { | ||
63 | *count = ib_umem_page_count(umem); | ||
64 | *shift = PAGE_SHIFT; | ||
65 | *ncont = *count; | ||
66 | if (order) | ||
67 | *order = ilog2(roundup_pow_of_two(*count)); | ||
68 | |||
69 | return; | ||
70 | } | ||
71 | |||
60 | addr = addr >> page_shift; | 72 | addr = addr >> page_shift; |
61 | tmp = (unsigned long)addr; | 73 | tmp = (unsigned long)addr; |
62 | m = find_first_bit(&tmp, sizeof(tmp)); | 74 | m = find_first_bit(&tmp, sizeof(tmp)); |
@@ -108,8 +120,32 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, | |||
108 | *count = i; | 120 | *count = i; |
109 | } | 121 | } |
110 | 122 | ||
123 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
124 | static u64 umem_dma_to_mtt(dma_addr_t umem_dma) | ||
125 | { | ||
126 | u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; | ||
127 | |||
128 | if (umem_dma & ODP_READ_ALLOWED_BIT) | ||
129 | mtt_entry |= MLX5_IB_MTT_READ; | ||
130 | if (umem_dma & ODP_WRITE_ALLOWED_BIT) | ||
131 | mtt_entry |= MLX5_IB_MTT_WRITE; | ||
132 | |||
133 | return mtt_entry; | ||
134 | } | ||
135 | #endif | ||
136 | |||
137 | /* | ||
138 | * Populate the given array with bus addresses from the umem. | ||
139 | * | ||
140 | * dev - mlx5_ib device | ||
141 | * umem - umem to use to fill the pages | ||
142 | * page_shift - determines the page size used in the resulting array | ||
143 | * pas - bus addresses array to fill | ||
144 | * access_flags - access flags to set on all present pages. | ||
145 | use enum mlx5_ib_mtt_access_flags for this. | ||
146 | */ | ||
111 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | 147 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, |
112 | int page_shift, __be64 *pas, int umr) | 148 | int page_shift, __be64 *pas, int access_flags) |
113 | { | 149 | { |
114 | unsigned long umem_page_shift = ilog2(umem->page_size); | 150 | unsigned long umem_page_shift = ilog2(umem->page_size); |
115 | int shift = page_shift - umem_page_shift; | 151 | int shift = page_shift - umem_page_shift; |
@@ -120,6 +156,23 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | |||
120 | int len; | 156 | int len; |
121 | struct scatterlist *sg; | 157 | struct scatterlist *sg; |
122 | int entry; | 158 | int entry; |
159 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
160 | const bool odp = umem->odp_data != NULL; | ||
161 | |||
162 | if (odp) { | ||
163 | int num_pages = ib_umem_num_pages(umem); | ||
164 | |||
165 | WARN_ON(shift != 0); | ||
166 | WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); | ||
167 | |||
168 | for (i = 0; i < num_pages; ++i) { | ||
169 | dma_addr_t pa = umem->odp_data->dma_list[i]; | ||
170 | |||
171 | pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); | ||
172 | } | ||
173 | return; | ||
174 | } | ||
175 | #endif | ||
123 | 176 | ||
124 | i = 0; | 177 | i = 0; |
125 | for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { | 178 | for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { |
@@ -128,8 +181,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | |||
128 | for (k = 0; k < len; k++) { | 181 | for (k = 0; k < len; k++) { |
129 | if (!(i & mask)) { | 182 | if (!(i & mask)) { |
130 | cur = base + (k << umem_page_shift); | 183 | cur = base + (k << umem_page_shift); |
131 | if (umr) | 184 | cur |= access_flags; |
132 | cur |= 3; | ||
133 | 185 | ||
134 | pas[i >> shift] = cpu_to_be64(cur); | 186 | pas[i >> shift] = cpu_to_be64(cur); |
135 | mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", | 187 | mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", |
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index cc50fce8cca7..83c1690e9dd0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h | |||
@@ -268,6 +268,13 @@ struct mlx5_ib_xrcd { | |||
268 | u32 xrcdn; | 268 | u32 xrcdn; |
269 | }; | 269 | }; |
270 | 270 | ||
271 | enum mlx5_ib_mtt_access_flags { | ||
272 | MLX5_IB_MTT_READ = (1 << 0), | ||
273 | MLX5_IB_MTT_WRITE = (1 << 1), | ||
274 | }; | ||
275 | |||
276 | #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) | ||
277 | |||
271 | struct mlx5_ib_mr { | 278 | struct mlx5_ib_mr { |
272 | struct ib_mr ibmr; | 279 | struct ib_mr ibmr; |
273 | struct mlx5_core_mr mmr; | 280 | struct mlx5_core_mr mmr; |
@@ -552,7 +559,7 @@ void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); | |||
552 | void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, | 559 | void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, |
553 | int *ncont, int *order); | 560 | int *ncont, int *order); |
554 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, | 561 | void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, |
555 | int page_shift, __be64 *pas, int umr); | 562 | int page_shift, __be64 *pas, int access_flags); |
556 | void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); | 563 | void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); |
557 | int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); | 564 | int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); |
558 | int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); | 565 | int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); |
@@ -588,4 +595,7 @@ static inline u8 convert_access(int acc) | |||
588 | MLX5_PERM_LOCAL_READ; | 595 | MLX5_PERM_LOCAL_READ; |
589 | } | 596 | } |
590 | 597 | ||
598 | #define MLX5_MAX_UMR_SHIFT 16 | ||
599 | #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) | ||
600 | |||
591 | #endif /* MLX5_IB_H */ | 601 | #endif /* MLX5_IB_H */ |
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2de4f4448f8a..49fc3ca735a4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
@@ -48,13 +48,6 @@ enum { | |||
48 | MLX5_UMR_ALIGN = 2048 | 48 | MLX5_UMR_ALIGN = 2048 |
49 | }; | 49 | }; |
50 | 50 | ||
51 | static __be64 *mr_align(__be64 *ptr, int align) | ||
52 | { | ||
53 | unsigned long mask = align - 1; | ||
54 | |||
55 | return (__be64 *)(((unsigned long)ptr + mask) & ~mask); | ||
56 | } | ||
57 | |||
58 | static int order2idx(struct mlx5_ib_dev *dev, int order) | 51 | static int order2idx(struct mlx5_ib_dev *dev, int order) |
59 | { | 52 | { |
60 | struct mlx5_mr_cache *cache = &dev->cache; | 53 | struct mlx5_mr_cache *cache = &dev->cache; |
@@ -669,7 +662,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size) | |||
669 | 662 | ||
670 | static int use_umr(int order) | 663 | static int use_umr(int order) |
671 | { | 664 | { |
672 | return order <= 17; | 665 | return order <= MLX5_MAX_UMR_SHIFT; |
673 | } | 666 | } |
674 | 667 | ||
675 | static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, | 668 | static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, |
@@ -747,8 +740,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, | |||
747 | struct ib_send_wr wr, *bad; | 740 | struct ib_send_wr wr, *bad; |
748 | struct mlx5_ib_mr *mr; | 741 | struct mlx5_ib_mr *mr; |
749 | struct ib_sge sg; | 742 | struct ib_sge sg; |
750 | int size = sizeof(u64) * npages; | 743 | int size; |
751 | __be64 *mr_pas; | 744 | __be64 *mr_pas; |
745 | __be64 *pas; | ||
752 | dma_addr_t dma; | 746 | dma_addr_t dma; |
753 | int err = 0; | 747 | int err = 0; |
754 | int i; | 748 | int i; |
@@ -768,17 +762,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, | |||
768 | if (!mr) | 762 | if (!mr) |
769 | return ERR_PTR(-EAGAIN); | 763 | return ERR_PTR(-EAGAIN); |
770 | 764 | ||
765 | /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. | ||
766 | * To avoid copying garbage after the pas array, we allocate | ||
767 | * a little more. */ | ||
768 | size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); | ||
771 | mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); | 769 | mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); |
772 | if (!mr_pas) { | 770 | if (!mr_pas) { |
773 | err = -ENOMEM; | 771 | err = -ENOMEM; |
774 | goto free_mr; | 772 | goto free_mr; |
775 | } | 773 | } |
776 | 774 | ||
777 | mlx5_ib_populate_pas(dev, umem, page_shift, | 775 | pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); |
778 | mr_align(mr_pas, MLX5_UMR_ALIGN), 1); | 776 | mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); |
777 | /* Clear padding after the actual pages. */ | ||
778 | memset(pas + npages, 0, size - npages * sizeof(u64)); | ||
779 | 779 | ||
780 | dma = dma_map_single(ddev, mr_align(mr_pas, MLX5_UMR_ALIGN), size, | 780 | dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); |
781 | DMA_TO_DEVICE); | ||
782 | if (dma_mapping_error(ddev, dma)) { | 781 | if (dma_mapping_error(ddev, dma)) { |
783 | err = -ENOMEM; | 782 | err = -ENOMEM; |
784 | goto free_pas; | 783 | goto free_pas; |
@@ -833,6 +832,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | |||
833 | struct mlx5_ib_mr *mr; | 832 | struct mlx5_ib_mr *mr; |
834 | int inlen; | 833 | int inlen; |
835 | int err; | 834 | int err; |
835 | bool pg_cap = !!(dev->mdev->caps.gen.flags & | ||
836 | MLX5_DEV_CAP_FLAG_ON_DMND_PG); | ||
836 | 837 | ||
837 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | 838 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
838 | if (!mr) | 839 | if (!mr) |
@@ -844,8 +845,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | |||
844 | err = -ENOMEM; | 845 | err = -ENOMEM; |
845 | goto err_1; | 846 | goto err_1; |
846 | } | 847 | } |
847 | mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); | 848 | mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, |
849 | pg_cap ? MLX5_IB_MTT_PRESENT : 0); | ||
848 | 850 | ||
851 | /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags | ||
852 | * in the page list submitted with the command. */ | ||
853 | in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; | ||
849 | in->seg.flags = convert_access(access_flags) | | 854 | in->seg.flags = convert_access(access_flags) | |
850 | MLX5_ACCESS_MODE_MTT; | 855 | MLX5_ACCESS_MODE_MTT; |
851 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); | 856 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); |
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 70c28239e339..64512a7354cb 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h | |||
@@ -198,6 +198,9 @@ enum { | |||
198 | MLX5_UMR_INLINE = (1 << 7), | 198 | MLX5_UMR_INLINE = (1 << 7), |
199 | }; | 199 | }; |
200 | 200 | ||
201 | #define MLX5_UMR_MTT_ALIGNMENT 0x40 | ||
202 | #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) | ||
203 | |||
201 | enum mlx5_event { | 204 | enum mlx5_event { |
202 | MLX5_EVENT_TYPE_COMP = 0x0, | 205 | MLX5_EVENT_TYPE_COMP = 0x0, |
203 | 206 | ||