aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 14:27:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 14:27:49 -0500
commitaf17fe7a63db7e11d65f1296f0cbf156a89a2735 (patch)
tree39b8c379a5a30e1468684832945eaab704f6d095
parentf14cc3b13d8f3ceee862f8365d37ba214630126a (diff)
parentcdbe33d0f82d68ff74f05502a4c26e65ec7e90bb (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull Mellanox rdma updates from Doug Ledford: "Mellanox specific updates for 4.11 merge window Because the Mellanox code required being based on a net-next tree, I keept it separate from the remainder of the RDMA stack submission that is based on 4.10-rc3. This branch contains: - Various mlx4 and mlx5 fixes and minor changes - Support for adding a tag match rule to flow specs - Support for cvlan offload operation for raw ethernet QPs - A change to the core IB code to recognize raw eth capabilities and enumerate them (touches non-Mellanox code) - Implicit On-Demand Paging memory registration support" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (40 commits) IB/mlx5: Fix configuration of port capabilities IB/mlx4: Take source GID by index from HW GID table IB/mlx5: Fix blue flame buffer size calculation IB/mlx4: Remove unused variable from function declaration IB: Query ports via the core instead of direct into the driver IB: Add protocol for USNIC IB/mlx4: Support raw packet protocol IB/mlx5: Support raw packet protocol IB/core: Add raw packet protocol IB/mlx5: Add implicit MR support IB/mlx5: Expose MR cache for mlx5_ib IB/mlx5: Add null_mkey access IB/umem: Indicate that process is being terminated IB/umem: Update on demand page (ODP) support IB/core: Add implicit MR flag IB/mlx5: Support creation of a WQ with scatter FCS offload IB/mlx5: Enable QP creation with cvlan offload IB/mlx5: Enable WQ creation and modification with cvlan offload IB/mlx5: Expose vlan offloads capabilities IB/uverbs: Enable QP creation with cvlan offload ...
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/umem_odp.c92
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c53
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c8
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c56
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c48
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h40
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c337
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h46
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c128
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c505
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c91
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c9
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c12
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/mlx5_ifc.h2
-rw-r--r--include/rdma/ib_umem_odp.h21
-rw-r--r--include/rdma/ib_verbs.h57
-rw-r--r--include/uapi/rdma/ib_user_verbs.h19
42 files changed, 1417 insertions, 270 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..446b56a5260b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
99 if (dmasync) 99 if (dmasync)
100 dma_attrs |= DMA_ATTR_WRITE_BARRIER; 100 dma_attrs |= DMA_ATTR_WRITE_BARRIER;
101 101
102 if (!size)
103 return ERR_PTR(-EINVAL);
104
105 /* 102 /*
106 * If the combination of the addr and size requested for this memory 103 * If the combination of the addr and size requested for this memory
107 * region causes an integer overflow, return error. 104 * region causes an integer overflow, return error.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..f2fc0431512d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
239 .invalidate_range_end = ib_umem_notifier_invalidate_range_end, 239 .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
240}; 240};
241 241
242struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
243 unsigned long addr,
244 size_t size)
245{
246 struct ib_umem *umem;
247 struct ib_umem_odp *odp_data;
248 int pages = size >> PAGE_SHIFT;
249 int ret;
250
251 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
252 if (!umem)
253 return ERR_PTR(-ENOMEM);
254
255 umem->context = context;
256 umem->length = size;
257 umem->address = addr;
258 umem->page_size = PAGE_SIZE;
259 umem->writable = 1;
260
261 odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
262 if (!odp_data) {
263 ret = -ENOMEM;
264 goto out_umem;
265 }
266 odp_data->umem = umem;
267
268 mutex_init(&odp_data->umem_mutex);
269 init_completion(&odp_data->notifier_completion);
270
271 odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
272 if (!odp_data->page_list) {
273 ret = -ENOMEM;
274 goto out_odp_data;
275 }
276
277 odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
278 if (!odp_data->dma_list) {
279 ret = -ENOMEM;
280 goto out_page_list;
281 }
282
283 down_write(&context->umem_rwsem);
284 context->odp_mrs_count++;
285 rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
286 if (likely(!atomic_read(&context->notifier_count)))
287 odp_data->mn_counters_active = true;
288 else
289 list_add(&odp_data->no_private_counters,
290 &context->no_private_counters);
291 up_write(&context->umem_rwsem);
292
293 umem->odp_data = odp_data;
294
295 return umem;
296
297out_page_list:
298 vfree(odp_data->page_list);
299out_odp_data:
300 kfree(odp_data);
301out_umem:
302 kfree(umem);
303 return ERR_PTR(ret);
304}
305EXPORT_SYMBOL(ib_alloc_odp_umem);
306
242int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) 307int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
243{ 308{
244 int ret_val; 309 int ret_val;
@@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
270 335
271 init_completion(&umem->odp_data->notifier_completion); 336 init_completion(&umem->odp_data->notifier_completion);
272 337
273 umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * 338 if (ib_umem_num_pages(umem)) {
339 umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
274 sizeof(*umem->odp_data->page_list)); 340 sizeof(*umem->odp_data->page_list));
275 if (!umem->odp_data->page_list) { 341 if (!umem->odp_data->page_list) {
276 ret_val = -ENOMEM; 342 ret_val = -ENOMEM;
277 goto out_odp_data; 343 goto out_odp_data;
278 } 344 }
279 345
280 umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * 346 umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
281 sizeof(*umem->odp_data->dma_list)); 347 sizeof(*umem->odp_data->dma_list));
282 if (!umem->odp_data->dma_list) { 348 if (!umem->odp_data->dma_list) {
283 ret_val = -ENOMEM; 349 ret_val = -ENOMEM;
284 goto out_page_list; 350 goto out_page_list;
351 }
285 } 352 }
286 353
287 /* 354 /*
@@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
466 } 533 }
467 umem->odp_data->dma_list[page_index] = dma_addr | access_mask; 534 umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
468 umem->odp_data->page_list[page_index] = page; 535 umem->odp_data->page_list[page_index] = page;
536 umem->npages++;
469 stored_page = 1; 537 stored_page = 1;
470 } else if (umem->odp_data->page_list[page_index] == page) { 538 } else if (umem->odp_data->page_list[page_index] == page) {
471 umem->odp_data->dma_list[page_index] |= access_mask; 539 umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +573,8 @@ out:
505 * for failure. 573 * for failure.
506 * An -EAGAIN error code is returned when a concurrent mmu notifier prevents 574 * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
507 * the function from completing its task. 575 * the function from completing its task.
508 * 576 * An -ENOENT error code indicates that userspace process is being terminated
577 * and mm was already destroyed.
509 * @umem: the umem to map and pin 578 * @umem: the umem to map and pin
510 * @user_virt: the address from which we need to map. 579 * @user_virt: the address from which we need to map.
511 * @bcnt: the minimal number of bytes to pin and map. The mapping might be 580 * @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
553 622
554 owning_mm = get_task_mm(owning_process); 623 owning_mm = get_task_mm(owning_process);
555 if (owning_mm == NULL) { 624 if (owning_mm == NULL) {
556 ret = -EINVAL; 625 ret = -ENOENT;
557 goto out_put_task; 626 goto out_put_task;
558 } 627 }
559 628
@@ -665,6 +734,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
665 put_page(page); 734 put_page(page);
666 umem->odp_data->page_list[idx] = NULL; 735 umem->odp_data->page_list[idx] = NULL;
667 umem->odp_data->dma_list[idx] = 0; 736 umem->odp_data->dma_list[idx] = 0;
737 umem->npages--;
668 } 738 }
669 } 739 }
670 mutex_unlock(&umem->odp_data->umem_mutex); 740 mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
78 void *cookie) 78 void *cookie)
79{ 79{
80 int ret_val = 0; 80 int ret_val = 0;
81 struct umem_odp_node *node; 81 struct umem_odp_node *node, *next;
82 struct ib_umem_odp *umem; 82 struct ib_umem_odp *umem;
83 83
84 if (unlikely(start == last)) 84 if (unlikely(start == last))
85 return ret_val; 85 return ret_val;
86 86
87 for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; 87 for (node = rbt_ib_umem_iter_first(root, start, last - 1);
88 node = rbt_ib_umem_iter_next(node, start, last - 1)) { 88 node; node = next) {
89 next = rbt_ib_umem_iter_next(node, start, last - 1);
89 umem = container_of(node, struct ib_umem_odp, interval_tree); 90 umem = container_of(node, struct ib_umem_odp, interval_tree);
90 ret_val = cb(umem->umem, start, last, cookie) || ret_val; 91 ret_val = cb(umem->umem, start, last, cookie) || ret_val;
91 } 92 }
92 93
93 return ret_val; 94 return ret_val;
94} 95}
96EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
97
98struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
99 u64 addr, u64 length)
100{
101 struct umem_odp_node *node;
102
103 node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
104 if (node)
105 return container_of(node, struct ib_umem_odp, interval_tree);
106 return NULL;
107
108}
109EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
228 struct ib_uverbs_flow_spec_ipv4 ipv4; 228 struct ib_uverbs_flow_spec_ipv4 ipv4;
229 struct ib_uverbs_flow_spec_tcp_udp tcp_udp; 229 struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
230 struct ib_uverbs_flow_spec_ipv6 ipv6; 230 struct ib_uverbs_flow_spec_ipv6 ipv6;
231 struct ib_uverbs_flow_spec_action_tag flow_tag;
231 }; 232 };
232}; 233};
233 234
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..b4b395a054ac 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file,
1891 IB_QP_CREATE_CROSS_CHANNEL | 1891 IB_QP_CREATE_CROSS_CHANNEL |
1892 IB_QP_CREATE_MANAGED_SEND | 1892 IB_QP_CREATE_MANAGED_SEND |
1893 IB_QP_CREATE_MANAGED_RECV | 1893 IB_QP_CREATE_MANAGED_RECV |
1894 IB_QP_CREATE_SCATTER_FCS)) { 1894 IB_QP_CREATE_SCATTER_FCS |
1895 IB_QP_CREATE_CVLAN_STRIPPING)) {
1895 ret = -EINVAL; 1896 ret = -EINVAL;
1896 goto err_put; 1897 goto err_put;
1897 } 1898 }
@@ -3143,6 +3144,25 @@ out_put:
3143 return ret ? ret : in_len; 3144 return ret ? ret : in_len;
3144} 3145}
3145 3146
3147static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
3148 union ib_flow_spec *ib_spec)
3149{
3150 ib_spec->type = kern_spec->type;
3151 switch (ib_spec->type) {
3152 case IB_FLOW_SPEC_ACTION_TAG:
3153 if (kern_spec->flow_tag.size !=
3154 sizeof(struct ib_uverbs_flow_spec_action_tag))
3155 return -EINVAL;
3156
3157 ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
3158 ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
3159 break;
3160 default:
3161 return -EINVAL;
3162 }
3163 return 0;
3164}
3165
3146static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) 3166static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
3147{ 3167{
3148 /* Returns user space filter size, includes padding */ 3168 /* Returns user space filter size, includes padding */
@@ -3167,8 +3187,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
3167 return kern_filter_size; 3187 return kern_filter_size;
3168} 3188}
3169 3189
3170static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, 3190static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
3171 union ib_flow_spec *ib_spec) 3191 union ib_flow_spec *ib_spec)
3172{ 3192{
3173 ssize_t actual_filter_sz; 3193 ssize_t actual_filter_sz;
3174 ssize_t kern_filter_sz; 3194 ssize_t kern_filter_sz;
@@ -3263,6 +3283,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3263 return 0; 3283 return 0;
3264} 3284}
3265 3285
3286static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
3287 union ib_flow_spec *ib_spec)
3288{
3289 if (kern_spec->reserved)
3290 return -EINVAL;
3291
3292 if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
3293 return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
3294 else
3295 return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
3296}
3297
3266int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, 3298int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3267 struct ib_device *ib_dev, 3299 struct ib_device *ib_dev,
3268 struct ib_udata *ucore, 3300 struct ib_udata *ucore,
@@ -3325,6 +3357,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3325 wq_init_attr.wq_context = file; 3357 wq_init_attr.wq_context = file;
3326 wq_init_attr.wq_type = cmd.wq_type; 3358 wq_init_attr.wq_type = cmd.wq_type;
3327 wq_init_attr.event_handler = ib_uverbs_wq_event_handler; 3359 wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
3360 if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
3361 sizeof(cmd.create_flags)))
3362 wq_init_attr.create_flags = cmd.create_flags;
3328 obj->uevent.events_reported = 0; 3363 obj->uevent.events_reported = 0;
3329 INIT_LIST_HEAD(&obj->uevent.event_list); 3364 INIT_LIST_HEAD(&obj->uevent.event_list);
3330 wq = pd->device->create_wq(pd, &wq_init_attr, uhw); 3365 wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3515,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3480 if (!cmd.attr_mask) 3515 if (!cmd.attr_mask)
3481 return -EINVAL; 3516 return -EINVAL;
3482 3517
3483 if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) 3518 if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
3484 return -EINVAL; 3519 return -EINVAL;
3485 3520
3486 wq = idr_read_wq(cmd.wq_handle, file->ucontext); 3521 wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3524,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3489 3524
3490 wq_attr.curr_wq_state = cmd.curr_wq_state; 3525 wq_attr.curr_wq_state = cmd.curr_wq_state;
3491 wq_attr.wq_state = cmd.wq_state; 3526 wq_attr.wq_state = cmd.wq_state;
3527 if (cmd.attr_mask & IB_WQ_FLAGS) {
3528 wq_attr.flags = cmd.flags;
3529 wq_attr.flags_mask = cmd.flags_mask;
3530 }
3492 ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); 3531 ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
3493 put_wq_read(wq); 3532 put_wq_read(wq);
3494 return ret; 3533 return ret;
@@ -4323,6 +4362,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
4323 4362
4324 resp.max_wq_type_rq = attr.max_wq_type_rq; 4363 resp.max_wq_type_rq = attr.max_wq_type_rq;
4325 resp.response_length += sizeof(resp.max_wq_type_rq); 4364 resp.response_length += sizeof(resp.max_wq_type_rq);
4365
4366 if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
4367 goto end;
4368
4369 resp.raw_packet_caps = attr.raw_packet_caps;
4370 resp.response_length += sizeof(resp.raw_packet_caps);
4326end: 4371end:
4327 err = ib_copy_to_udata(ucore, &resp, resp.response_length); 4372 err = ib_copy_to_udata(ucore, &resp, resp.response_length);
4328 return err; 4373 return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6262dc035f3c..48649f93258a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
1133 dev = to_iwch_dev(ibdev); 1133 dev = to_iwch_dev(ibdev);
1134 netdev = dev->rdev.port_info.lldevs[port-1]; 1134 netdev = dev->rdev.port_info.lldevs[port-1];
1135 1135
1136 memset(props, 0, sizeof(struct ib_port_attr)); 1136 /* props being zeroed by the caller, avoid zeroing it here */
1137 props->max_mtu = IB_MTU_4096; 1137 props->max_mtu = IB_MTU_4096;
1138 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); 1138 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
1139 1139
@@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
1329 struct ib_port_attr attr; 1329 struct ib_port_attr attr;
1330 int err; 1330 int err;
1331 1331
1332 err = iwch_query_port(ibdev, port_num, &attr); 1332 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
1333
1334 err = ib_query_port(ibdev, port_num, &attr);
1333 if (err) 1335 if (err)
1334 return err; 1336 return err;
1335 1337
1336 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1338 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1337 immutable->gid_tbl_len = attr.gid_tbl_len; 1339 immutable->gid_tbl_len = attr.gid_tbl_len;
1338 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
1339 1340
1340 return 0; 1341 return 0;
1341} 1342}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3345e1c312f7..bdf7de571d83 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
370 370
371 dev = to_c4iw_dev(ibdev); 371 dev = to_c4iw_dev(ibdev);
372 netdev = dev->rdev.lldi.ports[port-1]; 372 netdev = dev->rdev.lldi.ports[port-1];
373 373 /* props being zeroed by the caller, avoid zeroing it here */
374 memset(props, 0, sizeof(struct ib_port_attr));
375 props->max_mtu = IB_MTU_4096; 374 props->max_mtu = IB_MTU_4096;
376 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); 375 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
377 376
@@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
508 struct ib_port_attr attr; 507 struct ib_port_attr attr;
509 int err; 508 int err;
510 509
511 err = c4iw_query_port(ibdev, port_num, &attr); 510 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
511
512 err = ib_query_port(ibdev, port_num, &attr);
512 if (err) 513 if (err)
513 return err; 514 return err;
514 515
515 immutable->pkey_tbl_len = attr.pkey_tbl_len; 516 immutable->pkey_tbl_len = attr.pkey_tbl_len;
516 immutable->gid_tbl_len = attr.gid_tbl_len; 517 immutable->gid_tbl_len = attr.gid_tbl_len;
517 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
518 518
519 return 0; 519 return 0;
520} 520}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 5ba4c0dec348..33f00f0719c5 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1302,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
1302 struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; 1302 struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
1303 u16 lid = ppd->lid; 1303 u16 lid = ppd->lid;
1304 1304
1305 /* props being zeroed by the caller, avoid zeroing it here */
1305 props->lid = lid ? lid : 0; 1306 props->lid = lid ? lid : 0;
1306 props->lmc = ppd->lmc; 1307 props->lmc = ppd->lmc;
1307 /* OPA logical states match IB logical states */ 1308 /* OPA logical states match IB logical states */
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cf14679664ca..6843409fba29 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -250,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
250 assert(port_num > 0); 250 assert(port_num > 0);
251 port = port_num - 1; 251 port = port_num - 1;
252 252
253 memset(props, 0, sizeof(*props)); 253 /* props being zeroed by the caller, avoid zeroing it here */
254 254
255 props->max_mtu = hr_dev->caps.max_mtu; 255 props->max_mtu = hr_dev->caps.max_mtu;
256 props->gid_tbl_len = hr_dev->caps.gid_table_len[port]; 256 props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -401,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
401 struct ib_port_attr attr; 401 struct ib_port_attr attr;
402 int ret; 402 int ret;
403 403
404 ret = hns_roce_query_port(ib_dev, port_num, &attr); 404 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
405
406 ret = ib_query_port(ib_dev, port_num, &attr);
405 if (ret) 407 if (ret)
406 return ret; 408 return ret;
407 409
408 immutable->pkey_tbl_len = attr.pkey_tbl_len; 410 immutable->pkey_tbl_len = attr.pkey_tbl_len;
409 immutable->gid_tbl_len = attr.gid_tbl_len; 411 immutable->gid_tbl_len = attr.gid_tbl_len;
410 412
411 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
412 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 413 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
413 414
414 return 0; 415 return 0;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4c000d60d5c6..5f695bf232a8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
97 struct i40iw_device *iwdev = to_iwdev(ibdev); 97 struct i40iw_device *iwdev = to_iwdev(ibdev);
98 struct net_device *netdev = iwdev->netdev; 98 struct net_device *netdev = iwdev->netdev;
99 99
100 memset(props, 0, sizeof(*props)); 100 /* props being zeroed by the caller, avoid zeroing it here */
101
102 props->max_mtu = IB_MTU_4096; 101 props->max_mtu = IB_MTU_4096;
103 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); 102 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
104 103
@@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
2497 struct ib_port_attr attr; 2496 struct ib_port_attr attr;
2498 int err; 2497 int err;
2499 2498
2500 err = i40iw_query_port(ibdev, port_num, &attr); 2499 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
2500
2501 err = ib_query_port(ibdev, port_num, &attr);
2501 2502
2502 if (err) 2503 if (err)
2503 return err; 2504 return err;
2504 2505
2505 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2506 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2506 immutable->gid_tbl_len = attr.gid_tbl_len; 2507 immutable->gid_tbl_len = attr.gid_tbl_len;
2507 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
2508 2508
2509 return 0; 2509 return 0;
2510} 2510}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..ea24230ea0d4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
499 struct list_head *head = 499 struct list_head *head =
500 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; 500 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
501 501
502 memset(&attr, 0, sizeof(attr));
502 err = __mlx4_ib_query_port(ibdev, port, &attr, 1); 503 err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
503 if (err) { 504 if (err) {
504 pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", 505 pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..211cbbe9ccd1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
678} 678}
679 679
680static int eth_link_query_port(struct ib_device *ibdev, u8 port, 680static int eth_link_query_port(struct ib_device *ibdev, u8 port,
681 struct ib_port_attr *props, int netw_view) 681 struct ib_port_attr *props)
682{ 682{
683 683
684 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 684 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +741,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
741{ 741{
742 int err; 742 int err;
743 743
744 memset(props, 0, sizeof *props); 744 /* props being zeroed by the caller, avoid zeroing it here */
745 745
746 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? 746 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
747 ib_link_query_port(ibdev, port, props, netw_view) : 747 ib_link_query_port(ibdev, port, props, netw_view) :
748 eth_link_query_port(ibdev, port, props, netw_view); 748 eth_link_query_port(ibdev, port, props);
749 749
750 return err; 750 return err;
751} 751}
@@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1014 1014
1015 mutex_lock(&mdev->cap_mask_mutex); 1015 mutex_lock(&mdev->cap_mask_mutex);
1016 1016
1017 err = mlx4_ib_query_port(ibdev, port, &attr); 1017 err = ib_query_port(ibdev, port, &attr);
1018 if (err) 1018 if (err)
1019 goto out; 1019 goto out;
1020 1020
@@ -2537,24 +2537,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2537 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 2537 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2538 int err; 2538 int err;
2539 2539
2540 err = mlx4_ib_query_port(ibdev, port_num, &attr);
2541 if (err)
2542 return err;
2543
2544 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2545 immutable->gid_tbl_len = attr.gid_tbl_len;
2546
2547 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) { 2540 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2548 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; 2541 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2542 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2549 } else { 2543 } else {
2550 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) 2544 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2551 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 2545 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2552 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) 2546 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2553 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | 2547 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2554 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 2548 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2549 immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2550 if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2551 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2552 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2555 } 2553 }
2556 2554
2557 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2555 err = ib_query_port(ibdev, port_num, &attr);
2556 if (err)
2557 return err;
2558
2559 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2560 immutable->gid_tbl_len = attr.gid_tbl_len;
2558 2561
2559 return 0; 2562 return 0;
2560} 2563}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 7d76f769233c..c34eebc7db65 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2420,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
2420 return vl; 2420 return vl;
2421} 2421}
2422 2422
2423static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
2424 int index, union ib_gid *gid,
2425 enum ib_gid_type *gid_type)
2426{
2427 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
2428 struct mlx4_port_gid_table *port_gid_table;
2429 unsigned long flags;
2430
2431 port_gid_table = &iboe->gids[port_num - 1];
2432 spin_lock_irqsave(&iboe->lock, flags);
2433 memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
2434 *gid_type = port_gid_table->gids[index].gid_type;
2435 spin_unlock_irqrestore(&iboe->lock, flags);
2436 if (!memcmp(gid, &zgid, sizeof(*gid)))
2437 return -ENOENT;
2438
2439 return 0;
2440}
2441
2423#define MLX4_ROCEV2_QP1_SPORT 0xC000 2442#define MLX4_ROCEV2_QP1_SPORT 0xC000
2424static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, 2443static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2425 void *wqe, unsigned *mlx_seg_len) 2444 void *wqe, unsigned *mlx_seg_len)
2426{ 2445{
2427 struct ib_device *ib_dev = sqp->qp.ibqp.device; 2446 struct ib_device *ib_dev = sqp->qp.ibqp.device;
2447 struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
2428 struct mlx4_wqe_mlx_seg *mlx = wqe; 2448 struct mlx4_wqe_mlx_seg *mlx = wqe;
2429 struct mlx4_wqe_ctrl_seg *ctrl = wqe; 2449 struct mlx4_wqe_ctrl_seg *ctrl = wqe;
2430 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2450 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2450,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2450 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 2470 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
2451 is_grh = mlx4_ib_ah_grh_present(ah); 2471 is_grh = mlx4_ib_ah_grh_present(ah);
2452 if (is_eth) { 2472 if (is_eth) {
2453 struct ib_gid_attr gid_attr; 2473 enum ib_gid_type gid_type;
2454
2455 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2474 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2456 /* When multi-function is enabled, the ib_core gid 2475 /* When multi-function is enabled, the ib_core gid
2457 * indexes don't necessarily match the hw ones, so 2476 * indexes don't necessarily match the hw ones, so
@@ -2462,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2462 if (err) 2481 if (err)
2463 return err; 2482 return err;
2464 } else { 2483 } else {
2465 err = ib_get_cached_gid(ib_dev, 2484 err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
2466 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2485 ah->av.ib.gid_index,
2467 ah->av.ib.gid_index, &sgid, 2486 &sgid, &gid_type);
2468 &gid_attr);
2469 if (!err) {
2470 if (gid_attr.ndev)
2471 dev_put(gid_attr.ndev);
2472 if (!memcmp(&sgid, &zgid, sizeof(sgid)))
2473 err = -ENOENT;
2474 }
2475 if (!err) { 2487 if (!err) {
2476 is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; 2488 is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
2477 if (is_udp) { 2489 if (is_udp) {
2478 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) 2490 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
2479 ip_version = 4; 2491 ip_version = 4;
@@ -2951,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2951 2963
2952 if (sqp->roce_v2_gsi) { 2964 if (sqp->roce_v2_gsi) {
2953 struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); 2965 struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
2954 struct ib_gid_attr gid_attr; 2966 enum ib_gid_type gid_type;
2955 union ib_gid gid; 2967 union ib_gid gid;
2956 2968
2957 if (!ib_get_cached_gid(ibqp->device, 2969 if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
2958 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2970 ah->av.ib.gid_index,
2959 ah->av.ib.gid_index, &gid, 2971 &gid, &gid_type))
2960 &gid_attr)) { 2972 qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
2961 if (gid_attr.ndev) 2973 to_mqp(sqp->roce_v2_gsi) : qp;
2962 dev_put(gid_attr.ndev); 2974 else
2963 qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
2964 to_mqp(sqp->roce_v2_gsi) : qp;
2965 } else {
2966 pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n", 2975 pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
2967 ah->av.ib.gid_index); 2976 ah->av.ib.gid_index);
2968 }
2969 } 2977 }
2970 } 2978 }
2971 2979
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..0ba5ba7540c8 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
226 int ret = 0 ; 226 int ret = 0 ;
227 struct ib_port_attr attr; 227 struct ib_port_attr attr;
228 228
229 memset(&attr, 0, sizeof(attr));
229 /* get the physical gid and pkey table sizes.*/ 230 /* get the physical gid and pkey table sizes.*/
230 ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1); 231 ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
231 if (ret) 232 if (ret)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..90ad2adc752f 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o 1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
2 2
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o 3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o 4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..cdc2d3017da7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,48 @@
1/*
2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "cmd.h"
34
35int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
36{
37 u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
38 u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
39 int err;
40
41 MLX5_SET(query_special_contexts_in, in, opcode,
42 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
43 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
44 if (!err)
45 *null_mkey = MLX5_GET(query_special_contexts_out, out,
46 null_mkey);
47 return err;
48}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644
index 000000000000..7ca8a7b6434d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -0,0 +1,40 @@
1/*
2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_CMD_H
34#define MLX5_IB_CMD_H
35
36#include <linux/kernel.h>
37#include <linux/mlx5/driver.h>
38
39int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
40#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..8dacb49eabd9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,12 +42,24 @@ enum {
42 MLX5_IB_VENDOR_CLASS2 = 0xa 42 MLX5_IB_VENDOR_CLASS2 = 0xa
43}; 43};
44 44
45static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
46 struct ib_mad *in_mad)
47{
48 if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
49 in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
50 return true;
51 return dev->mdev->port_caps[port_num - 1].has_smi;
52}
53
45int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, 54int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
46 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, 55 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
47 const void *in_mad, void *response_mad) 56 const void *in_mad, void *response_mad)
48{ 57{
49 u8 op_modifier = 0; 58 u8 op_modifier = 0;
50 59
60 if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
61 return -EPERM;
62
51 /* Key check traps can't be generated unless we have in_wc to 63 /* Key check traps can't be generated unless we have in_wc to
52 * tell us where to send the trap. 64 * tell us where to send the trap.
53 */ 65 */
@@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
515 if (!in_mad || !out_mad) 527 if (!in_mad || !out_mad)
516 goto out; 528 goto out;
517 529
518 memset(props, 0, sizeof(*props)); 530 /* props being zeroed by the caller, avoid zeroing it here */
519 531
520 init_query_mad(in_mad); 532 init_query_mad(in_mad);
521 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 533 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9d8535385bb8..6a8498c052a5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -65,10 +65,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
65MODULE_LICENSE("Dual BSD/GPL"); 65MODULE_LICENSE("Dual BSD/GPL");
66MODULE_VERSION(DRIVER_VERSION); 66MODULE_VERSION(DRIVER_VERSION);
67 67
68static int deprecated_prof_sel = 2;
69module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
70MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
71
72static char mlx5_version[] = 68static char mlx5_version[] =
73 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 69 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
74 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 70 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@ -175,7 +171,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
175 enum ib_mtu ndev_ib_mtu; 171 enum ib_mtu ndev_ib_mtu;
176 u16 qkey_viol_cntr; 172 u16 qkey_viol_cntr;
177 173
178 memset(props, 0, sizeof(*props)); 174 /* props being zeroed by the caller, avoid zeroing it here */
179 175
180 props->port_cap_flags |= IB_PORT_CM_SUP; 176 props->port_cap_flags |= IB_PORT_CM_SUP;
181 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; 177 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -326,6 +322,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
326 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 322 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
327} 323}
328 324
325int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
326 int index, enum ib_gid_type *gid_type)
327{
328 struct ib_gid_attr attr;
329 union ib_gid gid;
330 int ret;
331
332 ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
333 if (ret)
334 return ret;
335
336 if (!attr.ndev)
337 return -ENODEV;
338
339 dev_put(attr.ndev);
340
341 *gid_type = attr.gid_type;
342
343 return 0;
344}
345
329static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 346static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
330{ 347{
331 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 348 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -565,8 +582,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
565 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 582 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
566 583
567 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { 584 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
568 if (MLX5_CAP_ETH(mdev, csum_cap)) 585 if (MLX5_CAP_ETH(mdev, csum_cap)) {
586 /* Legacy bit to support old userspace libraries */
569 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 587 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
588 props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
589 }
590
591 if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
592 props->raw_packet_caps |=
593 IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
570 594
571 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { 595 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
572 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); 596 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -605,8 +629,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
605 } 629 }
606 630
607 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 631 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
608 MLX5_CAP_ETH(dev->mdev, scatter_fcs)) 632 MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
633 /* Legacy bit to support old userspace libraries */
609 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 634 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
635 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
636 }
610 637
611 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) 638 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
612 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 639 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -831,7 +858,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
831 goto out; 858 goto out;
832 } 859 }
833 860
834 memset(props, 0, sizeof(*props)); 861 /* props being zeroed by the caller, avoid zeroing it here */
835 862
836 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); 863 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
837 if (err) 864 if (err)
@@ -969,6 +996,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
969 return err; 996 return err;
970} 997}
971 998
999static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
1000 u32 value)
1001{
1002 struct mlx5_hca_vport_context ctx = {};
1003 int err;
1004
1005 err = mlx5_query_hca_vport_context(dev->mdev, 0,
1006 port_num, 0, &ctx);
1007 if (err)
1008 return err;
1009
1010 if (~ctx.cap_mask1_perm & mask) {
1011 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
1012 mask, ctx.cap_mask1_perm);
1013 return -EINVAL;
1014 }
1015
1016 ctx.cap_mask1 = value;
1017 ctx.cap_mask1_perm = mask;
1018 err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
1019 port_num, 0, &ctx);
1020
1021 return err;
1022}
1023
972static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 1024static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
973 struct ib_port_modify *props) 1025 struct ib_port_modify *props)
974{ 1026{
@@ -976,10 +1028,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
976 struct ib_port_attr attr; 1028 struct ib_port_attr attr;
977 u32 tmp; 1029 u32 tmp;
978 int err; 1030 int err;
1031 u32 change_mask;
1032 u32 value;
1033 bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
1034 IB_LINK_LAYER_INFINIBAND);
1035
1036 if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
1037 change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
1038 value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
1039 return set_port_caps_atomic(dev, port, change_mask, value);
1040 }
979 1041
980 mutex_lock(&dev->cap_mask_mutex); 1042 mutex_lock(&dev->cap_mask_mutex);
981 1043
982 err = mlx5_ib_query_port(ibdev, port, &attr); 1044 err = ib_query_port(ibdev, port, &attr);
983 if (err) 1045 if (err)
984 goto out; 1046 goto out;
985 1047
@@ -1661,6 +1723,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
1661#define LAST_IPV6_FIELD traffic_class 1723#define LAST_IPV6_FIELD traffic_class
1662#define LAST_TCP_UDP_FIELD src_port 1724#define LAST_TCP_UDP_FIELD src_port
1663#define LAST_TUNNEL_FIELD tunnel_id 1725#define LAST_TUNNEL_FIELD tunnel_id
1726#define LAST_FLOW_TAG_FIELD tag_id
1664 1727
1665/* Field is the last supported field */ 1728/* Field is the last supported field */
1666#define FIELDS_NOT_SUPPORTED(filter, field)\ 1729#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1671,7 +1734,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
1671 sizeof(filter.field)) 1734 sizeof(filter.field))
1672 1735
1673static int parse_flow_attr(u32 *match_c, u32 *match_v, 1736static int parse_flow_attr(u32 *match_c, u32 *match_v,
1674 const union ib_flow_spec *ib_spec) 1737 const union ib_flow_spec *ib_spec, u32 *tag_id)
1675{ 1738{
1676 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 1739 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
1677 misc_parameters); 1740 misc_parameters);
@@ -1695,7 +1758,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1695 switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 1758 switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
1696 case IB_FLOW_SPEC_ETH: 1759 case IB_FLOW_SPEC_ETH:
1697 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 1760 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1698 return -ENOTSUPP; 1761 return -EOPNOTSUPP;
1699 1762
1700 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1763 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1701 dmac_47_16), 1764 dmac_47_16),
@@ -1743,7 +1806,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1743 break; 1806 break;
1744 case IB_FLOW_SPEC_IPV4: 1807 case IB_FLOW_SPEC_IPV4:
1745 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 1808 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1746 return -ENOTSUPP; 1809 return -EOPNOTSUPP;
1747 1810
1748 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1811 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1749 ethertype, 0xffff); 1812 ethertype, 0xffff);
@@ -1775,7 +1838,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1775 break; 1838 break;
1776 case IB_FLOW_SPEC_IPV6: 1839 case IB_FLOW_SPEC_IPV6:
1777 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 1840 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
1778 return -ENOTSUPP; 1841 return -EOPNOTSUPP;
1779 1842
1780 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1843 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1781 ethertype, 0xffff); 1844 ethertype, 0xffff);
@@ -1816,7 +1879,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1816 case IB_FLOW_SPEC_TCP: 1879 case IB_FLOW_SPEC_TCP:
1817 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 1880 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
1818 LAST_TCP_UDP_FIELD)) 1881 LAST_TCP_UDP_FIELD))
1819 return -ENOTSUPP; 1882 return -EOPNOTSUPP;
1820 1883
1821 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1884 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
1822 0xff); 1885 0xff);
@@ -1836,7 +1899,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1836 case IB_FLOW_SPEC_UDP: 1899 case IB_FLOW_SPEC_UDP:
1837 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 1900 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
1838 LAST_TCP_UDP_FIELD)) 1901 LAST_TCP_UDP_FIELD))
1839 return -ENOTSUPP; 1902 return -EOPNOTSUPP;
1840 1903
1841 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1904 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
1842 0xff); 1905 0xff);
@@ -1856,13 +1919,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
1856 case IB_FLOW_SPEC_VXLAN_TUNNEL: 1919 case IB_FLOW_SPEC_VXLAN_TUNNEL:
1857 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 1920 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
1858 LAST_TUNNEL_FIELD)) 1921 LAST_TUNNEL_FIELD))
1859 return -ENOTSUPP; 1922 return -EOPNOTSUPP;
1860 1923
1861 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 1924 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
1862 ntohl(ib_spec->tunnel.mask.tunnel_id)); 1925 ntohl(ib_spec->tunnel.mask.tunnel_id));
1863 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 1926 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
1864 ntohl(ib_spec->tunnel.val.tunnel_id)); 1927 ntohl(ib_spec->tunnel.val.tunnel_id));
1865 break; 1928 break;
1929 case IB_FLOW_SPEC_ACTION_TAG:
1930 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
1931 LAST_FLOW_TAG_FIELD))
1932 return -EOPNOTSUPP;
1933 if (ib_spec->flow_tag.tag_id >= BIT(24))
1934 return -EINVAL;
1935
1936 *tag_id = ib_spec->flow_tag.tag_id;
1937 break;
1866 default: 1938 default:
1867 return -EINVAL; 1939 return -EINVAL;
1868 } 1940 }
@@ -2046,6 +2118,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
2046 struct mlx5_flow_spec *spec; 2118 struct mlx5_flow_spec *spec;
2047 const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); 2119 const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
2048 unsigned int spec_index; 2120 unsigned int spec_index;
2121 u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
2049 int err = 0; 2122 int err = 0;
2050 2123
2051 if (!is_valid_attr(flow_attr)) 2124 if (!is_valid_attr(flow_attr))
@@ -2062,7 +2135,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
2062 2135
2063 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 2136 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
2064 err = parse_flow_attr(spec->match_criteria, 2137 err = parse_flow_attr(spec->match_criteria,
2065 spec->match_value, ib_flow); 2138 spec->match_value, ib_flow, &flow_tag);
2066 if (err < 0) 2139 if (err < 0)
2067 goto free; 2140 goto free;
2068 2141
@@ -2072,7 +2145,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
2072 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 2145 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
2073 flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : 2146 flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
2074 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 2147 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
2075 flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 2148
2149 if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
2150 (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
2151 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
2152 mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
2153 flow_tag, flow_attr->type);
2154 err = -EINVAL;
2155 goto free;
2156 }
2157 flow_act.flow_tag = flow_tag;
2076 handler->rule = mlx5_add_flow_rules(ft, spec, 2158 handler->rule = mlx5_add_flow_rules(ft, spec,
2077 &flow_act, 2159 &flow_act,
2078 dst, 1); 2160 dst, 1);
@@ -2542,6 +2624,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
2542 ibdev->ib_active = false; 2624 ibdev->ib_active = false;
2543} 2625}
2544 2626
2627static int set_has_smi_cap(struct mlx5_ib_dev *dev)
2628{
2629 struct mlx5_hca_vport_context vport_ctx;
2630 int err;
2631 int port;
2632
2633 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
2634 dev->mdev->port_caps[port - 1].has_smi = false;
2635 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2636 MLX5_CAP_PORT_TYPE_IB) {
2637 if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
2638 err = mlx5_query_hca_vport_context(dev->mdev, 0,
2639 port, 0,
2640 &vport_ctx);
2641 if (err) {
2642 mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
2643 port, err);
2644 return err;
2645 }
2646 dev->mdev->port_caps[port - 1].has_smi =
2647 vport_ctx.has_smi;
2648 } else {
2649 dev->mdev->port_caps[port - 1].has_smi = true;
2650 }
2651 }
2652 }
2653 return 0;
2654}
2655
2545static void get_ext_port_caps(struct mlx5_ib_dev *dev) 2656static void get_ext_port_caps(struct mlx5_ib_dev *dev)
2546{ 2657{
2547 int port; 2658 int port;
@@ -2566,6 +2677,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
2566 if (!dprops) 2677 if (!dprops)
2567 goto out; 2678 goto out;
2568 2679
2680 err = set_has_smi_cap(dev);
2681 if (err)
2682 goto out;
2683
2569 err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); 2684 err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
2570 if (err) { 2685 if (err) {
2571 mlx5_ib_warn(dev, "query_device failed %d\n", err); 2686 mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2573,6 +2688,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
2573 } 2688 }
2574 2689
2575 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 2690 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
2691 memset(pprops, 0, sizeof(*pprops));
2576 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 2692 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
2577 if (err) { 2693 if (err) {
2578 mlx5_ib_warn(dev, "query_port %d failed %d\n", 2694 mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2867,11 +2983,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
2867 if (ll == IB_LINK_LAYER_INFINIBAND) 2983 if (ll == IB_LINK_LAYER_INFINIBAND)
2868 return RDMA_CORE_PORT_IBA_IB; 2984 return RDMA_CORE_PORT_IBA_IB;
2869 2985
2986 ret = RDMA_CORE_PORT_RAW_PACKET;
2987
2870 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 2988 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
2871 return 0; 2989 return ret;
2872 2990
2873 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 2991 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
2874 return 0; 2992 return ret;
2875 2993
2876 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 2994 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
2877 ret |= RDMA_CORE_PORT_IBA_ROCE; 2995 ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2890,7 +3008,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
2890 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 3008 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
2891 int err; 3009 int err;
2892 3010
2893 err = mlx5_ib_query_port(ibdev, port_num, &attr); 3011 immutable->core_cap_flags = get_core_cap_flags(ibdev);
3012
3013 err = ib_query_port(ibdev, port_num, &attr);
2894 if (err) 3014 if (err)
2895 return err; 3015 return err;
2896 3016
@@ -3011,13 +3131,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
3011 mlx5_nic_vport_disable_roce(dev->mdev); 3131 mlx5_nic_vport_disable_roce(dev->mdev);
3012} 3132}
3013 3133
3134struct mlx5_ib_q_counter {
3135 const char *name;
3136 size_t offset;
3137};
3138
3139#define INIT_Q_COUNTER(_name) \
3140 { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
3141
3142static const struct mlx5_ib_q_counter basic_q_cnts[] = {
3143 INIT_Q_COUNTER(rx_write_requests),
3144 INIT_Q_COUNTER(rx_read_requests),
3145 INIT_Q_COUNTER(rx_atomic_requests),
3146 INIT_Q_COUNTER(out_of_buffer),
3147};
3148
3149static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
3150 INIT_Q_COUNTER(out_of_sequence),
3151};
3152
3153static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
3154 INIT_Q_COUNTER(duplicate_request),
3155 INIT_Q_COUNTER(rnr_nak_retry_err),
3156 INIT_Q_COUNTER(packet_seq_err),
3157 INIT_Q_COUNTER(implied_nak_seq_err),
3158 INIT_Q_COUNTER(local_ack_timeout_err),
3159};
3160
3014static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) 3161static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
3015{ 3162{
3016 unsigned int i; 3163 unsigned int i;
3017 3164
3018 for (i = 0; i < dev->num_ports; i++) 3165 for (i = 0; i < dev->num_ports; i++) {
3019 mlx5_core_dealloc_q_counter(dev->mdev, 3166 mlx5_core_dealloc_q_counter(dev->mdev,
3020 dev->port[i].q_cnt_id); 3167 dev->port[i].q_cnts.set_id);
3168 kfree(dev->port[i].q_cnts.names);
3169 kfree(dev->port[i].q_cnts.offsets);
3170 }
3171}
3172
3173static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
3174 const char ***names,
3175 size_t **offsets,
3176 u32 *num)
3177{
3178 u32 num_counters;
3179
3180 num_counters = ARRAY_SIZE(basic_q_cnts);
3181
3182 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
3183 num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
3184
3185 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
3186 num_counters += ARRAY_SIZE(retrans_q_cnts);
3187
3188 *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
3189 if (!*names)
3190 return -ENOMEM;
3191
3192 *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
3193 if (!*offsets)
3194 goto err_names;
3195
3196 *num = num_counters;
3197
3198 return 0;
3199
3200err_names:
3201 kfree(*names);
3202 return -ENOMEM;
3203}
3204
3205static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
3206 const char **names,
3207 size_t *offsets)
3208{
3209 int i;
3210 int j = 0;
3211
3212 for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
3213 names[j] = basic_q_cnts[i].name;
3214 offsets[j] = basic_q_cnts[i].offset;
3215 }
3216
3217 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
3218 for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
3219 names[j] = out_of_seq_q_cnts[i].name;
3220 offsets[j] = out_of_seq_q_cnts[i].offset;
3221 }
3222 }
3223
3224 if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
3225 for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
3226 names[j] = retrans_q_cnts[i].name;
3227 offsets[j] = retrans_q_cnts[i].offset;
3228 }
3229 }
3021} 3230}
3022 3231
3023static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) 3232static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@@ -3026,14 +3235,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
3026 int ret; 3235 int ret;
3027 3236
3028 for (i = 0; i < dev->num_ports; i++) { 3237 for (i = 0; i < dev->num_ports; i++) {
3238 struct mlx5_ib_port *port = &dev->port[i];
3239
3029 ret = mlx5_core_alloc_q_counter(dev->mdev, 3240 ret = mlx5_core_alloc_q_counter(dev->mdev,
3030 &dev->port[i].q_cnt_id); 3241 &port->q_cnts.set_id);
3031 if (ret) { 3242 if (ret) {
3032 mlx5_ib_warn(dev, 3243 mlx5_ib_warn(dev,
3033 "couldn't allocate queue counter for port %d, err %d\n", 3244 "couldn't allocate queue counter for port %d, err %d\n",
3034 i + 1, ret); 3245 i + 1, ret);
3035 goto dealloc_counters; 3246 goto dealloc_counters;
3036 } 3247 }
3248
3249 ret = __mlx5_ib_alloc_q_counters(dev,
3250 &port->q_cnts.names,
3251 &port->q_cnts.offsets,
3252 &port->q_cnts.num_counters);
3253 if (ret)
3254 goto dealloc_counters;
3255
3256 mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
3257 port->q_cnts.offsets);
3037 } 3258 }
3038 3259
3039 return 0; 3260 return 0;
@@ -3041,62 +3262,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
3041dealloc_counters: 3262dealloc_counters:
3042 while (--i >= 0) 3263 while (--i >= 0)
3043 mlx5_core_dealloc_q_counter(dev->mdev, 3264 mlx5_core_dealloc_q_counter(dev->mdev,
3044 dev->port[i].q_cnt_id); 3265 dev->port[i].q_cnts.set_id);
3045 3266
3046 return ret; 3267 return ret;
3047} 3268}
3048 3269
3049static const char * const names[] = {
3050 "rx_write_requests",
3051 "rx_read_requests",
3052 "rx_atomic_requests",
3053 "out_of_buffer",
3054 "out_of_sequence",
3055 "duplicate_request",
3056 "rnr_nak_retry_err",
3057 "packet_seq_err",
3058 "implied_nak_seq_err",
3059 "local_ack_timeout_err",
3060};
3061
3062static const size_t stats_offsets[] = {
3063 MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
3064 MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
3065 MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
3066 MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
3067 MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
3068 MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
3069 MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
3070 MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
3071 MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
3072 MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
3073};
3074
3075static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 3270static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
3076 u8 port_num) 3271 u8 port_num)
3077{ 3272{
3078 BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); 3273 struct mlx5_ib_dev *dev = to_mdev(ibdev);
3274 struct mlx5_ib_port *port = &dev->port[port_num - 1];
3079 3275
3080 /* We support only per port stats */ 3276 /* We support only per port stats */
3081 if (port_num == 0) 3277 if (port_num == 0)
3082 return NULL; 3278 return NULL;
3083 3279
3084 return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), 3280 return rdma_alloc_hw_stats_struct(port->q_cnts.names,
3281 port->q_cnts.num_counters,
3085 RDMA_HW_STATS_DEFAULT_LIFESPAN); 3282 RDMA_HW_STATS_DEFAULT_LIFESPAN);
3086} 3283}
3087 3284
3088static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 3285static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3089 struct rdma_hw_stats *stats, 3286 struct rdma_hw_stats *stats,
3090 u8 port, int index) 3287 u8 port_num, int index)
3091{ 3288{
3092 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3289 struct mlx5_ib_dev *dev = to_mdev(ibdev);
3290 struct mlx5_ib_port *port = &dev->port[port_num - 1];
3093 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 3291 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
3094 void *out; 3292 void *out;
3095 __be32 val; 3293 __be32 val;
3096 int ret; 3294 int ret;
3097 int i; 3295 int i;
3098 3296
3099 if (!port || !stats) 3297 if (!stats)
3100 return -ENOSYS; 3298 return -ENOSYS;
3101 3299
3102 out = mlx5_vzalloc(outlen); 3300 out = mlx5_vzalloc(outlen);
@@ -3104,18 +3302,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3104 return -ENOMEM; 3302 return -ENOMEM;
3105 3303
3106 ret = mlx5_core_query_q_counter(dev->mdev, 3304 ret = mlx5_core_query_q_counter(dev->mdev,
3107 dev->port[port - 1].q_cnt_id, 0, 3305 port->q_cnts.set_id, 0,
3108 out, outlen); 3306 out, outlen);
3109 if (ret) 3307 if (ret)
3110 goto free; 3308 goto free;
3111 3309
3112 for (i = 0; i < ARRAY_SIZE(names); i++) { 3310 for (i = 0; i < port->q_cnts.num_counters; i++) {
3113 val = *(__be32 *)(out + stats_offsets[i]); 3311 val = *(__be32 *)(out + port->q_cnts.offsets[i]);
3114 stats->value[i] = (u64)be32_to_cpu(val); 3312 stats->value[i] = (u64)be32_to_cpu(val);
3115 } 3313 }
3314
3116free: 3315free:
3117 kvfree(out); 3316 kvfree(out);
3118 return ARRAY_SIZE(names); 3317 return port->q_cnts.num_counters;
3119} 3318}
3120 3319
3121static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 3320static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3267,8 +3466,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
3267 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 3466 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
3268 } 3467 }
3269 3468
3270 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 3469 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
3271 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
3272 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; 3470 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
3273 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; 3471 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
3274 } 3472 }
@@ -3322,9 +3520,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
3322 if (err) 3520 if (err)
3323 goto err_rsrc; 3521 goto err_rsrc;
3324 3522
3325 err = mlx5_ib_alloc_q_counters(dev); 3523 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
3326 if (err) 3524 err = mlx5_ib_alloc_q_counters(dev);
3327 goto err_odp; 3525 if (err)
3526 goto err_odp;
3527 }
3328 3528
3329 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); 3529 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
3330 if (!dev->mdev->priv.uar) 3530 if (!dev->mdev->priv.uar)
@@ -3373,7 +3573,8 @@ err_uar_page:
3373 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); 3573 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
3374 3574
3375err_q_cnt: 3575err_q_cnt:
3376 mlx5_ib_dealloc_q_counters(dev); 3576 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
3577 mlx5_ib_dealloc_q_counters(dev);
3377 3578
3378err_odp: 3579err_odp:
3379 mlx5_ib_odp_remove_one(dev); 3580 mlx5_ib_odp_remove_one(dev);
@@ -3406,7 +3607,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
3406 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 3607 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
3407 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 3608 mlx5_free_bfreg(dev->mdev, &dev->bfreg);
3408 mlx5_put_uars_page(dev->mdev, mdev->priv.uar); 3609 mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
3409 mlx5_ib_dealloc_q_counters(dev); 3610 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
3611 mlx5_ib_dealloc_q_counters(dev);
3410 destroy_umrc_res(dev); 3612 destroy_umrc_res(dev);
3411 mlx5_ib_odp_remove_one(dev); 3613 mlx5_ib_odp_remove_one(dev);
3412 destroy_dev_resources(&dev->devr); 3614 destroy_dev_resources(&dev->devr);
@@ -3430,8 +3632,7 @@ static int __init mlx5_ib_init(void)
3430{ 3632{
3431 int err; 3633 int err;
3432 3634
3433 if (deprecated_prof_sel != 2) 3635 mlx5_ib_odp_init();
3434 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
3435 3636
3436 err = mlx5_register_interface(&mlx5_ib_interface); 3637 err = mlx5_register_interface(&mlx5_ib_interface);
3437 3638
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e1a4b93dce6b..3cd064b5f0bf 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
202#define MLX5_IB_UPD_XLT_ADDR BIT(3) 202#define MLX5_IB_UPD_XLT_ADDR BIT(3)
203#define MLX5_IB_UPD_XLT_PD BIT(4) 203#define MLX5_IB_UPD_XLT_PD BIT(4)
204#define MLX5_IB_UPD_XLT_ACCESS BIT(5) 204#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
205#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
205 206
206/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. 207/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
207 * 208 *
@@ -220,6 +221,10 @@ struct wr_list {
220 u16 next; 221 u16 next;
221}; 222};
222 223
224enum mlx5_ib_rq_flags {
225 MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
226};
227
223struct mlx5_ib_wq { 228struct mlx5_ib_wq {
224 u64 *wrid; 229 u64 *wrid;
225 u32 *wr_data; 230 u32 *wr_data;
@@ -308,6 +313,7 @@ struct mlx5_ib_rq {
308 struct mlx5_db *doorbell; 313 struct mlx5_db *doorbell;
309 u32 tirn; 314 u32 tirn;
310 u8 state; 315 u8 state;
316 u32 flags;
311}; 317};
312 318
313struct mlx5_ib_sq { 319struct mlx5_ib_sq {
@@ -392,6 +398,7 @@ enum mlx5_ib_qp_flags {
392 MLX5_IB_QP_SQPN_QP1 = 1 << 6, 398 MLX5_IB_QP_SQPN_QP1 = 1 << 6,
393 MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, 399 MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
394 MLX5_IB_QP_RSS = 1 << 8, 400 MLX5_IB_QP_RSS = 1 << 8,
401 MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
395}; 402};
396 403
397struct mlx5_umr_wr { 404struct mlx5_umr_wr {
@@ -497,6 +504,10 @@ struct mlx5_ib_mr {
497 int live; 504 int live;
498 void *descs_alloc; 505 void *descs_alloc;
499 int access_flags; /* Needed for rereg MR */ 506 int access_flags; /* Needed for rereg MR */
507
508 struct mlx5_ib_mr *parent;
509 atomic_t num_leaf_free;
510 wait_queue_head_t q_leaf_free;
500}; 511};
501 512
502struct mlx5_ib_mw { 513struct mlx5_ib_mw {
@@ -535,6 +546,10 @@ struct mlx5_cache_ent {
535 struct dentry *dir; 546 struct dentry *dir;
536 char name[4]; 547 char name[4];
537 u32 order; 548 u32 order;
549 u32 xlt;
550 u32 access_mode;
551 u32 page;
552
538 u32 size; 553 u32 size;
539 u32 cur; 554 u32 cur;
540 u32 miss; 555 u32 miss;
@@ -549,6 +564,7 @@ struct mlx5_cache_ent {
549 struct work_struct work; 564 struct work_struct work;
550 struct delayed_work dwork; 565 struct delayed_work dwork;
551 int pending; 566 int pending;
567 struct completion compl;
552}; 568};
553 569
554struct mlx5_mr_cache { 570struct mlx5_mr_cache {
@@ -579,8 +595,15 @@ struct mlx5_ib_resources {
579 struct mutex mutex; 595 struct mutex mutex;
580}; 596};
581 597
598struct mlx5_ib_q_counters {
599 const char **names;
600 size_t *offsets;
601 u32 num_counters;
602 u16 set_id;
603};
604
582struct mlx5_ib_port { 605struct mlx5_ib_port {
583 u16 q_cnt_id; 606 struct mlx5_ib_q_counters q_cnts;
584}; 607};
585 608
586struct mlx5_roce { 609struct mlx5_roce {
@@ -619,6 +642,7 @@ struct mlx5_ib_dev {
619 * being used by a page fault handler. 642 * being used by a page fault handler.
620 */ 643 */
621 struct srcu_struct mr_srcu; 644 struct srcu_struct mr_srcu;
645 u32 null_mkey;
622#endif 646#endif
623 struct mlx5_ib_flow_db flow_db; 647 struct mlx5_ib_flow_db flow_db;
624 /* protect resources needed as part of reset flow */ 648 /* protect resources needed as part of reset flow */
@@ -771,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
771int mlx5_ib_dealloc_mw(struct ib_mw *mw); 795int mlx5_ib_dealloc_mw(struct ib_mw *mw);
772int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 796int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
773 int page_shift, int flags); 797 int page_shift, int flags);
798struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
799 int access_flags);
800void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
774int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 801int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
775 u64 length, u64 virt_addr, int access_flags, 802 u64 length, u64 virt_addr, int access_flags,
776 struct ib_pd *pd, struct ib_udata *udata); 803 struct ib_pd *pd, struct ib_udata *udata);
@@ -824,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
824int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); 851int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
825int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); 852int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
826int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 853int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
827int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); 854
855struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
856void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
828int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 857int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
829 struct ib_mr_status *mr_status); 858 struct ib_mr_status *mr_status);
830struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, 859struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -848,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
848void mlx5_ib_odp_cleanup(void); 877void mlx5_ib_odp_cleanup(void);
849void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 878void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
850 unsigned long end); 879 unsigned long end);
880void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
881void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
882 size_t nentries, struct mlx5_ib_mr *mr, int flags);
851#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 883#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
852static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) 884static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
853{ 885{
@@ -855,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
855} 887}
856 888
857static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } 889static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
858static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} 890static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
859static inline int mlx5_ib_odp_init(void) { return 0; } 891static inline int mlx5_ib_odp_init(void) { return 0; }
860static inline void mlx5_ib_odp_cleanup(void) {} 892static inline void mlx5_ib_odp_cleanup(void) {}
893static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
894static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
895 size_t nentries, struct mlx5_ib_mr *mr,
896 int flags) {}
861 897
862#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 898#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
863 899
@@ -872,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
872 908
873__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 909__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
874 int index); 910 int index);
911int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
912 int index, enum ib_gid_type *gid_type);
875 913
876/* GSI QP helper functions */ 914/* GSI QP helper functions */
877struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, 915struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2a67f9fb0..3c1f483d003f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -49,6 +49,7 @@ enum {
49 49
50static int clean_mr(struct mlx5_ib_mr *mr); 50static int clean_mr(struct mlx5_ib_mr *mr);
51static int use_umr(struct mlx5_ib_dev *dev, int order); 51static int use_umr(struct mlx5_ib_dev *dev, int order);
52static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
52 53
53static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 54static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
54{ 55{
@@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
149 if (err) 150 if (err)
150 pr_err("Error inserting to mkey tree. 0x%x\n", -err); 151 pr_err("Error inserting to mkey tree. 0x%x\n", -err);
151 write_unlock_irqrestore(&table->lock, flags); 152 write_unlock_irqrestore(&table->lock, flags);
153
154 if (!completion_done(&ent->compl))
155 complete(&ent->compl);
152} 156}
153 157
154static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 158static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
157 struct mlx5_cache_ent *ent = &cache->ent[c]; 161 struct mlx5_cache_ent *ent = &cache->ent[c];
158 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 162 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
159 struct mlx5_ib_mr *mr; 163 struct mlx5_ib_mr *mr;
160 int npages = 1 << ent->order;
161 void *mkc; 164 void *mkc;
162 u32 *in; 165 u32 *in;
163 int err = 0; 166 int err = 0;
@@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
185 188
186 MLX5_SET(mkc, mkc, free, 1); 189 MLX5_SET(mkc, mkc, free, 1);
187 MLX5_SET(mkc, mkc, umr_en, 1); 190 MLX5_SET(mkc, mkc, umr_en, 1);
188 MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); 191 MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
189 192
190 MLX5_SET(mkc, mkc, qpn, 0xffffff); 193 MLX5_SET(mkc, mkc, qpn, 0xffffff);
191 MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); 194 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
192 MLX5_SET(mkc, mkc, log_page_size, 12); 195 MLX5_SET(mkc, mkc, log_page_size, ent->page);
193 196
194 spin_lock_irq(&ent->lock); 197 spin_lock_irq(&ent->lock);
195 ent->pending++; 198 ent->pending++;
@@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
447 __cache_work_func(ent); 450 __cache_work_func(ent);
448} 451}
449 452
453struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
454{
455 struct mlx5_mr_cache *cache = &dev->cache;
456 struct mlx5_cache_ent *ent;
457 struct mlx5_ib_mr *mr;
458 int err;
459
460 if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
461 mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
462 return NULL;
463 }
464
465 ent = &cache->ent[entry];
466 while (1) {
467 spin_lock_irq(&ent->lock);
468 if (list_empty(&ent->head)) {
469 spin_unlock_irq(&ent->lock);
470
471 err = add_keys(dev, entry, 1);
472 if (err && err != -EAGAIN)
473 return ERR_PTR(err);
474
475 wait_for_completion(&ent->compl);
476 } else {
477 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
478 list);
479 list_del(&mr->list);
480 ent->cur--;
481 spin_unlock_irq(&ent->lock);
482 if (ent->cur < ent->limit)
483 queue_work(cache->wq, &ent->work);
484 return mr;
485 }
486 }
487}
488
450static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 489static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
451{ 490{
452 struct mlx5_mr_cache *cache = &dev->cache; 491 struct mlx5_mr_cache *cache = &dev->cache;
@@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
456 int i; 495 int i;
457 496
458 c = order2idx(dev, order); 497 c = order2idx(dev, order);
459 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 498 if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
460 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 499 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
461 return NULL; 500 return NULL;
462 } 501 }
463 502
464 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 503 for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
465 ent = &cache->ent[i]; 504 ent = &cache->ent[i];
466 505
467 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 506 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
488 return mr; 527 return mr;
489} 528}
490 529
491static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 530void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
492{ 531{
493 struct mlx5_mr_cache *cache = &dev->cache; 532 struct mlx5_mr_cache *cache = &dev->cache;
494 struct mlx5_cache_ent *ent; 533 struct mlx5_cache_ent *ent;
@@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
500 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 539 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
501 return; 540 return;
502 } 541 }
542
543 if (unreg_umr(dev, mr))
544 return;
545
503 ent = &cache->ent[c]; 546 ent = &cache->ent[c];
504 spin_lock_irq(&ent->lock); 547 spin_lock_irq(&ent->lock);
505 list_add_tail(&mr->list, &ent->head); 548 list_add_tail(&mr->list, &ent->head);
@@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
602{ 645{
603 struct mlx5_mr_cache *cache = &dev->cache; 646 struct mlx5_mr_cache *cache = &dev->cache;
604 struct mlx5_cache_ent *ent; 647 struct mlx5_cache_ent *ent;
605 int limit;
606 int err; 648 int err;
607 int i; 649 int i;
608 650
@@ -615,26 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
615 657
616 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 658 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
617 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 659 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
618 INIT_LIST_HEAD(&cache->ent[i].head);
619 spin_lock_init(&cache->ent[i].lock);
620
621 ent = &cache->ent[i]; 660 ent = &cache->ent[i];
622 INIT_LIST_HEAD(&ent->head); 661 INIT_LIST_HEAD(&ent->head);
623 spin_lock_init(&ent->lock); 662 spin_lock_init(&ent->lock);
624 ent->order = i + 2; 663 ent->order = i + 2;
625 ent->dev = dev; 664 ent->dev = dev;
665 ent->limit = 0;
626 666
627 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && 667 init_completion(&ent->compl);
628 mlx5_core_is_pf(dev->mdev) &&
629 use_umr(dev, ent->order))
630 limit = dev->mdev->profile->mr_cache[i].limit;
631 else
632 limit = 0;
633
634 INIT_WORK(&ent->work, cache_work_func); 668 INIT_WORK(&ent->work, cache_work_func);
635 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 669 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
636 ent->limit = limit;
637 queue_work(cache->wq, &ent->work); 670 queue_work(cache->wq, &ent->work);
671
672 if (i > MAX_UMR_CACHE_ENTRY) {
673 mlx5_odp_init_mr_cache_entry(ent);
674 continue;
675 }
676
677 if (!use_umr(dev, ent->order))
678 continue;
679
680 ent->page = PAGE_SHIFT;
681 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
682 MLX5_IB_UMR_OCTOWORD;
683 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
684 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
685 mlx5_core_is_pf(dev->mdev))
686 ent->limit = dev->mdev->profile->mr_cache[i].limit;
687 else
688 ent->limit = 0;
638 } 689 }
639 690
640 err = mlx5_mr_cache_debugfs_init(dev); 691 err = mlx5_mr_cache_debugfs_init(dev);
@@ -758,7 +809,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
758static int use_umr(struct mlx5_ib_dev *dev, int order) 809static int use_umr(struct mlx5_ib_dev *dev, int order)
759{ 810{
760 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 811 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
761 return order < MAX_MR_CACHE_ENTRIES + 2; 812 return order <= MAX_UMR_CACHE_ENTRY + 2;
762 return order <= MLX5_MAX_UMR_SHIFT; 813 return order <= MLX5_MAX_UMR_SHIFT;
763} 814}
764 815
@@ -871,7 +922,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
871 MLX5_IB_UPD_XLT_ENABLE); 922 MLX5_IB_UPD_XLT_ENABLE);
872 923
873 if (err) { 924 if (err) {
874 free_cached_mr(dev, mr); 925 mlx5_mr_cache_free(dev, mr);
875 return ERR_PTR(err); 926 return ERR_PTR(err);
876 } 927 }
877 928
@@ -886,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
886{ 937{
887 struct mlx5_ib_dev *dev = mr->dev; 938 struct mlx5_ib_dev *dev = mr->dev;
888 struct ib_umem *umem = mr->umem; 939 struct ib_umem *umem = mr->umem;
940 if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
941 mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
942 return npages;
943 }
889 944
890 npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); 945 npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
891 946
@@ -919,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
919 struct mlx5_umr_wr wr; 974 struct mlx5_umr_wr wr;
920 struct ib_sge sg; 975 struct ib_sge sg;
921 int err = 0; 976 int err = 0;
922 int desc_size = sizeof(struct mlx5_mtt); 977 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
978 ? sizeof(struct mlx5_klm)
979 : sizeof(struct mlx5_mtt);
923 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 980 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
924 const int page_mask = page_align - 1; 981 const int page_mask = page_align - 1;
925 size_t pages_mapped = 0; 982 size_t pages_mapped = 0;
@@ -1091,6 +1148,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1091 goto err_2; 1148 goto err_2;
1092 } 1149 }
1093 mr->mmkey.type = MLX5_MKEY_MR; 1150 mr->mmkey.type = MLX5_MKEY_MR;
1151 mr->desc_size = sizeof(struct mlx5_mtt);
1094 mr->umem = umem; 1152 mr->umem = umem;
1095 mr->dev = dev; 1153 mr->dev = dev;
1096 mr->live = 1; 1154 mr->live = 1;
@@ -1136,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1136 1194
1137 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1195 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1138 start, virt_addr, length, access_flags); 1196 start, virt_addr, length, access_flags);
1197
1198#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1199 if (!start && length == U64_MAX) {
1200 if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1201 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1202 return ERR_PTR(-EINVAL);
1203
1204 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1205 return &mr->ibmr;
1206 }
1207#endif
1208
1139 err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, 1209 err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
1140 &page_shift, &ncont, &order); 1210 &page_shift, &ncont, &order);
1141 1211
@@ -1398,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
1398 return err; 1468 return err;
1399 } 1469 }
1400 } else { 1470 } else {
1401 err = unreg_umr(dev, mr); 1471 mlx5_mr_cache_free(dev, mr);
1402 if (err) {
1403 mlx5_ib_warn(dev, "failed unregister\n");
1404 return err;
1405 }
1406 free_cached_mr(dev, mr);
1407 } 1472 }
1408 1473
1409 if (!umred) 1474 if (!umred)
@@ -1426,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1426 /* Wait for all running page-fault handlers to finish. */ 1491 /* Wait for all running page-fault handlers to finish. */
1427 synchronize_srcu(&dev->mr_srcu); 1492 synchronize_srcu(&dev->mr_srcu);
1428 /* Destroy all page mappings */ 1493 /* Destroy all page mappings */
1429 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1494 if (umem->odp_data->page_list)
1430 ib_umem_end(umem)); 1495 mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1496 ib_umem_end(umem));
1497 else
1498 mlx5_ib_free_implicit_mr(mr);
1431 /* 1499 /*
1432 * We kill the umem before the MR for ODP, 1500 * We kill the umem before the MR for ODP,
1433 * so that there will not be any invalidations in 1501 * so that there will not be any invalidations in
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e5bc267aca73..d7b12f0750e2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,7 @@
34#include <rdma/ib_umem_odp.h> 34#include <rdma/ib_umem_odp.h>
35 35
36#include "mlx5_ib.h" 36#include "mlx5_ib.h"
37#include "cmd.h"
37 38
38#define MAX_PREFETCH_LEN (4*1024*1024U) 39#define MAX_PREFETCH_LEN (4*1024*1024U)
39 40
@@ -41,6 +42,140 @@
41 * a pagefault. */ 42 * a pagefault. */
42#define MMU_NOTIFIER_TIMEOUT 1000 43#define MMU_NOTIFIER_TIMEOUT 1000
43 44
45#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
46#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
47#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
48#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
49#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
50
51#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
52
53static u64 mlx5_imr_ksm_entries;
54
55static int check_parent(struct ib_umem_odp *odp,
56 struct mlx5_ib_mr *parent)
57{
58 struct mlx5_ib_mr *mr = odp->private;
59
60 return mr && mr->parent == parent;
61}
62
63static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
64{
65 struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
66 struct ib_ucontext *ctx = odp->umem->context;
67 struct rb_node *rb;
68
69 down_read(&ctx->umem_rwsem);
70 while (1) {
71 rb = rb_next(&odp->interval_tree.rb);
72 if (!rb)
73 goto not_found;
74 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
75 if (check_parent(odp, parent))
76 goto end;
77 }
78not_found:
79 odp = NULL;
80end:
81 up_read(&ctx->umem_rwsem);
82 return odp;
83}
84
85static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
86 u64 start, u64 length,
87 struct mlx5_ib_mr *parent)
88{
89 struct ib_umem_odp *odp;
90 struct rb_node *rb;
91
92 down_read(&ctx->umem_rwsem);
93 odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
94 if (!odp)
95 goto end;
96
97 while (1) {
98 if (check_parent(odp, parent))
99 goto end;
100 rb = rb_next(&odp->interval_tree.rb);
101 if (!rb)
102 goto not_found;
103 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
104 if (ib_umem_start(odp->umem) > start + length)
105 goto not_found;
106 }
107not_found:
108 odp = NULL;
109end:
110 up_read(&ctx->umem_rwsem);
111 return odp;
112}
113
114void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
115 size_t nentries, struct mlx5_ib_mr *mr, int flags)
116{
117 struct ib_pd *pd = mr->ibmr.pd;
118 struct ib_ucontext *ctx = pd->uobject->context;
119 struct mlx5_ib_dev *dev = to_mdev(pd->device);
120 struct ib_umem_odp *odp;
121 unsigned long va;
122 int i;
123
124 if (flags & MLX5_IB_UPD_XLT_ZAP) {
125 for (i = 0; i < nentries; i++, pklm++) {
126 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
127 pklm->key = cpu_to_be32(dev->null_mkey);
128 pklm->va = 0;
129 }
130 return;
131 }
132
133 odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
134 nentries * MLX5_IMR_MTT_SIZE, mr);
135
136 for (i = 0; i < nentries; i++, pklm++) {
137 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
138 va = (offset + i) * MLX5_IMR_MTT_SIZE;
139 if (odp && odp->umem->address == va) {
140 struct mlx5_ib_mr *mtt = odp->private;
141
142 pklm->key = cpu_to_be32(mtt->ibmr.lkey);
143 odp = odp_next(odp);
144 } else {
145 pklm->key = cpu_to_be32(dev->null_mkey);
146 }
147 mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
148 i, va, be32_to_cpu(pklm->key));
149 }
150}
151
152static void mr_leaf_free_action(struct work_struct *work)
153{
154 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
155 int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
156 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
157
158 mr->parent = NULL;
159 synchronize_srcu(&mr->dev->mr_srcu);
160
161 if (!READ_ONCE(odp->dying)) {
162 mr->parent = imr;
163 if (atomic_dec_and_test(&imr->num_leaf_free))
164 wake_up(&imr->q_leaf_free);
165 return;
166 }
167
168 ib_umem_release(odp->umem);
169 if (imr->live)
170 mlx5_ib_update_xlt(imr, idx, 1, 0,
171 MLX5_IB_UPD_XLT_INDIRECT |
172 MLX5_IB_UPD_XLT_ATOMIC);
173 mlx5_mr_cache_free(mr->dev, mr);
174
175 if (atomic_dec_and_test(&imr->num_leaf_free))
176 wake_up(&imr->q_leaf_free);
177}
178
44void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 179void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
45 unsigned long end) 180 unsigned long end)
46{ 181{
@@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
111 */ 246 */
112 247
113 ib_umem_odp_unmap_dma_pages(umem, start, end); 248 ib_umem_odp_unmap_dma_pages(umem, start, end);
249
250 if (unlikely(!umem->npages && mr->parent &&
251 !umem->odp_data->dying)) {
252 WRITE_ONCE(umem->odp_data->dying, 1);
253 atomic_inc(&mr->parent->num_leaf_free);
254 schedule_work(&umem->odp_data->work);
255 }
114} 256}
115 257
116void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) 258void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
147 if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) 289 if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
148 caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; 290 caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
149 291
292 if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
293 MLX5_CAP_GEN(dev->mdev, null_mkey) &&
294 MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
295 caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
296
150 return; 297 return;
151} 298}
152 299
@@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
184 wq_num); 331 wq_num);
185} 332}
186 333
334static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
335 struct ib_umem *umem,
336 bool ksm, int access_flags)
337{
338 struct mlx5_ib_dev *dev = to_mdev(pd->device);
339 struct mlx5_ib_mr *mr;
340 int err;
341
342 mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
343 MLX5_IMR_MTT_CACHE_ENTRY);
344
345 if (IS_ERR(mr))
346 return mr;
347
348 mr->ibmr.pd = pd;
349
350 mr->dev = dev;
351 mr->access_flags = access_flags;
352 mr->mmkey.iova = 0;
353 mr->umem = umem;
354
355 if (ksm) {
356 err = mlx5_ib_update_xlt(mr, 0,
357 mlx5_imr_ksm_entries,
358 MLX5_KSM_PAGE_SHIFT,
359 MLX5_IB_UPD_XLT_INDIRECT |
360 MLX5_IB_UPD_XLT_ZAP |
361 MLX5_IB_UPD_XLT_ENABLE);
362
363 } else {
364 err = mlx5_ib_update_xlt(mr, 0,
365 MLX5_IMR_MTT_ENTRIES,
366 PAGE_SHIFT,
367 MLX5_IB_UPD_XLT_ZAP |
368 MLX5_IB_UPD_XLT_ENABLE |
369 MLX5_IB_UPD_XLT_ATOMIC);
370 }
371
372 if (err)
373 goto fail;
374
375 mr->ibmr.lkey = mr->mmkey.key;
376 mr->ibmr.rkey = mr->mmkey.key;
377
378 mr->live = 1;
379
380 mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
381 mr->mmkey.key, dev->mdev, mr);
382
383 return mr;
384
385fail:
386 mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
387 mlx5_mr_cache_free(dev, mr);
388
389 return ERR_PTR(err);
390}
391
392static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
393 u64 io_virt, size_t bcnt)
394{
395 struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
396 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
397 struct ib_umem_odp *odp, *result = NULL;
398 u64 addr = io_virt & MLX5_IMR_MTT_MASK;
399 int nentries = 0, start_idx = 0, ret;
400 struct mlx5_ib_mr *mtt;
401 struct ib_umem *umem;
402
403 mutex_lock(&mr->umem->odp_data->umem_mutex);
404 odp = odp_lookup(ctx, addr, 1, mr);
405
406 mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
407 io_virt, bcnt, addr, odp);
408
409next_mr:
410 if (likely(odp)) {
411 if (nentries)
412 nentries++;
413 } else {
414 umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
415 if (IS_ERR(umem)) {
416 mutex_unlock(&mr->umem->odp_data->umem_mutex);
417 return ERR_CAST(umem);
418 }
419
420 mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
421 if (IS_ERR(mtt)) {
422 mutex_unlock(&mr->umem->odp_data->umem_mutex);
423 ib_umem_release(umem);
424 return ERR_CAST(mtt);
425 }
426
427 odp = umem->odp_data;
428 odp->private = mtt;
429 mtt->umem = umem;
430 mtt->mmkey.iova = addr;
431 mtt->parent = mr;
432 INIT_WORK(&odp->work, mr_leaf_free_action);
433
434 if (!nentries)
435 start_idx = addr >> MLX5_IMR_MTT_SHIFT;
436 nentries++;
437 }
438
439 odp->dying = 0;
440
441 /* Return first odp if region not covered by single one */
442 if (likely(!result))
443 result = odp;
444
445 addr += MLX5_IMR_MTT_SIZE;
446 if (unlikely(addr < io_virt + bcnt)) {
447 odp = odp_next(odp);
448 if (odp && odp->umem->address != addr)
449 odp = NULL;
450 goto next_mr;
451 }
452
453 if (unlikely(nentries)) {
454 ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
455 MLX5_IB_UPD_XLT_INDIRECT |
456 MLX5_IB_UPD_XLT_ATOMIC);
457 if (ret) {
458 mlx5_ib_err(dev, "Failed to update PAS\n");
459 result = ERR_PTR(ret);
460 }
461 }
462
463 mutex_unlock(&mr->umem->odp_data->umem_mutex);
464 return result;
465}
466
467struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
468 int access_flags)
469{
470 struct ib_ucontext *ctx = pd->ibpd.uobject->context;
471 struct mlx5_ib_mr *imr;
472 struct ib_umem *umem;
473
474 umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
475 if (IS_ERR(umem))
476 return ERR_CAST(umem);
477
478 imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
479 if (IS_ERR(imr)) {
480 ib_umem_release(umem);
481 return ERR_CAST(imr);
482 }
483
484 imr->umem = umem;
485 init_waitqueue_head(&imr->q_leaf_free);
486 atomic_set(&imr->num_leaf_free, 0);
487
488 return imr;
489}
490
491static int mr_leaf_free(struct ib_umem *umem, u64 start,
492 u64 end, void *cookie)
493{
494 struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
495
496 if (mr->parent != imr)
497 return 0;
498
499 ib_umem_odp_unmap_dma_pages(umem,
500 ib_umem_start(umem),
501 ib_umem_end(umem));
502
503 if (umem->odp_data->dying)
504 return 0;
505
506 WRITE_ONCE(umem->odp_data->dying, 1);
507 atomic_inc(&imr->num_leaf_free);
508 schedule_work(&umem->odp_data->work);
509
510 return 0;
511}
512
513void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
514{
515 struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
516
517 down_read(&ctx->umem_rwsem);
518 rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
519 mr_leaf_free, imr);
520 up_read(&ctx->umem_rwsem);
521
522 wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
523}
524
187/* 525/*
188 * Handle a single data segment in a page-fault WQE or RDMA region. 526 * Handle a single data segment in a page-fault WQE or RDMA region.
189 * 527 *
@@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
195 * -EFAULT when there's an error mapping the requested pages. The caller will 533 * -EFAULT when there's an error mapping the requested pages. The caller will
196 * abort the page fault handling. 534 * abort the page fault handling.
197 */ 535 */
198static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev, 536static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
199 u32 key, u64 io_virt, size_t bcnt, 537 u32 key, u64 io_virt, size_t bcnt,
200 u32 *bytes_committed, 538 u32 *bytes_committed,
201 u32 *bytes_mapped) 539 u32 *bytes_mapped)
202{ 540{
203 int srcu_key; 541 int srcu_key;
204 unsigned int current_seq; 542 unsigned int current_seq = 0;
205 u64 start_idx; 543 u64 start_idx;
206 int npages = 0, ret = 0; 544 int npages = 0, ret = 0;
207 struct mlx5_ib_mr *mr; 545 struct mlx5_ib_mr *mr;
208 u64 access_mask = ODP_READ_ALLOWED_BIT; 546 u64 access_mask = ODP_READ_ALLOWED_BIT;
547 struct ib_umem_odp *odp;
548 int implicit = 0;
549 size_t size;
209 550
210 srcu_key = srcu_read_lock(&mib_dev->mr_srcu); 551 srcu_key = srcu_read_lock(&dev->mr_srcu);
211 mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key); 552 mr = mlx5_ib_odp_find_mr_lkey(dev, key);
212 /* 553 /*
213 * If we didn't find the MR, it means the MR was closed while we were 554 * If we didn't find the MR, it means the MR was closed while we were
214 * handling the ODP event. In this case we return -EFAULT so that the 555 * handling the ODP event. In this case we return -EFAULT so that the
215 * QP will be closed. 556 * QP will be closed.
216 */ 557 */
217 if (!mr || !mr->ibmr.pd) { 558 if (!mr || !mr->ibmr.pd) {
218 pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n", 559 mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
219 key); 560 key);
220 ret = -EFAULT; 561 ret = -EFAULT;
221 goto srcu_unlock; 562 goto srcu_unlock;
222 } 563 }
223 if (!mr->umem->odp_data) { 564 if (!mr->umem->odp_data) {
224 pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", 565 mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
225 key); 566 key);
226 if (bytes_mapped) 567 if (bytes_mapped)
227 *bytes_mapped += 568 *bytes_mapped +=
228 (bcnt - *bytes_committed); 569 (bcnt - *bytes_committed);
229 goto srcu_unlock; 570 goto srcu_unlock;
230 } 571 }
231 572
232 current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
233 /*
234 * Ensure the sequence number is valid for some time before we call
235 * gup.
236 */
237 smp_rmb();
238
239 /* 573 /*
240 * Avoid branches - this code will perform correctly 574 * Avoid branches - this code will perform correctly
241 * in all iterations (in iteration 2 and above, 575 * in all iterations (in iteration 2 and above,
@@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
244 io_virt += *bytes_committed; 578 io_virt += *bytes_committed;
245 bcnt -= *bytes_committed; 579 bcnt -= *bytes_committed;
246 580
581 if (!mr->umem->odp_data->page_list) {
582 odp = implicit_mr_get_data(mr, io_virt, bcnt);
583
584 if (IS_ERR(odp)) {
585 ret = PTR_ERR(odp);
586 goto srcu_unlock;
587 }
588 mr = odp->private;
589 implicit = 1;
590
591 } else {
592 odp = mr->umem->odp_data;
593 }
594
595next_mr:
596 current_seq = READ_ONCE(odp->notifiers_seq);
597 /*
598 * Ensure the sequence number is valid for some time before we call
599 * gup.
600 */
601 smp_rmb();
602
603 size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
247 start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT; 604 start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
248 605
249 if (mr->umem->writable) 606 if (mr->umem->writable)
250 access_mask |= ODP_WRITE_ALLOWED_BIT; 607 access_mask |= ODP_WRITE_ALLOWED_BIT;
251 npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt, 608
252 access_mask, current_seq); 609 ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
253 if (npages < 0) { 610 access_mask, current_seq);
254 ret = npages; 611
612 if (ret < 0)
255 goto srcu_unlock; 613 goto srcu_unlock;
256 }
257 614
258 if (npages > 0) { 615 if (ret > 0) {
259 mutex_lock(&mr->umem->odp_data->umem_mutex); 616 int np = ret;
617
618 mutex_lock(&odp->umem_mutex);
260 if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { 619 if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
261 /* 620 /*
262 * No need to check whether the MTTs really belong to 621 * No need to check whether the MTTs really belong to
263 * this MR, since ib_umem_odp_map_dma_pages already 622 * this MR, since ib_umem_odp_map_dma_pages already
264 * checks this. 623 * checks this.
265 */ 624 */
266 ret = mlx5_ib_update_xlt(mr, start_idx, npages, 625 ret = mlx5_ib_update_xlt(mr, start_idx, np,
267 PAGE_SHIFT, 626 PAGE_SHIFT,
268 MLX5_IB_UPD_XLT_ATOMIC); 627 MLX5_IB_UPD_XLT_ATOMIC);
269 } else { 628 } else {
270 ret = -EAGAIN; 629 ret = -EAGAIN;
271 } 630 }
272 mutex_unlock(&mr->umem->odp_data->umem_mutex); 631 mutex_unlock(&odp->umem_mutex);
273 if (ret < 0) { 632 if (ret < 0) {
274 if (ret != -EAGAIN) 633 if (ret != -EAGAIN)
275 pr_err("Failed to update mkey page tables\n"); 634 mlx5_ib_err(dev, "Failed to update mkey page tables\n");
276 goto srcu_unlock; 635 goto srcu_unlock;
277 } 636 }
278 637
279 if (bytes_mapped) { 638 if (bytes_mapped) {
280 u32 new_mappings = npages * PAGE_SIZE - 639 u32 new_mappings = np * PAGE_SIZE -
281 (io_virt - round_down(io_virt, PAGE_SIZE)); 640 (io_virt - round_down(io_virt, PAGE_SIZE));
282 *bytes_mapped += min_t(u32, new_mappings, bcnt); 641 *bytes_mapped += min_t(u32, new_mappings, size);
283 } 642 }
643
644 npages += np;
645 }
646
647 bcnt -= size;
648 if (unlikely(bcnt)) {
649 struct ib_umem_odp *next;
650
651 io_virt += size;
652 next = odp_next(odp);
653 if (unlikely(!next || next->umem->address != io_virt)) {
654 mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
655 io_virt, next);
656 ret = -EAGAIN;
657 goto srcu_unlock_no_wait;
658 }
659 odp = next;
660 mr = odp->private;
661 goto next_mr;
284 } 662 }
285 663
286srcu_unlock: 664srcu_unlock:
287 if (ret == -EAGAIN) { 665 if (ret == -EAGAIN) {
288 if (!mr->umem->odp_data->dying) { 666 if (implicit || !odp->dying) {
289 struct ib_umem_odp *odp_data = mr->umem->odp_data;
290 unsigned long timeout = 667 unsigned long timeout =
291 msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); 668 msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
292 669
293 if (!wait_for_completion_timeout( 670 if (!wait_for_completion_timeout(
294 &odp_data->notifier_completion, 671 &odp->notifier_completion,
295 timeout)) { 672 timeout)) {
296 pr_warn("timeout waiting for mmu notifier completion\n"); 673 mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
674 current_seq, odp->notifiers_seq);
297 } 675 }
298 } else { 676 } else {
299 /* The MR is being killed, kill the QP as well. */ 677 /* The MR is being killed, kill the QP as well. */
300 ret = -EFAULT; 678 ret = -EFAULT;
301 } 679 }
302 } 680 }
303 srcu_read_unlock(&mib_dev->mr_srcu, srcu_key); 681
682srcu_unlock_no_wait:
683 srcu_read_unlock(&dev->mr_srcu, srcu_key);
304 *bytes_committed = 0; 684 *bytes_committed = 0;
305 return ret ? ret : npages; 685 return ret ? ret : npages;
306} 686}
@@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
618 goto resolve_page_fault; 998 goto resolve_page_fault;
619 } else if (ret < 0 || total_wqe_bytes > bytes_mapped) { 999 } else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
620 if (ret != -ENOENT) 1000 if (ret != -ENOENT)
621 mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n", 1001 mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
622 ret); 1002 ret, pfault->wqe.wq_num, pfault->type);
623 goto resolve_page_fault; 1003 goto resolve_page_fault;
624 } 1004 }
625 1005
@@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
627resolve_page_fault: 1007resolve_page_fault:
628 mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); 1008 mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
629 mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", 1009 mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
630 pfault->token, resume_with_error, 1010 pfault->wqe.wq_num, resume_with_error,
631 pfault->type); 1011 pfault->type);
632 free_page((unsigned long)buffer); 1012 free_page((unsigned long)buffer);
633} 1013}
@@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
700 ret = pagefault_single_data_segment(dev, rkey, address, 1080 ret = pagefault_single_data_segment(dev, rkey, address,
701 prefetch_len, 1081 prefetch_len,
702 &bytes_committed, NULL); 1082 &bytes_committed, NULL);
703 if (ret < 0) { 1083 if (ret < 0 && ret != -EAGAIN) {
704 mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", 1084 mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
705 ret, pfault->token, address, 1085 ret, pfault->token, address, prefetch_len);
706 prefetch_len);
707 } 1086 }
708 } 1087 }
709} 1088}
@@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
728 } 1107 }
729} 1108}
730 1109
731int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) 1110void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
1111{
1112 if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1113 return;
1114
1115 switch (ent->order - 2) {
1116 case MLX5_IMR_MTT_CACHE_ENTRY:
1117 ent->page = PAGE_SHIFT;
1118 ent->xlt = MLX5_IMR_MTT_ENTRIES *
1119 sizeof(struct mlx5_mtt) /
1120 MLX5_IB_UMR_OCTOWORD;
1121 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
1122 ent->limit = 0;
1123 break;
1124
1125 case MLX5_IMR_KSM_CACHE_ENTRY:
1126 ent->page = MLX5_KSM_PAGE_SHIFT;
1127 ent->xlt = mlx5_imr_ksm_entries *
1128 sizeof(struct mlx5_klm) /
1129 MLX5_IB_UMR_OCTOWORD;
1130 ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
1131 ent->limit = 0;
1132 break;
1133 }
1134}
1135
1136int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
732{ 1137{
733 int ret; 1138 int ret;
734 1139
735 ret = init_srcu_struct(&ibdev->mr_srcu); 1140 ret = init_srcu_struct(&dev->mr_srcu);
736 if (ret) 1141 if (ret)
737 return ret; 1142 return ret;
738 1143
1144 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
1145 ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
1146 if (ret) {
1147 mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
1148 return ret;
1149 }
1150 }
1151
739 return 0; 1152 return 0;
740} 1153}
741 1154
742void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) 1155void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
1156{
1157 cleanup_srcu_struct(&dev->mr_srcu);
1158}
1159
1160int mlx5_ib_odp_init(void)
743{ 1161{
744 cleanup_srcu_struct(&ibdev->mr_srcu); 1162 mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
1163 MLX5_IMR_MTT_BITS);
1164
1165 return 0;
745} 1166}
746 1167
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e31bf11ae64f..ad8a2638e339 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
905 else 905 else
906 qp->bf.bfreg = &dev->bfreg; 906 qp->bf.bfreg = &dev->bfreg;
907 907
908 qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 908 /* We need to divide by two since each register is comprised of
909 * two buffers of identical size, namely odd and even
910 */
911 qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
909 uar_index = qp->bf.bfreg->index; 912 uar_index = qp->bf.bfreg->index;
910 913
911 err = calc_sq_size(dev, init_attr, qp); 914 err = calc_sq_size(dev, init_attr, qp);
@@ -1141,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1141 return -ENOMEM; 1144 return -ENOMEM;
1142 1145
1143 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 1146 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1144 MLX5_SET(rqc, rqc, vsd, 1); 1147 if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
1148 MLX5_SET(rqc, rqc, vsd, 1);
1145 MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); 1149 MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
1146 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); 1150 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
1147 MLX5_SET(rqc, rqc, flush_in_error_en, 1); 1151 MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1238,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1238 if (qp->rq.wqe_cnt) { 1242 if (qp->rq.wqe_cnt) {
1239 rq->base.container_mibqp = qp; 1243 rq->base.container_mibqp = qp;
1240 1244
1245 if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
1246 rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
1241 err = create_raw_packet_qp_rq(dev, rq, in); 1247 err = create_raw_packet_qp_rq(dev, rq, in);
1242 if (err) 1248 if (err)
1243 goto err_destroy_sq; 1249 goto err_destroy_sq;
@@ -1559,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1559 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1565 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1560 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1566 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1561 1567
1568 if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
1569 if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
1570 MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
1571 (init_attr->qp_type != IB_QPT_RAW_PACKET))
1572 return -EOPNOTSUPP;
1573 qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
1574 }
1575
1562 if (pd && pd->uobject) { 1576 if (pd && pd->uobject) {
1563 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 1577 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
1564 mlx5_ib_dbg(dev, "copy failed\n"); 1578 mlx5_ib_dbg(dev, "copy failed\n");
@@ -2198,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2198{ 2212{
2199 enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); 2213 enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
2200 int err; 2214 int err;
2215 enum ib_gid_type gid_type;
2201 2216
2202 if (attr_mask & IB_QP_PKEY_INDEX) 2217 if (attr_mask & IB_QP_PKEY_INDEX)
2203 path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : 2218 path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@@ -2216,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2216 if (ll == IB_LINK_LAYER_ETHERNET) { 2231 if (ll == IB_LINK_LAYER_ETHERNET) {
2217 if (!(ah->ah_flags & IB_AH_GRH)) 2232 if (!(ah->ah_flags & IB_AH_GRH))
2218 return -EINVAL; 2233 return -EINVAL;
2234 err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
2235 &gid_type);
2236 if (err)
2237 return err;
2219 memcpy(path->rmac, ah->dmac, sizeof(ah->dmac)); 2238 memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
2220 path->udp_sport = mlx5_get_roce_udp_sport(dev, port, 2239 path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
2221 ah->grh.sgid_index); 2240 ah->grh.sgid_index);
2222 path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; 2241 path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
2242 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
2243 path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
2223 } else { 2244 } else {
2224 path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; 2245 path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
2225 path->fl_free_ar |= 2246 path->fl_free_ar |=
@@ -2422,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
2422 if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { 2443 if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
2423 if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { 2444 if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
2424 MLX5_SET64(modify_rq_in, in, modify_bitmask, 2445 MLX5_SET64(modify_rq_in, in, modify_bitmask,
2425 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); 2446 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
2426 MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); 2447 MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
2427 } else 2448 } else
2428 pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", 2449 pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2777,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2777 qp->port) - 1; 2798 qp->port) - 1;
2778 mibport = &dev->port[port_num]; 2799 mibport = &dev->port[port_num];
2779 context->qp_counter_set_usr_page |= 2800 context->qp_counter_set_usr_page |=
2780 cpu_to_be32((u32)(mibport->q_cnt_id) << 24); 2801 cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
2781 } 2802 }
2782 2803
2783 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2804 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2805,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2805 2826
2806 raw_qp_param.operation = op; 2827 raw_qp_param.operation = op;
2807 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 2828 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2808 raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; 2829 raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
2809 raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; 2830 raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
2810 } 2831 }
2811 2832
@@ -3637,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
3637 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); 3658 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
3638 break; 3659 break;
3639 default: 3660 default:
3640 pr_err("Bad signature type given.\n"); 3661 pr_err("Bad signature type (%d) is given.\n",
3641 return 1; 3662 domain->sig_type);
3663 return -EINVAL;
3642 } 3664 }
3643 3665
3644 *seg += sizeof(*psv_seg); 3666 *seg += sizeof(*psv_seg);
@@ -3978,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3978 break; 4000 break;
3979 4001
3980 case IB_QPT_SMI: 4002 case IB_QPT_SMI:
4003 if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
4004 mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
4005 err = -EPERM;
4006 *bad_wr = wr;
4007 goto out;
4008 }
3981 case MLX5_IB_QPT_HW_GSI: 4009 case MLX5_IB_QPT_HW_GSI:
3982 set_datagram_seg(seg, wr); 4010 set_datagram_seg(seg, wr);
3983 seg += sizeof(struct mlx5_wqe_datagram_seg); 4011 seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4579,6 +4607,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
4579 struct ib_wq_init_attr *init_attr) 4607 struct ib_wq_init_attr *init_attr)
4580{ 4608{
4581 struct mlx5_ib_dev *dev; 4609 struct mlx5_ib_dev *dev;
4610 int has_net_offloads;
4582 __be64 *rq_pas0; 4611 __be64 *rq_pas0;
4583 void *in; 4612 void *in;
4584 void *rqc; 4613 void *rqc;
@@ -4610,9 +4639,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
4610 MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size); 4639 MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
4611 MLX5_SET(wq, wq, wq_signature, rwq->wq_sig); 4640 MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
4612 MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); 4641 MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
4642 has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
4643 if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
4644 if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
4645 mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
4646 err = -EOPNOTSUPP;
4647 goto out;
4648 }
4649 } else {
4650 MLX5_SET(rqc, rqc, vsd, 1);
4651 }
4652 if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
4653 if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
4654 mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
4655 err = -EOPNOTSUPP;
4656 goto out;
4657 }
4658 MLX5_SET(rqc, rqc, scatter_fcs, 1);
4659 }
4613 rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); 4660 rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
4614 mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); 4661 mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
4615 err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); 4662 err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
4663out:
4616 kvfree(in); 4664 kvfree(in);
4617 return err; 4665 return err;
4618} 4666}
@@ -4896,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
4896 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); 4944 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
4897 MLX5_SET(rqc, rqc, state, wq_state); 4945 MLX5_SET(rqc, rqc, state, wq_state);
4898 4946
4947 if (wq_attr_mask & IB_WQ_FLAGS) {
4948 if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
4949 if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
4950 MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
4951 mlx5_ib_dbg(dev, "VLAN offloads are not "
4952 "supported\n");
4953 err = -EOPNOTSUPP;
4954 goto out;
4955 }
4956 MLX5_SET64(modify_rq_in, in, modify_bitmask,
4957 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
4958 MLX5_SET(rqc, rqc, vsd,
4959 (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
4960 }
4961 }
4962
4963 if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
4964 if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
4965 MLX5_SET64(modify_rq_in, in, modify_bitmask,
4966 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
4967 MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
4968 } else
4969 pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
4970 dev->ib_dev.name);
4971 }
4972
4899 err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); 4973 err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
4900 kvfree(in);
4901 if (!err) 4974 if (!err)
4902 rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; 4975 rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
4903 4976
4977out:
4978 kvfree(in);
4904 return err; 4979 return err;
4905} 4980}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..7cb145f9a6db 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
165 int err; 165 int err;
166 int i; 166 int i;
167 struct mlx5_wqe_srq_next_seg *next; 167 struct mlx5_wqe_srq_next_seg *next;
168 int page_shift;
169 int npages;
170 168
171 err = mlx5_db_alloc(dev->mdev, &srq->db); 169 err = mlx5_db_alloc(dev->mdev, &srq->db);
172 if (err) { 170 if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
179 err = -ENOMEM; 177 err = -ENOMEM;
180 goto err_db; 178 goto err_db;
181 } 179 }
182 page_shift = srq->buf.page_shift;
183 180
184 srq->head = 0; 181 srq->head = 0;
185 srq->tail = srq->msrq.max - 1; 182 srq->tail = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
191 cpu_to_be16((i + 1) & (srq->msrq.max - 1)); 188 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
192 } 189 }
193 190
194 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); 191 mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
195 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", 192 in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
196 buf_size, page_shift, srq->buf.npages, npages);
197 in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
198 if (!in->pas) { 193 if (!in->pas) {
199 err = -ENOMEM; 194 err = -ENOMEM;
200 goto err_buf; 195 goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
208 } 203 }
209 srq->wq_sig = !!srq_signature; 204 srq->wq_sig = !!srq_signature;
210 205
211 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 206 in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
212 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 207 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
213 in->type == IB_SRQT_XRC) 208 in->type == IB_SRQT_XRC)
214 in->user_index = MLX5_IB_DEFAULT_UIDX; 209 in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..ce163184e742 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
146 if (!in_mad || !out_mad) 146 if (!in_mad || !out_mad)
147 goto out; 147 goto out;
148 148
149 memset(props, 0, sizeof *props); 149 /* props being zeroed by the caller, avoid zeroing it here */
150 150
151 init_query_mad(in_mad); 151 init_query_mad(in_mad);
152 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 152 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
212 if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) 212 if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
213 return -ERESTARTSYS; 213 return -ERESTARTSYS;
214 214
215 err = mthca_query_port(ibdev, port, &attr); 215 err = ib_query_port(ibdev, port, &attr);
216 if (err) 216 if (err)
217 goto out; 217 goto out;
218 218
@@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
1166 struct ib_port_attr attr; 1166 struct ib_port_attr attr;
1167 int err; 1167 int err;
1168 1168
1169 err = mthca_query_port(ibdev, port_num, &attr); 1169 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1170
1171 err = ib_query_port(ibdev, port_num, &attr);
1170 if (err) 1172 if (err)
1171 return err; 1173 return err;
1172 1174
1173 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1175 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1174 immutable->gid_tbl_len = attr.gid_tbl_len; 1176 immutable->gid_tbl_len = attr.gid_tbl_len;
1175 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1176 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 1177 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1177 1178
1178 return 0; 1179 return 0;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5a31f3c6a421..d3eae2f3e9f5 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
475 struct nes_vnic *nesvnic = to_nesvnic(ibdev); 475 struct nes_vnic *nesvnic = to_nesvnic(ibdev);
476 struct net_device *netdev = nesvnic->netdev; 476 struct net_device *netdev = nesvnic->netdev;
477 477
478 memset(props, 0, sizeof(*props)); 478 /* props being zeroed by the caller, avoid zeroing it here */
479 479
480 props->max_mtu = IB_MTU_4096; 480 props->max_mtu = IB_MTU_4096;
481 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); 481 props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
3660 struct ib_port_attr attr; 3660 struct ib_port_attr attr;
3661 int err; 3661 int err;
3662 3662
3663 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
3664
3663 err = nes_query_port(ibdev, port_num, &attr); 3665 err = nes_query_port(ibdev, port_num, &attr);
3664 if (err) 3666 if (err)
3665 return err; 3667 return err;
3666 3668
3667 immutable->pkey_tbl_len = attr.pkey_tbl_len; 3669 immutable->pkey_tbl_len = attr.pkey_tbl_len;
3668 immutable->gid_tbl_len = attr.gid_tbl_len; 3670 immutable->gid_tbl_len = attr.gid_tbl_len;
3669 immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
3670 3671
3671 return 0; 3672 return 0;
3672} 3673}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..3e43bdc81e7a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
93 int err; 93 int err;
94 94
95 dev = get_ocrdma_dev(ibdev); 95 dev = get_ocrdma_dev(ibdev);
96 err = ocrdma_query_port(ibdev, port_num, &attr); 96 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
97 if (ocrdma_is_udp_encap_supported(dev))
98 immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
99
100 err = ib_query_port(ibdev, port_num, &attr);
97 if (err) 101 if (err)
98 return err; 102 return err;
99 103
100 immutable->pkey_tbl_len = attr.pkey_tbl_len; 104 immutable->pkey_tbl_len = attr.pkey_tbl_len;
101 immutable->gid_tbl_len = attr.gid_tbl_len; 105 immutable->gid_tbl_len = attr.gid_tbl_len;
102 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
103 if (ocrdma_is_udp_encap_supported(dev))
104 immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
105 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 106 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
106 107
107 return 0; 108 return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e06ad7250963..bc9fb144e57b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
210 struct ocrdma_dev *dev; 210 struct ocrdma_dev *dev;
211 struct net_device *netdev; 211 struct net_device *netdev;
212 212
213 /* props being zeroed by the caller, avoid zeroing it here */
213 dev = get_ocrdma_dev(ibdev); 214 dev = get_ocrdma_dev(ibdev);
214 if (port > 1) { 215 if (port > 1) {
215 pr_err("%s(%d) invalid_port=0x%x\n", __func__, 216 pr_err("%s(%d) invalid_port=0x%x\n", __func__,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0c51657af151..6b3bb32803bd 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
238 } 238 }
239 239
240 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); 240 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
241 memset(attr, 0, sizeof(*attr));
242 241
242 /* *attr being zeroed by the caller, avoid zeroing it here */
243 if (rdma_port->port_state == QED_RDMA_PORT_UP) { 243 if (rdma_port->port_state == QED_RDMA_PORT_UP) {
244 attr->state = IB_PORT_ACTIVE; 244 attr->state = IB_PORT_ACTIVE;
245 attr->phys_state = 5; 245 attr->phys_state = 5;
@@ -3494,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3494 struct ib_port_attr attr; 3494 struct ib_port_attr attr;
3495 int err; 3495 int err;
3496 3496
3497 err = qedr_query_port(ibdev, port_num, &attr); 3497 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3498 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3499
3500 err = ib_query_port(ibdev, port_num, &attr);
3498 if (err) 3501 if (err)
3499 return err; 3502 return err;
3500 3503
3501 immutable->pkey_tbl_len = attr.pkey_tbl_len; 3504 immutable->pkey_tbl_len = attr.pkey_tbl_len;
3502 immutable->gid_tbl_len = attr.gid_tbl_len; 3505 immutable->gid_tbl_len = attr.gid_tbl_len;
3503 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3504 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3505 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 3506 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3506 3507
3507 return 0; 3508 return 0;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index b0b78e1cec92..6b56f1c01a07 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1220,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
1220 enum ib_mtu mtu; 1220 enum ib_mtu mtu;
1221 u16 lid = ppd->lid; 1221 u16 lid = ppd->lid;
1222 1222
1223 /* props being zeroed by the caller, avoid zeroing it here */
1223 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); 1224 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1224 props->lmc = ppd->lmc; 1225 props->lmc = ppd->lmc;
1225 props->state = dd->f_iblink_state(ppd->lastibcstat); 1226 props->state = dd->f_iblink_state(ppd->lastibcstat);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0a89a955550b..4f5a45db08e1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
321 struct ib_port_attr attr; 321 struct ib_port_attr attr;
322 int err; 322 int err;
323 323
324 err = usnic_ib_query_port(ibdev, port_num, &attr); 324 immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
325
326 err = ib_query_port(ibdev, port_num, &attr);
325 if (err) 327 if (err)
326 return err; 328 return err;
327 329
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 69df8e353123..3284730d3c09 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
330 330
331 mutex_lock(&us_ibdev->usdev_lock); 331 mutex_lock(&us_ibdev->usdev_lock);
332 __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd); 332 __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
333 memset(props, 0, sizeof(*props)); 333 /* props being zeroed by the caller, avoid zeroing it here */
334 334
335 props->lid = 0; 335 props->lid = 0;
336 props->lmc = 1; 336 props->lmc = 1;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 60cdb7719565..e03d2f6c1f90 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
132 struct ib_port_attr attr; 132 struct ib_port_attr attr;
133 int err; 133 int err;
134 134
135 err = pvrdma_query_port(ibdev, port_num, &attr); 135 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
136
137 err = ib_query_port(ibdev, port_num, &attr);
136 if (err) 138 if (err)
137 return err; 139 return err;
138 140
139 immutable->pkey_tbl_len = attr.pkey_tbl_len; 141 immutable->pkey_tbl_len = attr.pkey_tbl_len;
140 immutable->gid_tbl_len = attr.gid_tbl_len; 142 immutable->gid_tbl_len = attr.gid_tbl_len;
141 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
142 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 143 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
143 return 0; 144 return 0;
144} 145}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index c2aa52638dcb..fec17c49103b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
135 return err; 135 return err;
136 } 136 }
137 137
138 memset(props, 0, sizeof(*props)); 138 /* props being zeroed by the caller, avoid zeroing it here */
139 139
140 props->state = pvrdma_port_state_to_ib(resp->attrs.state); 140 props->state = pvrdma_port_state_to_ib(resp->attrs.state);
141 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); 141 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
275 } 275 }
276 276
277 mutex_lock(&vdev->port_mutex); 277 mutex_lock(&vdev->port_mutex);
278 ret = pvrdma_query_port(ibdev, port, &attr); 278 ret = ib_query_port(ibdev, port, &attr);
279 if (ret) 279 if (ret)
280 goto out; 280 goto out;
281 281
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d430c2f7cec4..1165639a914b 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
165 return -EINVAL; 165 return -EINVAL;
166 166
167 rvp = rdi->ports[port_index]; 167 rvp = rdi->ports[port_index];
168 memset(props, 0, sizeof(*props)); 168 /* props being zeroed by the caller, avoid zeroing it here */
169 props->sm_lid = rvp->sm_lid; 169 props->sm_lid = rvp->sm_lid;
170 props->sm_sl = rvp->sm_sl; 170 props->sm_sl = rvp->sm_sl;
171 props->port_cap_flags = rvp->port_cap_flags; 171 props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
326 if (port_index < 0) 326 if (port_index < 0)
327 return -EINVAL; 327 return -EINVAL;
328 328
329 err = rvt_query_port(ibdev, port_num, &attr); 329 immutable->core_cap_flags = rdi->dparms.core_cap_flags;
330
331 err = ib_query_port(ibdev, port_num, &attr);
330 if (err) 332 if (err)
331 return err; 333 return err;
332 334
333 immutable->pkey_tbl_len = attr.pkey_tbl_len; 335 immutable->pkey_tbl_len = attr.pkey_tbl_len;
334 immutable->gid_tbl_len = attr.gid_tbl_len; 336 immutable->gid_tbl_len = attr.gid_tbl_len;
335 immutable->core_cap_flags = rdi->dparms.core_cap_flags;
336 immutable->max_mad_size = rdi->dparms.max_mad_size; 337 immutable->max_mad_size = rdi->dparms.max_mad_size;
337 338
338 return 0; 339 return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e4de37fb9aab..d2e2eff7a515 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev,
86 86
87 port = &rxe->port; 87 port = &rxe->port;
88 88
89 /* *attr being zeroed by the caller, avoid zeroing it here */
89 *attr = port->attr; 90 *attr = port->attr;
90 91
91 mutex_lock(&rxe->usdev_lock); 92 mutex_lock(&rxe->usdev_lock);
@@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
261 int err; 262 int err;
262 struct ib_port_attr attr; 263 struct ib_port_attr attr;
263 264
264 err = rxe_query_port(dev, port_num, &attr); 265 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
266
267 err = ib_query_port(dev, port_num, &attr);
265 if (err) 268 if (err)
266 return err; 269 return err;
267 270
268 immutable->pkey_tbl_len = attr.pkey_tbl_len; 271 immutable->pkey_tbl_len = attr.pkey_tbl_len;
269 immutable->gid_tbl_len = attr.gid_tbl_len; 272 immutable->gid_tbl_len = attr.gid_tbl_len;
270 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
271 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 273 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
272 274
273 return 0; 275 return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index ce3d92106386..2478516a61e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
1232 fs_for_each_fte(fte, fg) { 1232 fs_for_each_fte(fte, fg) {
1233 nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); 1233 nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
1234 if (compare_match_value(&fg->mask, match_value, &fte->val) && 1234 if (compare_match_value(&fg->mask, match_value, &fte->val) &&
1235 (flow_act->action & fte->action) && 1235 (flow_act->action & fte->action)) {
1236 flow_act->flow_tag == fte->flow_tag) {
1237 int old_action = fte->action; 1236 int old_action = fte->action;
1238 1237
1238 if (fte->flow_tag != flow_act->flow_tag) {
1239 mlx5_core_warn(get_dev(&fte->node),
1240 "FTE flow tag %u already exists with different flow tag %u\n",
1241 fte->flow_tag,
1242 flow_act->flow_tag);
1243 handle = ERR_PTR(-EEXIST);
1244 goto unlock_fte;
1245 }
1246
1239 fte->action |= flow_act->action; 1247 fte->action |= flow_act->action;
1240 handle = add_rule_fte(fte, fg, dest, dest_num, 1248 handle = add_rule_fte(fte, fg, dest, dest_num,
1241 old_action != flow_act->action); 1249 old_action != flow_act->action);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1bc4641734da..2fcff6b4503f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -295,6 +295,7 @@ struct mlx5_port_caps {
295 int gid_table_len; 295 int gid_table_len;
296 int pkey_table_len; 296 int pkey_table_len;
297 u8 ext_port_cap; 297 u8 ext_port_cap;
298 bool has_smi;
298}; 299};
299 300
300struct mlx5_cmd_mailbox { 301struct mlx5_cmd_mailbox {
@@ -1061,7 +1062,10 @@ enum {
1061}; 1062};
1062 1063
1063enum { 1064enum {
1064 MAX_MR_CACHE_ENTRIES = 21, 1065 MAX_UMR_CACHE_ENTRY = 20,
1066 MLX5_IMR_MTT_CACHE_ENTRY,
1067 MLX5_IMR_KSM_CACHE_ENTRY,
1068 MAX_MR_CACHE_ENTRIES
1065}; 1069};
1066 1070
1067enum { 1071enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index afcd4736d8df..838242697541 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5013,7 +5013,7 @@ struct mlx5_ifc_modify_rq_out_bits {
5013 5013
5014enum { 5014enum {
5015 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, 5015 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
5016 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, 5016 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3,
5017}; 5017};
5018 5018
5019struct mlx5_ifc_modify_rq_in_bits { 5019struct mlx5_ifc_modify_rq_in_bits {
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 3da0b167041b..542cd8b3414c 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -79,11 +79,15 @@ struct ib_umem_odp {
79 79
80 struct completion notifier_completion; 80 struct completion notifier_completion;
81 int dying; 81 int dying;
82 struct work_struct work;
82}; 83};
83 84
84#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 85#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
85 86
86int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); 87int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
88struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
89 unsigned long addr,
90 size_t size);
87 91
88void ib_umem_odp_release(struct ib_umem *umem); 92void ib_umem_odp_release(struct ib_umem *umem);
89 93
@@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
117int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, 121int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
118 umem_call_back cb, void *cookie); 122 umem_call_back cb, void *cookie);
119 123
120struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, 124/*
121 u64 start, u64 last); 125 * Find first region intersecting with address range.
122struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, 126 * Return NULL if not found
123 u64 start, u64 last); 127 */
128struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
129 u64 addr, u64 length);
124 130
125static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, 131static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
126 unsigned long mmu_seq) 132 unsigned long mmu_seq)
@@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context,
153 return -EINVAL; 159 return -EINVAL;
154} 160}
155 161
162static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
163 unsigned long addr,
164 size_t size)
165{
166 return ERR_PTR(-EINVAL);
167}
168
156static inline void ib_umem_odp_release(struct ib_umem *umem) {} 169static inline void ib_umem_odp_release(struct ib_umem *umem) {}
157 170
158#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 171#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8c61532cf521..89f5bd4e1d52 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -207,6 +207,7 @@ enum ib_device_cap_flags {
207 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), 207 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
208 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), 208 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
209 IB_DEVICE_RC_IP_CSUM = (1 << 25), 209 IB_DEVICE_RC_IP_CSUM = (1 << 25),
210 /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
210 IB_DEVICE_RAW_IP_CSUM = (1 << 26), 211 IB_DEVICE_RAW_IP_CSUM = (1 << 26),
211 /* 212 /*
212 * Devices should set IB_DEVICE_CROSS_CHANNEL if they 213 * Devices should set IB_DEVICE_CROSS_CHANNEL if they
@@ -220,6 +221,7 @@ enum ib_device_cap_flags {
220 IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), 221 IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
221 IB_DEVICE_SG_GAPS_REG = (1ULL << 32), 222 IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
222 IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), 223 IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
224 /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
223 IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), 225 IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
224}; 226};
225 227
@@ -241,7 +243,8 @@ enum ib_atomic_cap {
241}; 243};
242 244
243enum ib_odp_general_cap_bits { 245enum ib_odp_general_cap_bits {
244 IB_ODP_SUPPORT = 1 << 0, 246 IB_ODP_SUPPORT = 1 << 0,
247 IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
245}; 248};
246 249
247enum ib_odp_transport_cap_bits { 250enum ib_odp_transport_cap_bits {
@@ -330,6 +333,7 @@ struct ib_device_attr {
330 uint64_t hca_core_clock; /* in KHZ */ 333 uint64_t hca_core_clock; /* in KHZ */
331 struct ib_rss_caps rss_caps; 334 struct ib_rss_caps rss_caps;
332 u32 max_wq_type_rq; 335 u32 max_wq_type_rq;
336 u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
333}; 337};
334 338
335enum ib_mtu { 339enum ib_mtu {
@@ -499,6 +503,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
499#define RDMA_CORE_CAP_PROT_ROCE 0x00200000 503#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
500#define RDMA_CORE_CAP_PROT_IWARP 0x00400000 504#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
501#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 505#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
506#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
507#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
502 508
503#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ 509#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
504 | RDMA_CORE_CAP_IB_MAD \ 510 | RDMA_CORE_CAP_IB_MAD \
@@ -522,6 +528,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
522#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ 528#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
523 | RDMA_CORE_CAP_OPA_MAD) 529 | RDMA_CORE_CAP_OPA_MAD)
524 530
531#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
532
533#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
534
525struct ib_port_attr { 535struct ib_port_attr {
526 u64 subnet_prefix; 536 u64 subnet_prefix;
527 enum ib_port_state state; 537 enum ib_port_state state;
@@ -1019,6 +1029,7 @@ enum ib_qp_create_flags {
1019 IB_QP_CREATE_SIGNATURE_EN = 1 << 6, 1029 IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
1020 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, 1030 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
1021 IB_QP_CREATE_SCATTER_FCS = 1 << 8, 1031 IB_QP_CREATE_SCATTER_FCS = 1 << 8,
1032 IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
1022 /* reserve bits 26-31 for low level drivers' internal use */ 1033 /* reserve bits 26-31 for low level drivers' internal use */
1023 IB_QP_CREATE_RESERVED_START = 1 << 26, 1034 IB_QP_CREATE_RESERVED_START = 1 << 26,
1024 IB_QP_CREATE_RESERVED_END = 1 << 31, 1035 IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1470,6 +1481,18 @@ struct ib_srq {
1470 } ext; 1481 } ext;
1471}; 1482};
1472 1483
1484enum ib_raw_packet_caps {
1485 /* Strip cvlan from incoming packet and report it in the matching work
1486 * completion is supported.
1487 */
1488 IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
1489 /* Scatter FCS field of an incoming packet to host memory is supported.
1490 */
1491 IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
1492 /* Checksum offloads are supported (for both send and receive). */
1493 IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
1494};
1495
1473enum ib_wq_type { 1496enum ib_wq_type {
1474 IB_WQT_RQ 1497 IB_WQT_RQ
1475}; 1498};
@@ -1493,6 +1516,11 @@ struct ib_wq {
1493 atomic_t usecnt; 1516 atomic_t usecnt;
1494}; 1517};
1495 1518
1519enum ib_wq_flags {
1520 IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
1521 IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
1522};
1523
1496struct ib_wq_init_attr { 1524struct ib_wq_init_attr {
1497 void *wq_context; 1525 void *wq_context;
1498 enum ib_wq_type wq_type; 1526 enum ib_wq_type wq_type;
@@ -1500,16 +1528,20 @@ struct ib_wq_init_attr {
1500 u32 max_sge; 1528 u32 max_sge;
1501 struct ib_cq *cq; 1529 struct ib_cq *cq;
1502 void (*event_handler)(struct ib_event *, void *); 1530 void (*event_handler)(struct ib_event *, void *);
1531 u32 create_flags; /* Use enum ib_wq_flags */
1503}; 1532};
1504 1533
1505enum ib_wq_attr_mask { 1534enum ib_wq_attr_mask {
1506 IB_WQ_STATE = 1 << 0, 1535 IB_WQ_STATE = 1 << 0,
1507 IB_WQ_CUR_STATE = 1 << 1, 1536 IB_WQ_CUR_STATE = 1 << 1,
1537 IB_WQ_FLAGS = 1 << 2,
1508}; 1538};
1509 1539
1510struct ib_wq_attr { 1540struct ib_wq_attr {
1511 enum ib_wq_state wq_state; 1541 enum ib_wq_state wq_state;
1512 enum ib_wq_state curr_wq_state; 1542 enum ib_wq_state curr_wq_state;
1543 u32 flags; /* Use enum ib_wq_flags */
1544 u32 flags_mask; /* Use enum ib_wq_flags */
1513}; 1545};
1514 1546
1515struct ib_rwq_ind_table { 1547struct ib_rwq_ind_table {
@@ -1618,6 +1650,8 @@ enum ib_flow_spec_type {
1618 IB_FLOW_SPEC_UDP = 0x41, 1650 IB_FLOW_SPEC_UDP = 0x41,
1619 IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, 1651 IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
1620 IB_FLOW_SPEC_INNER = 0x100, 1652 IB_FLOW_SPEC_INNER = 0x100,
1653 /* Actions */
1654 IB_FLOW_SPEC_ACTION_TAG = 0x1000,
1621}; 1655};
1622#define IB_FLOW_SPEC_LAYER_MASK 0xF0 1656#define IB_FLOW_SPEC_LAYER_MASK 0xF0
1623#define IB_FLOW_SPEC_SUPPORT_LAYERS 8 1657#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1740,6 +1774,12 @@ struct ib_flow_spec_tunnel {
1740 struct ib_flow_tunnel_filter mask; 1774 struct ib_flow_tunnel_filter mask;
1741}; 1775};
1742 1776
1777struct ib_flow_spec_action_tag {
1778 enum ib_flow_spec_type type;
1779 u16 size;
1780 u32 tag_id;
1781};
1782
1743union ib_flow_spec { 1783union ib_flow_spec {
1744 struct { 1784 struct {
1745 u32 type; 1785 u32 type;
@@ -1751,6 +1791,7 @@ union ib_flow_spec {
1751 struct ib_flow_spec_tcp_udp tcp_udp; 1791 struct ib_flow_spec_tcp_udp tcp_udp;
1752 struct ib_flow_spec_ipv6 ipv6; 1792 struct ib_flow_spec_ipv6 ipv6;
1753 struct ib_flow_spec_tunnel tunnel; 1793 struct ib_flow_spec_tunnel tunnel;
1794 struct ib_flow_spec_action_tag flow_tag;
1754}; 1795};
1755 1796
1756struct ib_flow_attr { 1797struct ib_flow_attr {
@@ -2333,6 +2374,16 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
2333 rdma_protocol_roce(device, port_num); 2374 rdma_protocol_roce(device, port_num);
2334} 2375}
2335 2376
2377static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
2378{
2379 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
2380}
2381
2382static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
2383{
2384 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
2385}
2386
2336/** 2387/**
2337 * rdma_cap_ib_mad - Check if the port of a device supports Infiniband 2388 * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
2338 * Management Datagrams. 2389 * Management Datagrams.
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index f4f87cff6dc6..997f904c7692 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -246,7 +246,7 @@ struct ib_uverbs_ex_query_device_resp {
246 __u64 device_cap_flags_ex; 246 __u64 device_cap_flags_ex;
247 struct ib_uverbs_rss_caps rss_caps; 247 struct ib_uverbs_rss_caps rss_caps;
248 __u32 max_wq_type_rq; 248 __u32 max_wq_type_rq;
249 __u32 reserved; 249 __u32 raw_packet_caps;
250}; 250};
251 251
252struct ib_uverbs_query_port { 252struct ib_uverbs_query_port {
@@ -934,6 +934,19 @@ struct ib_uverbs_flow_spec_ipv6 {
934 struct ib_uverbs_flow_ipv6_filter mask; 934 struct ib_uverbs_flow_ipv6_filter mask;
935}; 935};
936 936
937struct ib_uverbs_flow_spec_action_tag {
938 union {
939 struct ib_uverbs_flow_spec_hdr hdr;
940 struct {
941 __u32 type;
942 __u16 size;
943 __u16 reserved;
944 };
945 };
946 __u32 tag_id;
947 __u32 reserved1;
948};
949
937struct ib_uverbs_flow_tunnel_filter { 950struct ib_uverbs_flow_tunnel_filter {
938 __be32 tunnel_id; 951 __be32 tunnel_id;
939}; 952};
@@ -1053,6 +1066,8 @@ struct ib_uverbs_ex_create_wq {
1053 __u32 cq_handle; 1066 __u32 cq_handle;
1054 __u32 max_wr; 1067 __u32 max_wr;
1055 __u32 max_sge; 1068 __u32 max_sge;
1069 __u32 create_flags; /* Use enum ib_wq_flags */
1070 __u32 reserved;
1056}; 1071};
1057 1072
1058struct ib_uverbs_ex_create_wq_resp { 1073struct ib_uverbs_ex_create_wq_resp {
@@ -1081,6 +1096,8 @@ struct ib_uverbs_ex_modify_wq {
1081 __u32 wq_handle; 1096 __u32 wq_handle;
1082 __u32 wq_state; 1097 __u32 wq_state;
1083 __u32 curr_wq_state; 1098 __u32 curr_wq_state;
1099 __u32 flags; /* Use enum ib_wq_flags */
1100 __u32 flags_mask; /* Use enum ib_wq_flags */
1084}; 1101};
1085 1102
1086/* Prevent memory allocation rather than max expected size */ 1103/* Prevent memory allocation rather than max expected size */