aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-11-17 16:53:02 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-11-17 16:53:02 -0500
commit57400d305201e1025ea0c20c851173146271bd1b (patch)
tree16c025ab63f43f314f300735723f51ce9d96bbae
parentbec1b089ab287d5df160205f5949114e5a3d3162 (diff)
parent5c6b2aaf9316fd0983c0c999d920306ddc65bd2d (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rmda fixes from Doug Ledford. "First round of -rc fixes. Due to various issues, I've been away and couldn't send a pull request for about three weeks. There were a number of -rc patches that built up in the meantime (some where there already from the early -rc stages). Obviously, there were way too many to send now, so I tried to pare the list down to the more important patches for the -rc cycle. Most of the code has had plenty of soak time at the various vendor's testing setups, so I doubt there will be another -rc pull request this cycle. I also tried to limit the patches to those with smaller footprints, so even though a shortlog is longer than I would like, the actual diffstat is mostly very small with the exception of just three files that had more changes, and a couple files with pure removals. Summary: - Misc Intel hfi1 fixes - Misc Mellanox mlx4, mlx5, and rxe fixes - A couple cxgb4 fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (34 commits) iw_cxgb4: invalidate the mr when posting a read_w_inv wr iw_cxgb4: set *bad_wr for post_send/post_recv errors IB/rxe: Update qp state for user query IB/rxe: Clear queue buffer when modifying QP to reset IB/rxe: Fix handling of erroneous WR IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum IB/mlx4: Fix create CQ error flow IB/mlx4: Check gid_index return value IB/mlx5: Fix NULL pointer dereference on debug print IB/mlx5: Fix fatal error dispatching IB/mlx5: Resolve soft lock on massive reg MRs IB/mlx5: Use cache line size to select CQE stride IB/mlx5: Validate requested RQT size IB/mlx5: Fix memory leak in query device IB/core: Avoid unsigned int overflow in sg_alloc_table IB/core: Add missing check for addr_resolve callback return value IB/core: Set routable RoCE gid type for ipv4/ipv6 networks IB/cm: Mark stale CM id's whenever the mad agent was unregistered IB/uverbs: Fix leak of XRC target QPs IB/hfi1: Remove incorrect IS_ERR check ...
-rw-r--r--drivers/infiniband/core/addr.c11
-rw-r--r--drivers/infiniband/core/cm.c126
-rw-r--r--drivers/infiniband/core/cma.c21
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c12
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c72
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c27
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h3
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c37
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c19
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h89
-rw-r--r--drivers/infiniband/hw/hfi1/init.c104
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c13
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c19
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c25
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h60
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c11
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c12
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c21
37 files changed, 391 insertions, 395 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index b136d3acc5bd..0f58f46dbad7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
699struct resolve_cb_context { 699struct resolve_cb_context {
700 struct rdma_dev_addr *addr; 700 struct rdma_dev_addr *addr;
701 struct completion comp; 701 struct completion comp;
702 int status;
702}; 703};
703 704
704static void resolve_cb(int status, struct sockaddr *src_addr, 705static void resolve_cb(int status, struct sockaddr *src_addr,
705 struct rdma_dev_addr *addr, void *context) 706 struct rdma_dev_addr *addr, void *context)
706{ 707{
707 memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct 708 if (!status)
708 rdma_dev_addr)); 709 memcpy(((struct resolve_cb_context *)context)->addr,
710 addr, sizeof(struct rdma_dev_addr));
711 ((struct resolve_cb_context *)context)->status = status;
709 complete(&((struct resolve_cb_context *)context)->comp); 712 complete(&((struct resolve_cb_context *)context)->comp);
710} 713}
711 714
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
743 746
744 wait_for_completion(&ctx.comp); 747 wait_for_completion(&ctx.comp);
745 748
749 ret = ctx.status;
750 if (ret)
751 return ret;
752
746 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 753 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
747 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 754 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
748 if (!dev) 755 if (!dev)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c99525512b34..71c7c4c328ef 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -80,6 +80,8 @@ static struct ib_cm {
80 __be32 random_id_operand; 80 __be32 random_id_operand;
81 struct list_head timewait_list; 81 struct list_head timewait_list;
82 struct workqueue_struct *wq; 82 struct workqueue_struct *wq;
83 /* Sync on cm change port state */
84 spinlock_t state_lock;
83} cm; 85} cm;
84 86
85/* Counter indexes ordered by attribute ID */ 87/* Counter indexes ordered by attribute ID */
@@ -161,6 +163,8 @@ struct cm_port {
161 struct ib_mad_agent *mad_agent; 163 struct ib_mad_agent *mad_agent;
162 struct kobject port_obj; 164 struct kobject port_obj;
163 u8 port_num; 165 u8 port_num;
166 struct list_head cm_priv_prim_list;
167 struct list_head cm_priv_altr_list;
164 struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; 168 struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
165}; 169};
166 170
@@ -241,6 +245,12 @@ struct cm_id_private {
241 u8 service_timeout; 245 u8 service_timeout;
242 u8 target_ack_delay; 246 u8 target_ack_delay;
243 247
248 struct list_head prim_list;
249 struct list_head altr_list;
250 /* Indicates that the send port mad is registered and av is set */
251 int prim_send_port_not_ready;
252 int altr_send_port_not_ready;
253
244 struct list_head work_list; 254 struct list_head work_list;
245 atomic_t work_count; 255 atomic_t work_count;
246}; 256};
@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
259 struct ib_mad_agent *mad_agent; 269 struct ib_mad_agent *mad_agent;
260 struct ib_mad_send_buf *m; 270 struct ib_mad_send_buf *m;
261 struct ib_ah *ah; 271 struct ib_ah *ah;
272 struct cm_av *av;
273 unsigned long flags, flags2;
274 int ret = 0;
262 275
276 /* don't let the port to be released till the agent is down */
277 spin_lock_irqsave(&cm.state_lock, flags2);
278 spin_lock_irqsave(&cm.lock, flags);
279 if (!cm_id_priv->prim_send_port_not_ready)
280 av = &cm_id_priv->av;
281 else if (!cm_id_priv->altr_send_port_not_ready &&
282 (cm_id_priv->alt_av.port))
283 av = &cm_id_priv->alt_av;
284 else {
285 pr_info("%s: not valid CM id\n", __func__);
286 ret = -ENODEV;
287 spin_unlock_irqrestore(&cm.lock, flags);
288 goto out;
289 }
290 spin_unlock_irqrestore(&cm.lock, flags);
291 /* Make sure the port haven't released the mad yet */
263 mad_agent = cm_id_priv->av.port->mad_agent; 292 mad_agent = cm_id_priv->av.port->mad_agent;
264 ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); 293 if (!mad_agent) {
265 if (IS_ERR(ah)) 294 pr_info("%s: not a valid MAD agent\n", __func__);
266 return PTR_ERR(ah); 295 ret = -ENODEV;
296 goto out;
297 }
298 ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
299 if (IS_ERR(ah)) {
300 ret = PTR_ERR(ah);
301 goto out;
302 }
267 303
268 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 304 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
269 cm_id_priv->av.pkey_index, 305 av->pkey_index,
270 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 306 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
271 GFP_ATOMIC, 307 GFP_ATOMIC,
272 IB_MGMT_BASE_VERSION); 308 IB_MGMT_BASE_VERSION);
273 if (IS_ERR(m)) { 309 if (IS_ERR(m)) {
274 ib_destroy_ah(ah); 310 ib_destroy_ah(ah);
275 return PTR_ERR(m); 311 ret = PTR_ERR(m);
312 goto out;
276 } 313 }
277 314
278 /* Timeout set by caller if response is expected. */ 315 /* Timeout set by caller if response is expected. */
@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
282 atomic_inc(&cm_id_priv->refcount); 319 atomic_inc(&cm_id_priv->refcount);
283 m->context[0] = cm_id_priv; 320 m->context[0] = cm_id_priv;
284 *msg = m; 321 *msg = m;
285 return 0; 322
323out:
324 spin_unlock_irqrestore(&cm.state_lock, flags2);
325 return ret;
286} 326}
287 327
288static int cm_alloc_response_msg(struct cm_port *port, 328static int cm_alloc_response_msg(struct cm_port *port,
@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
352 grh, &av->ah_attr); 392 grh, &av->ah_attr);
353} 393}
354 394
355static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) 395static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
396 struct cm_id_private *cm_id_priv)
356{ 397{
357 struct cm_device *cm_dev; 398 struct cm_device *cm_dev;
358 struct cm_port *port = NULL; 399 struct cm_port *port = NULL;
@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
387 &av->ah_attr); 428 &av->ah_attr);
388 av->timeout = path->packet_life_time + 1; 429 av->timeout = path->packet_life_time + 1;
389 430
390 return 0; 431 spin_lock_irqsave(&cm.lock, flags);
432 if (&cm_id_priv->av == av)
433 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
434 else if (&cm_id_priv->alt_av == av)
435 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
436 else
437 ret = -EINVAL;
438
439 spin_unlock_irqrestore(&cm.lock, flags);
440
441 return ret;
391} 442}
392 443
393static int cm_alloc_id(struct cm_id_private *cm_id_priv) 444static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
677 spin_lock_init(&cm_id_priv->lock); 728 spin_lock_init(&cm_id_priv->lock);
678 init_completion(&cm_id_priv->comp); 729 init_completion(&cm_id_priv->comp);
679 INIT_LIST_HEAD(&cm_id_priv->work_list); 730 INIT_LIST_HEAD(&cm_id_priv->work_list);
731 INIT_LIST_HEAD(&cm_id_priv->prim_list);
732 INIT_LIST_HEAD(&cm_id_priv->altr_list);
680 atomic_set(&cm_id_priv->work_count, -1); 733 atomic_set(&cm_id_priv->work_count, -1);
681 atomic_set(&cm_id_priv->refcount, 1); 734 atomic_set(&cm_id_priv->refcount, 1);
682 return &cm_id_priv->id; 735 return &cm_id_priv->id;
@@ -892,6 +945,15 @@ retest:
892 break; 945 break;
893 } 946 }
894 947
948 spin_lock_irq(&cm.lock);
949 if (!list_empty(&cm_id_priv->altr_list) &&
950 (!cm_id_priv->altr_send_port_not_ready))
951 list_del(&cm_id_priv->altr_list);
952 if (!list_empty(&cm_id_priv->prim_list) &&
953 (!cm_id_priv->prim_send_port_not_ready))
954 list_del(&cm_id_priv->prim_list);
955 spin_unlock_irq(&cm.lock);
956
895 cm_free_id(cm_id->local_id); 957 cm_free_id(cm_id->local_id);
896 cm_deref_id(cm_id_priv); 958 cm_deref_id(cm_id_priv);
897 wait_for_completion(&cm_id_priv->comp); 959 wait_for_completion(&cm_id_priv->comp);
@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
1192 goto out; 1254 goto out;
1193 } 1255 }
1194 1256
1195 ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); 1257 ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
1258 cm_id_priv);
1196 if (ret) 1259 if (ret)
1197 goto error1; 1260 goto error1;
1198 if (param->alternate_path) { 1261 if (param->alternate_path) {
1199 ret = cm_init_av_by_path(param->alternate_path, 1262 ret = cm_init_av_by_path(param->alternate_path,
1200 &cm_id_priv->alt_av); 1263 &cm_id_priv->alt_av, cm_id_priv);
1201 if (ret) 1264 if (ret)
1202 goto error1; 1265 goto error1;
1203 } 1266 }
@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
1653 dev_put(gid_attr.ndev); 1716 dev_put(gid_attr.ndev);
1654 } 1717 }
1655 work->path[0].gid_type = gid_attr.gid_type; 1718 work->path[0].gid_type = gid_attr.gid_type;
1656 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1719 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1720 cm_id_priv);
1657 } 1721 }
1658 if (ret) { 1722 if (ret) {
1659 int err = ib_get_cached_gid(work->port->cm_dev->ib_device, 1723 int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
1672 goto rejected; 1736 goto rejected;
1673 } 1737 }
1674 if (req_msg->alt_local_lid) { 1738 if (req_msg->alt_local_lid) {
1675 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); 1739 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
1740 cm_id_priv);
1676 if (ret) { 1741 if (ret) {
1677 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, 1742 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1678 &work->path[0].sgid, 1743 &work->path[0].sgid,
@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
2727 goto out; 2792 goto out;
2728 } 2793 }
2729 2794
2730 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); 2795 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
2796 cm_id_priv);
2731 if (ret) 2797 if (ret)
2732 goto out; 2798 goto out;
2733 cm_id_priv->alt_av.timeout = 2799 cm_id_priv->alt_av.timeout =
@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
2839 cm_init_av_for_response(work->port, work->mad_recv_wc->wc, 2905 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2840 work->mad_recv_wc->recv_buf.grh, 2906 work->mad_recv_wc->recv_buf.grh,
2841 &cm_id_priv->av); 2907 &cm_id_priv->av);
2842 cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); 2908 cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
2909 cm_id_priv);
2843 ret = atomic_inc_and_test(&cm_id_priv->work_count); 2910 ret = atomic_inc_and_test(&cm_id_priv->work_count);
2844 if (!ret) 2911 if (!ret)
2845 list_add_tail(&work->list, &cm_id_priv->work_list); 2912 list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3031 return -EINVAL; 3098 return -EINVAL;
3032 3099
3033 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 3100 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3034 ret = cm_init_av_by_path(param->path, &cm_id_priv->av); 3101 ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
3035 if (ret) 3102 if (ret)
3036 goto out; 3103 goto out;
3037 3104
@@ -3468,7 +3535,9 @@ out:
3468static int cm_migrate(struct ib_cm_id *cm_id) 3535static int cm_migrate(struct ib_cm_id *cm_id)
3469{ 3536{
3470 struct cm_id_private *cm_id_priv; 3537 struct cm_id_private *cm_id_priv;
3538 struct cm_av tmp_av;
3471 unsigned long flags; 3539 unsigned long flags;
3540 int tmp_send_port_not_ready;
3472 int ret = 0; 3541 int ret = 0;
3473 3542
3474 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 3543 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
3477 (cm_id->lap_state == IB_CM_LAP_UNINIT || 3546 (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3478 cm_id->lap_state == IB_CM_LAP_IDLE)) { 3547 cm_id->lap_state == IB_CM_LAP_IDLE)) {
3479 cm_id->lap_state = IB_CM_LAP_IDLE; 3548 cm_id->lap_state = IB_CM_LAP_IDLE;
3549 /* Swap address vector */
3550 tmp_av = cm_id_priv->av;
3480 cm_id_priv->av = cm_id_priv->alt_av; 3551 cm_id_priv->av = cm_id_priv->alt_av;
3552 cm_id_priv->alt_av = tmp_av;
3553 /* Swap port send ready state */
3554 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3555 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3556 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3481 } else 3557 } else
3482 ret = -EINVAL; 3558 ret = -EINVAL;
3483 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 3559 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
3888 port->cm_dev = cm_dev; 3964 port->cm_dev = cm_dev;
3889 port->port_num = i; 3965 port->port_num = i;
3890 3966
3967 INIT_LIST_HEAD(&port->cm_priv_prim_list);
3968 INIT_LIST_HEAD(&port->cm_priv_altr_list);
3969
3891 ret = cm_create_port_fs(port); 3970 ret = cm_create_port_fs(port);
3892 if (ret) 3971 if (ret)
3893 goto error1; 3972 goto error1;
@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
3945{ 4024{
3946 struct cm_device *cm_dev = client_data; 4025 struct cm_device *cm_dev = client_data;
3947 struct cm_port *port; 4026 struct cm_port *port;
4027 struct cm_id_private *cm_id_priv;
4028 struct ib_mad_agent *cur_mad_agent;
3948 struct ib_port_modify port_modify = { 4029 struct ib_port_modify port_modify = {
3949 .clr_port_cap_mask = IB_PORT_CM_SUP 4030 .clr_port_cap_mask = IB_PORT_CM_SUP
3950 }; 4031 };
@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
3968 4049
3969 port = cm_dev->port[i-1]; 4050 port = cm_dev->port[i-1];
3970 ib_modify_port(ib_device, port->port_num, 0, &port_modify); 4051 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4052 /* Mark all the cm_id's as not valid */
4053 spin_lock_irq(&cm.lock);
4054 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4055 cm_id_priv->altr_send_port_not_ready = 1;
4056 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4057 cm_id_priv->prim_send_port_not_ready = 1;
4058 spin_unlock_irq(&cm.lock);
3971 /* 4059 /*
3972 * We flush the queue here after the going_down set, this 4060 * We flush the queue here after the going_down set, this
3973 * verify that no new works will be queued in the recv handler, 4061 * verify that no new works will be queued in the recv handler,
3974 * after that we can call the unregister_mad_agent 4062 * after that we can call the unregister_mad_agent
3975 */ 4063 */
3976 flush_workqueue(cm.wq); 4064 flush_workqueue(cm.wq);
3977 ib_unregister_mad_agent(port->mad_agent); 4065 spin_lock_irq(&cm.state_lock);
4066 cur_mad_agent = port->mad_agent;
4067 port->mad_agent = NULL;
4068 spin_unlock_irq(&cm.state_lock);
4069 ib_unregister_mad_agent(cur_mad_agent);
3978 cm_remove_port_fs(port); 4070 cm_remove_port_fs(port);
3979 } 4071 }
4072
3980 device_unregister(cm_dev->device); 4073 device_unregister(cm_dev->device);
3981 kfree(cm_dev); 4074 kfree(cm_dev);
3982} 4075}
@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
3989 INIT_LIST_HEAD(&cm.device_list); 4082 INIT_LIST_HEAD(&cm.device_list);
3990 rwlock_init(&cm.device_lock); 4083 rwlock_init(&cm.device_lock);
3991 spin_lock_init(&cm.lock); 4084 spin_lock_init(&cm.lock);
4085 spin_lock_init(&cm.state_lock);
3992 cm.listen_service_table = RB_ROOT; 4086 cm.listen_service_table = RB_ROOT;
3993 cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); 4087 cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3994 cm.remote_id_table = RB_ROOT; 4088 cm.remote_id_table = RB_ROOT;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 89a6b0546804..2a6fc47a1dfb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2438,6 +2438,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2438 return 0; 2438 return 0;
2439} 2439}
2440 2440
2441static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2442 unsigned long supported_gids,
2443 enum ib_gid_type default_gid)
2444{
2445 if ((network_type == RDMA_NETWORK_IPV4 ||
2446 network_type == RDMA_NETWORK_IPV6) &&
2447 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2448 return IB_GID_TYPE_ROCE_UDP_ENCAP;
2449
2450 return default_gid;
2451}
2452
2441static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2453static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2442{ 2454{
2443 struct rdma_route *route = &id_priv->id.route; 2455 struct rdma_route *route = &id_priv->id.route;
@@ -2463,6 +2475,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2463 route->num_paths = 1; 2475 route->num_paths = 1;
2464 2476
2465 if (addr->dev_addr.bound_dev_if) { 2477 if (addr->dev_addr.bound_dev_if) {
2478 unsigned long supported_gids;
2479
2466 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); 2480 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2467 if (!ndev) { 2481 if (!ndev) {
2468 ret = -ENODEV; 2482 ret = -ENODEV;
@@ -2486,7 +2500,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2486 2500
2487 route->path_rec->net = &init_net; 2501 route->path_rec->net = &init_net;
2488 route->path_rec->ifindex = ndev->ifindex; 2502 route->path_rec->ifindex = ndev->ifindex;
2489 route->path_rec->gid_type = id_priv->gid_type; 2503 supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2504 id_priv->id.port_num);
2505 route->path_rec->gid_type =
2506 cma_route_gid_type(addr->dev_addr.network,
2507 supported_gids,
2508 id_priv->gid_type);
2490 } 2509 }
2491 if (!ndev) { 2510 if (!ndev) {
2492 ret = -ENODEV; 2511 ret = -ENODEV;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 224ad274ea0b..84b4eff90395 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
175 175
176 cur_base = addr & PAGE_MASK; 176 cur_base = addr & PAGE_MASK;
177 177
178 if (npages == 0) { 178 if (npages == 0 || npages > UINT_MAX) {
179 ret = -EINVAL; 179 ret = -EINVAL;
180 goto out; 180 goto out;
181 } 181 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0012fa58c105..44b1104eb168 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
262 container_of(uobj, struct ib_uqp_object, uevent.uobject); 262 container_of(uobj, struct ib_uqp_object, uevent.uobject);
263 263
264 idr_remove_uobj(&ib_uverbs_qp_idr, uobj); 264 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
265 if (qp != qp->real_qp) { 265 if (qp == qp->real_qp)
266 ib_close_qp(qp);
267 } else {
268 ib_uverbs_detach_umcast(qp, uqp); 266 ib_uverbs_detach_umcast(qp, uqp);
269 ib_destroy_qp(qp); 267 ib_destroy_qp(qp);
270 }
271 ib_uverbs_release_uevent(file, &uqp->uevent); 268 ib_uverbs_release_uevent(file, &uqp->uevent);
272 kfree(uqp); 269 kfree(uqp);
273 } 270 }
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 867b8cf82be8..19c6477af19f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -666,18 +666,6 @@ skip_cqe:
666 return ret; 666 return ret;
667} 667}
668 668
669static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
670{
671 struct c4iw_mr *mhp;
672 unsigned long flags;
673
674 spin_lock_irqsave(&rhp->lock, flags);
675 mhp = get_mhp(rhp, rkey >> 8);
676 if (mhp)
677 mhp->attr.state = 0;
678 spin_unlock_irqrestore(&rhp->lock, flags);
679}
680
681/* 669/*
682 * Get one cq entry from c4iw and map it to openib. 670 * Get one cq entry from c4iw and map it to openib.
683 * 671 *
@@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
733 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 721 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
734 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 722 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
735 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 723 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
736 invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 724 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
737 } 725 }
738 } else { 726 } else {
739 switch (CQE_OPCODE(&cqe)) { 727 switch (CQE_OPCODE(&cqe)) {
@@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
762 750
763 /* Invalidate the MR if the fastreg failed */ 751 /* Invalidate the MR if the fastreg failed */
764 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 752 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
765 invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); 753 c4iw_invalidate_mr(qhp->rhp,
754 CQE_WRID_FR_STAG(&cqe));
766 break; 755 break;
767 default: 756 default:
768 printk(KERN_ERR MOD "Unexpected opcode %d " 757 printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 7e7f79e55006..4788e1a46fde 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -999,6 +999,6 @@ extern int db_coalescing_threshold;
999extern int use_dsgl; 999extern int use_dsgl;
1000void c4iw_drain_rq(struct ib_qp *qp); 1000void c4iw_drain_rq(struct ib_qp *qp);
1001void c4iw_drain_sq(struct ib_qp *qp); 1001void c4iw_drain_sq(struct ib_qp *qp);
1002 1002void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
1003 1003
1004#endif 1004#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 80e27749420a..410408f886c1 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
770 kfree(mhp); 770 kfree(mhp);
771 return 0; 771 return 0;
772} 772}
773
774void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
775{
776 struct c4iw_mr *mhp;
777 unsigned long flags;
778
779 spin_lock_irqsave(&rhp->lock, flags);
780 mhp = get_mhp(rhp, rkey >> 8);
781 if (mhp)
782 mhp->attr.state = 0;
783 spin_unlock_irqrestore(&rhp->lock, flags);
784}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index f57deba6717c..b7ac97b27c88 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
706 return 0; 706 return 0;
707} 707}
708 708
709static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, 709static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
710 struct ib_send_wr *wr, u8 *len16)
711{ 710{
712 struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
713
714 mhp->attr.state = 0;
715 wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); 711 wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
716 wqe->inv.r2 = 0; 712 wqe->inv.r2 = 0;
717 *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); 713 *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
@@ -797,11 +793,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
797 spin_lock_irqsave(&qhp->lock, flag); 793 spin_lock_irqsave(&qhp->lock, flag);
798 if (t4_wq_in_error(&qhp->wq)) { 794 if (t4_wq_in_error(&qhp->wq)) {
799 spin_unlock_irqrestore(&qhp->lock, flag); 795 spin_unlock_irqrestore(&qhp->lock, flag);
796 *bad_wr = wr;
800 return -EINVAL; 797 return -EINVAL;
801 } 798 }
802 num_wrs = t4_sq_avail(&qhp->wq); 799 num_wrs = t4_sq_avail(&qhp->wq);
803 if (num_wrs == 0) { 800 if (num_wrs == 0) {
804 spin_unlock_irqrestore(&qhp->lock, flag); 801 spin_unlock_irqrestore(&qhp->lock, flag);
802 *bad_wr = wr;
805 return -ENOMEM; 803 return -ENOMEM;
806 } 804 }
807 while (wr) { 805 while (wr) {
@@ -840,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
840 case IB_WR_RDMA_READ_WITH_INV: 838 case IB_WR_RDMA_READ_WITH_INV:
841 fw_opcode = FW_RI_RDMA_READ_WR; 839 fw_opcode = FW_RI_RDMA_READ_WR;
842 swsqe->opcode = FW_RI_READ_REQ; 840 swsqe->opcode = FW_RI_READ_REQ;
843 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) 841 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
842 c4iw_invalidate_mr(qhp->rhp,
843 wr->sg_list[0].lkey);
844 fw_flags = FW_RI_RDMA_READ_INVALIDATE; 844 fw_flags = FW_RI_RDMA_READ_INVALIDATE;
845 else 845 } else {
846 fw_flags = 0; 846 fw_flags = 0;
847 }
847 err = build_rdma_read(wqe, wr, &len16); 848 err = build_rdma_read(wqe, wr, &len16);
848 if (err) 849 if (err)
849 break; 850 break;
@@ -876,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
876 fw_flags |= FW_RI_LOCAL_FENCE_FLAG; 877 fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
877 fw_opcode = FW_RI_INV_LSTAG_WR; 878 fw_opcode = FW_RI_INV_LSTAG_WR;
878 swsqe->opcode = FW_RI_LOCAL_INV; 879 swsqe->opcode = FW_RI_LOCAL_INV;
879 err = build_inv_stag(qhp->rhp, wqe, wr, &len16); 880 err = build_inv_stag(wqe, wr, &len16);
881 c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
880 break; 882 break;
881 default: 883 default:
882 PDBG("%s post of type=%d TBD!\n", __func__, 884 PDBG("%s post of type=%d TBD!\n", __func__,
@@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
934 spin_lock_irqsave(&qhp->lock, flag); 936 spin_lock_irqsave(&qhp->lock, flag);
935 if (t4_wq_in_error(&qhp->wq)) { 937 if (t4_wq_in_error(&qhp->wq)) {
936 spin_unlock_irqrestore(&qhp->lock, flag); 938 spin_unlock_irqrestore(&qhp->lock, flag);
939 *bad_wr = wr;
937 return -EINVAL; 940 return -EINVAL;
938 } 941 }
939 num_wrs = t4_rq_avail(&qhp->wq); 942 num_wrs = t4_rq_avail(&qhp->wq);
940 if (num_wrs == 0) { 943 if (num_wrs == 0) {
941 spin_unlock_irqrestore(&qhp->lock, flag); 944 spin_unlock_irqrestore(&qhp->lock, flag);
945 *bad_wr = wr;
942 return -ENOMEM; 946 return -ENOMEM;
943 } 947 }
944 while (wr) { 948 while (wr) {
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index a26a9a0bfc41..67ea85a56945 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu)
775 } 775 }
776 mutex_unlock(&affinity->lock); 776 mutex_unlock(&affinity->lock);
777} 777}
778
779int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
780 size_t count)
781{
782 struct hfi1_affinity_node *entry;
783 cpumask_var_t mask;
784 int ret, i;
785
786 mutex_lock(&node_affinity.lock);
787 entry = node_affinity_lookup(dd->node);
788
789 if (!entry) {
790 ret = -EINVAL;
791 goto unlock;
792 }
793
794 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
795 if (!ret) {
796 ret = -ENOMEM;
797 goto unlock;
798 }
799
800 ret = cpulist_parse(buf, mask);
801 if (ret)
802 goto out;
803
804 if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
805 dd_dev_warn(dd, "Invalid CPU mask\n");
806 ret = -EINVAL;
807 goto out;
808 }
809
810 /* reset the SDMA interrupt affinity details */
811 init_cpu_mask_set(&entry->def_intr);
812 cpumask_copy(&entry->def_intr.mask, mask);
813
814 /* Reassign the affinity for each SDMA interrupt. */
815 for (i = 0; i < dd->num_msix_entries; i++) {
816 struct hfi1_msix_entry *msix;
817
818 msix = &dd->msix_entries[i];
819 if (msix->type != IRQ_SDMA)
820 continue;
821
822 ret = get_irq_affinity(dd, msix);
823
824 if (ret)
825 break;
826 }
827out:
828 free_cpumask_var(mask);
829unlock:
830 mutex_unlock(&node_affinity.lock);
831 return ret ? ret : strnlen(buf, PAGE_SIZE);
832}
833
834int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
835{
836 struct hfi1_affinity_node *entry;
837
838 mutex_lock(&node_affinity.lock);
839 entry = node_affinity_lookup(dd->node);
840
841 if (!entry) {
842 mutex_unlock(&node_affinity.lock);
843 return -EINVAL;
844 }
845
846 cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
847 mutex_unlock(&node_affinity.lock);
848 return strnlen(buf, PAGE_SIZE);
849}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index b89ea3c0ee1a..42e63316afd1 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int);
102/* Release a CPU used by a user process. */ 102/* Release a CPU used by a user process. */
103void hfi1_put_proc_affinity(int); 103void hfi1_put_proc_affinity(int);
104 104
105int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
106int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
107 size_t count);
108
109struct hfi1_affinity_node { 105struct hfi1_affinity_node {
110 int node; 106 int node;
111 struct cpu_mask_set def_intr; 107 struct cpu_mask_set def_intr;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9bf5f23544d4..24d0820873cf 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
6301 /* leave shared count at zero for both global and VL15 */ 6301 /* leave shared count at zero for both global and VL15 */
6302 write_global_credit(dd, vau, vl15buf, 0); 6302 write_global_credit(dd, vau, vl15buf, 0);
6303 6303
6304 /* We may need some credits for another VL when sending packets 6304 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
6305 * with the snoop interface. Dividing it down the middle for VL15 6305 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
6306 * and VL0 should suffice.
6307 */
6308 if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
6309 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
6310 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
6311 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
6312 << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
6313 } else {
6314 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
6315 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
6316 }
6317} 6306}
6318 6307
6319/* 6308/*
@@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
9915 u32 mask = ~((1U << ppd->lmc) - 1); 9904 u32 mask = ~((1U << ppd->lmc) - 1);
9916 u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); 9905 u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
9917 9906
9918 if (dd->hfi1_snoop.mode_flag)
9919 dd_dev_info(dd, "Set lid/lmc while snooping");
9920
9921 c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK 9907 c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
9922 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); 9908 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
9923 c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) 9909 c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
@@ -12112,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque)
12112 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); 12098 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
12113} 12099}
12114 12100
12115#define C_MAX_NAME 13 /* 12 chars + one for /0 */ 12101#define C_MAX_NAME 16 /* 15 chars + one for /0 */
12116static int init_cntrs(struct hfi1_devdata *dd) 12102static int init_cntrs(struct hfi1_devdata *dd)
12117{ 12103{
12118 int i, rcv_ctxts, j; 12104 int i, rcv_ctxts, j;
@@ -14463,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14463 * Any error printing is already done by the init code. 14449 * Any error printing is already done by the init code.
14464 * On return, we have the chip mapped. 14450 * On return, we have the chip mapped.
14465 */ 14451 */
14466 ret = hfi1_pcie_ddinit(dd, pdev, ent); 14452 ret = hfi1_pcie_ddinit(dd, pdev);
14467 if (ret < 0) 14453 if (ret < 0)
14468 goto bail_free; 14454 goto bail_free;
14469 14455
@@ -14691,6 +14677,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14691 if (ret) 14677 if (ret)
14692 goto bail_free_cntrs; 14678 goto bail_free_cntrs;
14693 14679
14680 init_completion(&dd->user_comp);
14681
14682 /* The user refcount starts with one to inidicate an active device */
14683 atomic_set(&dd->user_refcount, 1);
14684
14694 goto bail; 14685 goto bail;
14695 14686
14696bail_free_rcverr: 14687bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 92345259a8f4..043fd21dc5f3 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -320,6 +320,9 @@
320/* DC_DC8051_CFG_MODE.GENERAL bits */ 320/* DC_DC8051_CFG_MODE.GENERAL bits */
321#define DISABLE_SELF_GUID_CHECK 0x2 321#define DISABLE_SELF_GUID_CHECK 0x2
322 322
323/* Bad L2 frame error code */
324#define BAD_L2_ERR 0x6
325
323/* 326/*
324 * Eager buffer minimum and maximum sizes supported by the hardware. 327 * Eager buffer minimum and maximum sizes supported by the hardware.
325 * All power-of-two sizes in between are supported as well. 328 * All power-of-two sizes in between are supported as well.
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 6563e4d38b80..c5efff29c147 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
599 dd->rhf_offset; 599 dd->rhf_offset;
600 struct rvt_qp *qp; 600 struct rvt_qp *qp;
601 struct ib_header *hdr; 601 struct ib_header *hdr;
602 struct ib_other_headers *ohdr;
603 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 602 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
604 u64 rhf = rhf_to_cpu(rhf_addr); 603 u64 rhf = rhf_to_cpu(rhf_addr);
605 u32 etype = rhf_rcv_type(rhf), qpn, bth1; 604 u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet)
615 if (etype != RHF_RCV_TYPE_IB) 614 if (etype != RHF_RCV_TYPE_IB)
616 goto next; 615 goto next;
617 616
618 hdr = hfi1_get_msgheader(dd, rhf_addr); 617 packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
618 hdr = packet->hdr;
619 619
620 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 620 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
621 621
622 if (lnh == HFI1_LRH_BTH) 622 if (lnh == HFI1_LRH_BTH) {
623 ohdr = &hdr->u.oth; 623 packet->ohdr = &hdr->u.oth;
624 else if (lnh == HFI1_LRH_GRH) 624 } else if (lnh == HFI1_LRH_GRH) {
625 ohdr = &hdr->u.l.oth; 625 packet->ohdr = &hdr->u.l.oth;
626 else 626 packet->rcv_flags |= HFI1_HAS_GRH;
627 } else {
627 goto next; /* just in case */ 628 goto next; /* just in case */
629 }
628 630
629 bth1 = be32_to_cpu(ohdr->bth[1]); 631 bth1 = be32_to_cpu(packet->ohdr->bth[1]);
630 is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); 632 is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
631 633
632 if (!is_ecn) 634 if (!is_ecn)
@@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
646 648
647 /* turn off BECN, FECN */ 649 /* turn off BECN, FECN */
648 bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); 650 bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
649 ohdr->bth[1] = cpu_to_be32(bth1); 651 packet->ohdr->bth[1] = cpu_to_be32(bth1);
650next: 652next:
651 update_ps_mdata(&mdata, rcd); 653 update_ps_mdata(&mdata, rcd);
652 } 654 }
@@ -1360,12 +1362,25 @@ int process_receive_ib(struct hfi1_packet *packet)
1360 1362
1361int process_receive_bypass(struct hfi1_packet *packet) 1363int process_receive_bypass(struct hfi1_packet *packet)
1362{ 1364{
1365 struct hfi1_devdata *dd = packet->rcd->dd;
1366
1363 if (unlikely(rhf_err_flags(packet->rhf))) 1367 if (unlikely(rhf_err_flags(packet->rhf)))
1364 handle_eflags(packet); 1368 handle_eflags(packet);
1365 1369
1366 dd_dev_err(packet->rcd->dd, 1370 dd_dev_err(dd,
1367 "Bypass packets are not supported in normal operation. Dropping\n"); 1371 "Bypass packets are not supported in normal operation. Dropping\n");
1368 incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); 1372 incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
1373 if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
1374 u64 *flits = packet->ebuf;
1375
1376 if (flits && !(packet->rhf & RHF_LEN_ERR)) {
1377 dd->err_info_rcvport.packet_flit1 = flits[0];
1378 dd->err_info_rcvport.packet_flit2 =
1379 packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
1380 }
1381 dd->err_info_rcvport.status_and_code |=
1382 (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
1383 }
1369 return RHF_RCV_CONTINUE; 1384 return RHF_RCV_CONTINUE;
1370} 1385}
1371 1386
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 677efa0e8cd6..bd786b7bd30b 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
172 struct hfi1_devdata, 172 struct hfi1_devdata,
173 user_cdev); 173 user_cdev);
174 174
175 if (!atomic_inc_not_zero(&dd->user_refcount))
176 return -ENXIO;
177
175 /* Just take a ref now. Not all opens result in a context assign */ 178 /* Just take a ref now. Not all opens result in a context assign */
176 kobject_get(&dd->kobj); 179 kobject_get(&dd->kobj);
177 180
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
183 fd->rec_cpu_num = -1; /* no cpu affinity by default */ 186 fd->rec_cpu_num = -1; /* no cpu affinity by default */
184 fd->mm = current->mm; 187 fd->mm = current->mm;
185 atomic_inc(&fd->mm->mm_count); 188 atomic_inc(&fd->mm->mm_count);
186 } 189 fp->private_data = fd;
190 } else {
191 fp->private_data = NULL;
192
193 if (atomic_dec_and_test(&dd->user_refcount))
194 complete(&dd->user_comp);
187 195
188 fp->private_data = fd; 196 return -ENOMEM;
197 }
189 198
190 return fd ? 0 : -ENOMEM; 199 return 0;
191} 200}
192 201
193static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, 202static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
798done: 807done:
799 mmdrop(fdata->mm); 808 mmdrop(fdata->mm);
800 kobject_put(&dd->kobj); 809 kobject_put(&dd->kobj);
810
811 if (atomic_dec_and_test(&dd->user_refcount))
812 complete(&dd->user_comp);
813
801 kfree(fdata); 814 kfree(fdata);
802 return 0; 815 return 0;
803} 816}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7eef11b316ff..cc87fd4e534b 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -367,26 +367,6 @@ struct hfi1_packet {
367 u8 etype; 367 u8 etype;
368}; 368};
369 369
370/*
371 * Private data for snoop/capture support.
372 */
373struct hfi1_snoop_data {
374 int mode_flag;
375 struct cdev cdev;
376 struct device *class_dev;
377 /* protect snoop data */
378 spinlock_t snoop_lock;
379 struct list_head queue;
380 wait_queue_head_t waitq;
381 void *filter_value;
382 int (*filter_callback)(void *hdr, void *data, void *value);
383 u64 dcc_cfg; /* saved value of DCC Cfg register */
384};
385
386/* snoop mode_flag values */
387#define HFI1_PORT_SNOOP_MODE 1U
388#define HFI1_PORT_CAPTURE_MODE 2U
389
390struct rvt_sge_state; 370struct rvt_sge_state;
391 371
392/* 372/*
@@ -613,8 +593,6 @@ struct hfi1_pportdata {
613 struct mutex hls_lock; 593 struct mutex hls_lock;
614 u32 host_link_state; 594 u32 host_link_state;
615 595
616 spinlock_t sdma_alllock ____cacheline_aligned_in_smp;
617
618 u32 lstate; /* logical link state */ 596 u32 lstate; /* logical link state */
619 597
620 /* these are the "32 bit" regs */ 598 /* these are the "32 bit" regs */
@@ -1104,8 +1082,6 @@ struct hfi1_devdata {
1104 char *portcntrnames; 1082 char *portcntrnames;
1105 size_t portcntrnameslen; 1083 size_t portcntrnameslen;
1106 1084
1107 struct hfi1_snoop_data hfi1_snoop;
1108
1109 struct err_info_rcvport err_info_rcvport; 1085 struct err_info_rcvport err_info_rcvport;
1110 struct err_info_constraint err_info_rcv_constraint; 1086 struct err_info_constraint err_info_rcv_constraint;
1111 struct err_info_constraint err_info_xmit_constraint; 1087 struct err_info_constraint err_info_xmit_constraint;
@@ -1141,8 +1117,8 @@ struct hfi1_devdata {
1141 rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; 1117 rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
1142 1118
1143 /* 1119 /*
1144 * Handlers for outgoing data so that snoop/capture does not 1120 * Capability to have different send engines simply by changing a
1145 * have to have its hooks in the send path 1121 * pointer value.
1146 */ 1122 */
1147 send_routine process_pio_send; 1123 send_routine process_pio_send;
1148 send_routine process_dma_send; 1124 send_routine process_dma_send;
@@ -1174,6 +1150,10 @@ struct hfi1_devdata {
1174 spinlock_t aspm_lock; 1150 spinlock_t aspm_lock;
1175 /* Number of verbs contexts which have disabled ASPM */ 1151 /* Number of verbs contexts which have disabled ASPM */
1176 atomic_t aspm_disabled_cnt; 1152 atomic_t aspm_disabled_cnt;
1153 /* Keeps track of user space clients */
1154 atomic_t user_refcount;
1155 /* Used to wait for outstanding user space clients before dev removal */
1156 struct completion user_comp;
1177 1157
1178 struct hfi1_affinity *affinity; 1158 struct hfi1_affinity *affinity;
1179 struct rhashtable sdma_rht; 1159 struct rhashtable sdma_rht;
@@ -1221,8 +1201,6 @@ struct hfi1_devdata *hfi1_lookup(int unit);
1221extern u32 hfi1_cpulist_count; 1201extern u32 hfi1_cpulist_count;
1222extern unsigned long *hfi1_cpulist; 1202extern unsigned long *hfi1_cpulist;
1223 1203
1224extern unsigned int snoop_drop_send;
1225extern unsigned int snoop_force_capture;
1226int hfi1_init(struct hfi1_devdata *, int); 1204int hfi1_init(struct hfi1_devdata *, int);
1227int hfi1_count_units(int *npresentp, int *nupp); 1205int hfi1_count_units(int *npresentp, int *nupp);
1228int hfi1_count_active_units(void); 1206int hfi1_count_active_units(void);
@@ -1557,13 +1535,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
1557void reset_link_credits(struct hfi1_devdata *dd); 1535void reset_link_credits(struct hfi1_devdata *dd);
1558void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); 1536void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
1559 1537
1560int snoop_recv_handler(struct hfi1_packet *packet);
1561int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
1562 u64 pbc);
1563int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
1564 u64 pbc);
1565void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
1566 u64 pbc, const void *from, size_t count);
1567int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc); 1538int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
1568 1539
1569static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) 1540static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
@@ -1763,8 +1734,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
1763 1734
1764int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); 1735int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
1765void hfi1_pcie_cleanup(struct pci_dev *); 1736void hfi1_pcie_cleanup(struct pci_dev *);
1766int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *, 1737int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
1767 const struct pci_device_id *);
1768void hfi1_pcie_ddcleanup(struct hfi1_devdata *); 1738void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
1769void hfi1_pcie_flr(struct hfi1_devdata *); 1739void hfi1_pcie_flr(struct hfi1_devdata *);
1770int pcie_speeds(struct hfi1_devdata *); 1740int pcie_speeds(struct hfi1_devdata *);
@@ -1799,8 +1769,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
1799int kdeth_process_eager(struct hfi1_packet *packet); 1769int kdeth_process_eager(struct hfi1_packet *packet);
1800int process_receive_invalid(struct hfi1_packet *packet); 1770int process_receive_invalid(struct hfi1_packet *packet);
1801 1771
1802extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
1803
1804void update_sge(struct rvt_sge_state *ss, u32 length); 1772void update_sge(struct rvt_sge_state *ss, u32 length);
1805 1773
1806/* global module parameter variables */ 1774/* global module parameter variables */
@@ -1827,9 +1795,6 @@ extern struct mutex hfi1_mutex;
1827#define DRIVER_NAME "hfi1" 1795#define DRIVER_NAME "hfi1"
1828#define HFI1_USER_MINOR_BASE 0 1796#define HFI1_USER_MINOR_BASE 0
1829#define HFI1_TRACE_MINOR 127 1797#define HFI1_TRACE_MINOR 127
1830#define HFI1_DIAGPKT_MINOR 128
1831#define HFI1_DIAG_MINOR_BASE 129
1832#define HFI1_SNOOP_CAPTURE_BASE 200
1833#define HFI1_NMINORS 255 1798#define HFI1_NMINORS 255
1834 1799
1835#define PCI_VENDOR_ID_INTEL 0x8086 1800#define PCI_VENDOR_ID_INTEL 0x8086
@@ -1848,7 +1813,13 @@ extern struct mutex hfi1_mutex;
1848static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, 1813static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
1849 u16 ctxt_type) 1814 u16 ctxt_type)
1850{ 1815{
1851 u64 base_sc_integrity = 1816 u64 base_sc_integrity;
1817
1818 /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
1819 if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
1820 return 0;
1821
1822 base_sc_integrity =
1852 SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK 1823 SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
1853 | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK 1824 | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
1854 | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK 1825 | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
@@ -1863,7 +1834,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
1863 | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK 1834 | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
1864 | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK 1835 | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
1865 | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK 1836 | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
1866 | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
1867 | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK 1837 | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
1868 | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; 1838 | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
1869 1839
@@ -1872,18 +1842,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
1872 else 1842 else
1873 base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; 1843 base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
1874 1844
1875 if (is_ax(dd)) 1845 /* turn on send-side job key checks if !A0 */
1876 /* turn off send-side job key checks - A0 */ 1846 if (!is_ax(dd))
1877 return base_sc_integrity & 1847 base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
1878 ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; 1848
1879 return base_sc_integrity; 1849 return base_sc_integrity;
1880} 1850}
1881 1851
1882static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) 1852static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
1883{ 1853{
1884 u64 base_sdma_integrity = 1854 u64 base_sdma_integrity;
1855
1856 /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
1857 if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
1858 return 0;
1859
1860 base_sdma_integrity =
1885 SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK 1861 SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
1886 | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
1887 | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK 1862 | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
1888 | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK 1863 | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
1889 | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK 1864 | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
@@ -1895,14 +1870,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
1895 | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK 1870 | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
1896 | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK 1871 | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
1897 | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK 1872 | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
1898 | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
1899 | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK 1873 | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
1900 | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; 1874 | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
1901 1875
1902 if (is_ax(dd)) 1876 if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
1903 /* turn off send-side job key checks - A0 */ 1877 base_sdma_integrity |=
1904 return base_sdma_integrity & 1878 SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
1905 ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; 1879
1880 /* turn on send-side job key checks if !A0 */
1881 if (!is_ax(dd))
1882 base_sdma_integrity |=
1883 SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
1884
1906 return base_sdma_integrity; 1885 return base_sdma_integrity;
1907} 1886}
1908 1887
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 60db61536fed..e3b5bc93bc70 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
144 struct hfi1_ctxtdata *rcd; 144 struct hfi1_ctxtdata *rcd;
145 145
146 ppd = dd->pport + (i % dd->num_pports); 146 ppd = dd->pport + (i % dd->num_pports);
147
148 /* dd->rcd[i] gets assigned inside the callee */
147 rcd = hfi1_create_ctxtdata(ppd, i, dd->node); 149 rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
148 if (!rcd) { 150 if (!rcd) {
149 dd_dev_err(dd, 151 dd_dev_err(dd,
@@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
169 if (!rcd->sc) { 171 if (!rcd->sc) {
170 dd_dev_err(dd, 172 dd_dev_err(dd,
171 "Unable to allocate kernel send context, failing\n"); 173 "Unable to allocate kernel send context, failing\n");
172 dd->rcd[rcd->ctxt] = NULL;
173 hfi1_free_ctxtdata(dd, rcd);
174 goto nomem; 174 goto nomem;
175 } 175 }
176 176
@@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
178 if (ret < 0) { 178 if (ret < 0) {
179 dd_dev_err(dd, 179 dd_dev_err(dd,
180 "Failed to setup kernel receive context, failing\n"); 180 "Failed to setup kernel receive context, failing\n");
181 sc_free(rcd->sc);
182 dd->rcd[rcd->ctxt] = NULL;
183 hfi1_free_ctxtdata(dd, rcd);
184 ret = -EFAULT; 181 ret = -EFAULT;
185 goto bail; 182 goto bail;
186 } 183 }
@@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
196nomem: 193nomem:
197 ret = -ENOMEM; 194 ret = -ENOMEM;
198bail: 195bail:
196 if (dd->rcd) {
197 for (i = 0; i < dd->num_rcv_contexts; ++i)
198 hfi1_free_ctxtdata(dd, dd->rcd[i]);
199 }
199 kfree(dd->rcd); 200 kfree(dd->rcd);
200 dd->rcd = NULL; 201 dd->rcd = NULL;
201 return ret; 202 return ret;
@@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
216 dd->num_rcv_contexts - dd->first_user_ctxt) 217 dd->num_rcv_contexts - dd->first_user_ctxt)
217 kctxt_ngroups = (dd->rcv_entries.nctxt_extra - 218 kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
218 (dd->num_rcv_contexts - dd->first_user_ctxt)); 219 (dd->num_rcv_contexts - dd->first_user_ctxt));
219 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 220 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
220 if (rcd) { 221 if (rcd) {
221 u32 rcvtids, max_entries; 222 u32 rcvtids, max_entries;
222 223
@@ -261,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
261 } 262 }
262 rcd->eager_base = base * dd->rcv_entries.group_size; 263 rcd->eager_base = base * dd->rcv_entries.group_size;
263 264
264 /* Validate and initialize Rcv Hdr Q variables */
265 if (rcvhdrcnt % HDRQ_INCREMENT) {
266 dd_dev_err(dd,
267 "ctxt%u: header queue count %d must be divisible by %lu\n",
268 rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
269 goto bail;
270 }
271 rcd->rcvhdrq_cnt = rcvhdrcnt; 265 rcd->rcvhdrq_cnt = rcvhdrcnt;
272 rcd->rcvhdrqentsize = hfi1_hdrq_entsize; 266 rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
273 /* 267 /*
@@ -506,7 +500,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
506 INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); 500 INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
507 501
508 mutex_init(&ppd->hls_lock); 502 mutex_init(&ppd->hls_lock);
509 spin_lock_init(&ppd->sdma_alllock);
510 spin_lock_init(&ppd->qsfp_info.qsfp_lock); 503 spin_lock_init(&ppd->qsfp_info.qsfp_lock);
511 504
512 ppd->qsfp_info.ppd = ppd; 505 ppd->qsfp_info.ppd = ppd;
@@ -1399,28 +1392,43 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
1399 hfi1_free_devdata(dd); 1392 hfi1_free_devdata(dd);
1400} 1393}
1401 1394
1395static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
1396{
1397 if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
1398 hfi1_early_err(dev, "Receive header queue count too small\n");
1399 return -EINVAL;
1400 }
1401
1402 if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
1403 hfi1_early_err(dev,
1404 "Receive header queue count cannot be greater than %u\n",
1405 HFI1_MAX_HDRQ_EGRBUF_CNT);
1406 return -EINVAL;
1407 }
1408
1409 if (thecnt % HDRQ_INCREMENT) {
1410 hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
1411 thecnt, HDRQ_INCREMENT);
1412 return -EINVAL;
1413 }
1414
1415 return 0;
1416}
1417
1402static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1418static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1403{ 1419{
1404 int ret = 0, j, pidx, initfail; 1420 int ret = 0, j, pidx, initfail;
1405 struct hfi1_devdata *dd = ERR_PTR(-EINVAL); 1421 struct hfi1_devdata *dd;
1406 struct hfi1_pportdata *ppd; 1422 struct hfi1_pportdata *ppd;
1407 1423
1408 /* First, lock the non-writable module parameters */ 1424 /* First, lock the non-writable module parameters */
1409 HFI1_CAP_LOCK(); 1425 HFI1_CAP_LOCK();
1410 1426
1411 /* Validate some global module parameters */ 1427 /* Validate some global module parameters */
1412 if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { 1428 ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
1413 hfi1_early_err(&pdev->dev, "Header queue count too small\n"); 1429 if (ret)
1414 ret = -EINVAL;
1415 goto bail;
1416 }
1417 if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
1418 hfi1_early_err(&pdev->dev,
1419 "Receive header queue count cannot be greater than %u\n",
1420 HFI1_MAX_HDRQ_EGRBUF_CNT);
1421 ret = -EINVAL;
1422 goto bail; 1430 goto bail;
1423 } 1431
1424 /* use the encoding function as a sanitization check */ 1432 /* use the encoding function as a sanitization check */
1425 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { 1433 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
1426 hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", 1434 hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1461,26 +1469,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1461 if (ret) 1469 if (ret)
1462 goto bail; 1470 goto bail;
1463 1471
1464 /* 1472 if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
1465 * Do device-specific initialization, function table setup, dd 1473 ent->device == PCI_DEVICE_ID_INTEL1)) {
1466 * allocation, etc.
1467 */
1468 switch (ent->device) {
1469 case PCI_DEVICE_ID_INTEL0:
1470 case PCI_DEVICE_ID_INTEL1:
1471 dd = hfi1_init_dd(pdev, ent);
1472 break;
1473 default:
1474 hfi1_early_err(&pdev->dev, 1474 hfi1_early_err(&pdev->dev,
1475 "Failing on unknown Intel deviceid 0x%x\n", 1475 "Failing on unknown Intel deviceid 0x%x\n",
1476 ent->device); 1476 ent->device);
1477 ret = -ENODEV; 1477 ret = -ENODEV;
1478 goto clean_bail;
1478 } 1479 }
1479 1480
1480 if (IS_ERR(dd)) 1481 /*
1482 * Do device-specific initialization, function table setup, dd
1483 * allocation, etc.
1484 */
1485 dd = hfi1_init_dd(pdev, ent);
1486
1487 if (IS_ERR(dd)) {
1481 ret = PTR_ERR(dd); 1488 ret = PTR_ERR(dd);
1482 if (ret)
1483 goto clean_bail; /* error already printed */ 1489 goto clean_bail; /* error already printed */
1490 }
1484 1491
1485 ret = create_workqueues(dd); 1492 ret = create_workqueues(dd);
1486 if (ret) 1493 if (ret)
@@ -1538,12 +1545,31 @@ bail:
1538 return ret; 1545 return ret;
1539} 1546}
1540 1547
1548static void wait_for_clients(struct hfi1_devdata *dd)
1549{
1550 /*
1551 * Remove the device init value and complete the device if there is
1552 * no clients or wait for active clients to finish.
1553 */
1554 if (atomic_dec_and_test(&dd->user_refcount))
1555 complete(&dd->user_comp);
1556
1557 wait_for_completion(&dd->user_comp);
1558}
1559
1541static void remove_one(struct pci_dev *pdev) 1560static void remove_one(struct pci_dev *pdev)
1542{ 1561{
1543 struct hfi1_devdata *dd = pci_get_drvdata(pdev); 1562 struct hfi1_devdata *dd = pci_get_drvdata(pdev);
1544 1563
1545 /* close debugfs files before ib unregister */ 1564 /* close debugfs files before ib unregister */
1546 hfi1_dbg_ibdev_exit(&dd->verbs_dev); 1565 hfi1_dbg_ibdev_exit(&dd->verbs_dev);
1566
1567 /* remove the /dev hfi1 interface */
1568 hfi1_device_remove(dd);
1569
1570 /* wait for existing user space clients to finish */
1571 wait_for_clients(dd);
1572
1547 /* unregister from IB core */ 1573 /* unregister from IB core */
1548 hfi1_unregister_ib_device(dd); 1574 hfi1_unregister_ib_device(dd);
1549 1575
@@ -1558,8 +1584,6 @@ static void remove_one(struct pci_dev *pdev)
1558 /* wait until all of our (qsfp) queue_work() calls complete */ 1584 /* wait until all of our (qsfp) queue_work() calls complete */
1559 flush_workqueue(ib_wq); 1585 flush_workqueue(ib_wq);
1560 1586
1561 hfi1_device_remove(dd);
1562
1563 postinit_cleanup(dd); 1587 postinit_cleanup(dd);
1564} 1588}
1565 1589
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 89c68da1c273..4ac8f330c5cb 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev)
157 * fields required to re-initialize after a chip reset, or for 157 * fields required to re-initialize after a chip reset, or for
158 * various other purposes 158 * various other purposes
159 */ 159 */
160int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, 160int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
161 const struct pci_device_id *ent)
162{ 161{
163 unsigned long len; 162 unsigned long len;
164 resource_size_t addr; 163 resource_size_t addr;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 50a3a36d9363..d89b8745d4c1 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
668void set_pio_integrity(struct send_context *sc) 668void set_pio_integrity(struct send_context *sc)
669{ 669{
670 struct hfi1_devdata *dd = sc->dd; 670 struct hfi1_devdata *dd = sc->dd;
671 u64 reg = 0;
672 u32 hw_context = sc->hw_context; 671 u32 hw_context = sc->hw_context;
673 int type = sc->type; 672 int type = sc->type;
674 673
675 /* 674 write_kctxt_csr(dd, hw_context,
676 * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if 675 SC(CHECK_ENABLE),
677 * we're snooping. 676 hfi1_pkt_default_send_ctxt_mask(dd, type));
678 */
679 if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
680 dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
681 reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
682
683 write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
684} 677}
685 678
686static u32 get_buffers_allocated(struct send_context *sc) 679static u32 get_buffers_allocated(struct send_context *sc)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 8bc5013f39a1..83198a8a8797 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
89 89
90 lockdep_assert_held(&qp->s_lock); 90 lockdep_assert_held(&qp->s_lock);
91 qp->s_flags |= RVT_S_WAIT_RNR; 91 qp->s_flags |= RVT_S_WAIT_RNR;
92 qp->s_timer.expires = jiffies + usecs_to_jiffies(to); 92 priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
93 add_timer(&priv->s_rnr_timer); 93 add_timer(&priv->s_rnr_timer);
94} 94}
95 95
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index fd39bcaa062d..9cbe52d21077 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
2009 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg); 2009 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
2010} 2010}
2011 2011
2012#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
2013(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
2014
2015#define SET_STATIC_RATE_CONTROL_SMASK(r) \
2016(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
2017/* 2012/*
2018 * set_sdma_integrity 2013 * set_sdma_integrity
2019 * 2014 *
@@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
2022static void set_sdma_integrity(struct sdma_engine *sde) 2017static void set_sdma_integrity(struct sdma_engine *sde)
2023{ 2018{
2024 struct hfi1_devdata *dd = sde->dd; 2019 struct hfi1_devdata *dd = sde->dd;
2025 u64 reg;
2026
2027 if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY)))
2028 return;
2029
2030 reg = hfi1_pkt_base_sdma_integrity(dd);
2031
2032 if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
2033 CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
2034 else
2035 SET_STATIC_RATE_CONTROL_SMASK(reg);
2036 2020
2037 write_sde_csr(sde, SD(CHECK_ENABLE), reg); 2021 write_sde_csr(sde, SD(CHECK_ENABLE),
2022 hfi1_pkt_base_sdma_integrity(dd));
2038} 2023}
2039 2024
2040static void init_sdma_regs( 2025static void init_sdma_regs(
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index edba22461a9c..919a5474e651 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -49,7 +49,6 @@
49#include "hfi.h" 49#include "hfi.h"
50#include "mad.h" 50#include "mad.h"
51#include "trace.h" 51#include "trace.h"
52#include "affinity.h"
53 52
54/* 53/*
55 * Start of per-port congestion control structures and support code 54 * Start of per-port congestion control structures and support code
@@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device,
623 return ret; 622 return ret;
624} 623}
625 624
626static ssize_t show_sdma_affinity(struct device *device,
627 struct device_attribute *attr, char *buf)
628{
629 struct hfi1_ibdev *dev =
630 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
631 struct hfi1_devdata *dd = dd_from_dev(dev);
632
633 return hfi1_get_sdma_affinity(dd, buf);
634}
635
636static ssize_t store_sdma_affinity(struct device *device,
637 struct device_attribute *attr,
638 const char *buf, size_t count)
639{
640 struct hfi1_ibdev *dev =
641 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
642 struct hfi1_devdata *dd = dd_from_dev(dev);
643
644 return hfi1_set_sdma_affinity(dd, buf, count);
645}
646
647/* 625/*
648 * end of per-unit (or driver, in some cases, but replicated 626 * end of per-unit (or driver, in some cases, but replicated
649 * per unit) functions 627 * per unit) functions
@@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
658static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 636static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
659static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 637static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
660static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 638static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
661static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
662 store_sdma_affinity);
663 639
664static struct device_attribute *hfi1_attributes[] = { 640static struct device_attribute *hfi1_attributes[] = {
665 &dev_attr_hw_rev, 641 &dev_attr_hw_rev,
@@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = {
670 &dev_attr_boardversion, 646 &dev_attr_boardversion,
671 &dev_attr_tempsense, 647 &dev_attr_tempsense,
672 &dev_attr_chip_reset, 648 &dev_attr_chip_reset,
673 &dev_attr_sdma_affinity,
674}; 649};
675 650
676int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, 651int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 11e02b228922..f77e59fb43fe 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate,
253 ) 253 )
254 ); 254 );
255 255
256#define SNOOP_PRN \
257 "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
258 "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
259
260TRACE_EVENT(snoop_capture,
261 TP_PROTO(struct hfi1_devdata *dd,
262 int hdr_len,
263 struct ib_header *hdr,
264 int data_len,
265 void *data),
266 TP_ARGS(dd, hdr_len, hdr, data_len, data),
267 TP_STRUCT__entry(
268 DD_DEV_ENTRY(dd)
269 __field(u16, slid)
270 __field(u16, dlid)
271 __field(u32, qpn)
272 __field(u8, opcode)
273 __field(u8, sl)
274 __field(u16, pkey)
275 __field(u32, hdr_len)
276 __field(u32, data_len)
277 __field(u8, lnh)
278 __dynamic_array(u8, raw_hdr, hdr_len)
279 __dynamic_array(u8, raw_pkt, data_len)
280 ),
281 TP_fast_assign(
282 struct ib_other_headers *ohdr;
283
284 __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
285 if (__entry->lnh == HFI1_LRH_BTH)
286 ohdr = &hdr->u.oth;
287 else
288 ohdr = &hdr->u.l.oth;
289 DD_DEV_ASSIGN(dd);
290 __entry->slid = be16_to_cpu(hdr->lrh[3]);
291 __entry->dlid = be16_to_cpu(hdr->lrh[1]);
292 __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
293 __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
294 __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
295 __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
296 __entry->hdr_len = hdr_len;
297 __entry->data_len = data_len;
298 memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
299 memcpy(__get_dynamic_array(raw_pkt), data, data_len);
300 ),
301 TP_printk(
302 "[%s] " SNOOP_PRN,
303 __get_str(dev),
304 __entry->slid,
305 __entry->dlid,
306 __entry->qpn,
307 __entry->opcode,
308 show_ib_opcode(__entry->opcode),
309 __entry->sl,
310 __entry->pkey,
311 __entry->hdr_len,
312 __entry->data_len
313 )
314);
315
316#endif /* __HFI1_TRACE_RX_H */ 256#endif /* __HFI1_TRACE_RX_H */
317 257
318#undef TRACE_INCLUDE_PATH 258#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a761f804111e..77697d690f3e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
1144 rb_node = hfi1_mmu_rb_extract(pq->handler, 1144 rb_node = hfi1_mmu_rb_extract(pq->handler,
1145 (unsigned long)iovec->iov.iov_base, 1145 (unsigned long)iovec->iov.iov_base,
1146 iovec->iov.iov_len); 1146 iovec->iov.iov_len);
1147 if (rb_node && !IS_ERR(rb_node)) 1147 if (rb_node)
1148 node = container_of(rb_node, struct sdma_mmu_node, rb); 1148 node = container_of(rb_node, struct sdma_mmu_node, rb);
1149 else 1149 else
1150 rb_node = NULL; 1150 rb_node = NULL;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 5fc623362731..b9bf0759f10a 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
102 if (vlan_tag < 0x1000) 102 if (vlan_tag < 0x1000)
103 vlan_tag |= (ah_attr->sl & 7) << 13; 103 vlan_tag |= (ah_attr->sl & 7) << 13;
104 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); 104 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
105 ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); 105 ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
106 if (ret < 0)
107 return ERR_PTR(ret);
108 ah->av.eth.gid_index = ret;
106 ah->av.eth.vlan = cpu_to_be16(vlan_tag); 109 ah->av.eth.vlan = cpu_to_be16(vlan_tag);
107 ah->av.eth.hop_limit = ah_attr->grh.hop_limit; 110 ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
108 if (ah_attr->static_rate) { 111 if (ah_attr->static_rate) {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1ea686b9e0f9..6a0fec357dae 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
253 if (context) 253 if (context)
254 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { 254 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
255 err = -EFAULT; 255 err = -EFAULT;
256 goto err_dbmap; 256 goto err_cq_free;
257 } 257 }
258 258
259 return &cq->ibcq; 259 return &cq->ibcq;
260 260
261err_cq_free:
262 mlx4_cq_free(dev->dev, &cq->mcq);
263
261err_dbmap: 264err_dbmap:
262 if (context) 265 if (context)
263 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); 266 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 79d017baf6f4..fcd04b881ec1 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
932 if (err) 932 if (err)
933 goto err_create; 933 goto err_create;
934 } else { 934 } else {
935 /* for now choose 64 bytes till we have a proper interface */ 935 cqe_size = cache_line_size() == 128 ? 128 : 64;
936 cqe_size = 64;
937 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, 936 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
938 &index, &inlen); 937 &index, &inlen);
939 if (err) 938 if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 63036c731626..32b09f059c84 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
2311{ 2311{
2312 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 2312 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
2313 struct ib_event ibev; 2313 struct ib_event ibev;
2314 2314 bool fatal = false;
2315 u8 port = 0; 2315 u8 port = 0;
2316 2316
2317 switch (event) { 2317 switch (event) {
2318 case MLX5_DEV_EVENT_SYS_ERROR: 2318 case MLX5_DEV_EVENT_SYS_ERROR:
2319 ibdev->ib_active = false;
2320 ibev.event = IB_EVENT_DEVICE_FATAL; 2319 ibev.event = IB_EVENT_DEVICE_FATAL;
2321 mlx5_ib_handle_internal_error(ibdev); 2320 mlx5_ib_handle_internal_error(ibdev);
2321 fatal = true;
2322 break; 2322 break;
2323 2323
2324 case MLX5_DEV_EVENT_PORT_UP: 2324 case MLX5_DEV_EVENT_PORT_UP:
@@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
2370 2370
2371 if (ibdev->ib_active) 2371 if (ibdev->ib_active)
2372 ib_dispatch_event(&ibev); 2372 ib_dispatch_event(&ibev);
2373
2374 if (fatal)
2375 ibdev->ib_active = false;
2373} 2376}
2374 2377
2375static void get_ext_port_caps(struct mlx5_ib_dev *dev) 2378static void get_ext_port_caps(struct mlx5_ib_dev *dev)
@@ -3115,7 +3118,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
3115 } 3118 }
3116 err = init_node_data(dev); 3119 err = init_node_data(dev);
3117 if (err) 3120 if (err)
3118 goto err_dealloc; 3121 goto err_free_port;
3119 3122
3120 mutex_init(&dev->flow_db.lock); 3123 mutex_init(&dev->flow_db.lock);
3121 mutex_init(&dev->cap_mask_mutex); 3124 mutex_init(&dev->cap_mask_mutex);
@@ -3125,7 +3128,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
3125 if (ll == IB_LINK_LAYER_ETHERNET) { 3128 if (ll == IB_LINK_LAYER_ETHERNET) {
3126 err = mlx5_enable_roce(dev); 3129 err = mlx5_enable_roce(dev);
3127 if (err) 3130 if (err)
3128 goto err_dealloc; 3131 goto err_free_port;
3129 } 3132 }
3130 3133
3131 err = create_dev_resources(&dev->devr); 3134 err = create_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index dcdcd195fe53..7d689903c87c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -626,6 +626,8 @@ struct mlx5_ib_dev {
626 struct mlx5_ib_resources devr; 626 struct mlx5_ib_resources devr;
627 struct mlx5_mr_cache cache; 627 struct mlx5_mr_cache cache;
628 struct timer_list delay_timer; 628 struct timer_list delay_timer;
629 /* Prevents soft lock on massive reg MRs */
630 struct mutex slow_path_mutex;
629 int fill_delay; 631 int fill_delay;
630#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 632#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
631 struct ib_odp_caps odp_caps; 633 struct ib_odp_caps odp_caps;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d4ad672b905b..4e9012463c37 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
610 int err; 610 int err;
611 int i; 611 int i;
612 612
613 mutex_init(&dev->slow_path_mutex);
613 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 614 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
614 if (!cache->wq) { 615 if (!cache->wq) {
615 mlx5_ib_warn(dev, "failed to create work queue\n"); 616 mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1182 goto error; 1183 goto error;
1183 } 1184 }
1184 1185
1185 if (!mr) 1186 if (!mr) {
1187 mutex_lock(&dev->slow_path_mutex);
1186 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1188 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1187 page_shift, access_flags); 1189 page_shift, access_flags);
1190 mutex_unlock(&dev->slow_path_mutex);
1191 }
1188 1192
1189 if (IS_ERR(mr)) { 1193 if (IS_ERR(mr)) {
1190 err = PTR_ERR(mr); 1194 err = PTR_ERR(mr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7ce97daf26c6..d1e921816bfe 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2051,8 +2051,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2051 2051
2052 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", 2052 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
2053 qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, 2053 qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
2054 to_mcq(init_attr->recv_cq)->mcq.cqn, 2054 init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
2055 to_mcq(init_attr->send_cq)->mcq.cqn); 2055 init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
2056 2056
2057 qp->trans_qp.xrcdn = xrcdn; 2057 qp->trans_qp.xrcdn = xrcdn;
2058 2058
@@ -4814,6 +4814,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
4814 udata->inlen)) 4814 udata->inlen))
4815 return ERR_PTR(-EOPNOTSUPP); 4815 return ERR_PTR(-EOPNOTSUPP);
4816 4816
4817 if (init_attr->log_ind_tbl_size >
4818 MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
4819 mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
4820 init_attr->log_ind_tbl_size,
4821 MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
4822 return ERR_PTR(-EINVAL);
4823 }
4824
4817 min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); 4825 min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
4818 if (udata->outlen && udata->outlen < min_resp_len) 4826 if (udata->outlen && udata->outlen < min_resp_len)
4819 return ERR_PTR(-EINVAL); 4827 return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
index 01f71caa3ac4..f2cefb0d9180 100644
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
90 if (WARN_ON(!valid_dma_direction(direction))) 90 if (WARN_ON(!valid_dma_direction(direction)))
91 return BAD_DMA_ADDRESS; 91 return BAD_DMA_ADDRESS;
92 92
93 if (offset + size > PAGE_SIZE)
94 return BAD_DMA_ADDRESS;
95
96 addr = (u64)page_address(page); 93 addr = (u64)page_address(page);
97 if (addr) 94 if (addr)
98 addr += offset; 95 addr += offset;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b8258e4f0aea..ffff5a54cb34 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
243{ 243{
244 int err; 244 int err;
245 struct socket *sock; 245 struct socket *sock;
246 struct udp_port_cfg udp_cfg; 246 struct udp_port_cfg udp_cfg = {0};
247 struct udp_tunnel_sock_cfg tnl_cfg; 247 struct udp_tunnel_sock_cfg tnl_cfg = {0};
248
249 memset(&udp_cfg, 0, sizeof(udp_cfg));
250 248
251 if (ipv6) { 249 if (ipv6) {
252 udp_cfg.family = AF_INET6; 250 udp_cfg.family = AF_INET6;
@@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
264 return ERR_PTR(err); 262 return ERR_PTR(err);
265 } 263 }
266 264
267 tnl_cfg.sk_user_data = NULL;
268 tnl_cfg.encap_type = 1; 265 tnl_cfg.encap_type = 1;
269 tnl_cfg.encap_rcv = rxe_udp_encap_recv; 266 tnl_cfg.encap_rcv = rxe_udp_encap_recv;
270 tnl_cfg.encap_destroy = NULL;
271 267
272 /* Setup UDP tunnel */ 268 /* Setup UDP tunnel */
273 setup_udp_tunnel_sock(net, sock, &tnl_cfg); 269 setup_udp_tunnel_sock(net, sock, &tnl_cfg);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b8036cfbce04..c3e60e4bde6e 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
522 if (qp->sq.queue) { 522 if (qp->sq.queue) {
523 __rxe_do_task(&qp->comp.task); 523 __rxe_do_task(&qp->comp.task);
524 __rxe_do_task(&qp->req.task); 524 __rxe_do_task(&qp->req.task);
525 rxe_queue_reset(qp->sq.queue);
525 } 526 }
526 527
527 /* cleanup attributes */ 528 /* cleanup attributes */
@@ -573,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp)
573{ 574{
574 qp->req.state = QP_STATE_ERROR; 575 qp->req.state = QP_STATE_ERROR;
575 qp->resp.state = QP_STATE_ERROR; 576 qp->resp.state = QP_STATE_ERROR;
577 qp->attr.qp_state = IB_QPS_ERR;
576 578
577 /* drain work and packet queues */ 579 /* drain work and packet queues */
578 rxe_run_task(&qp->resp.task, 1); 580 rxe_run_task(&qp->resp.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 08274254eb88..d14bf496d62d 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -84,6 +84,15 @@ err1:
84 return -EINVAL; 84 return -EINVAL;
85} 85}
86 86
87inline void rxe_queue_reset(struct rxe_queue *q)
88{
89 /* queue is comprised from header and the memory
90 * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h
91 * reset only the queue itself and not the management header
92 */
93 memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
94}
95
87struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, 96struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
88 int *num_elem, 97 int *num_elem,
89 unsigned int elem_size) 98 unsigned int elem_size)
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 239fd609c31e..8c8641c87817 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe,
84 size_t buf_size, 84 size_t buf_size,
85 struct rxe_mmap_info **ip_p); 85 struct rxe_mmap_info **ip_p);
86 86
87void rxe_queue_reset(struct rxe_queue *q);
88
87struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, 89struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
88 int *num_elem, 90 int *num_elem,
89 unsigned int elem_size); 91 unsigned int elem_size);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 832846b73ea0..22bd9630dcd9 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -696,7 +696,8 @@ next_wqe:
696 qp->req.wqe_index); 696 qp->req.wqe_index);
697 wqe->state = wqe_state_done; 697 wqe->state = wqe_state_done;
698 wqe->status = IB_WC_SUCCESS; 698 wqe->status = IB_WC_SUCCESS;
699 goto complete; 699 __rxe_do_task(&qp->comp.task);
700 return 0;
700 } 701 }
701 payload = mtu; 702 payload = mtu;
702 } 703 }
@@ -745,13 +746,17 @@ err:
745 wqe->status = IB_WC_LOC_PROT_ERR; 746 wqe->status = IB_WC_LOC_PROT_ERR;
746 wqe->state = wqe_state_error; 747 wqe->state = wqe_state_error;
747 748
748complete: 749 /*
749 if (qp_type(qp) != IB_QPT_RC) { 750 * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
750 while (rxe_completer(qp) == 0) 751 * ---------8<---------8<-------------
751 ; 752 * ...Note that if a completion error occurs, a Work Completion
752 } 753 * will always be generated, even if the signaling
753 754 * indicator requests an Unsignaled Completion.
754 return 0; 755 * ---------8<---------8<-------------
756 */
757 wqe->wr.send_flags |= IB_SEND_SIGNALED;
758 __rxe_do_task(&qp->comp.task);
759 return -EAGAIN;
755 760
756exit: 761exit:
757 return -EAGAIN; 762 return -EAGAIN;