aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucma.c2
-rw-r--r--drivers/infiniband/core/uverbs.h36
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c109
-rw-r--r--drivers/infiniband/core/uverbs_main.c128
-rw-r--r--drivers/infiniband/core/verbs.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c25
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c167
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c21
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h53
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c10
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c500
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h21
37 files changed, 894 insertions, 404 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b84791f03a27..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -31,17 +31,6 @@ config INFINIBAND_USER_ACCESS
31 libibverbs, libibcm and a hardware driver library from 31 libibverbs, libibcm and a hardware driver library from
32 <http://www.openfabrics.org/git/>. 32 <http://www.openfabrics.org/git/>.
33 33
34config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
35 bool "Experimental and unstable ABI for userspace access to flow steering verbs"
36 depends on INFINIBAND_USER_ACCESS
37 depends on STAGING
38 ---help---
39 The final ABI for userspace access to flow steering verbs
40 has not been defined. To use the current ABI, *WHICH WILL
41 CHANGE IN THE FUTURE*, say Y here.
42
43 If unsure, say N.
44
45config INFINIBAND_USER_MEM 34config INFINIBAND_USER_MEM
46 bool 35 bool
47 depends on INFINIBAND_USER_ACCESS != n 36 depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b0..f2ef7ef0f36f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
383{ 383{
384 unsigned long flags; 384 unsigned long flags;
385 int id; 385 int id;
386 static int next_id;
387 386
388 idr_preload(GFP_KERNEL); 387 idr_preload(GFP_KERNEL);
389 spin_lock_irqsave(&cm.lock, flags); 388 spin_lock_irqsave(&cm.lock, flags);
390 389
391 id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); 390 id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
392 if (id >= 0)
393 next_id = max(id + 1, 0);
394 391
395 spin_unlock_irqrestore(&cm.lock, flags); 392 spin_unlock_irqrestore(&cm.lock, flags);
396 idr_preload_end(); 393 idr_preload_end();
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0d..a1e9cba84944 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
148 list_for_each_entry(client, &client_list, list) { 148 list_for_each_entry(client, &client_list, list) {
149 if (client->index == index) { 149 if (client->index == index) {
150 if (op < 0 || op >= client->nops || 150 if (op < 0 || op >= client->nops ||
151 !client->cb_table[RDMA_NL_GET_OP(op)].dump) 151 !client->cb_table[op].dump)
152 return -EINVAL; 152 return -EINVAL;
153 153
154 { 154 {
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index cde1e7b5b85d..faad2caf22b1 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -612,6 +612,7 @@ static ssize_t show_node_type(struct device *device,
612 switch (dev->node_type) { 612 switch (dev->node_type) {
613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); 614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
615 case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
615 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 616 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
616 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 617 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
617 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 618 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 826016b013ca..ab8b1c30b36b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
57static unsigned int max_backlog = 1024; 57static unsigned int max_backlog = 1024;
58 58
59static struct ctl_table_header *ucma_ctl_table_hdr; 59static struct ctl_table_header *ucma_ctl_table_hdr;
60static ctl_table ucma_ctl_table[] = { 60static struct ctl_table ucma_ctl_table[] = {
61 { 61 {
62 .procname = "max_backlog", 62 .procname = "max_backlog",
63 .data = &max_backlog, 63 .data = &max_backlog,
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index d8f9c6c272d7..bdc842e9faef 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,14 @@
47#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 48#include <rdma/ib_user_verbs.h>
49 49
50#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
51 do { \
52 (udata)->inbuf = (void __user *) (ibuf); \
53 (udata)->outbuf = (void __user *) (obuf); \
54 (udata)->inlen = (ilen); \
55 (udata)->outlen = (olen); \
56 } while (0)
57
50/* 58/*
51 * Our lifetime rules for these structs are the following: 59 * Our lifetime rules for these structs are the following:
52 * 60 *
@@ -178,6 +186,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
178 struct ib_event *event); 186 struct ib_event *event);
179void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); 187void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
180 188
189struct ib_uverbs_flow_spec {
190 union {
191 union {
192 struct ib_uverbs_flow_spec_hdr hdr;
193 struct {
194 __u32 type;
195 __u16 size;
196 __u16 reserved;
197 };
198 };
199 struct ib_uverbs_flow_spec_eth eth;
200 struct ib_uverbs_flow_spec_ipv4 ipv4;
201 struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
202 };
203};
204
181#define IB_UVERBS_DECLARE_CMD(name) \ 205#define IB_UVERBS_DECLARE_CMD(name) \
182 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 206 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
183 const char __user *buf, int in_len, \ 207 const char __user *buf, int in_len, \
@@ -217,9 +241,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
217IB_UVERBS_DECLARE_CMD(create_xsrq); 241IB_UVERBS_DECLARE_CMD(create_xsrq);
218IB_UVERBS_DECLARE_CMD(open_xrcd); 242IB_UVERBS_DECLARE_CMD(open_xrcd);
219IB_UVERBS_DECLARE_CMD(close_xrcd); 243IB_UVERBS_DECLARE_CMD(close_xrcd);
220#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 244
221IB_UVERBS_DECLARE_CMD(create_flow); 245#define IB_UVERBS_DECLARE_EX_CMD(name) \
222IB_UVERBS_DECLARE_CMD(destroy_flow); 246 int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
223#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 247 struct ib_udata *ucore, \
248 struct ib_udata *uhw)
249
250IB_UVERBS_DECLARE_EX_CMD(create_flow);
251IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
224 252
225#endif /* UVERBS_H */ 253#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2f0f01b70e3b..65f6e7dc380c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,17 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; 54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; 55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; 56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
57#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
58static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; 57static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
59#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
60
61#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
62 do { \
63 (udata)->inbuf = (void __user *) (ibuf); \
64 (udata)->outbuf = (void __user *) (obuf); \
65 (udata)->inlen = (ilen); \
66 (udata)->outlen = (olen); \
67 } while (0)
68 58
69/* 59/*
70 * The ib_uobject locking scheme is as follows: 60 * The ib_uobject locking scheme is as follows:
@@ -939,13 +929,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
939 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 929 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
940 return -EINVAL; 930 return -EINVAL;
941 931
942 /* 932 ret = ib_check_mr_access(cmd.access_flags);
943 * Local write permission is required if remote write or 933 if (ret)
944 * remote atomic permission is also requested. 934 return ret;
945 */
946 if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
947 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
948 return -EINVAL;
949 935
950 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 936 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
951 if (!uobj) 937 if (!uobj)
@@ -2128,6 +2114,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2128 } 2114 }
2129 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; 2115 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
2130 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; 2116 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
2117 if (next->opcode == IB_WR_SEND_WITH_IMM)
2118 next->ex.imm_data =
2119 (__be32 __force) user_wr->ex.imm_data;
2131 } else { 2120 } else {
2132 switch (next->opcode) { 2121 switch (next->opcode) {
2133 case IB_WR_RDMA_WRITE_WITH_IMM: 2122 case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2601,8 +2590,7 @@ out_put:
2601 return ret ? ret : in_len; 2590 return ret ? ret : in_len;
2602} 2591}
2603 2592
2604#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 2593static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
2605static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2606 union ib_flow_spec *ib_spec) 2594 union ib_flow_spec *ib_spec)
2607{ 2595{
2608 ib_spec->type = kern_spec->type; 2596 ib_spec->type = kern_spec->type;
@@ -2642,28 +2630,31 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2642 return 0; 2630 return 0;
2643} 2631}
2644 2632
2645ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, 2633int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
2646 const char __user *buf, int in_len, 2634 struct ib_udata *ucore,
2647 int out_len) 2635 struct ib_udata *uhw)
2648{ 2636{
2649 struct ib_uverbs_create_flow cmd; 2637 struct ib_uverbs_create_flow cmd;
2650 struct ib_uverbs_create_flow_resp resp; 2638 struct ib_uverbs_create_flow_resp resp;
2651 struct ib_uobject *uobj; 2639 struct ib_uobject *uobj;
2652 struct ib_flow *flow_id; 2640 struct ib_flow *flow_id;
2653 struct ib_kern_flow_attr *kern_flow_attr; 2641 struct ib_uverbs_flow_attr *kern_flow_attr;
2654 struct ib_flow_attr *flow_attr; 2642 struct ib_flow_attr *flow_attr;
2655 struct ib_qp *qp; 2643 struct ib_qp *qp;
2656 int err = 0; 2644 int err = 0;
2657 void *kern_spec; 2645 void *kern_spec;
2658 void *ib_spec; 2646 void *ib_spec;
2659 int i; 2647 int i;
2660 int kern_attr_size;
2661 2648
2662 if (out_len < sizeof(resp)) 2649 if (ucore->outlen < sizeof(resp))
2663 return -ENOSPC; 2650 return -ENOSPC;
2664 2651
2665 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2652 err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2666 return -EFAULT; 2653 if (err)
2654 return err;
2655
2656 ucore->inbuf += sizeof(cmd);
2657 ucore->inlen -= sizeof(cmd);
2667 2658
2668 if (cmd.comp_mask) 2659 if (cmd.comp_mask)
2669 return -EINVAL; 2660 return -EINVAL;
@@ -2672,32 +2663,27 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2672 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 2663 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
2673 return -EPERM; 2664 return -EPERM;
2674 2665
2675 if (cmd.flow_attr.num_of_specs < 0 || 2666 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2676 cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2677 return -EINVAL; 2667 return -EINVAL;
2678 2668
2679 kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - 2669 if (cmd.flow_attr.size > ucore->inlen ||
2680 sizeof(struct ib_uverbs_cmd_hdr_ex); 2670 cmd.flow_attr.size >
2681 2671 (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
2682 if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
2683 kern_attr_size < 0 || kern_attr_size >
2684 (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
2685 return -EINVAL; 2672 return -EINVAL;
2686 2673
2687 if (cmd.flow_attr.num_of_specs) { 2674 if (cmd.flow_attr.num_of_specs) {
2688 kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2675 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
2676 GFP_KERNEL);
2689 if (!kern_flow_attr) 2677 if (!kern_flow_attr)
2690 return -ENOMEM; 2678 return -ENOMEM;
2691 2679
2692 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); 2680 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
2693 if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), 2681 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
2694 kern_attr_size)) { 2682 cmd.flow_attr.size);
2695 err = -EFAULT; 2683 if (err)
2696 goto err_free_attr; 2684 goto err_free_attr;
2697 }
2698 } else { 2685 } else {
2699 kern_flow_attr = &cmd.flow_attr; 2686 kern_flow_attr = &cmd.flow_attr;
2700 kern_attr_size = sizeof(cmd.flow_attr);
2701 } 2687 }
2702 2688
2703 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); 2689 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2714,7 +2700,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2714 goto err_uobj; 2700 goto err_uobj;
2715 } 2701 }
2716 2702
2717 flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2703 flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
2718 if (!flow_attr) { 2704 if (!flow_attr) {
2719 err = -ENOMEM; 2705 err = -ENOMEM;
2720 goto err_put; 2706 goto err_put;
@@ -2729,19 +2715,22 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2729 2715
2730 kern_spec = kern_flow_attr + 1; 2716 kern_spec = kern_flow_attr + 1;
2731 ib_spec = flow_attr + 1; 2717 ib_spec = flow_attr + 1;
2732 for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { 2718 for (i = 0; i < flow_attr->num_of_specs &&
2719 cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
2720 cmd.flow_attr.size >=
2721 ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
2733 err = kern_spec_to_ib_spec(kern_spec, ib_spec); 2722 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
2734 if (err) 2723 if (err)
2735 goto err_free; 2724 goto err_free;
2736 flow_attr->size += 2725 flow_attr->size +=
2737 ((union ib_flow_spec *) ib_spec)->size; 2726 ((union ib_flow_spec *) ib_spec)->size;
2738 kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; 2727 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
2739 kern_spec += ((struct ib_kern_spec *) kern_spec)->size; 2728 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
2740 ib_spec += ((union ib_flow_spec *) ib_spec)->size; 2729 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
2741 } 2730 }
2742 if (kern_attr_size) { 2731 if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
2743 pr_warn("create flow failed, %d bytes left from uverb cmd\n", 2732 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
2744 kern_attr_size); 2733 i, cmd.flow_attr.size);
2745 goto err_free; 2734 goto err_free;
2746 } 2735 }
2747 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); 2736 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2760,11 +2749,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2760 memset(&resp, 0, sizeof(resp)); 2749 memset(&resp, 0, sizeof(resp));
2761 resp.flow_handle = uobj->id; 2750 resp.flow_handle = uobj->id;
2762 2751
2763 if (copy_to_user((void __user *)(unsigned long) cmd.response, 2752 err = ib_copy_to_udata(ucore,
2764 &resp, sizeof(resp))) { 2753 &resp, sizeof(resp));
2765 err = -EFAULT; 2754 if (err)
2766 goto err_copy; 2755 goto err_copy;
2767 }
2768 2756
2769 put_qp_read(qp); 2757 put_qp_read(qp);
2770 mutex_lock(&file->mutex); 2758 mutex_lock(&file->mutex);
@@ -2777,7 +2765,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2777 kfree(flow_attr); 2765 kfree(flow_attr);
2778 if (cmd.flow_attr.num_of_specs) 2766 if (cmd.flow_attr.num_of_specs)
2779 kfree(kern_flow_attr); 2767 kfree(kern_flow_attr);
2780 return in_len; 2768 return 0;
2781err_copy: 2769err_copy:
2782 idr_remove_uobj(&ib_uverbs_rule_idr, uobj); 2770 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
2783destroy_flow: 2771destroy_flow:
@@ -2794,16 +2782,18 @@ err_free_attr:
2794 return err; 2782 return err;
2795} 2783}
2796 2784
2797ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, 2785int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
2798 const char __user *buf, int in_len, 2786 struct ib_udata *ucore,
2799 int out_len) { 2787 struct ib_udata *uhw)
2788{
2800 struct ib_uverbs_destroy_flow cmd; 2789 struct ib_uverbs_destroy_flow cmd;
2801 struct ib_flow *flow_id; 2790 struct ib_flow *flow_id;
2802 struct ib_uobject *uobj; 2791 struct ib_uobject *uobj;
2803 int ret; 2792 int ret;
2804 2793
2805 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2794 ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2806 return -EFAULT; 2795 if (ret)
2796 return ret;
2807 2797
2808 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, 2798 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
2809 file->ucontext); 2799 file->ucontext);
@@ -2825,9 +2815,8 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
2825 2815
2826 put_uobj(uobj); 2816 put_uobj(uobj);
2827 2817
2828 return ret ? ret : in_len; 2818 return ret;
2829} 2819}
2830#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
2831 2820
2832static int __uverbs_create_xsrq(struct ib_uverbs_file *file, 2821static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
2833 struct ib_uverbs_create_xsrq *cmd, 2822 struct ib_uverbs_create_xsrq *cmd,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2df31f68ea09..34386943ebcf 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -115,10 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, 115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, 116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, 117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
118#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 118};
119 [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, 119
120 [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow 120static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 121 struct ib_udata *ucore,
122 struct ib_udata *uhw) = {
123 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
124 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
122}; 125};
123 126
124static void ib_uverbs_add_one(struct ib_device *device); 127static void ib_uverbs_add_one(struct ib_device *device);
@@ -589,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
589{ 592{
590 struct ib_uverbs_file *file = filp->private_data; 593 struct ib_uverbs_file *file = filp->private_data;
591 struct ib_uverbs_cmd_hdr hdr; 594 struct ib_uverbs_cmd_hdr hdr;
595 __u32 flags;
592 596
593 if (count < sizeof hdr) 597 if (count < sizeof hdr)
594 return -EINVAL; 598 return -EINVAL;
@@ -596,45 +600,105 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
596 if (copy_from_user(&hdr, buf, sizeof hdr)) 600 if (copy_from_user(&hdr, buf, sizeof hdr))
597 return -EFAULT; 601 return -EFAULT;
598 602
599 if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 603 flags = (hdr.command &
600 !uverbs_cmd_table[hdr.command]) 604 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
601 return -EINVAL;
602 605
603 if (!file->ucontext && 606 if (!flags) {
604 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 607 __u32 command;
605 return -EINVAL;
606 608
607 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) 609 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
608 return -ENOSYS; 610 IB_USER_VERBS_CMD_COMMAND_MASK))
611 return -EINVAL;
609 612
610#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 613 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
611 if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
612 struct ib_uverbs_cmd_hdr_ex hdr_ex;
613 614
614 if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) 615 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
615 return -EFAULT; 616 !uverbs_cmd_table[command])
617 return -EINVAL;
616 618
617 if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) 619 if (!file->ucontext &&
620 command != IB_USER_VERBS_CMD_GET_CONTEXT)
618 return -EINVAL; 621 return -EINVAL;
619 622
620 return uverbs_cmd_table[hdr.command](file, 623 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
621 buf + sizeof(hdr_ex), 624 return -ENOSYS;
622 (hdr_ex.in_words + 625
623 hdr_ex.provider_in_words) * 4,
624 (hdr_ex.out_words +
625 hdr_ex.provider_out_words) * 4);
626 } else {
627#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
628 if (hdr.in_words * 4 != count) 626 if (hdr.in_words * 4 != count)
629 return -EINVAL; 627 return -EINVAL;
630 628
631 return uverbs_cmd_table[hdr.command](file, 629 return uverbs_cmd_table[command](file,
632 buf + sizeof(hdr), 630 buf + sizeof(hdr),
633 hdr.in_words * 4, 631 hdr.in_words * 4,
634 hdr.out_words * 4); 632 hdr.out_words * 4);
635#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 633
634 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
635 __u32 command;
636
637 struct ib_uverbs_ex_cmd_hdr ex_hdr;
638 struct ib_udata ucore;
639 struct ib_udata uhw;
640 int err;
641 size_t written_count = count;
642
643 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
644 IB_USER_VERBS_CMD_COMMAND_MASK))
645 return -EINVAL;
646
647 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
648
649 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
650 !uverbs_ex_cmd_table[command])
651 return -ENOSYS;
652
653 if (!file->ucontext)
654 return -EINVAL;
655
656 if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
657 return -ENOSYS;
658
659 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
660 return -EINVAL;
661
662 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
663 return -EFAULT;
664
665 count -= sizeof(hdr) + sizeof(ex_hdr);
666 buf += sizeof(hdr) + sizeof(ex_hdr);
667
668 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
669 return -EINVAL;
670
671 if (ex_hdr.response) {
672 if (!hdr.out_words && !ex_hdr.provider_out_words)
673 return -EINVAL;
674 } else {
675 if (hdr.out_words || ex_hdr.provider_out_words)
676 return -EINVAL;
677 }
678
679 INIT_UDATA(&ucore,
680 (hdr.in_words) ? buf : 0,
681 (unsigned long)ex_hdr.response,
682 hdr.in_words * 8,
683 hdr.out_words * 8);
684
685 INIT_UDATA(&uhw,
686 (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
687 (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
688 ex_hdr.provider_in_words * 8,
689 ex_hdr.provider_out_words * 8);
690
691 err = uverbs_ex_cmd_table[command](file,
692 &ucore,
693 &uhw);
694
695 if (err)
696 return err;
697
698 return written_count;
636 } 699 }
637#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 700
701 return -ENOSYS;
638} 702}
639 703
640static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 704static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a321df28bab2..d4f6ddf72ffa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -114,6 +114,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
114 return RDMA_TRANSPORT_IB; 114 return RDMA_TRANSPORT_IB;
115 case RDMA_NODE_RNIC: 115 case RDMA_NODE_RNIC:
116 return RDMA_TRANSPORT_IWARP; 116 return RDMA_TRANSPORT_IWARP;
117 case RDMA_NODE_USNIC:
118 return RDMA_TRANSPORT_USNIC;
117 default: 119 default:
118 BUG(); 120 BUG();
119 return 0; 121 return 0;
@@ -130,6 +132,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
130 case RDMA_TRANSPORT_IB: 132 case RDMA_TRANSPORT_IB:
131 return IB_LINK_LAYER_INFINIBAND; 133 return IB_LINK_LAYER_INFINIBAND;
132 case RDMA_TRANSPORT_IWARP: 134 case RDMA_TRANSPORT_IWARP:
135 case RDMA_TRANSPORT_USNIC:
133 return IB_LINK_LAYER_ETHERNET; 136 return IB_LINK_LAYER_ETHERNET;
134 default: 137 default:
135 return IB_LINK_LAYER_UNSPECIFIED; 138 return IB_LINK_LAYER_UNSPECIFIED;
@@ -958,6 +961,11 @@ EXPORT_SYMBOL(ib_resize_cq);
958struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) 961struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
959{ 962{
960 struct ib_mr *mr; 963 struct ib_mr *mr;
964 int err;
965
966 err = ib_check_mr_access(mr_access_flags);
967 if (err)
968 return ERR_PTR(err);
961 969
962 mr = pd->device->get_dma_mr(pd, mr_access_flags); 970 mr = pd->device->get_dma_mr(pd, mr_access_flags);
963 971
@@ -980,6 +988,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
980 u64 *iova_start) 988 u64 *iova_start)
981{ 989{
982 struct ib_mr *mr; 990 struct ib_mr *mr;
991 int err;
992
993 err = ib_check_mr_access(mr_access_flags);
994 if (err)
995 return ERR_PTR(err);
983 996
984 if (!pd->device->reg_phys_mr) 997 if (!pd->device->reg_phys_mr)
985 return ERR_PTR(-ENOSYS); 998 return ERR_PTR(-ENOSYS);
@@ -1010,6 +1023,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
1010 struct ib_pd *old_pd; 1023 struct ib_pd *old_pd;
1011 int ret; 1024 int ret;
1012 1025
1026 ret = ib_check_mr_access(mr_access_flags);
1027 if (ret)
1028 return ret;
1029
1013 if (!mr->device->rereg_phys_mr) 1030 if (!mr->device->rereg_phys_mr)
1014 return -ENOSYS; 1031 return -ENOSYS;
1015 1032
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab562..4a033853312e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -602,10 +602,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
602 rdev->lldi.vr->qp.size, 602 rdev->lldi.vr->qp.size,
603 rdev->lldi.vr->cq.start, 603 rdev->lldi.vr->cq.start,
604 rdev->lldi.vr->cq.size); 604 rdev->lldi.vr->cq.size);
605 PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " 605 PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n", 606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2), 607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
608 (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2), 608 (u64)pci_resource_start(rdev->lldi.pdev, 2),
609 rdev->lldi.db_reg, 609 rdev->lldi.db_reg,
610 rdev->lldi.gts_reg, 610 rdev->lldi.gts_reg,
611 rdev->qpshift, rdev->qpmask, 611 rdev->qpshift, rdev->qpmask,
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b21445..cc04b7ba3488 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
280 int j; 280 int j;
281 int ret; 281 int ret;
282 282
283 ret = get_user_pages(current, current->mm, addr, 283 ret = get_user_pages_fast(addr, npages, 0, pages);
284 npages, 0, 1, pages, NULL);
285
286 if (ret != npages) { 284 if (ret != npages) {
287 int i; 285 int i;
288 286
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
811 while (dim) { 809 while (dim) {
812 const int mxp = 8; 810 const int mxp = 8;
813 811
814 down_write(&current->mm->mmap_sem);
815 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); 812 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
816 up_write(&current->mm->mmap_sem);
817
818 if (ret <= 0) 813 if (ret <= 0)
819 goto done_unlock; 814 goto done_unlock;
820 else { 815 else {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d5e60f44ba5a..66dbf8062374 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
324 u32 i; 324 u32 i;
325 325
326 i = cq->mcq.cons_index; 326 i = cq->mcq.cons_index;
327 while (get_sw_cqe(cq, i & cq->ibcq.cqe)) 327 while (get_sw_cqe(cq, i))
328 ++i; 328 ++i;
329 329
330 return i - cq->mcq.cons_index; 330 return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
365 365
366 mutex_lock(&cq->resize_mutex); 366 mutex_lock(&cq->resize_mutex);
367 367
368 if (entries < 1 || entries > dev->dev->caps.max_cqes) { 368 if (entries < 1) {
369 err = -EINVAL; 369 err = -EINVAL;
370 goto out; 370 goto out;
371 } 371 }
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
376 goto out; 376 goto out;
377 } 377 }
378 378
379 if (entries > dev->dev->caps.max_cqes) {
380 err = -EINVAL;
381 goto out;
382 }
383
379 if (ibcq->uobject) { 384 if (ibcq->uobject) {
380 err = mlx4_alloc_resize_umem(dev, cq, entries, udata); 385 err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
381 if (err) 386 if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..1aad9b3e6bdd 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1691,11 +1691,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow; 1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; 1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
1693 1693
1694#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 1694 ibdev->ib_dev.uverbs_ex_cmd_mask |=
1695 ibdev->ib_dev.uverbs_cmd_mask |= 1695 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
1696 (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) | 1696 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
1697 (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
1698#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
1699 } 1697 }
1700 1698
1701 mlx4_ib_alloc_eqs(dev, ibdev); 1699 mlx4_ib_alloc_eqs(dev, ibdev);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03948a3..b72627429745 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
556 goto err_db; 556 goto err_db;
557 } 557 }
558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); 558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
559 (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 559 (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
560 560
561 *index = to_mucontext(context)->uuari.uars[0].index; 561 *index = to_mucontext(context)->uuari.uars[0].index;
562 562
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
620 } 620 }
621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); 621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
622 622
623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; 623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
624 *index = dev->mdev.priv.uuari.uars[0].index; 624 *index = dev->mdev.priv.uuari.uars[0].index;
625 625
626 return 0; 626 return 0;
@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
653 int eqn; 653 int eqn;
654 int err; 654 int err;
655 655
656 if (entries < 0)
657 return ERR_PTR(-EINVAL);
658
656 entries = roundup_pow_of_two(entries + 1); 659 entries = roundup_pow_of_two(entries + 1);
657 if (entries < 1 || entries > dev->mdev.caps.max_cqes) 660 if (entries > dev->mdev.caps.max_cqes)
658 return ERR_PTR(-EINVAL); 661 return ERR_PTR(-EINVAL);
659 662
660 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 663 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
747 return 0; 750 return 0;
748} 751}
749 752
750static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, 753static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
751 u32 rsn)
752{ 754{
753 u32 lrsn; 755 return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
754
755 if (srq)
756 lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
757 else
758 lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
759
760 return rsn == lrsn;
761} 756}
762 757
763void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) 758void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
787 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 782 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
788 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 783 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
789 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 784 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
790 if (is_equal_rsn(cqe64, srq, rsn)) { 785 if (is_equal_rsn(cqe64, rsn)) {
791 if (srq) 786 if (srq && (ntohl(cqe64->srqn) & 0xffffff))
792 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); 787 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
793 ++nfreed; 788 ++nfreed;
794 } else if (nfreed) { 789 } else if (nfreed) {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b1a6cb3a2809..306534109627 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746 seg->start_addr = 0; 746 seg->start_addr = 0;
747 747
748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); 748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
749 NULL, NULL, NULL);
749 if (err) { 750 if (err) {
750 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); 751 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
751 goto err_in; 752 goto err_in;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be9157242..4c134d93d4fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
262 int npages; 262 int npages;
263 struct completion done; 263 struct completion done;
264 enum ib_wc_status status; 264 enum ib_wc_status status;
265 struct mlx5_ib_dev *dev;
266 struct mlx5_create_mkey_mbox_out out;
267 unsigned long start;
265}; 268};
266 269
267struct mlx5_ib_fast_reg_page_list { 270struct mlx5_ib_fast_reg_page_list {
@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
323 struct mlx5_ib_dev *dev; 326 struct mlx5_ib_dev *dev;
324 struct work_struct work; 327 struct work_struct work;
325 struct delayed_work dwork; 328 struct delayed_work dwork;
329 int pending;
326}; 330};
327 331
328struct mlx5_mr_cache { 332struct mlx5_mr_cache {
@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
358 spinlock_t mr_lock; 362 spinlock_t mr_lock;
359 struct mlx5_ib_resources devr; 363 struct mlx5_ib_resources devr;
360 struct mlx5_mr_cache cache; 364 struct mlx5_mr_cache cache;
365 struct timer_list delay_timer;
366 int fill_delay;
361}; 367};
362 368
363static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) 369static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3453580b1eb2..039c3e40fcb4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,12 @@
35#include <linux/random.h> 35#include <linux/random.h>
36#include <linux/debugfs.h> 36#include <linux/debugfs.h>
37#include <linux/export.h> 37#include <linux/export.h>
38#include <linux/delay.h>
38#include <rdma/ib_umem.h> 39#include <rdma/ib_umem.h>
39#include "mlx5_ib.h" 40#include "mlx5_ib.h"
40 41
41enum { 42enum {
42 DEF_CACHE_SIZE = 10, 43 MAX_PENDING_REG_MR = 8,
43}; 44};
44 45
45enum { 46enum {
@@ -63,6 +64,51 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
63 return order - cache->ent[0].order; 64 return order - cache->ent[0].order;
64} 65}
65 66
67static void reg_mr_callback(int status, void *context)
68{
69 struct mlx5_ib_mr *mr = context;
70 struct mlx5_ib_dev *dev = mr->dev;
71 struct mlx5_mr_cache *cache = &dev->cache;
72 int c = order2idx(dev, mr->order);
73 struct mlx5_cache_ent *ent = &cache->ent[c];
74 u8 key;
75 unsigned long flags;
76
77 spin_lock_irqsave(&ent->lock, flags);
78 ent->pending--;
79 spin_unlock_irqrestore(&ent->lock, flags);
80 if (status) {
81 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
82 kfree(mr);
83 dev->fill_delay = 1;
84 mod_timer(&dev->delay_timer, jiffies + HZ);
85 return;
86 }
87
88 if (mr->out.hdr.status) {
89 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
90 mr->out.hdr.status,
91 be32_to_cpu(mr->out.hdr.syndrome));
92 kfree(mr);
93 dev->fill_delay = 1;
94 mod_timer(&dev->delay_timer, jiffies + HZ);
95 return;
96 }
97
98 spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
99 key = dev->mdev.priv.mkey_key++;
100 spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
101 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
102
103 cache->last_add = jiffies;
104
105 spin_lock_irqsave(&ent->lock, flags);
106 list_add_tail(&mr->list, &ent->head);
107 ent->cur++;
108 ent->size++;
109 spin_unlock_irqrestore(&ent->lock, flags);
110}
111
66static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 112static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
67{ 113{
68 struct mlx5_mr_cache *cache = &dev->cache; 114 struct mlx5_mr_cache *cache = &dev->cache;
@@ -78,36 +124,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
78 return -ENOMEM; 124 return -ENOMEM;
79 125
80 for (i = 0; i < num; i++) { 126 for (i = 0; i < num; i++) {
127 if (ent->pending >= MAX_PENDING_REG_MR) {
128 err = -EAGAIN;
129 break;
130 }
131
81 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 132 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
82 if (!mr) { 133 if (!mr) {
83 err = -ENOMEM; 134 err = -ENOMEM;
84 goto out; 135 break;
85 } 136 }
86 mr->order = ent->order; 137 mr->order = ent->order;
87 mr->umred = 1; 138 mr->umred = 1;
139 mr->dev = dev;
88 in->seg.status = 1 << 6; 140 in->seg.status = 1 << 6;
89 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 141 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
90 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 142 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
91 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 143 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
92 in->seg.log2_page_size = 12; 144 in->seg.log2_page_size = 12;
93 145
146 spin_lock_irq(&ent->lock);
147 ent->pending++;
148 spin_unlock_irq(&ent->lock);
149 mr->start = jiffies;
94 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 150 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
95 sizeof(*in)); 151 sizeof(*in), reg_mr_callback,
152 mr, &mr->out);
96 if (err) { 153 if (err) {
97 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 154 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
98 kfree(mr); 155 kfree(mr);
99 goto out; 156 break;
100 } 157 }
101 cache->last_add = jiffies;
102
103 spin_lock(&ent->lock);
104 list_add_tail(&mr->list, &ent->head);
105 ent->cur++;
106 ent->size++;
107 spin_unlock(&ent->lock);
108 } 158 }
109 159
110out:
111 kfree(in); 160 kfree(in);
112 return err; 161 return err;
113} 162}
@@ -121,16 +170,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
121 int i; 170 int i;
122 171
123 for (i = 0; i < num; i++) { 172 for (i = 0; i < num; i++) {
124 spin_lock(&ent->lock); 173 spin_lock_irq(&ent->lock);
125 if (list_empty(&ent->head)) { 174 if (list_empty(&ent->head)) {
126 spin_unlock(&ent->lock); 175 spin_unlock_irq(&ent->lock);
127 return; 176 return;
128 } 177 }
129 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 178 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
130 list_del(&mr->list); 179 list_del(&mr->list);
131 ent->cur--; 180 ent->cur--;
132 ent->size--; 181 ent->size--;
133 spin_unlock(&ent->lock); 182 spin_unlock_irq(&ent->lock);
134 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 183 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
135 if (err) 184 if (err)
136 mlx5_ib_warn(dev, "failed destroy mkey\n"); 185 mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -162,9 +211,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
162 return -EINVAL; 211 return -EINVAL;
163 212
164 if (var > ent->size) { 213 if (var > ent->size) {
165 err = add_keys(dev, c, var - ent->size); 214 do {
166 if (err) 215 err = add_keys(dev, c, var - ent->size);
167 return err; 216 if (err && err != -EAGAIN)
217 return err;
218
219 usleep_range(3000, 5000);
220 } while (err);
168 } else if (var < ent->size) { 221 } else if (var < ent->size) {
169 remove_keys(dev, c, ent->size - var); 222 remove_keys(dev, c, ent->size - var);
170 } 223 }
@@ -280,23 +333,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
280 struct mlx5_ib_dev *dev = ent->dev; 333 struct mlx5_ib_dev *dev = ent->dev;
281 struct mlx5_mr_cache *cache = &dev->cache; 334 struct mlx5_mr_cache *cache = &dev->cache;
282 int i = order2idx(dev, ent->order); 335 int i = order2idx(dev, ent->order);
336 int err;
283 337
284 if (cache->stopped) 338 if (cache->stopped)
285 return; 339 return;
286 340
287 ent = &dev->cache.ent[i]; 341 ent = &dev->cache.ent[i];
288 if (ent->cur < 2 * ent->limit) { 342 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
289 add_keys(dev, i, 1); 343 err = add_keys(dev, i, 1);
290 if (ent->cur < 2 * ent->limit) 344 if (ent->cur < 2 * ent->limit) {
291 queue_work(cache->wq, &ent->work); 345 if (err == -EAGAIN) {
346 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
347 i + 2);
348 queue_delayed_work(cache->wq, &ent->dwork,
349 msecs_to_jiffies(3));
350 } else if (err) {
351 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
352 i + 2, err);
353 queue_delayed_work(cache->wq, &ent->dwork,
354 msecs_to_jiffies(1000));
355 } else {
356 queue_work(cache->wq, &ent->work);
357 }
358 }
292 } else if (ent->cur > 2 * ent->limit) { 359 } else if (ent->cur > 2 * ent->limit) {
293 if (!someone_adding(cache) && 360 if (!someone_adding(cache) &&
294 time_after(jiffies, cache->last_add + 60 * HZ)) { 361 time_after(jiffies, cache->last_add + 300 * HZ)) {
295 remove_keys(dev, i, 1); 362 remove_keys(dev, i, 1);
296 if (ent->cur > ent->limit) 363 if (ent->cur > ent->limit)
297 queue_work(cache->wq, &ent->work); 364 queue_work(cache->wq, &ent->work);
298 } else { 365 } else {
299 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); 366 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
300 } 367 }
301 } 368 }
302} 369}
@@ -336,18 +403,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
336 403
337 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 404 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
338 405
339 spin_lock(&ent->lock); 406 spin_lock_irq(&ent->lock);
340 if (!list_empty(&ent->head)) { 407 if (!list_empty(&ent->head)) {
341 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 408 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
342 list); 409 list);
343 list_del(&mr->list); 410 list_del(&mr->list);
344 ent->cur--; 411 ent->cur--;
345 spin_unlock(&ent->lock); 412 spin_unlock_irq(&ent->lock);
346 if (ent->cur < ent->limit) 413 if (ent->cur < ent->limit)
347 queue_work(cache->wq, &ent->work); 414 queue_work(cache->wq, &ent->work);
348 break; 415 break;
349 } 416 }
350 spin_unlock(&ent->lock); 417 spin_unlock_irq(&ent->lock);
351 418
352 queue_work(cache->wq, &ent->work); 419 queue_work(cache->wq, &ent->work);
353 420
@@ -374,12 +441,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
374 return; 441 return;
375 } 442 }
376 ent = &cache->ent[c]; 443 ent = &cache->ent[c];
377 spin_lock(&ent->lock); 444 spin_lock_irq(&ent->lock);
378 list_add_tail(&mr->list, &ent->head); 445 list_add_tail(&mr->list, &ent->head);
379 ent->cur++; 446 ent->cur++;
380 if (ent->cur > 2 * ent->limit) 447 if (ent->cur > 2 * ent->limit)
381 shrink = 1; 448 shrink = 1;
382 spin_unlock(&ent->lock); 449 spin_unlock_irq(&ent->lock);
383 450
384 if (shrink) 451 if (shrink)
385 queue_work(cache->wq, &ent->work); 452 queue_work(cache->wq, &ent->work);
@@ -394,16 +461,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
394 461
395 cancel_delayed_work(&ent->dwork); 462 cancel_delayed_work(&ent->dwork);
396 while (1) { 463 while (1) {
397 spin_lock(&ent->lock); 464 spin_lock_irq(&ent->lock);
398 if (list_empty(&ent->head)) { 465 if (list_empty(&ent->head)) {
399 spin_unlock(&ent->lock); 466 spin_unlock_irq(&ent->lock);
400 return; 467 return;
401 } 468 }
402 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 469 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
403 list_del(&mr->list); 470 list_del(&mr->list);
404 ent->cur--; 471 ent->cur--;
405 ent->size--; 472 ent->size--;
406 spin_unlock(&ent->lock); 473 spin_unlock_irq(&ent->lock);
407 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 474 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
408 if (err) 475 if (err)
409 mlx5_ib_warn(dev, "failed destroy mkey\n"); 476 mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -464,12 +531,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
464 debugfs_remove_recursive(dev->cache.root); 531 debugfs_remove_recursive(dev->cache.root);
465} 532}
466 533
534static void delay_time_func(unsigned long ctx)
535{
536 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
537
538 dev->fill_delay = 0;
539}
540
467int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 541int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
468{ 542{
469 struct mlx5_mr_cache *cache = &dev->cache; 543 struct mlx5_mr_cache *cache = &dev->cache;
470 struct mlx5_cache_ent *ent; 544 struct mlx5_cache_ent *ent;
471 int limit; 545 int limit;
472 int size;
473 int err; 546 int err;
474 int i; 547 int i;
475 548
@@ -479,6 +552,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
479 return -ENOMEM; 552 return -ENOMEM;
480 } 553 }
481 554
555 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
482 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 556 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
483 INIT_LIST_HEAD(&cache->ent[i].head); 557 INIT_LIST_HEAD(&cache->ent[i].head);
484 spin_lock_init(&cache->ent[i].lock); 558 spin_lock_init(&cache->ent[i].lock);
@@ -489,13 +563,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
489 ent->order = i + 2; 563 ent->order = i + 2;
490 ent->dev = dev; 564 ent->dev = dev;
491 565
492 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { 566 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
493 size = dev->mdev.profile->mr_cache[i].size;
494 limit = dev->mdev.profile->mr_cache[i].limit; 567 limit = dev->mdev.profile->mr_cache[i].limit;
495 } else { 568 else
496 size = DEF_CACHE_SIZE;
497 limit = 0; 569 limit = 0;
498 } 570
499 INIT_WORK(&ent->work, cache_work_func); 571 INIT_WORK(&ent->work, cache_work_func);
500 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 572 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
501 ent->limit = limit; 573 ent->limit = limit;
@@ -522,6 +594,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
522 clean_keys(dev, i); 594 clean_keys(dev, i);
523 595
524 destroy_workqueue(dev->cache.wq); 596 destroy_workqueue(dev->cache.wq);
597 del_timer_sync(&dev->delay_timer);
525 598
526 return 0; 599 return 0;
527} 600}
@@ -551,7 +624,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
551 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 624 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
552 seg->start_addr = 0; 625 seg->start_addr = 0;
553 626
554 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); 627 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
628 NULL);
555 if (err) 629 if (err)
556 goto err_in; 630 goto err_in;
557 631
@@ -660,14 +734,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
660 int err; 734 int err;
661 int i; 735 int i;
662 736
663 for (i = 0; i < 10; i++) { 737 for (i = 0; i < 1; i++) {
664 mr = alloc_cached_mr(dev, order); 738 mr = alloc_cached_mr(dev, order);
665 if (mr) 739 if (mr)
666 break; 740 break;
667 741
668 err = add_keys(dev, order2idx(dev, order), 1); 742 err = add_keys(dev, order2idx(dev, order), 1);
669 if (err) { 743 if (err && err != -EAGAIN) {
670 mlx5_ib_warn(dev, "add_keys failed\n"); 744 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
671 break; 745 break;
672 } 746 }
673 } 747 }
@@ -759,8 +833,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
759 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 833 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
760 in->seg.log2_page_size = page_shift; 834 in->seg.log2_page_size = page_shift;
761 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 835 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
762 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 836 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
763 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); 837 1 << page_shift));
838 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
839 NULL, NULL);
764 if (err) { 840 if (err) {
765 mlx5_ib_warn(dev, "create mkey failed\n"); 841 mlx5_ib_warn(dev, "create mkey failed\n");
766 goto err_2; 842 goto err_2;
@@ -944,7 +1020,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
944 * TBD not needed - issue 197292 */ 1020 * TBD not needed - issue 197292 */
945 in->seg.log2_page_size = PAGE_SHIFT; 1021 in->seg.log2_page_size = PAGE_SHIFT;
946 1022
947 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); 1023 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1024 NULL, NULL);
948 kfree(in); 1025 kfree(in);
949 if (err) 1026 if (err)
950 goto err_free; 1027 goto err_free;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5659ea880741..7c6b4ba49bec 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
551 } 551 }
552 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); 552 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
553 (*in)->ctx.log_pg_sz_remote_qpn = 553 (*in)->ctx.log_pg_sz_remote_qpn =
554 cpu_to_be32((page_shift - PAGE_SHIFT) << 24); 554 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
555 (*in)->ctx.params2 = cpu_to_be32(offset << 6); 555 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
556 556
557 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); 557 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
648 goto err_buf; 648 goto err_buf;
649 } 649 }
650 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); 650 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
651 (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); 651 (*in)->ctx.log_pg_sz_remote_qpn =
652 cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
652 /* Set "fast registration enabled" for all kernel QPs */ 653 /* Set "fast registration enabled" for all kernel QPs */
653 (*in)->ctx.params1 |= cpu_to_be32(1 << 11); 654 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
654 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); 655 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -1317,9 +1318,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
1317 MLX5_QP_OPTPAR_RAE | 1318 MLX5_QP_OPTPAR_RAE |
1318 MLX5_QP_OPTPAR_RWE | 1319 MLX5_QP_OPTPAR_RWE |
1319 MLX5_QP_OPTPAR_RNR_TIMEOUT | 1320 MLX5_QP_OPTPAR_RNR_TIMEOUT |
1320 MLX5_QP_OPTPAR_PM_STATE, 1321 MLX5_QP_OPTPAR_PM_STATE |
1322 MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1321 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | 1323 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1322 MLX5_QP_OPTPAR_PM_STATE, 1324 MLX5_QP_OPTPAR_PM_STATE |
1325 MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1323 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | 1326 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
1324 MLX5_QP_OPTPAR_SRQN | 1327 MLX5_QP_OPTPAR_SRQN |
1325 MLX5_QP_OPTPAR_CQN_RCV, 1328 MLX5_QP_OPTPAR_CQN_RCV,
@@ -1550,7 +1553,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1550 mlx5_cur = to_mlx5_state(cur_state); 1553 mlx5_cur = to_mlx5_state(cur_state);
1551 mlx5_new = to_mlx5_state(new_state); 1554 mlx5_new = to_mlx5_state(new_state);
1552 mlx5_st = to_mlx5_st(ibqp->qp_type); 1555 mlx5_st = to_mlx5_st(ibqp->qp_type);
1553 if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) 1556 if (mlx5_st < 0)
1554 goto out; 1557 goto out;
1555 1558
1556 optpar = ib_mask_to_mlx5_opt(attr_mask); 1559 optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1744,6 +1747,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1744 MLX5_MKEY_MASK_PD | 1747 MLX5_MKEY_MASK_PD |
1745 MLX5_MKEY_MASK_LR | 1748 MLX5_MKEY_MASK_LR |
1746 MLX5_MKEY_MASK_LW | 1749 MLX5_MKEY_MASK_LW |
1750 MLX5_MKEY_MASK_KEY |
1747 MLX5_MKEY_MASK_RR | 1751 MLX5_MKEY_MASK_RR |
1748 MLX5_MKEY_MASK_RW | 1752 MLX5_MKEY_MASK_RW |
1749 MLX5_MKEY_MASK_A | 1753 MLX5_MKEY_MASK_A |
@@ -1800,7 +1804,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
1800 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 1804 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1801 seg->len = cpu_to_be64(wr->wr.fast_reg.length); 1805 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1802 seg->log2_page_size = wr->wr.fast_reg.page_shift; 1806 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1803 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1807 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
1808 mlx5_mkey_variant(wr->wr.fast_reg.rkey));
1804} 1809}
1805 1810
1806static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, 1811static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1913,6 +1918,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
1913 if (unlikely((*seg == qp->sq.qend))) 1918 if (unlikely((*seg == qp->sq.qend)))
1914 *seg = mlx5_get_send_wqe(qp, 0); 1919 *seg = mlx5_get_send_wqe(qp, 0);
1915 if (!li) { 1920 if (!li) {
1921 if (unlikely(wr->wr.fast_reg.page_list_len >
1922 wr->wr.fast_reg.page_list->max_page_list_len))
1923 return -ENOMEM;
1924
1916 set_frwr_pages(*seg, wr, mdev, pd, writ); 1925 set_frwr_pages(*seg, wr, mdev, pd, writ);
1917 *seg += sizeof(struct mlx5_wqe_data_seg); 1926 *seg += sizeof(struct mlx5_wqe_data_seg);
1918 *size += (sizeof(struct mlx5_wqe_data_seg) / 16); 1927 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 0aa478bc291a..210b3eaf188a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
123 goto err_in; 123 goto err_in;
124 } 124 }
125 125
126 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 126 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); 127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
128 128
129 return 0; 129 return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
192 } 192 }
193 srq->wq_sig = !!srq_signature; 193 srq->wq_sig = !!srq_signature;
194 194
195 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 195 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
196 196
197 return 0; 197 return 0;
198 198
@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
390 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); 390 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
391 ib_umem_release(msrq->umem); 391 ib_umem_release(msrq->umem);
392 } else { 392 } else {
393 kfree(msrq->wrid); 393 destroy_srq_kernel(dev, msrq);
394 mlx5_buf_free(&dev->mdev, &msrq->buf);
395 mlx5_db_free(&dev->mdev, &msrq->db);
396 } 394 }
397 395
398 kfree(srq); 396 kfree(srq);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a2284..8308e3634767 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2834,7 +2834,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2834 init_attr->qp_context = nesqp->ibqp.qp_context; 2834 init_attr->qp_context = nesqp->ibqp.qp_context;
2835 init_attr->send_cq = nesqp->ibqp.send_cq; 2835 init_attr->send_cq = nesqp->ibqp.send_cq;
2836 init_attr->recv_cq = nesqp->ibqp.recv_cq; 2836 init_attr->recv_cq = nesqp->ibqp.recv_cq;
2837 init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; 2837 init_attr->srq = nesqp->ibqp.srq;
2838 init_attr->cap = attr->cap; 2838 init_attr->cap = attr->cap;
2839 2839
2840 return 0; 2840 return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f878..294dd27b601e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -122,6 +122,32 @@ struct mqe_ctx {
122 bool cmd_done; 122 bool cmd_done;
123}; 123};
124 124
125struct ocrdma_hw_mr {
126 u32 lkey;
127 u8 fr_mr;
128 u8 remote_atomic;
129 u8 remote_rd;
130 u8 remote_wr;
131 u8 local_rd;
132 u8 local_wr;
133 u8 mw_bind;
134 u8 rsvd;
135 u64 len;
136 struct ocrdma_pbl *pbl_table;
137 u32 num_pbls;
138 u32 num_pbes;
139 u32 pbl_size;
140 u32 pbe_size;
141 u64 fbo;
142 u64 va;
143};
144
145struct ocrdma_mr {
146 struct ib_mr ibmr;
147 struct ib_umem *umem;
148 struct ocrdma_hw_mr hwmr;
149};
150
125struct ocrdma_dev { 151struct ocrdma_dev {
126 struct ib_device ibdev; 152 struct ib_device ibdev;
127 struct ocrdma_dev_attr attr; 153 struct ocrdma_dev_attr attr;
@@ -169,7 +195,7 @@ struct ocrdma_dev {
169 struct list_head entry; 195 struct list_head entry;
170 struct rcu_head rcu; 196 struct rcu_head rcu;
171 int id; 197 int id;
172 u64 stag_arr[OCRDMA_MAX_STAG]; 198 struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
173 u16 pvid; 199 u16 pvid;
174}; 200};
175 201
@@ -294,31 +320,6 @@ struct ocrdma_qp {
294 u16 db_cache; 320 u16 db_cache;
295}; 321};
296 322
297struct ocrdma_hw_mr {
298 u32 lkey;
299 u8 fr_mr;
300 u8 remote_atomic;
301 u8 remote_rd;
302 u8 remote_wr;
303 u8 local_rd;
304 u8 local_wr;
305 u8 mw_bind;
306 u8 rsvd;
307 u64 len;
308 struct ocrdma_pbl *pbl_table;
309 u32 num_pbls;
310 u32 num_pbes;
311 u32 pbl_size;
312 u32 pbe_size;
313 u64 fbo;
314 u64 va;
315};
316
317struct ocrdma_mr {
318 struct ib_mr ibmr;
319 struct ib_umem *umem;
320 struct ocrdma_hw_mr hwmr;
321};
322 323
323struct ocrdma_ucontext { 324struct ocrdma_ucontext {
324 struct ib_ucontext ibucontext; 325 struct ib_ucontext ibucontext;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 50219ab2279d..56bf32fcb62c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1783,7 +1783,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1783 u32 max_sges = attrs->cap.max_send_sge; 1783 u32 max_sges = attrs->cap.max_send_sge;
1784 1784
1785 /* QP1 may exceed 127 */ 1785 /* QP1 may exceed 127 */
1786 max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1, 1786 max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
1787 dev->attr.max_wqe); 1787 dev->attr.max_wqe);
1788 1788
1789 status = ocrdma_build_q_conf(&max_wqe_allocated, 1789 status = ocrdma_build_q_conf(&max_wqe_allocated,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0ce7674621ea..91443bcb9e0e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -452,9 +452,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
452{ 452{
453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); 453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
454 454
455 ocrdma_free_resources(dev);
456 ocrdma_cleanup_hw(dev);
457
458 idr_remove(&ocrdma_dev_id, dev->id); 455 idr_remove(&ocrdma_dev_id, dev->id);
459 kfree(dev->mbx_cmd); 456 kfree(dev->mbx_cmd);
460 ib_dealloc_device(&dev->ibdev); 457 ib_dealloc_device(&dev->ibdev);
@@ -470,6 +467,10 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
470 spin_lock(&ocrdma_devlist_lock); 467 spin_lock(&ocrdma_devlist_lock);
471 list_del_rcu(&dev->entry); 468 list_del_rcu(&dev->entry);
472 spin_unlock(&ocrdma_devlist_lock); 469 spin_unlock(&ocrdma_devlist_lock);
470
471 ocrdma_free_resources(dev);
472 ocrdma_cleanup_hw(dev);
473
473 call_rcu(&dev->rcu, ocrdma_remove_free); 474 call_rcu(&dev->rcu, ocrdma_remove_free);
474} 475}
475 476
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 69f1d1221a6b..7686dceadd29 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1981,9 +1981,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1981 1981
1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); 1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
1983 1983
1984 if ((wr->wr.fast_reg.page_list_len > 1984 if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
1985 qp->dev->attr.max_pages_per_frmr) ||
1986 (wr->wr.fast_reg.length > 0xffffffffULL))
1987 return -EINVAL; 1985 return -EINVAL;
1988 1986
1989 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); 1987 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2839,7 +2837,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
2839 goto mbx_err; 2837 goto mbx_err;
2840 mr->ibmr.rkey = mr->hwmr.lkey; 2838 mr->ibmr.rkey = mr->hwmr.lkey;
2841 mr->ibmr.lkey = mr->hwmr.lkey; 2839 mr->ibmr.lkey = mr->hwmr.lkey;
2842 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr; 2840 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
2843 return &mr->ibmr; 2841 return &mr->ibmr;
2844mbx_err: 2842mbx_err:
2845 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 2843 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf6..5bfc02f450e6 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6190,21 +6190,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
6190{ 6190{
6191 struct qib_devdata *dd; 6191 struct qib_devdata *dd;
6192 unsigned long val; 6192 unsigned long val;
6193 int ret; 6193 char *n;
6194
6195 if (strlen(str) >= MAX_ATTEN_LEN) { 6194 if (strlen(str) >= MAX_ATTEN_LEN) {
6196 pr_info("txselect_values string too long\n"); 6195 pr_info("txselect_values string too long\n");
6197 return -ENOSPC; 6196 return -ENOSPC;
6198 } 6197 }
6199 ret = kstrtoul(str, 0, &val); 6198 val = simple_strtoul(str, &n, 0);
6200 if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + 6199 if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
6201 TXDDS_MFG_SZ)) { 6200 TXDDS_MFG_SZ)) {
6202 pr_info("txselect_values must start with a number < %d\n", 6201 pr_info("txselect_values must start with a number < %d\n",
6203 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); 6202 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
6204 return ret ? ret : -EINVAL; 6203 return -EINVAL;
6205 } 6204 }
6206
6207 strcpy(txselect_list, str); 6205 strcpy(txselect_list, str);
6206
6208 list_for_each_entry(dd, &qib_dev_list, list) 6207 list_for_each_entry(dd, &qib_dev_list, list)
6209 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) 6208 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
6210 set_no_qsfp_atten(dd, 1); 6209 set_no_qsfp_atten(dd, 1);
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f8..941d4d50d8e7 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
54 __be32 revision; 54 __be32 revision;
55 u8 local_port_num; 55 u8 local_port_num;
56 u8 vendor_id[3]; 56 u8 vendor_id[3];
57} __attribute__ ((packed)); 57} __packed;
58 58
59struct ib_mad_notice_attr { 59struct ib_mad_notice_attr {
60 u8 generic_type; 60 u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
73 __be16 reserved; 73 __be16 reserved;
74 __be16 lid; /* where violation happened */ 74 __be16 lid; /* where violation happened */
75 u8 port_num; /* where violation happened */ 75 u8 port_num; /* where violation happened */
76 } __attribute__ ((packed)) ntc_129_131; 76 } __packed ntc_129_131;
77 77
78 struct { 78 struct {
79 __be16 reserved; 79 __be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
83 __be32 new_cap_mask; /* new capability mask */ 83 __be32 new_cap_mask; /* new capability mask */
84 u8 reserved3; 84 u8 reserved3;
85 u8 change_flags; /* low 3 bits only */ 85 u8 change_flags; /* low 3 bits only */
86 } __attribute__ ((packed)) ntc_144; 86 } __packed ntc_144;
87 87
88 struct { 88 struct {
89 __be16 reserved; 89 __be16 reserved;
90 __be16 lid; /* lid where sys guid changed */ 90 __be16 lid; /* lid where sys guid changed */
91 __be16 reserved2; 91 __be16 reserved2;
92 __be64 new_sys_guid; 92 __be64 new_sys_guid;
93 } __attribute__ ((packed)) ntc_145; 93 } __packed ntc_145;
94 94
95 struct { 95 struct {
96 __be16 reserved; 96 __be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
104 u8 reserved3; 104 u8 reserved3;
105 u8 dr_trunc_hop; 105 u8 dr_trunc_hop;
106 u8 dr_rtn_path[30]; 106 u8 dr_rtn_path[30];
107 } __attribute__ ((packed)) ntc_256; 107 } __packed ntc_256;
108 108
109 struct { 109 struct {
110 __be16 reserved; 110 __be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
115 __be32 qp2; /* high 8 bits reserved */ 115 __be32 qp2; /* high 8 bits reserved */
116 union ib_gid gid1; 116 union ib_gid gid1;
117 union ib_gid gid2; 117 union ib_gid gid2;
118 } __attribute__ ((packed)) ntc_257_258; 118 } __packed ntc_257_258;
119 119
120 } details; 120 } details;
121}; 121};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
209 __be64 port_rcv_packets; 209 __be64 port_rcv_packets;
210 __be64 port_xmit_wait; 210 __be64 port_xmit_wait;
211 __be64 port_adr_events; 211 __be64 port_adr_events;
212} __attribute__ ((packed)); 212} __packed;
213 213
214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d6..165aee2ca8a0 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -594,8 +594,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
594 else 594 else
595 j = npages; 595 j = npages;
596 596
597 ret = get_user_pages(current, current->mm, addr, 597 ret = get_user_pages_fast(addr, j, 0, pages);
598 j, 0, 1, pages, NULL);
599 if (ret != j) { 598 if (ret != j) {
600 i = 0; 599 i = 0;
601 j = ret; 600 j = ret;
@@ -1294,11 +1293,8 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
1294 int mxp = 8; 1293 int mxp = 8;
1295 int ndesc = 0; 1294 int ndesc = 0;
1296 1295
1297 down_write(&current->mm->mmap_sem);
1298 ret = qib_user_sdma_queue_pkts(dd, ppd, pq, 1296 ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
1299 iov, dim, &list, &mxp, &ndesc); 1297 iov, dim, &list, &mxp, &ndesc);
1300 up_write(&current->mm->mmap_sem);
1301
1302 if (ret < 0) 1298 if (ret < 0)
1303 goto done_unlock; 1299 goto done_unlock;
1304 else { 1300 else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575ad..a01c7d2cf541 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
150 __be64 vaddr; 150 __be64 vaddr;
151 __be32 rkey; 151 __be32 rkey;
152 __be32 length; 152 __be32 length;
153} __attribute__ ((packed)); 153} __packed;
154 154
155struct ib_atomic_eth { 155struct ib_atomic_eth {
156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ 156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
157 __be32 rkey; 157 __be32 rkey;
158 __be64 swap_data; 158 __be64 swap_data;
159 __be64 compare_data; 159 __be64 compare_data;
160} __attribute__ ((packed)); 160} __packed;
161 161
162struct qib_other_headers { 162struct qib_other_headers {
163 __be32 bth[3]; 163 __be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
178 __be32 aeth; 178 __be32 aeth;
179 struct ib_atomic_eth atomic_eth; 179 struct ib_atomic_eth atomic_eth;
180 } u; 180 } u;
181} __attribute__ ((packed)); 181} __packed;
182 182
183/* 183/*
184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes 184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
195 } l; 195 } l;
196 struct qib_other_headers oth; 196 struct qib_other_headers oth;
197 } u; 197 } u;
198} __attribute__ ((packed)); 198} __packed;
199 199
200struct qib_pio_header { 200struct qib_pio_header {
201 __le32 pbc[2]; 201 __le32 pbc[2];
202 struct qib_ib_header hdr; 202 struct qib_ib_header hdr;
203} __attribute__ ((packed)); 203} __packed;
204 204
205/* 205/*
206 * There is one struct qib_mcast for each multicast GID. 206 * There is one struct qib_mcast for each multicast GID.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb71aaa26a9a..c639f90cfda4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -101,6 +101,7 @@ enum {
101 IPOIB_MCAST_FLAG_SENDONLY = 1, 101 IPOIB_MCAST_FLAG_SENDONLY = 1,
102 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 102 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
103 IPOIB_MCAST_FLAG_ATTACHED = 3, 103 IPOIB_MCAST_FLAG_ATTACHED = 3,
104 IPOIB_MCAST_JOIN_STARTED = 4,
104 105
105 MAX_SEND_CQE = 16, 106 MAX_SEND_CQE = 16,
106 IPOIB_CM_COPYBREAK = 256, 107 IPOIB_CM_COPYBREAK = 256,
@@ -151,6 +152,7 @@ struct ipoib_mcast {
151 struct sk_buff_head pkt_queue; 152 struct sk_buff_head pkt_queue;
152 153
153 struct net_device *dev; 154 struct net_device *dev;
155 struct completion done;
154}; 156};
155 157
156struct ipoib_rx_buf { 158struct ipoib_rx_buf {
@@ -299,7 +301,7 @@ struct ipoib_dev_priv {
299 301
300 unsigned long flags; 302 unsigned long flags;
301 303
302 struct mutex vlan_mutex; 304 struct rw_semaphore vlan_rwsem;
303 305
304 struct rb_root path_tree; 306 struct rb_root path_tree;
305 struct list_head path_list; 307 struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7a3175400b2a..1377f85911c2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
140static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, 140static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
141 struct ipoib_cm_rx_buf *rx_ring, 141 struct ipoib_cm_rx_buf *rx_ring,
142 int id, int frags, 142 int id, int frags,
143 u64 mapping[IPOIB_CM_RX_SG]) 143 u64 mapping[IPOIB_CM_RX_SG],
144 gfp_t gfp)
144{ 145{
145 struct ipoib_dev_priv *priv = netdev_priv(dev); 146 struct ipoib_dev_priv *priv = netdev_priv(dev);
146 struct sk_buff *skb; 147 struct sk_buff *skb;
@@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
164 } 165 }
165 166
166 for (i = 0; i < frags; i++) { 167 for (i = 0; i < frags; i++) {
167 struct page *page = alloc_page(GFP_ATOMIC); 168 struct page *page = alloc_page(gfp);
168 169
169 if (!page) 170 if (!page)
170 goto partial_error; 171 goto partial_error;
@@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
382 383
383 for (i = 0; i < ipoib_recvq_size; ++i) { 384 for (i = 0; i < ipoib_recvq_size; ++i) {
384 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, 385 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
385 rx->rx_ring[i].mapping)) { 386 rx->rx_ring[i].mapping,
387 GFP_KERNEL)) {
386 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 388 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
387 ret = -ENOMEM; 389 ret = -ENOMEM;
388 goto err_count; 390 goto err_count;
@@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
639 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 641 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
640 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 642 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
641 643
642 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); 644 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
645 mapping, GFP_ATOMIC);
643 if (unlikely(!newskb)) { 646 if (unlikely(!newskb)) {
644 /* 647 /*
645 * If we can't allocate a new RX buffer, dump 648 * If we can't allocate a new RX buffer, dump
@@ -1556,7 +1559,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
1556 for (i = 0; i < ipoib_recvq_size; ++i) { 1559 for (i = 0; i < ipoib_recvq_size; ++i) {
1557 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, 1560 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1558 priv->cm.num_frags - 1, 1561 priv->cm.num_frags - 1,
1559 priv->cm.srq_ring[i].mapping)) { 1562 priv->cm.srq_ring[i].mapping,
1563 GFP_KERNEL)) {
1560 ipoib_warn(priv, "failed to allocate " 1564 ipoib_warn(priv, "failed to allocate "
1561 "receive buffer %d\n", i); 1565 "receive buffer %d\n", i);
1562 ipoib_cm_dev_cleanup(dev); 1566 ipoib_cm_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 196b1d13cbcb..6a7003ddb0be 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
685 ret = ipoib_ib_post_receives(dev); 685 ret = ipoib_ib_post_receives(dev);
686 if (ret) { 686 if (ret) {
687 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 687 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
688 ipoib_ib_dev_stop(dev, 1); 688 goto dev_stop;
689 return -1;
690 } 689 }
691 690
692 ret = ipoib_cm_dev_open(dev); 691 ret = ipoib_cm_dev_open(dev);
693 if (ret) { 692 if (ret) {
694 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); 693 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
695 ipoib_ib_dev_stop(dev, 1); 694 goto dev_stop;
696 return -1;
697 } 695 }
698 696
699 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 697 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
@@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)
704 napi_enable(&priv->napi); 702 napi_enable(&priv->napi);
705 703
706 return 0; 704 return 0;
705dev_stop:
706 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
707 napi_enable(&priv->napi);
708 ipoib_ib_dev_stop(dev, 1);
709 return -1;
707} 710}
708 711
709static void ipoib_pkey_dev_check_presence(struct net_device *dev) 712static void ipoib_pkey_dev_check_presence(struct net_device *dev)
@@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
746 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 749 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
747 mutex_lock(&pkey_mutex); 750 mutex_lock(&pkey_mutex);
748 set_bit(IPOIB_PKEY_STOP, &priv->flags); 751 set_bit(IPOIB_PKEY_STOP, &priv->flags);
749 cancel_delayed_work(&priv->pkey_poll_task); 752 cancel_delayed_work_sync(&priv->pkey_poll_task);
750 mutex_unlock(&pkey_mutex); 753 mutex_unlock(&pkey_mutex);
751 if (flush)
752 flush_workqueue(ipoib_workqueue);
753 } 754 }
754 755
755 ipoib_mcast_stop_thread(dev, flush); 756 ipoib_mcast_stop_thread(dev, flush);
@@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
974 u16 new_index; 975 u16 new_index;
975 int result; 976 int result;
976 977
977 mutex_lock(&priv->vlan_mutex); 978 down_read(&priv->vlan_rwsem);
978 979
979 /* 980 /*
980 * Flush any child interfaces too -- they might be up even if 981 * Flush any child interfaces too -- they might be up even if
@@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
983 list_for_each_entry(cpriv, &priv->child_intfs, list) 984 list_for_each_entry(cpriv, &priv->child_intfs, list)
984 __ipoib_ib_dev_flush(cpriv, level); 985 __ipoib_ib_dev_flush(cpriv, level);
985 986
986 mutex_unlock(&priv->vlan_mutex); 987 up_read(&priv->vlan_rwsem);
987 988
988 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { 989 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
989 /* for non-child devices must check/update the pkey value here */ 990 /* for non-child devices must check/update the pkey value here */
@@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
1081 struct ipoib_dev_priv *priv = netdev_priv(dev); 1082 struct ipoib_dev_priv *priv = netdev_priv(dev);
1082 1083
1083 ipoib_dbg(priv, "cleaning up ib_dev\n"); 1084 ipoib_dbg(priv, "cleaning up ib_dev\n");
1085 /*
1086 * We must make sure there are no more (path) completions
1087 * that may wish to touch priv fields that are no longer valid
1088 */
1089 ipoib_flush_paths(dev);
1084 1090
1085 ipoib_mcast_stop_thread(dev, 1); 1091 ipoib_mcast_stop_thread(dev, 1);
1086 ipoib_mcast_dev_flush(dev); 1092 ipoib_mcast_dev_flush(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 82cec1af902c..d64ed05fb082 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -119,7 +119,7 @@ int ipoib_open(struct net_device *dev)
119 struct ipoib_dev_priv *cpriv; 119 struct ipoib_dev_priv *cpriv;
120 120
121 /* Bring up any child interfaces too */ 121 /* Bring up any child interfaces too */
122 mutex_lock(&priv->vlan_mutex); 122 down_read(&priv->vlan_rwsem);
123 list_for_each_entry(cpriv, &priv->child_intfs, list) { 123 list_for_each_entry(cpriv, &priv->child_intfs, list) {
124 int flags; 124 int flags;
125 125
@@ -129,7 +129,7 @@ int ipoib_open(struct net_device *dev)
129 129
130 dev_change_flags(cpriv->dev, flags | IFF_UP); 130 dev_change_flags(cpriv->dev, flags | IFF_UP);
131 } 131 }
132 mutex_unlock(&priv->vlan_mutex); 132 up_read(&priv->vlan_rwsem);
133 } 133 }
134 134
135 netif_start_queue(dev); 135 netif_start_queue(dev);
@@ -162,7 +162,7 @@ static int ipoib_stop(struct net_device *dev)
162 struct ipoib_dev_priv *cpriv; 162 struct ipoib_dev_priv *cpriv;
163 163
164 /* Bring down any child interfaces too */ 164 /* Bring down any child interfaces too */
165 mutex_lock(&priv->vlan_mutex); 165 down_read(&priv->vlan_rwsem);
166 list_for_each_entry(cpriv, &priv->child_intfs, list) { 166 list_for_each_entry(cpriv, &priv->child_intfs, list) {
167 int flags; 167 int flags;
168 168
@@ -172,7 +172,7 @@ static int ipoib_stop(struct net_device *dev)
172 172
173 dev_change_flags(cpriv->dev, flags & ~IFF_UP); 173 dev_change_flags(cpriv->dev, flags & ~IFF_UP);
174 } 174 }
175 mutex_unlock(&priv->vlan_mutex); 175 up_read(&priv->vlan_rwsem);
176 } 176 }
177 177
178 return 0; 178 return 0;
@@ -1350,7 +1350,7 @@ void ipoib_setup(struct net_device *dev)
1350 1350
1351 ipoib_set_ethtool_ops(dev); 1351 ipoib_set_ethtool_ops(dev);
1352 1352
1353 netif_napi_add(dev, &priv->napi, ipoib_poll, 100); 1353 netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
1354 1354
1355 dev->watchdog_timeo = HZ; 1355 dev->watchdog_timeo = HZ;
1356 1356
@@ -1372,7 +1372,7 @@ void ipoib_setup(struct net_device *dev)
1372 1372
1373 spin_lock_init(&priv->lock); 1373 spin_lock_init(&priv->lock);
1374 1374
1375 mutex_init(&priv->vlan_mutex); 1375 init_rwsem(&priv->vlan_rwsem);
1376 1376
1377 INIT_LIST_HEAD(&priv->path_list); 1377 INIT_LIST_HEAD(&priv->path_list);
1378 INIT_LIST_HEAD(&priv->child_intfs); 1378 INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c662..d4e005720d01 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,
386 mcast->mcmember.mgid.raw, status); 386 mcast->mcmember.mgid.raw, status);
387 387
388 /* We trap for port events ourselves. */ 388 /* We trap for port events ourselves. */
389 if (status == -ENETRESET) 389 if (status == -ENETRESET) {
390 return 0; 390 status = 0;
391 goto out;
392 }
391 393
392 if (!status) 394 if (!status)
393 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 395 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
@@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,
407 if (mcast == priv->broadcast) 409 if (mcast == priv->broadcast)
408 queue_work(ipoib_workqueue, &priv->carrier_on_task); 410 queue_work(ipoib_workqueue, &priv->carrier_on_task);
409 411
410 return 0; 412 status = 0;
413 goto out;
411 } 414 }
412 415
413 if (mcast->logcount++ < 20) { 416 if (mcast->logcount++ < 20) {
@@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,
434 mcast->backoff * HZ); 437 mcast->backoff * HZ);
435 spin_unlock_irq(&priv->lock); 438 spin_unlock_irq(&priv->lock);
436 mutex_unlock(&mcast_mutex); 439 mutex_unlock(&mcast_mutex);
437 440out:
441 complete(&mcast->done);
438 return status; 442 return status;
439} 443}
440 444
@@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
484 } 488 }
485 489
486 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 490 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
491 init_completion(&mcast->done);
492 set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
493
487 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 494 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
488 &rec, comp_mask, GFP_KERNEL, 495 &rec, comp_mask, GFP_KERNEL,
489 ipoib_mcast_join_complete, mcast); 496 ipoib_mcast_join_complete, mcast);
490 if (IS_ERR(mcast->mc)) { 497 if (IS_ERR(mcast->mc)) {
491 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 498 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
499 complete(&mcast->done);
492 ret = PTR_ERR(mcast->mc); 500 ret = PTR_ERR(mcast->mc);
493 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); 501 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
494 502
@@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)
510 struct ipoib_dev_priv *priv = 518 struct ipoib_dev_priv *priv =
511 container_of(work, struct ipoib_dev_priv, mcast_task.work); 519 container_of(work, struct ipoib_dev_priv, mcast_task.work);
512 struct net_device *dev = priv->dev; 520 struct net_device *dev = priv->dev;
521 struct ib_port_attr port_attr;
513 522
514 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 523 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
515 return; 524 return;
516 525
526 if (ib_query_port(priv->ca, priv->port, &port_attr) ||
527 port_attr.state != IB_PORT_ACTIVE) {
528 ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
529 port_attr.state);
530 return;
531 }
532
517 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 533 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
518 ipoib_warn(priv, "ib_query_gid() failed\n"); 534 ipoib_warn(priv, "ib_query_gid() failed\n");
519 else 535 else
@@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
751 767
752 spin_unlock_irqrestore(&priv->lock, flags); 768 spin_unlock_irqrestore(&priv->lock, flags);
753 769
770 /* seperate between the wait to the leave*/
771 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
772 if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
773 wait_for_completion(&mcast->done);
774
754 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 775 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
755 ipoib_mcast_leave(dev, mcast); 776 ipoib_mcast_leave(dev, mcast);
756 ipoib_mcast_free(mcast); 777 ipoib_mcast_free(mcast);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index f81abe16cf09..c29b5c838833 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -142,10 +142,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
142 priv = netdev_priv(dev); 142 priv = netdev_priv(dev);
143 ppriv = netdev_priv(priv->parent); 143 ppriv = netdev_priv(priv->parent);
144 144
145 mutex_lock(&ppriv->vlan_mutex); 145 down_write(&ppriv->vlan_rwsem);
146 unregister_netdevice_queue(dev, head); 146 unregister_netdevice_queue(dev, head);
147 list_del(&priv->list); 147 list_del(&priv->list);
148 mutex_unlock(&ppriv->vlan_mutex); 148 up_write(&ppriv->vlan_rwsem);
149} 149}
150 150
151static size_t ipoib_get_size(const struct net_device *dev) 151static size_t ipoib_get_size(const struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb5..9fad7b5ac8b9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
140 if (!rtnl_trylock()) 140 if (!rtnl_trylock())
141 return restart_syscall(); 141 return restart_syscall();
142 142
143 mutex_lock(&ppriv->vlan_mutex); 143 down_write(&ppriv->vlan_rwsem);
144 144
145 /* 145 /*
146 * First ensure this isn't a duplicate. We check the parent device and 146 * First ensure this isn't a duplicate. We check the parent device and
@@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); 163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
164 164
165out: 165out:
166 mutex_unlock(&ppriv->vlan_mutex); 166 up_write(&ppriv->vlan_rwsem);
167 167
168 if (result) 168 if (result)
169 free_netdev(priv->dev); 169 free_netdev(priv->dev);
@@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
185 185
186 if (!rtnl_trylock()) 186 if (!rtnl_trylock())
187 return restart_syscall(); 187 return restart_syscall();
188 mutex_lock(&ppriv->vlan_mutex); 188
189 down_write(&ppriv->vlan_rwsem);
189 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 190 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
190 if (priv->pkey == pkey && 191 if (priv->pkey == pkey &&
191 priv->child_type == IPOIB_LEGACY_CHILD) { 192 priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
195 break; 196 break;
196 } 197 }
197 } 198 }
198 mutex_unlock(&ppriv->vlan_mutex); 199 up_write(&ppriv->vlan_rwsem);
200
199 rtnl_unlock(); 201 rtnl_unlock();
200 202
201 if (dev) { 203 if (dev) {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f93baf8254c4..a88631918e85 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -46,6 +46,7 @@
46#include <scsi/scsi.h> 46#include <scsi/scsi.h>
47#include <scsi/scsi_device.h> 47#include <scsi/scsi_device.h>
48#include <scsi/scsi_dbg.h> 48#include <scsi/scsi_dbg.h>
49#include <scsi/scsi_tcq.h>
49#include <scsi/srp.h> 50#include <scsi/srp.h>
50#include <scsi/scsi_transport_srp.h> 51#include <scsi/scsi_transport_srp.h>
51 52
@@ -86,6 +87,32 @@ module_param(topspin_workarounds, int, 0444);
86MODULE_PARM_DESC(topspin_workarounds, 87MODULE_PARM_DESC(topspin_workarounds,
87 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 88 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
88 89
90static struct kernel_param_ops srp_tmo_ops;
91
92static int srp_reconnect_delay = 10;
93module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
94 S_IRUGO | S_IWUSR);
95MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
96
97static int srp_fast_io_fail_tmo = 15;
98module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
99 S_IRUGO | S_IWUSR);
100MODULE_PARM_DESC(fast_io_fail_tmo,
101 "Number of seconds between the observation of a transport"
102 " layer error and failing all I/O. \"off\" means that this"
103 " functionality is disabled.");
104
105static int srp_dev_loss_tmo = 600;
106module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
107 S_IRUGO | S_IWUSR);
108MODULE_PARM_DESC(dev_loss_tmo,
109 "Maximum number of seconds that the SRP transport should"
110 " insulate transport layer errors. After this time has been"
111 " exceeded the SCSI host is removed. Should be"
112 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
113 " if fast_io_fail_tmo has not been set. \"off\" means that"
114 " this functionality is disabled.");
115
89static void srp_add_one(struct ib_device *device); 116static void srp_add_one(struct ib_device *device);
90static void srp_remove_one(struct ib_device *device); 117static void srp_remove_one(struct ib_device *device);
91static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); 118static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +129,48 @@ static struct ib_client srp_client = {
102 129
103static struct ib_sa_client srp_sa_client; 130static struct ib_sa_client srp_sa_client;
104 131
132static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
133{
134 int tmo = *(int *)kp->arg;
135
136 if (tmo >= 0)
137 return sprintf(buffer, "%d", tmo);
138 else
139 return sprintf(buffer, "off");
140}
141
142static int srp_tmo_set(const char *val, const struct kernel_param *kp)
143{
144 int tmo, res;
145
146 if (strncmp(val, "off", 3) != 0) {
147 res = kstrtoint(val, 0, &tmo);
148 if (res)
149 goto out;
150 } else {
151 tmo = -1;
152 }
153 if (kp->arg == &srp_reconnect_delay)
154 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
155 srp_dev_loss_tmo);
156 else if (kp->arg == &srp_fast_io_fail_tmo)
157 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
158 else
159 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
160 tmo);
161 if (res)
162 goto out;
163 *(int *)kp->arg = tmo;
164
165out:
166 return res;
167}
168
169static struct kernel_param_ops srp_tmo_ops = {
170 .get = srp_tmo_get,
171 .set = srp_tmo_set,
172};
173
105static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 174static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
106{ 175{
107 return (struct srp_target_port *) host->hostdata; 176 return (struct srp_target_port *) host->hostdata;
@@ -231,16 +300,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
231 return -ENOMEM; 300 return -ENOMEM;
232 301
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 302 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 303 srp_recv_completion, NULL, target,
235 target->comp_vector); 304 target->queue_size, target->comp_vector);
236 if (IS_ERR(recv_cq)) { 305 if (IS_ERR(recv_cq)) {
237 ret = PTR_ERR(recv_cq); 306 ret = PTR_ERR(recv_cq);
238 goto err; 307 goto err;
239 } 308 }
240 309
241 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 310 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
242 srp_send_completion, NULL, target, SRP_SQ_SIZE, 311 srp_send_completion, NULL, target,
243 target->comp_vector); 312 target->queue_size, target->comp_vector);
244 if (IS_ERR(send_cq)) { 313 if (IS_ERR(send_cq)) {
245 ret = PTR_ERR(send_cq); 314 ret = PTR_ERR(send_cq);
246 goto err_recv_cq; 315 goto err_recv_cq;
@@ -249,8 +318,8 @@ static int srp_create_target_ib(struct srp_target_port *target)
249 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 318 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
250 319
251 init_attr->event_handler = srp_qp_event; 320 init_attr->event_handler = srp_qp_event;
252 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 321 init_attr->cap.max_send_wr = target->queue_size;
253 init_attr->cap.max_recv_wr = SRP_RQ_SIZE; 322 init_attr->cap.max_recv_wr = target->queue_size;
254 init_attr->cap.max_recv_sge = 1; 323 init_attr->cap.max_recv_sge = 1;
255 init_attr->cap.max_send_sge = 1; 324 init_attr->cap.max_send_sge = 1;
256 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 325 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -296,6 +365,10 @@ err:
296 return ret; 365 return ret;
297} 366}
298 367
368/*
369 * Note: this function may be called without srp_alloc_iu_bufs() having been
370 * invoked. Hence the target->[rt]x_ring checks.
371 */
299static void srp_free_target_ib(struct srp_target_port *target) 372static void srp_free_target_ib(struct srp_target_port *target)
300{ 373{
301 int i; 374 int i;
@@ -307,10 +380,18 @@ static void srp_free_target_ib(struct srp_target_port *target)
307 target->qp = NULL; 380 target->qp = NULL;
308 target->send_cq = target->recv_cq = NULL; 381 target->send_cq = target->recv_cq = NULL;
309 382
310 for (i = 0; i < SRP_RQ_SIZE; ++i) 383 if (target->rx_ring) {
311 srp_free_iu(target->srp_host, target->rx_ring[i]); 384 for (i = 0; i < target->queue_size; ++i)
312 for (i = 0; i < SRP_SQ_SIZE; ++i) 385 srp_free_iu(target->srp_host, target->rx_ring[i]);
313 srp_free_iu(target->srp_host, target->tx_ring[i]); 386 kfree(target->rx_ring);
387 target->rx_ring = NULL;
388 }
389 if (target->tx_ring) {
390 for (i = 0; i < target->queue_size; ++i)
391 srp_free_iu(target->srp_host, target->tx_ring[i]);
392 kfree(target->tx_ring);
393 target->tx_ring = NULL;
394 }
314} 395}
315 396
316static void srp_path_rec_completion(int status, 397static void srp_path_rec_completion(int status,
@@ -390,7 +471,7 @@ static int srp_send_req(struct srp_target_port *target)
390 req->param.responder_resources = 4; 471 req->param.responder_resources = 4;
391 req->param.remote_cm_response_timeout = 20; 472 req->param.remote_cm_response_timeout = 20;
392 req->param.local_cm_response_timeout = 20; 473 req->param.local_cm_response_timeout = 20;
393 req->param.retry_count = 7; 474 req->param.retry_count = target->tl_retry_count;
394 req->param.rnr_retry_count = 7; 475 req->param.rnr_retry_count = 7;
395 req->param.max_cm_retries = 15; 476 req->param.max_cm_retries = 15;
396 477
@@ -496,7 +577,11 @@ static void srp_free_req_data(struct srp_target_port *target)
496 struct srp_request *req; 577 struct srp_request *req;
497 int i; 578 int i;
498 579
499 for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { 580 if (!target->req_ring)
581 return;
582
583 for (i = 0; i < target->req_ring_size; ++i) {
584 req = &target->req_ring[i];
500 kfree(req->fmr_list); 585 kfree(req->fmr_list);
501 kfree(req->map_page); 586 kfree(req->map_page);
502 if (req->indirect_dma_addr) { 587 if (req->indirect_dma_addr) {
@@ -506,6 +591,50 @@ static void srp_free_req_data(struct srp_target_port *target)
506 } 591 }
507 kfree(req->indirect_desc); 592 kfree(req->indirect_desc);
508 } 593 }
594
595 kfree(target->req_ring);
596 target->req_ring = NULL;
597}
598
599static int srp_alloc_req_data(struct srp_target_port *target)
600{
601 struct srp_device *srp_dev = target->srp_host->srp_dev;
602 struct ib_device *ibdev = srp_dev->dev;
603 struct srp_request *req;
604 dma_addr_t dma_addr;
605 int i, ret = -ENOMEM;
606
607 INIT_LIST_HEAD(&target->free_reqs);
608
609 target->req_ring = kzalloc(target->req_ring_size *
610 sizeof(*target->req_ring), GFP_KERNEL);
611 if (!target->req_ring)
612 goto out;
613
614 for (i = 0; i < target->req_ring_size; ++i) {
615 req = &target->req_ring[i];
616 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
617 GFP_KERNEL);
618 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
619 GFP_KERNEL);
620 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
621 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
622 goto out;
623
624 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
625 target->indirect_size,
626 DMA_TO_DEVICE);
627 if (ib_dma_mapping_error(ibdev, dma_addr))
628 goto out;
629
630 req->indirect_dma_addr = dma_addr;
631 req->index = i;
632 list_add_tail(&req->list, &target->free_reqs);
633 }
634 ret = 0;
635
636out:
637 return ret;
509} 638}
510 639
511/** 640/**
@@ -528,12 +657,20 @@ static void srp_remove_target(struct srp_target_port *target)
528 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 657 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
529 658
530 srp_del_scsi_host_attr(target->scsi_host); 659 srp_del_scsi_host_attr(target->scsi_host);
660 srp_rport_get(target->rport);
531 srp_remove_host(target->scsi_host); 661 srp_remove_host(target->scsi_host);
532 scsi_remove_host(target->scsi_host); 662 scsi_remove_host(target->scsi_host);
533 srp_disconnect_target(target); 663 srp_disconnect_target(target);
534 ib_destroy_cm_id(target->cm_id); 664 ib_destroy_cm_id(target->cm_id);
535 srp_free_target_ib(target); 665 srp_free_target_ib(target);
666 cancel_work_sync(&target->tl_err_work);
667 srp_rport_put(target->rport);
536 srp_free_req_data(target); 668 srp_free_req_data(target);
669
670 spin_lock(&target->srp_host->target_lock);
671 list_del(&target->list);
672 spin_unlock(&target->srp_host->target_lock);
673
537 scsi_host_put(target->scsi_host); 674 scsi_host_put(target->scsi_host);
538} 675}
539 676
@@ -545,10 +682,6 @@ static void srp_remove_work(struct work_struct *work)
545 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 682 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
546 683
547 srp_remove_target(target); 684 srp_remove_target(target);
548
549 spin_lock(&target->srp_host->target_lock);
550 list_del(&target->list);
551 spin_unlock(&target->srp_host->target_lock);
552} 685}
553 686
554static void srp_rport_delete(struct srp_rport *rport) 687static void srp_rport_delete(struct srp_rport *rport)
@@ -686,23 +819,42 @@ static void srp_free_req(struct srp_target_port *target,
686 spin_unlock_irqrestore(&target->lock, flags); 819 spin_unlock_irqrestore(&target->lock, flags);
687} 820}
688 821
689static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) 822static void srp_finish_req(struct srp_target_port *target,
823 struct srp_request *req, int result)
690{ 824{
691 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 825 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
692 826
693 if (scmnd) { 827 if (scmnd) {
694 srp_free_req(target, req, scmnd, 0); 828 srp_free_req(target, req, scmnd, 0);
695 scmnd->result = DID_RESET << 16; 829 scmnd->result = result;
696 scmnd->scsi_done(scmnd); 830 scmnd->scsi_done(scmnd);
697 } 831 }
698} 832}
699 833
700static int srp_reconnect_target(struct srp_target_port *target) 834static void srp_terminate_io(struct srp_rport *rport)
701{ 835{
702 struct Scsi_Host *shost = target->scsi_host; 836 struct srp_target_port *target = rport->lld_data;
703 int i, ret; 837 int i;
704 838
705 scsi_target_block(&shost->shost_gendev); 839 for (i = 0; i < target->req_ring_size; ++i) {
840 struct srp_request *req = &target->req_ring[i];
841 srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
842 }
843}
844
845/*
846 * It is up to the caller to ensure that srp_rport_reconnect() calls are
847 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
848 * srp_reset_device() or srp_reset_host() calls will occur while this function
849 * is in progress. One way to realize that is not to call this function
850 * directly but to call srp_reconnect_rport() instead since that last function
851 * serializes calls of this function via rport->mutex and also blocks
852 * srp_queuecommand() calls before invoking this function.
853 */
854static int srp_rport_reconnect(struct srp_rport *rport)
855{
856 struct srp_target_port *target = rport->lld_data;
857 int i, ret;
706 858
707 srp_disconnect_target(target); 859 srp_disconnect_target(target);
708 /* 860 /*
@@ -721,41 +873,21 @@ static int srp_reconnect_target(struct srp_target_port *target)
721 else 873 else
722 srp_create_target_ib(target); 874 srp_create_target_ib(target);
723 875
724 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 876 for (i = 0; i < target->req_ring_size; ++i) {
725 struct srp_request *req = &target->req_ring[i]; 877 struct srp_request *req = &target->req_ring[i];
726 if (req->scmnd) 878 srp_finish_req(target, req, DID_RESET << 16);
727 srp_reset_req(target, req);
728 } 879 }
729 880
730 INIT_LIST_HEAD(&target->free_tx); 881 INIT_LIST_HEAD(&target->free_tx);
731 for (i = 0; i < SRP_SQ_SIZE; ++i) 882 for (i = 0; i < target->queue_size; ++i)
732 list_add(&target->tx_ring[i]->list, &target->free_tx); 883 list_add(&target->tx_ring[i]->list, &target->free_tx);
733 884
734 if (ret == 0) 885 if (ret == 0)
735 ret = srp_connect_target(target); 886 ret = srp_connect_target(target);
736 887
737 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : 888 if (ret == 0)
738 SDEV_TRANSPORT_OFFLINE); 889 shost_printk(KERN_INFO, target->scsi_host,
739 target->transport_offline = !!ret; 890 PFX "reconnect succeeded\n");
740
741 if (ret)
742 goto err;
743
744 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
745
746 return ret;
747
748err:
749 shost_printk(KERN_ERR, target->scsi_host,
750 PFX "reconnect failed (%d), removing target port.\n", ret);
751
752 /*
753 * We couldn't reconnect, so kill our target port off.
754 * However, we have to defer the real removal because we
755 * are in the context of the SCSI error handler now, which
756 * will deadlock if we call scsi_remove_host().
757 */
758 srp_queue_remove_work(target);
759 891
760 return ret; 892 return ret;
761} 893}
@@ -1302,15 +1434,30 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1302 PFX "Recv failed with error code %d\n", res); 1434 PFX "Recv failed with error code %d\n", res);
1303} 1435}
1304 1436
1305static void srp_handle_qp_err(enum ib_wc_status wc_status, 1437/**
1306 enum ib_wc_opcode wc_opcode, 1438 * srp_tl_err_work() - handle a transport layer error
1439 *
1440 * Note: This function may get invoked before the rport has been created,
1441 * hence the target->rport test.
1442 */
1443static void srp_tl_err_work(struct work_struct *work)
1444{
1445 struct srp_target_port *target;
1446
1447 target = container_of(work, struct srp_target_port, tl_err_work);
1448 if (target->rport)
1449 srp_start_tl_fail_timers(target->rport);
1450}
1451
1452static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
1307 struct srp_target_port *target) 1453 struct srp_target_port *target)
1308{ 1454{
1309 if (target->connected && !target->qp_in_error) { 1455 if (target->connected && !target->qp_in_error) {
1310 shost_printk(KERN_ERR, target->scsi_host, 1456 shost_printk(KERN_ERR, target->scsi_host,
1311 PFX "failed %s status %d\n", 1457 PFX "failed %s status %d\n",
1312 wc_opcode & IB_WC_RECV ? "receive" : "send", 1458 send_err ? "send" : "receive",
1313 wc_status); 1459 wc_status);
1460 queue_work(system_long_wq, &target->tl_err_work);
1314 } 1461 }
1315 target->qp_in_error = true; 1462 target->qp_in_error = true;
1316} 1463}
@@ -1325,7 +1472,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1325 if (likely(wc.status == IB_WC_SUCCESS)) { 1472 if (likely(wc.status == IB_WC_SUCCESS)) {
1326 srp_handle_recv(target, &wc); 1473 srp_handle_recv(target, &wc);
1327 } else { 1474 } else {
1328 srp_handle_qp_err(wc.status, wc.opcode, target); 1475 srp_handle_qp_err(wc.status, false, target);
1329 } 1476 }
1330 } 1477 }
1331} 1478}
@@ -1341,7 +1488,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1341 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1488 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1342 list_add(&iu->list, &target->free_tx); 1489 list_add(&iu->list, &target->free_tx);
1343 } else { 1490 } else {
1344 srp_handle_qp_err(wc.status, wc.opcode, target); 1491 srp_handle_qp_err(wc.status, true, target);
1345 } 1492 }
1346 } 1493 }
1347} 1494}
@@ -1349,17 +1496,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1349static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 1496static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1350{ 1497{
1351 struct srp_target_port *target = host_to_target(shost); 1498 struct srp_target_port *target = host_to_target(shost);
1499 struct srp_rport *rport = target->rport;
1352 struct srp_request *req; 1500 struct srp_request *req;
1353 struct srp_iu *iu; 1501 struct srp_iu *iu;
1354 struct srp_cmd *cmd; 1502 struct srp_cmd *cmd;
1355 struct ib_device *dev; 1503 struct ib_device *dev;
1356 unsigned long flags; 1504 unsigned long flags;
1357 int len; 1505 int len, result;
1506 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1507
1508 /*
1509 * The SCSI EH thread is the only context from which srp_queuecommand()
1510 * can get invoked for blocked devices (SDEV_BLOCK /
1511 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1512 * locking the rport mutex if invoked from inside the SCSI EH.
1513 */
1514 if (in_scsi_eh)
1515 mutex_lock(&rport->mutex);
1358 1516
1359 if (unlikely(target->transport_offline)) { 1517 result = srp_chkready(target->rport);
1360 scmnd->result = DID_NO_CONNECT << 16; 1518 if (unlikely(result)) {
1519 scmnd->result = result;
1361 scmnd->scsi_done(scmnd); 1520 scmnd->scsi_done(scmnd);
1362 return 0; 1521 goto unlock_rport;
1363 } 1522 }
1364 1523
1365 spin_lock_irqsave(&target->lock, flags); 1524 spin_lock_irqsave(&target->lock, flags);
@@ -1404,6 +1563,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1404 goto err_unmap; 1563 goto err_unmap;
1405 } 1564 }
1406 1565
1566unlock_rport:
1567 if (in_scsi_eh)
1568 mutex_unlock(&rport->mutex);
1569
1407 return 0; 1570 return 0;
1408 1571
1409err_unmap: 1572err_unmap:
@@ -1418,14 +1581,30 @@ err_iu:
1418err_unlock: 1581err_unlock:
1419 spin_unlock_irqrestore(&target->lock, flags); 1582 spin_unlock_irqrestore(&target->lock, flags);
1420 1583
1584 if (in_scsi_eh)
1585 mutex_unlock(&rport->mutex);
1586
1421 return SCSI_MLQUEUE_HOST_BUSY; 1587 return SCSI_MLQUEUE_HOST_BUSY;
1422} 1588}
1423 1589
1590/*
1591 * Note: the resources allocated in this function are freed in
1592 * srp_free_target_ib().
1593 */
1424static int srp_alloc_iu_bufs(struct srp_target_port *target) 1594static int srp_alloc_iu_bufs(struct srp_target_port *target)
1425{ 1595{
1426 int i; 1596 int i;
1427 1597
1428 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1598 target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
1599 GFP_KERNEL);
1600 if (!target->rx_ring)
1601 goto err_no_ring;
1602 target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
1603 GFP_KERNEL);
1604 if (!target->tx_ring)
1605 goto err_no_ring;
1606
1607 for (i = 0; i < target->queue_size; ++i) {
1429 target->rx_ring[i] = srp_alloc_iu(target->srp_host, 1608 target->rx_ring[i] = srp_alloc_iu(target->srp_host,
1430 target->max_ti_iu_len, 1609 target->max_ti_iu_len,
1431 GFP_KERNEL, DMA_FROM_DEVICE); 1610 GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1433,7 +1612,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1433 goto err; 1612 goto err;
1434 } 1613 }
1435 1614
1436 for (i = 0; i < SRP_SQ_SIZE; ++i) { 1615 for (i = 0; i < target->queue_size; ++i) {
1437 target->tx_ring[i] = srp_alloc_iu(target->srp_host, 1616 target->tx_ring[i] = srp_alloc_iu(target->srp_host,
1438 target->max_iu_len, 1617 target->max_iu_len,
1439 GFP_KERNEL, DMA_TO_DEVICE); 1618 GFP_KERNEL, DMA_TO_DEVICE);
@@ -1446,16 +1625,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1446 return 0; 1625 return 0;
1447 1626
1448err: 1627err:
1449 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1628 for (i = 0; i < target->queue_size; ++i) {
1450 srp_free_iu(target->srp_host, target->rx_ring[i]); 1629 srp_free_iu(target->srp_host, target->rx_ring[i]);
1451 target->rx_ring[i] = NULL;
1452 }
1453
1454 for (i = 0; i < SRP_SQ_SIZE; ++i) {
1455 srp_free_iu(target->srp_host, target->tx_ring[i]); 1630 srp_free_iu(target->srp_host, target->tx_ring[i]);
1456 target->tx_ring[i] = NULL;
1457 } 1631 }
1458 1632
1633
1634err_no_ring:
1635 kfree(target->tx_ring);
1636 target->tx_ring = NULL;
1637 kfree(target->rx_ring);
1638 target->rx_ring = NULL;
1639
1459 return -ENOMEM; 1640 return -ENOMEM;
1460} 1641}
1461 1642
@@ -1506,6 +1687,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1506 target->scsi_host->can_queue 1687 target->scsi_host->can_queue
1507 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, 1688 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1508 target->scsi_host->can_queue); 1689 target->scsi_host->can_queue);
1690 target->scsi_host->cmd_per_lun
1691 = min_t(int, target->scsi_host->can_queue,
1692 target->scsi_host->cmd_per_lun);
1509 } else { 1693 } else {
1510 shost_printk(KERN_WARNING, target->scsi_host, 1694 shost_printk(KERN_WARNING, target->scsi_host,
1511 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 1695 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
@@ -1513,7 +1697,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1513 goto error; 1697 goto error;
1514 } 1698 }
1515 1699
1516 if (!target->rx_ring[0]) { 1700 if (!target->rx_ring) {
1517 ret = srp_alloc_iu_bufs(target); 1701 ret = srp_alloc_iu_bufs(target);
1518 if (ret) 1702 if (ret)
1519 goto error; 1703 goto error;
@@ -1533,7 +1717,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1533 if (ret) 1717 if (ret)
1534 goto error_free; 1718 goto error_free;
1535 1719
1536 for (i = 0; i < SRP_RQ_SIZE; i++) { 1720 for (i = 0; i < target->queue_size; i++) {
1537 struct srp_iu *iu = target->rx_ring[i]; 1721 struct srp_iu *iu = target->rx_ring[i];
1538 ret = srp_post_recv(target, iu); 1722 ret = srp_post_recv(target, iu);
1539 if (ret) 1723 if (ret)
@@ -1672,6 +1856,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1672 if (ib_send_cm_drep(cm_id, NULL, 0)) 1856 if (ib_send_cm_drep(cm_id, NULL, 0))
1673 shost_printk(KERN_ERR, target->scsi_host, 1857 shost_printk(KERN_ERR, target->scsi_host,
1674 PFX "Sending CM DREP failed\n"); 1858 PFX "Sending CM DREP failed\n");
1859 queue_work(system_long_wq, &target->tl_err_work);
1675 break; 1860 break;
1676 1861
1677 case IB_CM_TIMEWAIT_EXIT: 1862 case IB_CM_TIMEWAIT_EXIT:
@@ -1698,9 +1883,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1698 return 0; 1883 return 0;
1699} 1884}
1700 1885
1886/**
1887 * srp_change_queue_type - changing device queue tag type
1888 * @sdev: scsi device struct
1889 * @tag_type: requested tag type
1890 *
1891 * Returns queue tag type.
1892 */
1893static int
1894srp_change_queue_type(struct scsi_device *sdev, int tag_type)
1895{
1896 if (sdev->tagged_supported) {
1897 scsi_set_tag_type(sdev, tag_type);
1898 if (tag_type)
1899 scsi_activate_tcq(sdev, sdev->queue_depth);
1900 else
1901 scsi_deactivate_tcq(sdev, sdev->queue_depth);
1902 } else
1903 tag_type = 0;
1904
1905 return tag_type;
1906}
1907
1908/**
1909 * srp_change_queue_depth - setting device queue depth
1910 * @sdev: scsi device struct
1911 * @qdepth: requested queue depth
1912 * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
1913 * (see include/scsi/scsi_host.h for definition)
1914 *
1915 * Returns queue depth.
1916 */
1917static int
1918srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
1919{
1920 struct Scsi_Host *shost = sdev->host;
1921 int max_depth;
1922 if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
1923 max_depth = shost->can_queue;
1924 if (!sdev->tagged_supported)
1925 max_depth = 1;
1926 if (qdepth > max_depth)
1927 qdepth = max_depth;
1928 scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
1929 } else if (reason == SCSI_QDEPTH_QFULL)
1930 scsi_track_queue_full(sdev, qdepth);
1931 else
1932 return -EOPNOTSUPP;
1933
1934 return sdev->queue_depth;
1935}
1936
1701static int srp_send_tsk_mgmt(struct srp_target_port *target, 1937static int srp_send_tsk_mgmt(struct srp_target_port *target,
1702 u64 req_tag, unsigned int lun, u8 func) 1938 u64 req_tag, unsigned int lun, u8 func)
1703{ 1939{
1940 struct srp_rport *rport = target->rport;
1704 struct ib_device *dev = target->srp_host->srp_dev->dev; 1941 struct ib_device *dev = target->srp_host->srp_dev->dev;
1705 struct srp_iu *iu; 1942 struct srp_iu *iu;
1706 struct srp_tsk_mgmt *tsk_mgmt; 1943 struct srp_tsk_mgmt *tsk_mgmt;
@@ -1710,12 +1947,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1710 1947
1711 init_completion(&target->tsk_mgmt_done); 1948 init_completion(&target->tsk_mgmt_done);
1712 1949
1950 /*
1951 * Lock the rport mutex to avoid that srp_create_target_ib() is
1952 * invoked while a task management function is being sent.
1953 */
1954 mutex_lock(&rport->mutex);
1713 spin_lock_irq(&target->lock); 1955 spin_lock_irq(&target->lock);
1714 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); 1956 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
1715 spin_unlock_irq(&target->lock); 1957 spin_unlock_irq(&target->lock);
1716 1958
1717 if (!iu) 1959 if (!iu) {
1960 mutex_unlock(&rport->mutex);
1961
1718 return -1; 1962 return -1;
1963 }
1719 1964
1720 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 1965 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
1721 DMA_TO_DEVICE); 1966 DMA_TO_DEVICE);
@@ -1732,8 +1977,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1732 DMA_TO_DEVICE); 1977 DMA_TO_DEVICE);
1733 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { 1978 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
1734 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); 1979 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
1980 mutex_unlock(&rport->mutex);
1981
1735 return -1; 1982 return -1;
1736 } 1983 }
1984 mutex_unlock(&rport->mutex);
1737 1985
1738 if (!wait_for_completion_timeout(&target->tsk_mgmt_done, 1986 if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
1739 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 1987 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
@@ -1751,11 +1999,11 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1751 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1999 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1752 2000
1753 if (!req || !srp_claim_req(target, req, scmnd)) 2001 if (!req || !srp_claim_req(target, req, scmnd))
1754 return FAILED; 2002 return SUCCESS;
1755 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 2003 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1756 SRP_TSK_ABORT_TASK) == 0) 2004 SRP_TSK_ABORT_TASK) == 0)
1757 ret = SUCCESS; 2005 ret = SUCCESS;
1758 else if (target->transport_offline) 2006 else if (target->rport->state == SRP_RPORT_LOST)
1759 ret = FAST_IO_FAIL; 2007 ret = FAST_IO_FAIL;
1760 else 2008 else
1761 ret = FAILED; 2009 ret = FAILED;
@@ -1779,10 +2027,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1779 if (target->tsk_mgmt_status) 2027 if (target->tsk_mgmt_status)
1780 return FAILED; 2028 return FAILED;
1781 2029
1782 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2030 for (i = 0; i < target->req_ring_size; ++i) {
1783 struct srp_request *req = &target->req_ring[i]; 2031 struct srp_request *req = &target->req_ring[i];
1784 if (req->scmnd && req->scmnd->device == scmnd->device) 2032 if (req->scmnd && req->scmnd->device == scmnd->device)
1785 srp_reset_req(target, req); 2033 srp_finish_req(target, req, DID_RESET << 16);
1786 } 2034 }
1787 2035
1788 return SUCCESS; 2036 return SUCCESS;
@@ -1791,14 +2039,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1791static int srp_reset_host(struct scsi_cmnd *scmnd) 2039static int srp_reset_host(struct scsi_cmnd *scmnd)
1792{ 2040{
1793 struct srp_target_port *target = host_to_target(scmnd->device->host); 2041 struct srp_target_port *target = host_to_target(scmnd->device->host);
1794 int ret = FAILED;
1795 2042
1796 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2043 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
1797 2044
1798 if (!srp_reconnect_target(target)) 2045 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
1799 ret = SUCCESS;
1800
1801 return ret;
1802} 2046}
1803 2047
1804static int srp_slave_configure(struct scsi_device *sdev) 2048static int srp_slave_configure(struct scsi_device *sdev)
@@ -1851,6 +2095,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
1851 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); 2095 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1852} 2096}
1853 2097
2098static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2099 char *buf)
2100{
2101 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2102
2103 return sprintf(buf, "%pI6\n", target->path.sgid.raw);
2104}
2105
1854static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2106static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
1855 char *buf) 2107 char *buf)
1856{ 2108{
@@ -1907,6 +2159,14 @@ static ssize_t show_comp_vector(struct device *dev,
1907 return sprintf(buf, "%d\n", target->comp_vector); 2159 return sprintf(buf, "%d\n", target->comp_vector);
1908} 2160}
1909 2161
2162static ssize_t show_tl_retry_count(struct device *dev,
2163 struct device_attribute *attr, char *buf)
2164{
2165 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2166
2167 return sprintf(buf, "%d\n", target->tl_retry_count);
2168}
2169
1910static ssize_t show_cmd_sg_entries(struct device *dev, 2170static ssize_t show_cmd_sg_entries(struct device *dev,
1911 struct device_attribute *attr, char *buf) 2171 struct device_attribute *attr, char *buf)
1912{ 2172{
@@ -1927,6 +2187,7 @@ static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1927static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2187static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1928static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2188static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1929static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2189static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2190static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
1930static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2191static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1931static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2192static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
1932static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2193static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
@@ -1934,6 +2195,7 @@ static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1934static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2195static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1935static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2196static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1936static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2197static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2198static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
1937static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2199static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
1938static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2200static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
1939 2201
@@ -1942,6 +2204,7 @@ static struct device_attribute *srp_host_attrs[] = {
1942 &dev_attr_ioc_guid, 2204 &dev_attr_ioc_guid,
1943 &dev_attr_service_id, 2205 &dev_attr_service_id,
1944 &dev_attr_pkey, 2206 &dev_attr_pkey,
2207 &dev_attr_sgid,
1945 &dev_attr_dgid, 2208 &dev_attr_dgid,
1946 &dev_attr_orig_dgid, 2209 &dev_attr_orig_dgid,
1947 &dev_attr_req_lim, 2210 &dev_attr_req_lim,
@@ -1949,6 +2212,7 @@ static struct device_attribute *srp_host_attrs[] = {
1949 &dev_attr_local_ib_port, 2212 &dev_attr_local_ib_port,
1950 &dev_attr_local_ib_device, 2213 &dev_attr_local_ib_device,
1951 &dev_attr_comp_vector, 2214 &dev_attr_comp_vector,
2215 &dev_attr_tl_retry_count,
1952 &dev_attr_cmd_sg_entries, 2216 &dev_attr_cmd_sg_entries,
1953 &dev_attr_allow_ext_sg, 2217 &dev_attr_allow_ext_sg,
1954 NULL 2218 NULL
@@ -1961,14 +2225,16 @@ static struct scsi_host_template srp_template = {
1961 .slave_configure = srp_slave_configure, 2225 .slave_configure = srp_slave_configure,
1962 .info = srp_target_info, 2226 .info = srp_target_info,
1963 .queuecommand = srp_queuecommand, 2227 .queuecommand = srp_queuecommand,
2228 .change_queue_depth = srp_change_queue_depth,
2229 .change_queue_type = srp_change_queue_type,
1964 .eh_abort_handler = srp_abort, 2230 .eh_abort_handler = srp_abort,
1965 .eh_device_reset_handler = srp_reset_device, 2231 .eh_device_reset_handler = srp_reset_device,
1966 .eh_host_reset_handler = srp_reset_host, 2232 .eh_host_reset_handler = srp_reset_host,
1967 .skip_settle_delay = true, 2233 .skip_settle_delay = true,
1968 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2234 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
1969 .can_queue = SRP_CMD_SQ_SIZE, 2235 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
1970 .this_id = -1, 2236 .this_id = -1,
1971 .cmd_per_lun = SRP_CMD_SQ_SIZE, 2237 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
1972 .use_clustering = ENABLE_CLUSTERING, 2238 .use_clustering = ENABLE_CLUSTERING,
1973 .shost_attrs = srp_host_attrs 2239 .shost_attrs = srp_host_attrs
1974}; 2240};
@@ -1994,6 +2260,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1994 } 2260 }
1995 2261
1996 rport->lld_data = target; 2262 rport->lld_data = target;
2263 target->rport = rport;
1997 2264
1998 spin_lock(&host->target_lock); 2265 spin_lock(&host->target_lock);
1999 list_add_tail(&target->list, &host->target_list); 2266 list_add_tail(&target->list, &host->target_list);
@@ -2073,6 +2340,8 @@ enum {
2073 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 2340 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2074 SRP_OPT_SG_TABLESIZE = 1 << 11, 2341 SRP_OPT_SG_TABLESIZE = 1 << 11,
2075 SRP_OPT_COMP_VECTOR = 1 << 12, 2342 SRP_OPT_COMP_VECTOR = 1 << 12,
2343 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2344 SRP_OPT_QUEUE_SIZE = 1 << 14,
2076 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 2345 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2077 SRP_OPT_IOC_GUID | 2346 SRP_OPT_IOC_GUID |
2078 SRP_OPT_DGID | 2347 SRP_OPT_DGID |
@@ -2094,6 +2363,8 @@ static const match_table_t srp_opt_tokens = {
2094 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 2363 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2095 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 2364 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2096 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 2365 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2366 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2367 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2097 { SRP_OPT_ERR, NULL } 2368 { SRP_OPT_ERR, NULL }
2098}; 2369};
2099 2370
@@ -2188,13 +2459,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2188 target->scsi_host->max_sectors = token; 2459 target->scsi_host->max_sectors = token;
2189 break; 2460 break;
2190 2461
2462 case SRP_OPT_QUEUE_SIZE:
2463 if (match_int(args, &token) || token < 1) {
2464 pr_warn("bad queue_size parameter '%s'\n", p);
2465 goto out;
2466 }
2467 target->scsi_host->can_queue = token;
2468 target->queue_size = token + SRP_RSP_SQ_SIZE +
2469 SRP_TSK_MGMT_SQ_SIZE;
2470 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2471 target->scsi_host->cmd_per_lun = token;
2472 break;
2473
2191 case SRP_OPT_MAX_CMD_PER_LUN: 2474 case SRP_OPT_MAX_CMD_PER_LUN:
2192 if (match_int(args, &token)) { 2475 if (match_int(args, &token) || token < 1) {
2193 pr_warn("bad max cmd_per_lun parameter '%s'\n", 2476 pr_warn("bad max cmd_per_lun parameter '%s'\n",
2194 p); 2477 p);
2195 goto out; 2478 goto out;
2196 } 2479 }
2197 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); 2480 target->scsi_host->cmd_per_lun = token;
2198 break; 2481 break;
2199 2482
2200 case SRP_OPT_IO_CLASS: 2483 case SRP_OPT_IO_CLASS:
@@ -2257,6 +2540,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2257 target->comp_vector = token; 2540 target->comp_vector = token;
2258 break; 2541 break;
2259 2542
2543 case SRP_OPT_TL_RETRY_COUNT:
2544 if (match_int(args, &token) || token < 2 || token > 7) {
2545 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
2546 p);
2547 goto out;
2548 }
2549 target->tl_retry_count = token;
2550 break;
2551
2260 default: 2552 default:
2261 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2553 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2262 p); 2554 p);
@@ -2273,6 +2565,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2273 pr_warn("target creation request is missing parameter '%s'\n", 2565 pr_warn("target creation request is missing parameter '%s'\n",
2274 srp_opt_tokens[i].pattern); 2566 srp_opt_tokens[i].pattern);
2275 2567
2568 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
2569 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2570 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
2571 target->scsi_host->cmd_per_lun,
2572 target->scsi_host->can_queue);
2573
2276out: 2574out:
2277 kfree(options); 2575 kfree(options);
2278 return ret; 2576 return ret;
@@ -2287,8 +2585,7 @@ static ssize_t srp_create_target(struct device *dev,
2287 struct Scsi_Host *target_host; 2585 struct Scsi_Host *target_host;
2288 struct srp_target_port *target; 2586 struct srp_target_port *target;
2289 struct ib_device *ibdev = host->srp_dev->dev; 2587 struct ib_device *ibdev = host->srp_dev->dev;
2290 dma_addr_t dma_addr; 2588 int ret;
2291 int i, ret;
2292 2589
2293 target_host = scsi_host_alloc(&srp_template, 2590 target_host = scsi_host_alloc(&srp_template,
2294 sizeof (struct srp_target_port)); 2591 sizeof (struct srp_target_port));
@@ -2311,11 +2608,15 @@ static ssize_t srp_create_target(struct device *dev,
2311 target->cmd_sg_cnt = cmd_sg_entries; 2608 target->cmd_sg_cnt = cmd_sg_entries;
2312 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 2609 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
2313 target->allow_ext_sg = allow_ext_sg; 2610 target->allow_ext_sg = allow_ext_sg;
2611 target->tl_retry_count = 7;
2612 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
2314 2613
2315 ret = srp_parse_options(buf, target); 2614 ret = srp_parse_options(buf, target);
2316 if (ret) 2615 if (ret)
2317 goto err; 2616 goto err;
2318 2617
2618 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
2619
2319 if (!srp_conn_unique(target->srp_host, target)) { 2620 if (!srp_conn_unique(target->srp_host, target)) {
2320 shost_printk(KERN_INFO, target->scsi_host, 2621 shost_printk(KERN_INFO, target->scsi_host,
2321 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 2622 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -2339,31 +2640,13 @@ static ssize_t srp_create_target(struct device *dev,
2339 sizeof (struct srp_indirect_buf) + 2640 sizeof (struct srp_indirect_buf) +
2340 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2641 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2341 2642
2643 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
2342 INIT_WORK(&target->remove_work, srp_remove_work); 2644 INIT_WORK(&target->remove_work, srp_remove_work);
2343 spin_lock_init(&target->lock); 2645 spin_lock_init(&target->lock);
2344 INIT_LIST_HEAD(&target->free_tx); 2646 INIT_LIST_HEAD(&target->free_tx);
2345 INIT_LIST_HEAD(&target->free_reqs); 2647 ret = srp_alloc_req_data(target);
2346 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2648 if (ret)
2347 struct srp_request *req = &target->req_ring[i]; 2649 goto err_free_mem;
2348
2349 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
2350 GFP_KERNEL);
2351 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
2352 GFP_KERNEL);
2353 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
2354 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
2355 goto err_free_mem;
2356
2357 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
2358 target->indirect_size,
2359 DMA_TO_DEVICE);
2360 if (ib_dma_mapping_error(ibdev, dma_addr))
2361 goto err_free_mem;
2362
2363 req->indirect_dma_addr = dma_addr;
2364 req->index = i;
2365 list_add_tail(&req->list, &target->free_reqs);
2366 }
2367 2650
2368 ib_query_gid(ibdev, host->port, 0, &target->path.sgid); 2651 ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
2369 2652
@@ -2612,7 +2895,14 @@ static void srp_remove_one(struct ib_device *device)
2612} 2895}
2613 2896
2614static struct srp_function_template ib_srp_transport_functions = { 2897static struct srp_function_template ib_srp_transport_functions = {
2898 .has_rport_state = true,
2899 .reset_timer_if_blocked = true,
2900 .reconnect_delay = &srp_reconnect_delay,
2901 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
2902 .dev_loss_tmo = &srp_dev_loss_tmo,
2903 .reconnect = srp_rport_reconnect,
2615 .rport_delete = srp_rport_delete, 2904 .rport_delete = srp_rport_delete,
2905 .terminate_rport_io = srp_terminate_io,
2616}; 2906};
2617 2907
2618static int __init srp_init_module(void) 2908static int __init srp_init_module(void)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e641088c14dc..575681063f38 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,14 +57,11 @@ enum {
57 SRP_MAX_LUN = 512, 57 SRP_MAX_LUN = 512,
58 SRP_DEF_SG_TABLESIZE = 12, 58 SRP_DEF_SG_TABLESIZE = 12,
59 59
60 SRP_RQ_SHIFT = 6, 60 SRP_DEFAULT_QUEUE_SIZE = 1 << 6,
61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
62
63 SRP_SQ_SIZE = SRP_RQ_SIZE,
64 SRP_RSP_SQ_SIZE = 1, 61 SRP_RSP_SQ_SIZE = 1,
65 SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
66 SRP_TSK_MGMT_SQ_SIZE = 1, 62 SRP_TSK_MGMT_SQ_SIZE = 1,
67 SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, 63 SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
64 SRP_TSK_MGMT_SQ_SIZE,
68 65
69 SRP_TAG_NO_REQ = ~0U, 66 SRP_TAG_NO_REQ = ~0U,
70 SRP_TAG_TSK_MGMT = 1U << 31, 67 SRP_TAG_TSK_MGMT = 1U << 31,
@@ -140,7 +137,6 @@ struct srp_target_port {
140 unsigned int cmd_sg_cnt; 137 unsigned int cmd_sg_cnt;
141 unsigned int indirect_size; 138 unsigned int indirect_size;
142 bool allow_ext_sg; 139 bool allow_ext_sg;
143 bool transport_offline;
144 140
145 /* Everything above this point is used in the hot path of 141 /* Everything above this point is used in the hot path of
146 * command processing. Try to keep them packed into cachelines. 142 * command processing. Try to keep them packed into cachelines.
@@ -153,10 +149,14 @@ struct srp_target_port {
153 u16 io_class; 149 u16 io_class;
154 struct srp_host *srp_host; 150 struct srp_host *srp_host;
155 struct Scsi_Host *scsi_host; 151 struct Scsi_Host *scsi_host;
152 struct srp_rport *rport;
156 char target_name[32]; 153 char target_name[32];
157 unsigned int scsi_id; 154 unsigned int scsi_id;
158 unsigned int sg_tablesize; 155 unsigned int sg_tablesize;
156 int queue_size;
157 int req_ring_size;
159 int comp_vector; 158 int comp_vector;
159 int tl_retry_count;
160 160
161 struct ib_sa_path_rec path; 161 struct ib_sa_path_rec path;
162 __be16 orig_dgid[8]; 162 __be16 orig_dgid[8];
@@ -172,10 +172,11 @@ struct srp_target_port {
172 172
173 int zero_req_lim; 173 int zero_req_lim;
174 174
175 struct srp_iu *tx_ring[SRP_SQ_SIZE]; 175 struct srp_iu **tx_ring;
176 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 176 struct srp_iu **rx_ring;
177 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 177 struct srp_request *req_ring;
178 178
179 struct work_struct tl_err_work;
179 struct work_struct remove_work; 180 struct work_struct remove_work;
180 181
181 struct list_head list; 182 struct list_head list;