summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/configfs-rdma_cm22
-rw-r--r--Documentation/ABI/testing/sysfs-class-infiniband16
-rw-r--r--Documentation/infiniband/core_locking.txt2
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt2
-rw-r--r--MAINTAINERS32
-rw-r--r--block/Makefile2
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c194
-rw-r--r--drivers/infiniband/core/cache.c345
-rw-r--r--drivers/infiniband/core/cm.c49
-rw-r--r--drivers/infiniband/core/cma.c265
-rw-r--r--drivers/infiniband/core/cma_configfs.c321
-rw-r--r--drivers/infiniband/core/core_priv.h45
-rw-r--r--drivers/infiniband/core/cq.c209
-rw-r--r--drivers/infiniband/core/device.c51
-rw-r--r--drivers/infiniband/core/fmr_pool.c20
-rw-r--r--drivers/infiniband/core/mad.c162
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c17
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c81
-rw-r--r--drivers/infiniband/core/sa_query.c91
-rw-r--r--drivers/infiniband/core/sysfs.c377
-rw-r--r--drivers/infiniband/core/ud_header.c155
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c38
-rw-r--r--drivers/infiniband/core/uverbs_main.c13
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c1
-rw-r--r--drivers/infiniband/core/verbs.c238
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c102
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c146
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h15
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c82
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c14
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c57
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h13
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c251
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h7
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h10
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c22
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c318
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c32
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx5/main.c447
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h121
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c29
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c1014
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c41
-rw-r--r--drivers/infiniband/hw/mlx5/user.h63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c84
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c17
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c216
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c163
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c51
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c24
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c45
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h156
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c323
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c179
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c337
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c118
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h41
-rw-r--r--drivers/infiniband/ulp/isert/isert_proto.h47
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c205
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c443
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c233
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c116
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/be.h4
-rw-r--r--drivers/scsi/be2iscsi/be_iscsi.c4
-rw-r--r--drivers/scsi/be2iscsi/be_main.c20
-rw-r--r--drivers/scsi/ipr.c25
-rw-r--r--drivers/scsi/ipr.h4
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c21
-rw-r--r--drivers/staging/rdma/amso1100/c2_cq.c3
-rw-r--r--drivers/staging/rdma/amso1100/c2_provider.c72
-rw-r--r--drivers/staging/rdma/ehca/ehca_classes.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_iverbs.h16
-rw-r--r--drivers/staging/rdma/ehca/ehca_main.c4
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.c477
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_reqs.c1
-rw-r--r--drivers/staging/rdma/hfi1/mr.c51
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c1
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h4
-rw-r--r--drivers/staging/rdma/ipath/ipath_mr.c55
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c1
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h4
-rw-r--r--include/linux/blk-iopoll.h46
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/irq_poll.h25
-rw-r--r--include/linux/mlx4/cmd.h3
-rw-r--r--include/linux/mlx4/device.h15
-rw-r--r--include/linux/mlx4/qp.h15
-rw-r--r--include/linux/mlx5/device.h40
-rw-r--r--include/linux/mlx5/driver.h20
-rw-r--r--include/linux/mlx5/mlx5_ifc.h48
-rw-r--r--include/linux/mlx5/qp.h46
-rw-r--r--include/linux/mlx5/transobj.h (renamed from drivers/net/ethernet/mellanox/mlx5/core/transobj.h)10
-rw-r--r--include/linux/mlx5/vport.h8
-rw-r--r--include/linux/sunrpc/svc_rdma.h39
-rw-r--r--include/rdma/ib_addr.h16
-rw-r--r--include/rdma/ib_cache.h4
-rw-r--r--include/rdma/ib_mad.h2
-rw-r--r--include/rdma/ib_pack.h45
-rw-r--r--include/rdma/ib_pma.h1
-rw-r--r--include/rdma/ib_sa.h3
-rw-r--r--include/rdma/ib_verbs.h356
-rw-r--r--include/scsi/iser.h78
-rw-r--r--include/trace/events/irq.h2
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Makefile1
-rw-r--r--lib/irq_poll.c (renamed from block/blk-iopoll.c)108
-rw-r--r--net/rds/ib.c34
-rw-r--r--net/rds/iw.c23
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c7
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c371
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c56
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c360
-rw-r--r--net/sunrpc/xprtrdma/transport.c30
-rw-r--r--net/sunrpc/xprtrdma/verbs.c24
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h21
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/perf/util/trace-event-parse.c2
174 files changed, 7110 insertions, 4669 deletions
diff --git a/Documentation/ABI/testing/configfs-rdma_cm b/Documentation/ABI/testing/configfs-rdma_cm
new file mode 100644
index 000000000000..5c389aaf5291
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-rdma_cm
@@ -0,0 +1,22 @@
1What: /config/rdma_cm
2Date: November 29, 2015
3KernelVersion: 4.4.0
4Description: Interface is used to configure RDMA-cable HCAs in respect to
5 RDMA-CM attributes.
6
7 Attributes are visible only when configfs is mounted. To mount
8 configfs in /config directory use:
9 # mount -t configfs none /config/
10
11 In order to set parameters related to a specific HCA, a directory
12 for this HCA has to be created:
13 mkdir -p /config/rdma_cm/<hca>
14
15
16What: /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
17Date: November 29, 2015
18KernelVersion: 4.4.0
19Description: RDMA-CM based connections from HCA <hca> at port <port-num>
20 will be initiated with this RoCE type as default.
21 The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
22 This parameter has RW access.
diff --git a/Documentation/ABI/testing/sysfs-class-infiniband b/Documentation/ABI/testing/sysfs-class-infiniband
new file mode 100644
index 000000000000..a86abe66a316
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-infiniband
@@ -0,0 +1,16 @@
1What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
2Date: November 29, 2015
3KernelVersion: 4.4.0
4Contact: linux-rdma@vger.kernel.org
5Description: The net-device's name associated with the GID resides
6 at index <gid-index>.
7
8What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
9Date: November 29, 2015
10KernelVersion: 4.4.0
11Contact: linux-rdma@vger.kernel.org
12Description: The RoCE type of the associated GID resides at index <gid-index>.
13 This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
14 or "RoCE v2" for RoCE v2 based GIDs.
15
16
diff --git a/Documentation/infiniband/core_locking.txt b/Documentation/infiniband/core_locking.txt
index e1678542279a..4b1f36b6ada0 100644
--- a/Documentation/infiniband/core_locking.txt
+++ b/Documentation/infiniband/core_locking.txt
@@ -15,7 +15,6 @@ Sleeping and interrupt context
15 modify_ah 15 modify_ah
16 query_ah 16 query_ah
17 destroy_ah 17 destroy_ah
18 bind_mw
19 post_send 18 post_send
20 post_recv 19 post_recv
21 poll_cq 20 poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
31 ib_modify_ah 30 ib_modify_ah
32 ib_query_ah 31 ib_query_ah
33 ib_destroy_ah 32 ib_destroy_ah
34 ib_bind_mw
35 ib_post_send 33 ib_post_send
36 ib_post_recv 34 ib_post_recv
37 ib_req_notify_cq 35 ib_req_notify_cq
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index f4cbfe0ba108..edec3a3e648d 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following:
90 from being initiated from tasks that might run on the CPU to 90 from being initiated from tasks that might run on the CPU to
91 be de-jittered. (It is OK to force this CPU offline and then 91 be de-jittered. (It is OK to force this CPU offline and then
92 bring it back online before you start your application.) 92 bring it back online before you start your application.)
93BLOCK_IOPOLL_SOFTIRQ: Do all of the following: 93IRQ_POLL_SOFTIRQ: Do all of the following:
941. Force block-device interrupts onto some other CPU. 941. Force block-device interrupts onto some other CPU.
952. Initiate any block I/O and block-I/O polling on other CPUs. 952. Initiate any block I/O and block-I/O polling on other CPUs.
963. Once your application has started, prevent CPU-hotplug operations 963. Once your application has started, prevent CPU-hotplug operations
diff --git a/MAINTAINERS b/MAINTAINERS
index 8f3f93c9571e..59373e5bb09e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7151,27 +7151,45 @@ W: https://linuxtv.org
7151S: Odd Fixes 7151S: Odd Fixes
7152F: drivers/media/radio/radio-miropcm20* 7152F: drivers/media/radio/radio-miropcm20*
7153 7153
7154Mellanox MLX5 core VPI driver 7154MELLANOX MLX4 core VPI driver
7155M: Eli Cohen <eli@mellanox.com> 7155M: Yishai Hadas <yishaih@mellanox.com>
7156L: netdev@vger.kernel.org 7156L: netdev@vger.kernel.org
7157L: linux-rdma@vger.kernel.org 7157L: linux-rdma@vger.kernel.org
7158W: http://www.mellanox.com 7158W: http://www.mellanox.com
7159Q: http://patchwork.ozlabs.org/project/netdev/list/ 7159Q: http://patchwork.ozlabs.org/project/netdev/list/
7160S: Supported
7161F: drivers/net/ethernet/mellanox/mlx4/
7162F: include/linux/mlx4/
7163
7164MELLANOX MLX4 IB driver
7165M: Yishai Hadas <yishaih@mellanox.com>
7166L: linux-rdma@vger.kernel.org
7167W: http://www.mellanox.com
7160Q: http://patchwork.kernel.org/project/linux-rdma/list/ 7168Q: http://patchwork.kernel.org/project/linux-rdma/list/
7161T: git git://openfabrics.org/~eli/connect-ib.git 7169S: Supported
7170F: drivers/infiniband/hw/mlx4/
7171F: include/linux/mlx4/
7172
7173MELLANOX MLX5 core VPI driver
7174M: Matan Barak <matanb@mellanox.com>
7175M: Leon Romanovsky <leonro@mellanox.com>
7176L: netdev@vger.kernel.org
7177L: linux-rdma@vger.kernel.org
7178W: http://www.mellanox.com
7179Q: http://patchwork.ozlabs.org/project/netdev/list/
7162S: Supported 7180S: Supported
7163F: drivers/net/ethernet/mellanox/mlx5/core/ 7181F: drivers/net/ethernet/mellanox/mlx5/core/
7164F: include/linux/mlx5/ 7182F: include/linux/mlx5/
7165 7183
7166Mellanox MLX5 IB driver 7184MELLANOX MLX5 IB driver
7167M: Eli Cohen <eli@mellanox.com> 7185M: Matan Barak <matanb@mellanox.com>
7186M: Leon Romanovsky <leonro@mellanox.com>
7168L: linux-rdma@vger.kernel.org 7187L: linux-rdma@vger.kernel.org
7169W: http://www.mellanox.com 7188W: http://www.mellanox.com
7170Q: http://patchwork.kernel.org/project/linux-rdma/list/ 7189Q: http://patchwork.kernel.org/project/linux-rdma/list/
7171T: git git://openfabrics.org/~eli/connect-ib.git
7172S: Supported 7190S: Supported
7173F: include/linux/mlx5/
7174F: drivers/infiniband/hw/mlx5/ 7191F: drivers/infiniband/hw/mlx5/
7192F: include/linux/mlx5/
7175 7193
7176MELEXIS MLX90614 DRIVER 7194MELEXIS MLX90614 DRIVER
7177M: Crt Mori <cmo@melexis.com> 7195M: Crt Mori <cmo@melexis.com>
diff --git a/block/Makefile b/block/Makefile
index db5f622c9d67..9eda2322b2d4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ 8 blk-lib.o blk-mq.o blk-mq-tag.o \
9 blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ 9 blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ 10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
11 badblocks.o partitions/ 11 badblocks.o partitions/
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa26f3c3416b..8a8440c0eed1 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
5 depends on NET 5 depends on NET
6 depends on INET 6 depends on INET
7 depends on m || IPV6 != m 7 depends on m || IPV6 != m
8 select IRQ_POLL
8 ---help--- 9 ---help---
9 Core support for InfiniBand (IB). Make sure to also select 10 Core support for InfiniBand (IB). Make sure to also select
10 any protocols you wish to use as well as drivers for your 11 any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
54 depends on INFINIBAND 55 depends on INFINIBAND
55 default y 56 default y
56 57
58config INFINIBAND_ADDR_TRANS_CONFIGFS
59 bool
60 depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
61 default y
62 ---help---
63 ConfigFS support for RDMA communication manager (CM).
64 This allows the user to config the default GID type that the CM
65 uses for each device, when initiaing new connections.
66
57source "drivers/infiniband/hw/mthca/Kconfig" 67source "drivers/infiniband/hw/mthca/Kconfig"
58source "drivers/infiniband/hw/qib/Kconfig" 68source "drivers/infiniband/hw/qib/Kconfig"
59source "drivers/infiniband/hw/cxgb3/Kconfig" 69source "drivers/infiniband/hw/cxgb3/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d43a8994ac5c..f818538a7f4e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 $(user_access-y) 9 $(user_access-y)
10 10
11ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 11ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
12 device.o fmr_pool.o cache.o netlink.o \ 12 device.o fmr_pool.o cache.o netlink.o \
13 roce_gid_mgmt.o 13 roce_gid_mgmt.o
14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
24 24
25rdma_cm-y := cma.o 25rdma_cm-y := cma.o
26 26
27rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
28
27rdma_ucm-y := ucma.o 29rdma_ucm-y := ucma.o
28 30
29ib_addr-y := addr.o 31ib_addr-y := addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 34b1adad07aa..337353d86cfa 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
121} 121}
122EXPORT_SYMBOL(rdma_copy_addr); 122EXPORT_SYMBOL(rdma_copy_addr);
123 123
124int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 124int rdma_translate_ip(const struct sockaddr *addr,
125 struct rdma_dev_addr *dev_addr,
125 u16 *vlan_id) 126 u16 *vlan_id)
126{ 127{
127 struct net_device *dev; 128 struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
139 switch (addr->sa_family) { 140 switch (addr->sa_family) {
140 case AF_INET: 141 case AF_INET:
141 dev = ip_dev_find(dev_addr->net, 142 dev = ip_dev_find(dev_addr->net,
142 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 143 ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
143 144
144 if (!dev) 145 if (!dev)
145 return ret; 146 return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
154 rcu_read_lock(); 155 rcu_read_lock();
155 for_each_netdev_rcu(dev_addr->net, dev) { 156 for_each_netdev_rcu(dev_addr->net, dev) {
156 if (ipv6_chk_addr(dev_addr->net, 157 if (ipv6_chk_addr(dev_addr->net,
157 &((struct sockaddr_in6 *) addr)->sin6_addr, 158 &((const struct sockaddr_in6 *)addr)->sin6_addr,
158 dev, 1)) { 159 dev, 1)) {
159 ret = rdma_copy_addr(dev_addr, dev, NULL); 160 ret = rdma_copy_addr(dev_addr, dev, NULL);
160 if (vlan_id) 161 if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
198 mutex_unlock(&lock); 199 mutex_unlock(&lock);
199} 200}
200 201
201static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr) 202static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
203 const void *daddr)
202{ 204{
203 struct neighbour *n; 205 struct neighbour *n;
204 int ret; 206 int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
222} 224}
223 225
224static int addr4_resolve(struct sockaddr_in *src_in, 226static int addr4_resolve(struct sockaddr_in *src_in,
225 struct sockaddr_in *dst_in, 227 const struct sockaddr_in *dst_in,
226 struct rdma_dev_addr *addr) 228 struct rdma_dev_addr *addr,
229 struct rtable **prt)
227{ 230{
228 __be32 src_ip = src_in->sin_addr.s_addr; 231 __be32 src_ip = src_in->sin_addr.s_addr;
229 __be32 dst_ip = dst_in->sin_addr.s_addr; 232 __be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
243 src_in->sin_family = AF_INET; 246 src_in->sin_family = AF_INET;
244 src_in->sin_addr.s_addr = fl4.saddr; 247 src_in->sin_addr.s_addr = fl4.saddr;
245 248
246 if (rt->dst.dev->flags & IFF_LOOPBACK) { 249 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
247 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); 250 * routable) and we could set the network type accordingly.
248 if (!ret) 251 */
249 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 252 if (rt->rt_uses_gateway)
250 goto put; 253 addr->network = RDMA_NETWORK_IPV4;
251 }
252 254
253 /* If the device does ARP internally, return 'done' */ 255 addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
254 if (rt->dst.dev->flags & IFF_NOARP) {
255 ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
256 goto put;
257 }
258 256
259 ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); 257 *prt = rt;
260put: 258 return 0;
261 ip_rt_put(rt);
262out: 259out:
263 return ret; 260 return ret;
264} 261}
265 262
266#if IS_ENABLED(CONFIG_IPV6) 263#if IS_ENABLED(CONFIG_IPV6)
267static int addr6_resolve(struct sockaddr_in6 *src_in, 264static int addr6_resolve(struct sockaddr_in6 *src_in,
268 struct sockaddr_in6 *dst_in, 265 const struct sockaddr_in6 *dst_in,
269 struct rdma_dev_addr *addr) 266 struct rdma_dev_addr *addr,
267 struct dst_entry **pdst)
270{ 268{
271 struct flowi6 fl6; 269 struct flowi6 fl6;
272 struct dst_entry *dst; 270 struct dst_entry *dst;
271 struct rt6_info *rt;
273 int ret; 272 int ret;
274 273
275 memset(&fl6, 0, sizeof fl6); 274 memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
281 if ((ret = dst->error)) 280 if ((ret = dst->error))
282 goto put; 281 goto put;
283 282
283 rt = (struct rt6_info *)dst;
284 if (ipv6_addr_any(&fl6.saddr)) { 284 if (ipv6_addr_any(&fl6.saddr)) {
285 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev, 285 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
286 &fl6.daddr, 0, &fl6.saddr); 286 &fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
291 src_in->sin6_addr = fl6.saddr; 291 src_in->sin6_addr = fl6.saddr;
292 } 292 }
293 293
294 if (dst->dev->flags & IFF_LOOPBACK) { 294 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
295 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); 295 * routable) and we could set the network type accordingly.
296 if (!ret) 296 */
297 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 297 if (rt->rt6i_flags & RTF_GATEWAY)
298 goto put; 298 addr->network = RDMA_NETWORK_IPV6;
299 }
300 299
301 /* If the device does ARP internally, return 'done' */ 300 addr->hoplimit = ip6_dst_hoplimit(dst);
302 if (dst->dev->flags & IFF_NOARP) {
303 ret = rdma_copy_addr(addr, dst->dev, NULL);
304 goto put;
305 }
306 301
307 ret = dst_fetch_ha(dst, addr, &fl6.daddr); 302 *pdst = dst;
303 return 0;
308put: 304put:
309 dst_release(dst); 305 dst_release(dst);
310 return ret; 306 return ret;
311} 307}
312#else 308#else
313static int addr6_resolve(struct sockaddr_in6 *src_in, 309static int addr6_resolve(struct sockaddr_in6 *src_in,
314 struct sockaddr_in6 *dst_in, 310 const struct sockaddr_in6 *dst_in,
315 struct rdma_dev_addr *addr) 311 struct rdma_dev_addr *addr,
312 struct dst_entry **pdst)
316{ 313{
317 return -EADDRNOTAVAIL; 314 return -EADDRNOTAVAIL;
318} 315}
319#endif 316#endif
320 317
318static int addr_resolve_neigh(struct dst_entry *dst,
319 const struct sockaddr *dst_in,
320 struct rdma_dev_addr *addr)
321{
322 if (dst->dev->flags & IFF_LOOPBACK) {
323 int ret;
324
325 ret = rdma_translate_ip(dst_in, addr, NULL);
326 if (!ret)
327 memcpy(addr->dst_dev_addr, addr->src_dev_addr,
328 MAX_ADDR_LEN);
329
330 return ret;
331 }
332
333 /* If the device doesn't do ARP internally */
334 if (!(dst->dev->flags & IFF_NOARP)) {
335 const struct sockaddr_in *dst_in4 =
336 (const struct sockaddr_in *)dst_in;
337 const struct sockaddr_in6 *dst_in6 =
338 (const struct sockaddr_in6 *)dst_in;
339
340 return dst_fetch_ha(dst, addr,
341 dst_in->sa_family == AF_INET ?
342 (const void *)&dst_in4->sin_addr.s_addr :
343 (const void *)&dst_in6->sin6_addr);
344 }
345
346 return rdma_copy_addr(addr, dst->dev, NULL);
347}
348
321static int addr_resolve(struct sockaddr *src_in, 349static int addr_resolve(struct sockaddr *src_in,
322 struct sockaddr *dst_in, 350 const struct sockaddr *dst_in,
323 struct rdma_dev_addr *addr) 351 struct rdma_dev_addr *addr,
352 bool resolve_neigh)
324{ 353{
354 struct net_device *ndev;
355 struct dst_entry *dst;
356 int ret;
357
325 if (src_in->sa_family == AF_INET) { 358 if (src_in->sa_family == AF_INET) {
326 return addr4_resolve((struct sockaddr_in *) src_in, 359 struct rtable *rt = NULL;
327 (struct sockaddr_in *) dst_in, addr); 360 const struct sockaddr_in *dst_in4 =
328 } else 361 (const struct sockaddr_in *)dst_in;
329 return addr6_resolve((struct sockaddr_in6 *) src_in, 362
330 (struct sockaddr_in6 *) dst_in, addr); 363 ret = addr4_resolve((struct sockaddr_in *)src_in,
364 dst_in4, addr, &rt);
365 if (ret)
366 return ret;
367
368 if (resolve_neigh)
369 ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
370
371 ndev = rt->dst.dev;
372 dev_hold(ndev);
373
374 ip_rt_put(rt);
375 } else {
376 const struct sockaddr_in6 *dst_in6 =
377 (const struct sockaddr_in6 *)dst_in;
378
379 ret = addr6_resolve((struct sockaddr_in6 *)src_in,
380 dst_in6, addr,
381 &dst);
382 if (ret)
383 return ret;
384
385 if (resolve_neigh)
386 ret = addr_resolve_neigh(dst, dst_in, addr);
387
388 ndev = dst->dev;
389 dev_hold(ndev);
390
391 dst_release(dst);
392 }
393
394 addr->bound_dev_if = ndev->ifindex;
395 addr->net = dev_net(ndev);
396 dev_put(ndev);
397
398 return ret;
331} 399}
332 400
333static void process_req(struct work_struct *work) 401static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
343 if (req->status == -ENODATA) { 411 if (req->status == -ENODATA) {
344 src_in = (struct sockaddr *) &req->src_addr; 412 src_in = (struct sockaddr *) &req->src_addr;
345 dst_in = (struct sockaddr *) &req->dst_addr; 413 dst_in = (struct sockaddr *) &req->dst_addr;
346 req->status = addr_resolve(src_in, dst_in, req->addr); 414 req->status = addr_resolve(src_in, dst_in, req->addr,
415 true);
347 if (req->status && time_after_eq(jiffies, req->timeout)) 416 if (req->status && time_after_eq(jiffies, req->timeout))
348 req->status = -ETIMEDOUT; 417 req->status = -ETIMEDOUT;
349 else if (req->status == -ENODATA) 418 else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
403 req->client = client; 472 req->client = client;
404 atomic_inc(&client->refcount); 473 atomic_inc(&client->refcount);
405 474
406 req->status = addr_resolve(src_in, dst_in, addr); 475 req->status = addr_resolve(src_in, dst_in, addr, true);
407 switch (req->status) { 476 switch (req->status) {
408 case 0: 477 case 0:
409 req->timeout = jiffies; 478 req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
425} 494}
426EXPORT_SYMBOL(rdma_resolve_ip); 495EXPORT_SYMBOL(rdma_resolve_ip);
427 496
497int rdma_resolve_ip_route(struct sockaddr *src_addr,
498 const struct sockaddr *dst_addr,
499 struct rdma_dev_addr *addr)
500{
501 struct sockaddr_storage ssrc_addr = {};
502 struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
503
504 if (src_addr) {
505 if (src_addr->sa_family != dst_addr->sa_family)
506 return -EINVAL;
507
508 memcpy(src_in, src_addr, rdma_addr_size(src_addr));
509 } else {
510 src_in->sa_family = dst_addr->sa_family;
511 }
512
513 return addr_resolve(src_in, dst_addr, addr, false);
514}
515EXPORT_SYMBOL(rdma_resolve_ip_route);
516
428void rdma_addr_cancel(struct rdma_dev_addr *addr) 517void rdma_addr_cancel(struct rdma_dev_addr *addr)
429{ 518{
430 struct addr_req *req, *temp_req; 519 struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
456 complete(&((struct resolve_cb_context *)context)->comp); 545 complete(&((struct resolve_cb_context *)context)->comp);
457} 546}
458 547
459int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, 548int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
460 u8 *dmac, u16 *vlan_id, int if_index) 549 const union ib_gid *dgid,
550 u8 *dmac, u16 *vlan_id, int *if_index,
551 int *hoplimit)
461{ 552{
462 int ret = 0; 553 int ret = 0;
463 struct rdma_dev_addr dev_addr; 554 struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
475 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 566 rdma_gid2ip(&dgid_addr._sockaddr, dgid);
476 567
477 memset(&dev_addr, 0, sizeof(dev_addr)); 568 memset(&dev_addr, 0, sizeof(dev_addr));
478 dev_addr.bound_dev_if = if_index; 569 if (if_index)
570 dev_addr.bound_dev_if = *if_index;
479 dev_addr.net = &init_net; 571 dev_addr.net = &init_net;
480 572
481 ctx.addr = &dev_addr; 573 ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
491 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 583 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
492 if (!dev) 584 if (!dev)
493 return -ENODEV; 585 return -ENODEV;
586 if (if_index)
587 *if_index = dev_addr.bound_dev_if;
494 if (vlan_id) 588 if (vlan_id)
495 *vlan_id = rdma_vlan_dev_vlan_id(dev); 589 *vlan_id = rdma_vlan_dev_vlan_id(dev);
590 if (hoplimit)
591 *hoplimit = dev_addr.hoplimit;
496 dev_put(dev); 592 dev_put(dev);
497 return ret; 593 return ret;
498} 594}
499EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); 595EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
500 596
501int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) 597int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
502{ 598{
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 89bebeada38b..53343ffbff7a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
64 GID_ATTR_FIND_MASK_GID = 1UL << 0, 64 GID_ATTR_FIND_MASK_GID = 1UL << 0,
65 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 65 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
66 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 66 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
67 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
67}; 68};
68 69
69enum gid_table_entry_props { 70enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
81}; 82};
82 83
83struct ib_gid_table_entry { 84struct ib_gid_table_entry {
84 /* This lock protects an entry from being
85 * read and written simultaneously.
86 */
87 rwlock_t lock;
88 unsigned long props; 85 unsigned long props;
89 union ib_gid gid; 86 union ib_gid gid;
90 struct ib_gid_attr attr; 87 struct ib_gid_attr attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
109 * are locked by this lock. 106 * are locked by this lock.
110 **/ 107 **/
111 struct mutex lock; 108 struct mutex lock;
109 /* This lock protects the table entries from being
110 * read and written simultaneously.
111 */
112 rwlock_t rwlock;
112 struct ib_gid_table_entry *data_vec; 113 struct ib_gid_table_entry *data_vec;
113}; 114};
114 115
116static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
117{
118 if (rdma_cap_roce_gid_table(ib_dev, port)) {
119 struct ib_event event;
120
121 event.device = ib_dev;
122 event.element.port_num = port;
123 event.event = IB_EVENT_GID_CHANGE;
124
125 ib_dispatch_event(&event);
126 }
127}
128
129static const char * const gid_type_str[] = {
130 [IB_GID_TYPE_IB] = "IB/RoCE v1",
131 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
132};
133
134const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
135{
136 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
137 return gid_type_str[gid_type];
138
139 return "Invalid GID type";
140}
141EXPORT_SYMBOL(ib_cache_gid_type_str);
142
143int ib_cache_gid_parse_type_str(const char *buf)
144{
145 unsigned int i;
146 size_t len;
147 int err = -EINVAL;
148
149 len = strlen(buf);
150 if (len == 0)
151 return -EINVAL;
152
153 if (buf[len - 1] == '\n')
154 len--;
155
156 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
157 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
158 len == strlen(gid_type_str[i])) {
159 err = i;
160 break;
161 }
162
163 return err;
164}
165EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
166
167/* This function expects that rwlock will be write locked in all
168 * scenarios and that lock will be locked in sleep-able (RoCE)
169 * scenarios.
170 */
115static int write_gid(struct ib_device *ib_dev, u8 port, 171static int write_gid(struct ib_device *ib_dev, u8 port,
116 struct ib_gid_table *table, int ix, 172 struct ib_gid_table *table, int ix,
117 const union ib_gid *gid, 173 const union ib_gid *gid,
118 const struct ib_gid_attr *attr, 174 const struct ib_gid_attr *attr,
119 enum gid_table_write_action action, 175 enum gid_table_write_action action,
120 bool default_gid) 176 bool default_gid)
177 __releases(&table->rwlock) __acquires(&table->rwlock)
121{ 178{
122 int ret = 0; 179 int ret = 0;
123 struct net_device *old_net_dev; 180 struct net_device *old_net_dev;
124 unsigned long flags;
125 181
126 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 182 /* in rdma_cap_roce_gid_table, this funciton should be protected by a
127 * sleep-able lock. 183 * sleep-able lock.
128 */ 184 */
129 write_lock_irqsave(&table->data_vec[ix].lock, flags);
130 185
131 if (rdma_cap_roce_gid_table(ib_dev, port)) { 186 if (rdma_cap_roce_gid_table(ib_dev, port)) {
132 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 187 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
133 write_unlock_irqrestore(&table->data_vec[ix].lock, flags); 188 write_unlock_irq(&table->rwlock);
134 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 189 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
135 * RoCE providers and thus only updates the cache. 190 * RoCE providers and thus only updates the cache.
136 */ 191 */
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
140 else if (action == GID_TABLE_WRITE_ACTION_DEL) 195 else if (action == GID_TABLE_WRITE_ACTION_DEL)
141 ret = ib_dev->del_gid(ib_dev, port, ix, 196 ret = ib_dev->del_gid(ib_dev, port, ix,
142 &table->data_vec[ix].context); 197 &table->data_vec[ix].context);
143 write_lock_irqsave(&table->data_vec[ix].lock, flags); 198 write_lock_irq(&table->rwlock);
144 } 199 }
145 200
146 old_net_dev = table->data_vec[ix].attr.ndev; 201 old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
162 217
163 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 218 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
164 219
165 write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
166
167 if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
168 struct ib_event event;
169
170 event.device = ib_dev;
171 event.element.port_num = port;
172 event.event = IB_EVENT_GID_CHANGE;
173
174 ib_dispatch_event(&event);
175 }
176 return ret; 220 return ret;
177} 221}
178 222
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
201 GID_TABLE_WRITE_ACTION_DEL, default_gid); 245 GID_TABLE_WRITE_ACTION_DEL, default_gid);
202} 246}
203 247
248/* rwlock should be read locked */
204static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 249static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
205 const struct ib_gid_attr *val, bool default_gid, 250 const struct ib_gid_attr *val, bool default_gid,
206 unsigned long mask) 251 unsigned long mask, int *pempty)
207{ 252{
208 int i; 253 int i = 0;
254 int found = -1;
255 int empty = pempty ? -1 : 0;
209 256
210 for (i = 0; i < table->sz; i++) { 257 while (i < table->sz && (found < 0 || empty < 0)) {
211 unsigned long flags; 258 struct ib_gid_table_entry *data = &table->data_vec[i];
212 struct ib_gid_attr *attr = &table->data_vec[i].attr; 259 struct ib_gid_attr *attr = &data->attr;
260 int curr_index = i;
213 261
214 read_lock_irqsave(&table->data_vec[i].lock, flags); 262 i++;
215 263
216 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 264 if (data->props & GID_TABLE_ENTRY_INVALID)
217 goto next; 265 continue;
266
267 if (empty < 0)
268 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
269 !memcmp(attr, &zattr, sizeof(*attr)) &&
270 !data->props)
271 empty = curr_index;
272
273 if (found >= 0)
274 continue;
275
276 if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
277 attr->gid_type != val->gid_type)
278 continue;
218 279
219 if (mask & GID_ATTR_FIND_MASK_GID && 280 if (mask & GID_ATTR_FIND_MASK_GID &&
220 memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 281 memcmp(gid, &data->gid, sizeof(*gid)))
221 goto next; 282 continue;
222 283
223 if (mask & GID_ATTR_FIND_MASK_NETDEV && 284 if (mask & GID_ATTR_FIND_MASK_NETDEV &&
224 attr->ndev != val->ndev) 285 attr->ndev != val->ndev)
225 goto next; 286 continue;
226 287
227 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 288 if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
228 !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) != 289 !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
229 default_gid) 290 default_gid)
230 goto next; 291 continue;
231 292
232 read_unlock_irqrestore(&table->data_vec[i].lock, flags); 293 found = curr_index;
233 return i;
234next:
235 read_unlock_irqrestore(&table->data_vec[i].lock, flags);
236 } 294 }
237 295
238 return -1; 296 if (pempty)
297 *pempty = empty;
298
299 return found;
239} 300}
240 301
241static void make_default_gid(struct net_device *dev, union ib_gid *gid) 302static void make_default_gid(struct net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
252 int ix; 313 int ix;
253 int ret = 0; 314 int ret = 0;
254 struct net_device *idev; 315 struct net_device *idev;
316 int empty;
255 317
256 table = ports_table[port - rdma_start_port(ib_dev)]; 318 table = ports_table[port - rdma_start_port(ib_dev)];
257 319
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
275 } 337 }
276 338
277 mutex_lock(&table->lock); 339 mutex_lock(&table->lock);
340 write_lock_irq(&table->rwlock);
278 341
279 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 342 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
280 GID_ATTR_FIND_MASK_NETDEV); 343 GID_ATTR_FIND_MASK_GID_TYPE |
344 GID_ATTR_FIND_MASK_NETDEV, &empty);
281 if (ix >= 0) 345 if (ix >= 0)
282 goto out_unlock; 346 goto out_unlock;
283 347
284 ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID | 348 if (empty < 0) {
285 GID_ATTR_FIND_MASK_DEFAULT);
286 if (ix < 0) {
287 ret = -ENOSPC; 349 ret = -ENOSPC;
288 goto out_unlock; 350 goto out_unlock;
289 } 351 }
290 352
291 add_gid(ib_dev, port, table, ix, gid, attr, false); 353 ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
354 if (!ret)
355 dispatch_gid_change_event(ib_dev, port);
292 356
293out_unlock: 357out_unlock:
358 write_unlock_irq(&table->rwlock);
294 mutex_unlock(&table->lock); 359 mutex_unlock(&table->lock);
295 return ret; 360 return ret;
296} 361}
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
305 table = ports_table[port - rdma_start_port(ib_dev)]; 370 table = ports_table[port - rdma_start_port(ib_dev)];
306 371
307 mutex_lock(&table->lock); 372 mutex_lock(&table->lock);
373 write_lock_irq(&table->rwlock);
308 374
309 ix = find_gid(table, gid, attr, false, 375 ix = find_gid(table, gid, attr, false,
310 GID_ATTR_FIND_MASK_GID | 376 GID_ATTR_FIND_MASK_GID |
377 GID_ATTR_FIND_MASK_GID_TYPE |
311 GID_ATTR_FIND_MASK_NETDEV | 378 GID_ATTR_FIND_MASK_NETDEV |
312 GID_ATTR_FIND_MASK_DEFAULT); 379 GID_ATTR_FIND_MASK_DEFAULT,
380 NULL);
313 if (ix < 0) 381 if (ix < 0)
314 goto out_unlock; 382 goto out_unlock;
315 383
316 del_gid(ib_dev, port, table, ix, false); 384 if (!del_gid(ib_dev, port, table, ix, false))
385 dispatch_gid_change_event(ib_dev, port);
317 386
318out_unlock: 387out_unlock:
388 write_unlock_irq(&table->rwlock);
319 mutex_unlock(&table->lock); 389 mutex_unlock(&table->lock);
320 return 0; 390 return 0;
321} 391}
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
326 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 396 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
327 struct ib_gid_table *table; 397 struct ib_gid_table *table;
328 int ix; 398 int ix;
399 bool deleted = false;
329 400
330 table = ports_table[port - rdma_start_port(ib_dev)]; 401 table = ports_table[port - rdma_start_port(ib_dev)];
331 402
332 mutex_lock(&table->lock); 403 mutex_lock(&table->lock);
404 write_lock_irq(&table->rwlock);
333 405
334 for (ix = 0; ix < table->sz; ix++) 406 for (ix = 0; ix < table->sz; ix++)
335 if (table->data_vec[ix].attr.ndev == ndev) 407 if (table->data_vec[ix].attr.ndev == ndev)
336 del_gid(ib_dev, port, table, ix, false); 408 if (!del_gid(ib_dev, port, table, ix, false))
409 deleted = true;
337 410
411 write_unlock_irq(&table->rwlock);
338 mutex_unlock(&table->lock); 412 mutex_unlock(&table->lock);
413
414 if (deleted)
415 dispatch_gid_change_event(ib_dev, port);
416
339 return 0; 417 return 0;
340} 418}
341 419
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
344{ 422{
345 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 423 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
346 struct ib_gid_table *table; 424 struct ib_gid_table *table;
347 unsigned long flags;
348 425
349 table = ports_table[port - rdma_start_port(ib_dev)]; 426 table = ports_table[port - rdma_start_port(ib_dev)];
350 427
351 if (index < 0 || index >= table->sz) 428 if (index < 0 || index >= table->sz)
352 return -EINVAL; 429 return -EINVAL;
353 430
354 read_lock_irqsave(&table->data_vec[index].lock, flags); 431 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
355 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
356 read_unlock_irqrestore(&table->data_vec[index].lock, flags);
357 return -EAGAIN; 432 return -EAGAIN;
358 }
359 433
360 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 434 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
361 if (attr) { 435 if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
364 dev_hold(attr->ndev); 438 dev_hold(attr->ndev);
365 } 439 }
366 440
367 read_unlock_irqrestore(&table->data_vec[index].lock, flags);
368 return 0; 441 return 0;
369} 442}
370 443
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
378 struct ib_gid_table *table; 451 struct ib_gid_table *table;
379 u8 p; 452 u8 p;
380 int local_index; 453 int local_index;
454 unsigned long flags;
381 455
382 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 456 for (p = 0; p < ib_dev->phys_port_cnt; p++) {
383 table = ports_table[p]; 457 table = ports_table[p];
384 local_index = find_gid(table, gid, val, false, mask); 458 read_lock_irqsave(&table->rwlock, flags);
459 local_index = find_gid(table, gid, val, false, mask, NULL);
385 if (local_index >= 0) { 460 if (local_index >= 0) {
386 if (index) 461 if (index)
387 *index = local_index; 462 *index = local_index;
388 if (port) 463 if (port)
389 *port = p + rdma_start_port(ib_dev); 464 *port = p + rdma_start_port(ib_dev);
465 read_unlock_irqrestore(&table->rwlock, flags);
390 return 0; 466 return 0;
391 } 467 }
468 read_unlock_irqrestore(&table->rwlock, flags);
392 } 469 }
393 470
394 return -ENOENT; 471 return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
396 473
397static int ib_cache_gid_find(struct ib_device *ib_dev, 474static int ib_cache_gid_find(struct ib_device *ib_dev,
398 const union ib_gid *gid, 475 const union ib_gid *gid,
476 enum ib_gid_type gid_type,
399 struct net_device *ndev, u8 *port, 477 struct net_device *ndev, u8 *port,
400 u16 *index) 478 u16 *index)
401{ 479{
402 unsigned long mask = GID_ATTR_FIND_MASK_GID; 480 unsigned long mask = GID_ATTR_FIND_MASK_GID |
403 struct ib_gid_attr gid_attr_val = {.ndev = ndev}; 481 GID_ATTR_FIND_MASK_GID_TYPE;
482 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
404 483
405 if (ndev) 484 if (ndev)
406 mask |= GID_ATTR_FIND_MASK_NETDEV; 485 mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
411 490
412int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 491int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
413 const union ib_gid *gid, 492 const union ib_gid *gid,
493 enum ib_gid_type gid_type,
414 u8 port, struct net_device *ndev, 494 u8 port, struct net_device *ndev,
415 u16 *index) 495 u16 *index)
416{ 496{
417 int local_index; 497 int local_index;
418 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 498 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
419 struct ib_gid_table *table; 499 struct ib_gid_table *table;
420 unsigned long mask = GID_ATTR_FIND_MASK_GID; 500 unsigned long mask = GID_ATTR_FIND_MASK_GID |
421 struct ib_gid_attr val = {.ndev = ndev}; 501 GID_ATTR_FIND_MASK_GID_TYPE;
502 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
503 unsigned long flags;
422 504
423 if (port < rdma_start_port(ib_dev) || 505 if (port < rdma_start_port(ib_dev) ||
424 port > rdma_end_port(ib_dev)) 506 port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
429 if (ndev) 511 if (ndev)
430 mask |= GID_ATTR_FIND_MASK_NETDEV; 512 mask |= GID_ATTR_FIND_MASK_NETDEV;
431 513
432 local_index = find_gid(table, gid, &val, false, mask); 514 read_lock_irqsave(&table->rwlock, flags);
515 local_index = find_gid(table, gid, &val, false, mask, NULL);
433 if (local_index >= 0) { 516 if (local_index >= 0) {
434 if (index) 517 if (index)
435 *index = local_index; 518 *index = local_index;
519 read_unlock_irqrestore(&table->rwlock, flags);
436 return 0; 520 return 0;
437 } 521 }
438 522
523 read_unlock_irqrestore(&table->rwlock, flags);
439 return -ENOENT; 524 return -ENOENT;
440} 525}
441EXPORT_SYMBOL(ib_find_cached_gid_by_port); 526EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
472 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 557 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
473 struct ib_gid_table *table; 558 struct ib_gid_table *table;
474 unsigned int i; 559 unsigned int i;
560 unsigned long flags;
475 bool found = false; 561 bool found = false;
476 562
477 if (!ports_table) 563 if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
484 570
485 table = ports_table[port - rdma_start_port(ib_dev)]; 571 table = ports_table[port - rdma_start_port(ib_dev)];
486 572
573 read_lock_irqsave(&table->rwlock, flags);
487 for (i = 0; i < table->sz; i++) { 574 for (i = 0; i < table->sz; i++) {
488 struct ib_gid_attr attr; 575 struct ib_gid_attr attr;
489 unsigned long flags;
490 576
491 read_lock_irqsave(&table->data_vec[i].lock, flags);
492 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 577 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
493 goto next; 578 goto next;
494 579
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
501 found = true; 586 found = true;
502 587
503next: 588next:
504 read_unlock_irqrestore(&table->data_vec[i].lock, flags);
505
506 if (found) 589 if (found)
507 break; 590 break;
508 } 591 }
592 read_unlock_irqrestore(&table->rwlock, flags);
509 593
510 if (!found) 594 if (!found)
511 return -ENOENT; 595 return -ENOENT;
@@ -517,9 +601,9 @@ next:
517 601
518static struct ib_gid_table *alloc_gid_table(int sz) 602static struct ib_gid_table *alloc_gid_table(int sz)
519{ 603{
520 unsigned int i;
521 struct ib_gid_table *table = 604 struct ib_gid_table *table =
522 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 605 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
606
523 if (!table) 607 if (!table)
524 return NULL; 608 return NULL;
525 609
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
530 mutex_init(&table->lock); 614 mutex_init(&table->lock);
531 615
532 table->sz = sz; 616 table->sz = sz;
533 617 rwlock_init(&table->rwlock);
534 for (i = 0; i < sz; i++)
535 rwlock_init(&table->data_vec[i].lock);
536 618
537 return table; 619 return table;
538 620
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
553 struct ib_gid_table *table) 635 struct ib_gid_table *table)
554{ 636{
555 int i; 637 int i;
638 bool deleted = false;
556 639
557 if (!table) 640 if (!table)
558 return; 641 return;
559 642
643 write_lock_irq(&table->rwlock);
560 for (i = 0; i < table->sz; ++i) { 644 for (i = 0; i < table->sz; ++i) {
561 if (memcmp(&table->data_vec[i].gid, &zgid, 645 if (memcmp(&table->data_vec[i].gid, &zgid,
562 sizeof(table->data_vec[i].gid))) 646 sizeof(table->data_vec[i].gid)))
563 del_gid(ib_dev, port, table, i, 647 if (!del_gid(ib_dev, port, table, i,
564 table->data_vec[i].props & 648 table->data_vec[i].props &
565 GID_ATTR_FIND_MASK_DEFAULT); 649 GID_ATTR_FIND_MASK_DEFAULT))
650 deleted = true;
566 } 651 }
652 write_unlock_irq(&table->rwlock);
653
654 if (deleted)
655 dispatch_gid_change_event(ib_dev, port);
567} 656}
568 657
569void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 658void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
570 struct net_device *ndev, 659 struct net_device *ndev,
660 unsigned long gid_type_mask,
571 enum ib_cache_gid_default_mode mode) 661 enum ib_cache_gid_default_mode mode)
572{ 662{
573 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 663 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
574 union ib_gid gid; 664 union ib_gid gid;
575 struct ib_gid_attr gid_attr; 665 struct ib_gid_attr gid_attr;
666 struct ib_gid_attr zattr_type = zattr;
576 struct ib_gid_table *table; 667 struct ib_gid_table *table;
577 int ix; 668 unsigned int gid_type;
578 union ib_gid current_gid;
579 struct ib_gid_attr current_gid_attr = {};
580 669
581 table = ports_table[port - rdma_start_port(ib_dev)]; 670 table = ports_table[port - rdma_start_port(ib_dev)];
582 671
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
584 memset(&gid_attr, 0, sizeof(gid_attr)); 673 memset(&gid_attr, 0, sizeof(gid_attr));
585 gid_attr.ndev = ndev; 674 gid_attr.ndev = ndev;
586 675
587 mutex_lock(&table->lock); 676 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
588 ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT); 677 int ix;
589 678 union ib_gid current_gid;
590 /* Coudn't find default GID location */ 679 struct ib_gid_attr current_gid_attr = {};
591 WARN_ON(ix < 0); 680
592 681 if (1UL << gid_type & ~gid_type_mask)
593 if (!__ib_cache_gid_get(ib_dev, port, ix, 682 continue;
594 &current_gid, &current_gid_attr) && 683
595 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 684 gid_attr.gid_type = gid_type;
596 !memcmp(&gid, &current_gid, sizeof(gid)) && 685
597 !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr))) 686 mutex_lock(&table->lock);
598 goto unlock; 687 write_lock_irq(&table->rwlock);
599 688 ix = find_gid(table, NULL, &gid_attr, true,
600 if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) || 689 GID_ATTR_FIND_MASK_GID_TYPE |
601 memcmp(&current_gid_attr, &zattr, 690 GID_ATTR_FIND_MASK_DEFAULT,
602 sizeof(current_gid_attr))) && 691 NULL);
603 del_gid(ib_dev, port, table, ix, true)) { 692
604 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 693 /* Coudn't find default GID location */
605 ix, gid.raw); 694 WARN_ON(ix < 0);
606 goto unlock; 695
607 } 696 zattr_type.gid_type = gid_type;
697
698 if (!__ib_cache_gid_get(ib_dev, port, ix,
699 &current_gid, &current_gid_attr) &&
700 mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
701 !memcmp(&gid, &current_gid, sizeof(gid)) &&
702 !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
703 goto release;
704
705 if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
706 memcmp(&current_gid_attr, &zattr_type,
707 sizeof(current_gid_attr))) {
708 if (del_gid(ib_dev, port, table, ix, true)) {
709 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
710 ix, gid.raw);
711 goto release;
712 } else {
713 dispatch_gid_change_event(ib_dev, port);
714 }
715 }
608 716
609 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) 717 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
610 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 718 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
611 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 719 pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
612 gid.raw); 720 gid.raw);
721 else
722 dispatch_gid_change_event(ib_dev, port);
723 }
613 724
614unlock: 725release:
615 if (current_gid_attr.ndev) 726 if (current_gid_attr.ndev)
616 dev_put(current_gid_attr.ndev); 727 dev_put(current_gid_attr.ndev);
617 mutex_unlock(&table->lock); 728 write_unlock_irq(&table->rwlock);
729 mutex_unlock(&table->lock);
730 }
618} 731}
619 732
620static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 733static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
621 struct ib_gid_table *table) 734 struct ib_gid_table *table)
622{ 735{
623 if (rdma_protocol_roce(ib_dev, port)) { 736 unsigned int i;
624 struct ib_gid_table_entry *entry = &table->data_vec[0]; 737 unsigned long roce_gid_type_mask;
738 unsigned int num_default_gids;
739 unsigned int current_gid = 0;
740
741 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
742 num_default_gids = hweight_long(roce_gid_type_mask);
743 for (i = 0; i < num_default_gids && i < table->sz; i++) {
744 struct ib_gid_table_entry *entry =
745 &table->data_vec[i];
625 746
626 entry->props |= GID_TABLE_ENTRY_DEFAULT; 747 entry->props |= GID_TABLE_ENTRY_DEFAULT;
748 current_gid = find_next_bit(&roce_gid_type_mask,
749 BITS_PER_LONG,
750 current_gid);
751 entry->attr.gid_type = current_gid++;
627 } 752 }
628 753
629 return 0; 754 return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
728 union ib_gid *gid, 853 union ib_gid *gid,
729 struct ib_gid_attr *gid_attr) 854 struct ib_gid_attr *gid_attr)
730{ 855{
856 int res;
857 unsigned long flags;
858 struct ib_gid_table **ports_table = device->cache.gid_cache;
859 struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
860
731 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 861 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
732 return -EINVAL; 862 return -EINVAL;
733 863
734 return __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 864 read_lock_irqsave(&table->rwlock, flags);
865 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
866 read_unlock_irqrestore(&table->rwlock, flags);
867
868 return res;
735} 869}
736EXPORT_SYMBOL(ib_get_cached_gid); 870EXPORT_SYMBOL(ib_get_cached_gid);
737 871
738int ib_find_cached_gid(struct ib_device *device, 872int ib_find_cached_gid(struct ib_device *device,
739 const union ib_gid *gid, 873 const union ib_gid *gid,
874 enum ib_gid_type gid_type,
740 struct net_device *ndev, 875 struct net_device *ndev,
741 u8 *port_num, 876 u8 *port_num,
742 u16 *index) 877 u16 *index)
743{ 878{
744 return ib_cache_gid_find(device, gid, ndev, port_num, index); 879 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
745} 880}
746EXPORT_SYMBOL(ib_find_cached_gid); 881EXPORT_SYMBOL(ib_find_cached_gid);
747 882
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
956 1091
957 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; 1092 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
958 if (!use_roce_gid_table) { 1093 if (!use_roce_gid_table) {
1094 write_lock(&table->rwlock);
959 for (i = 0; i < gid_cache->table_len; i++) { 1095 for (i = 0; i < gid_cache->table_len; i++) {
960 modify_gid(device, port, table, i, gid_cache->table + i, 1096 modify_gid(device, port, table, i, gid_cache->table + i,
961 &zattr, false); 1097 &zattr, false);
962 } 1098 }
1099 write_unlock(&table->rwlock);
963 } 1100 }
964 1101
965 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; 1102 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0a26dd6d9b19..1d92e091e22e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
364 read_lock_irqsave(&cm.device_lock, flags); 364 read_lock_irqsave(&cm.device_lock, flags);
365 list_for_each_entry(cm_dev, &cm.device_list, list) { 365 list_for_each_entry(cm_dev, &cm.device_list, list) {
366 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, 366 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
367 ndev, &p, NULL)) { 367 path->gid_type, ndev, &p, NULL)) {
368 port = cm_dev->port[p-1]; 368 port = cm_dev->port[p-1];
369 break; 369 break;
370 } 370 }
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
782 wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); 782 wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
783 783
784 /* Check if the device started its remove_one */ 784 /* Check if the device started its remove_one */
785 spin_lock_irq(&cm.lock); 785 spin_lock_irqsave(&cm.lock, flags);
786 if (!cm_dev->going_down) 786 if (!cm_dev->going_down)
787 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, 787 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
788 msecs_to_jiffies(wait_time)); 788 msecs_to_jiffies(wait_time));
789 spin_unlock_irq(&cm.lock); 789 spin_unlock_irqrestore(&cm.lock, flags);
790 790
791 cm_id_priv->timewait_info = NULL; 791 cm_id_priv->timewait_info = NULL;
792} 792}
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
1600 struct ib_cm_id *cm_id; 1600 struct ib_cm_id *cm_id;
1601 struct cm_id_private *cm_id_priv, *listen_cm_id_priv; 1601 struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1602 struct cm_req_msg *req_msg; 1602 struct cm_req_msg *req_msg;
1603 union ib_gid gid;
1604 struct ib_gid_attr gid_attr;
1603 int ret; 1605 int ret;
1604 1606
1605 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1607 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
1639 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); 1641 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1640 1642
1641 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); 1643 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1642 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1644 work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
1645 ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
1646 work->port->port_num,
1647 cm_id_priv->av.ah_attr.grh.sgid_index,
1648 &gid, &gid_attr);
1649 if (!ret) {
1650 if (gid_attr.ndev) {
1651 work->path[0].ifindex = gid_attr.ndev->ifindex;
1652 work->path[0].net = dev_net(gid_attr.ndev);
1653 dev_put(gid_attr.ndev);
1654 }
1655 work->path[0].gid_type = gid_attr.gid_type;
1656 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1657 }
1643 if (ret) { 1658 if (ret) {
1644 ib_get_cached_gid(work->port->cm_dev->ib_device, 1659 int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
1645 work->port->port_num, 0, &work->path[0].sgid, 1660 work->port->port_num, 0,
1646 NULL); 1661 &work->path[0].sgid,
1662 &gid_attr);
1663 if (!err && gid_attr.ndev) {
1664 work->path[0].ifindex = gid_attr.ndev->ifindex;
1665 work->path[0].net = dev_net(gid_attr.ndev);
1666 dev_put(gid_attr.ndev);
1667 }
1668 work->path[0].gid_type = gid_attr.gid_type;
1647 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, 1669 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1648 &work->path[0].sgid, sizeof work->path[0].sgid, 1670 &work->path[0].sgid, sizeof work->path[0].sgid,
1649 NULL, 0); 1671 NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3482EXPORT_SYMBOL(ib_cm_notify); 3504EXPORT_SYMBOL(ib_cm_notify);
3483 3505
3484static void cm_recv_handler(struct ib_mad_agent *mad_agent, 3506static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3507 struct ib_mad_send_buf *send_buf,
3485 struct ib_mad_recv_wc *mad_recv_wc) 3508 struct ib_mad_recv_wc *mad_recv_wc)
3486{ 3509{
3487 struct cm_port *port = mad_agent->context; 3510 struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3731} 3754}
3732EXPORT_SYMBOL(ib_cm_init_qp_attr); 3755EXPORT_SYMBOL(ib_cm_init_qp_attr);
3733 3756
3734static void cm_get_ack_delay(struct cm_device *cm_dev)
3735{
3736 struct ib_device_attr attr;
3737
3738 if (ib_query_device(cm_dev->ib_device, &attr))
3739 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3740 else
3741 cm_dev->ack_delay = attr.local_ca_ack_delay;
3742}
3743
3744static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, 3757static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3745 char *buf) 3758 char *buf)
3746{ 3759{
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
3852 return; 3865 return;
3853 3866
3854 cm_dev->ib_device = ib_device; 3867 cm_dev->ib_device = ib_device;
3855 cm_get_ack_delay(cm_dev); 3868 cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
3856 cm_dev->going_down = 0; 3869 cm_dev->going_down = 0;
3857 cm_dev->device = device_create(&cm_class, &ib_device->dev, 3870 cm_dev->device = device_create(&cm_class, &ib_device->dev,
3858 MKDEV(0, 0), NULL, 3871 MKDEV(0, 0), NULL,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2d762a2ecd81..9729639df407 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -38,6 +38,7 @@
38#include <linux/in6.h> 38#include <linux/in6.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/igmp.h>
41#include <linux/idr.h> 42#include <linux/idr.h>
42#include <linux/inetdevice.h> 43#include <linux/inetdevice.h>
43#include <linux/slab.h> 44#include <linux/slab.h>
@@ -60,6 +61,8 @@
60#include <rdma/ib_sa.h> 61#include <rdma/ib_sa.h>
61#include <rdma/iw_cm.h> 62#include <rdma/iw_cm.h>
62 63
64#include "core_priv.h"
65
63MODULE_AUTHOR("Sean Hefty"); 66MODULE_AUTHOR("Sean Hefty");
64MODULE_DESCRIPTION("Generic RDMA CM Agent"); 67MODULE_DESCRIPTION("Generic RDMA CM Agent");
65MODULE_LICENSE("Dual BSD/GPL"); 68MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
150 struct completion comp; 153 struct completion comp;
151 atomic_t refcount; 154 atomic_t refcount;
152 struct list_head id_list; 155 struct list_head id_list;
156 enum ib_gid_type *default_gid_type;
153}; 157};
154 158
155struct rdma_bind_list { 159struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
185 CMA_OPTION_AFONLY, 189 CMA_OPTION_AFONLY,
186}; 190};
187 191
192void cma_ref_dev(struct cma_device *cma_dev)
193{
194 atomic_inc(&cma_dev->refcount);
195}
196
197struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
198 void *cookie)
199{
200 struct cma_device *cma_dev;
201 struct cma_device *found_cma_dev = NULL;
202
203 mutex_lock(&lock);
204
205 list_for_each_entry(cma_dev, &dev_list, list)
206 if (filter(cma_dev->device, cookie)) {
207 found_cma_dev = cma_dev;
208 break;
209 }
210
211 if (found_cma_dev)
212 cma_ref_dev(found_cma_dev);
213 mutex_unlock(&lock);
214 return found_cma_dev;
215}
216
217int cma_get_default_gid_type(struct cma_device *cma_dev,
218 unsigned int port)
219{
220 if (port < rdma_start_port(cma_dev->device) ||
221 port > rdma_end_port(cma_dev->device))
222 return -EINVAL;
223
224 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
225}
226
227int cma_set_default_gid_type(struct cma_device *cma_dev,
228 unsigned int port,
229 enum ib_gid_type default_gid_type)
230{
231 unsigned long supported_gids;
232
233 if (port < rdma_start_port(cma_dev->device) ||
234 port > rdma_end_port(cma_dev->device))
235 return -EINVAL;
236
237 supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
238
239 if (!(supported_gids & 1 << default_gid_type))
240 return -EINVAL;
241
242 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
243 default_gid_type;
244
245 return 0;
246}
247
248struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
249{
250 return cma_dev->device;
251}
252
188/* 253/*
189 * Device removal can occur at anytime, so we need extra handling to 254 * Device removal can occur at anytime, so we need extra handling to
190 * serialize notifying the user of device removal with other callbacks. 255 * serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
228 u8 tos; 293 u8 tos;
229 u8 reuseaddr; 294 u8 reuseaddr;
230 u8 afonly; 295 u8 afonly;
296 enum ib_gid_type gid_type;
231}; 297};
232 298
233struct cma_multicast { 299struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
239 void *context; 305 void *context;
240 struct sockaddr_storage addr; 306 struct sockaddr_storage addr;
241 struct kref mcref; 307 struct kref mcref;
308 bool igmp_joined;
242}; 309};
243 310
244struct cma_work { 311struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
335 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 402 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
336} 403}
337 404
338static void cma_attach_to_dev(struct rdma_id_private *id_priv, 405static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
339 struct cma_device *cma_dev)
340{ 406{
341 atomic_inc(&cma_dev->refcount); 407 struct in_device *in_dev = NULL;
408
409 if (ndev) {
410 rtnl_lock();
411 in_dev = __in_dev_get_rtnl(ndev);
412 if (in_dev) {
413 if (join)
414 ip_mc_inc_group(in_dev,
415 *(__be32 *)(mgid->raw + 12));
416 else
417 ip_mc_dec_group(in_dev,
418 *(__be32 *)(mgid->raw + 12));
419 }
420 rtnl_unlock();
421 }
422 return (in_dev) ? 0 : -ENODEV;
423}
424
425static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
426 struct cma_device *cma_dev)
427{
428 cma_ref_dev(cma_dev);
342 id_priv->cma_dev = cma_dev; 429 id_priv->cma_dev = cma_dev;
430 id_priv->gid_type = 0;
343 id_priv->id.device = cma_dev->device; 431 id_priv->id.device = cma_dev->device;
344 id_priv->id.route.addr.dev_addr.transport = 432 id_priv->id.route.addr.dev_addr.transport =
345 rdma_node_get_transport(cma_dev->device->node_type); 433 rdma_node_get_transport(cma_dev->device->node_type);
346 list_add_tail(&id_priv->list, &cma_dev->id_list); 434 list_add_tail(&id_priv->list, &cma_dev->id_list);
347} 435}
348 436
349static inline void cma_deref_dev(struct cma_device *cma_dev) 437static void cma_attach_to_dev(struct rdma_id_private *id_priv,
438 struct cma_device *cma_dev)
439{
440 _cma_attach_to_dev(id_priv, cma_dev);
441 id_priv->gid_type =
442 cma_dev->default_gid_type[id_priv->id.port_num -
443 rdma_start_port(cma_dev->device)];
444}
445
446void cma_deref_dev(struct cma_device *cma_dev)
350{ 447{
351 if (atomic_dec_and_test(&cma_dev->refcount)) 448 if (atomic_dec_and_test(&cma_dev->refcount))
352 complete(&cma_dev->comp); 449 complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
441} 538}
442 539
443static inline int cma_validate_port(struct ib_device *device, u8 port, 540static inline int cma_validate_port(struct ib_device *device, u8 port,
541 enum ib_gid_type gid_type,
444 union ib_gid *gid, int dev_type, 542 union ib_gid *gid, int dev_type,
445 int bound_if_index) 543 int bound_if_index)
446{ 544{
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
453 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 551 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
454 return ret; 552 return ret;
455 553
456 if (dev_type == ARPHRD_ETHER) 554 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
457 ndev = dev_get_by_index(&init_net, bound_if_index); 555 ndev = dev_get_by_index(&init_net, bound_if_index);
556 if (ndev && ndev->flags & IFF_LOOPBACK) {
557 pr_info("detected loopback device\n");
558 dev_put(ndev);
458 559
459 ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); 560 if (!device->get_netdev)
561 return -EOPNOTSUPP;
562
563 ndev = device->get_netdev(device, port);
564 if (!ndev)
565 return -ENODEV;
566 }
567 } else {
568 gid_type = IB_GID_TYPE_IB;
569 }
570
571 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
572 ndev, NULL);
460 573
461 if (ndev) 574 if (ndev)
462 dev_put(ndev); 575 dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
490 gidp = rdma_protocol_roce(cma_dev->device, port) ? 603 gidp = rdma_protocol_roce(cma_dev->device, port) ?
491 &iboe_gid : &gid; 604 &iboe_gid : &gid;
492 605
493 ret = cma_validate_port(cma_dev->device, port, gidp, 606 ret = cma_validate_port(cma_dev->device, port,
607 rdma_protocol_ib(cma_dev->device, port) ?
608 IB_GID_TYPE_IB :
609 listen_id_priv->gid_type, gidp,
494 dev_addr->dev_type, 610 dev_addr->dev_type,
495 dev_addr->bound_dev_if); 611 dev_addr->bound_dev_if);
496 if (!ret) { 612 if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
509 gidp = rdma_protocol_roce(cma_dev->device, port) ? 625 gidp = rdma_protocol_roce(cma_dev->device, port) ?
510 &iboe_gid : &gid; 626 &iboe_gid : &gid;
511 627
512 ret = cma_validate_port(cma_dev->device, port, gidp, 628 ret = cma_validate_port(cma_dev->device, port,
513 dev_addr->dev_type, 629 rdma_protocol_ib(cma_dev->device, port) ?
630 IB_GID_TYPE_IB :
631 cma_dev->default_gid_type[port - 1],
632 gidp, dev_addr->dev_type,
514 dev_addr->bound_dev_if); 633 dev_addr->bound_dev_if);
515 if (!ret) { 634 if (!ret) {
516 id_priv->id.port_num = port; 635 id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1437 id_priv->id.port_num)) { 1556 id_priv->id.port_num)) {
1438 ib_sa_free_multicast(mc->multicast.ib); 1557 ib_sa_free_multicast(mc->multicast.ib);
1439 kfree(mc); 1558 kfree(mc);
1440 } else 1559 } else {
1560 if (mc->igmp_joined) {
1561 struct rdma_dev_addr *dev_addr =
1562 &id_priv->id.route.addr.dev_addr;
1563 struct net_device *ndev = NULL;
1564
1565 if (dev_addr->bound_dev_if)
1566 ndev = dev_get_by_index(&init_net,
1567 dev_addr->bound_dev_if);
1568 if (ndev) {
1569 cma_igmp_send(ndev,
1570 &mc->multicast.ib->rec.mgid,
1571 false);
1572 dev_put(ndev);
1573 }
1574 }
1441 kref_put(&mc->mcref, release_mc); 1575 kref_put(&mc->mcref, release_mc);
1576 }
1442 } 1577 }
1443} 1578}
1444 1579
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1896 struct rdma_id_private *listen_id, *conn_id; 2031 struct rdma_id_private *listen_id, *conn_id;
1897 struct rdma_cm_event event; 2032 struct rdma_cm_event event;
1898 int ret; 2033 int ret;
1899 struct ib_device_attr attr;
1900 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2034 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
1901 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2035 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
1902 2036
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1938 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2072 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
1939 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2073 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
1940 2074
1941 ret = ib_query_device(conn_id->id.device, &attr);
1942 if (ret) {
1943 mutex_unlock(&conn_id->handler_mutex);
1944 rdma_destroy_id(new_cm_id);
1945 goto out;
1946 }
1947
1948 memset(&event, 0, sizeof event); 2075 memset(&event, 0, sizeof event);
1949 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2076 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1950 event.param.conn.private_data = iw_event->private_data; 2077 event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2051 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2178 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
2052 rdma_addr_size(cma_src_addr(id_priv))); 2179 rdma_addr_size(cma_src_addr(id_priv)));
2053 2180
2054 cma_attach_to_dev(dev_id_priv, cma_dev); 2181 _cma_attach_to_dev(dev_id_priv, cma_dev);
2055 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2182 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
2056 atomic_inc(&id_priv->refcount); 2183 atomic_inc(&id_priv->refcount);
2057 dev_id_priv->internal_id = 1; 2184 dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2321 2448
2322 if (addr->dev_addr.bound_dev_if) { 2449 if (addr->dev_addr.bound_dev_if) {
2323 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); 2450 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2451 if (!ndev)
2452 return -ENODEV;
2453
2454 if (ndev->flags & IFF_LOOPBACK) {
2455 dev_put(ndev);
2456 if (!id_priv->id.device->get_netdev)
2457 return -EOPNOTSUPP;
2458
2459 ndev = id_priv->id.device->get_netdev(id_priv->id.device,
2460 id_priv->id.port_num);
2461 if (!ndev)
2462 return -ENODEV;
2463 }
2464
2324 route->path_rec->net = &init_net; 2465 route->path_rec->net = &init_net;
2325 route->path_rec->ifindex = addr->dev_addr.bound_dev_if; 2466 route->path_rec->ifindex = ndev->ifindex;
2467 route->path_rec->gid_type = id_priv->gid_type;
2326 } 2468 }
2327 if (!ndev) { 2469 if (!ndev) {
2328 ret = -ENODEV; 2470 ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2336 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2478 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2337 &route->path_rec->dgid); 2479 &route->path_rec->dgid);
2338 2480
2339 route->path_rec->hop_limit = 1; 2481 /* Use the hint from IP Stack to select GID Type */
2482 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2483 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2484 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2485 /* TODO: get the hoplimit from the inet/inet6 device */
2486 route->path_rec->hop_limit = addr->dev_addr.hoplimit;
2487 else
2488 route->path_rec->hop_limit = 1;
2340 route->path_rec->reversible = 1; 2489 route->path_rec->reversible = 1;
2341 route->path_rec->pkey = cpu_to_be16(0xffff); 2490 route->path_rec->pkey = cpu_to_be16(0xffff);
2342 route->path_rec->mtu_selector = IB_SA_EQ; 2491 route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3534 event.status = status; 3683 event.status = status;
3535 event.param.ud.private_data = mc->context; 3684 event.param.ud.private_data = mc->context;
3536 if (!status) { 3685 if (!status) {
3686 struct rdma_dev_addr *dev_addr =
3687 &id_priv->id.route.addr.dev_addr;
3688 struct net_device *ndev =
3689 dev_get_by_index(&init_net, dev_addr->bound_dev_if);
3690 enum ib_gid_type gid_type =
3691 id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3692 rdma_start_port(id_priv->cma_dev->device)];
3693
3537 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3694 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
3538 ib_init_ah_from_mcmember(id_priv->id.device, 3695 ib_init_ah_from_mcmember(id_priv->id.device,
3539 id_priv->id.port_num, &multicast->rec, 3696 id_priv->id.port_num, &multicast->rec,
3697 ndev, gid_type,
3540 &event.param.ud.ah_attr); 3698 &event.param.ud.ah_attr);
3541 event.param.ud.qp_num = 0xFFFFFF; 3699 event.param.ud.qp_num = 0xFFFFFF;
3542 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3700 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
3701 if (ndev)
3702 dev_put(ndev);
3543 } else 3703 } else
3544 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3704 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3545 3705
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3672{ 3832{
3673 struct iboe_mcast_work *work; 3833 struct iboe_mcast_work *work;
3674 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3834 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
3675 int err; 3835 int err = 0;
3676 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 3836 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
3677 struct net_device *ndev = NULL; 3837 struct net_device *ndev = NULL;
3838 enum ib_gid_type gid_type;
3678 3839
3679 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 3840 if (cma_zero_addr((struct sockaddr *)&mc->addr))
3680 return -EINVAL; 3841 return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
3704 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 3865 mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
3705 mc->multicast.ib->rec.hop_limit = 1; 3866 mc->multicast.ib->rec.hop_limit = 1;
3706 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); 3867 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
3868
3869 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3870 rdma_start_port(id_priv->cma_dev->device)];
3871 if (addr->sa_family == AF_INET) {
3872 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3873 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
3874 true);
3875 if (!err) {
3876 mc->igmp_joined = true;
3877 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
3878 }
3879 } else {
3880 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3881 err = -ENOTSUPP;
3882 }
3707 dev_put(ndev); 3883 dev_put(ndev);
3708 if (!mc->multicast.ib->rec.mtu) { 3884 if (err || !mc->multicast.ib->rec.mtu) {
3709 err = -EINVAL; 3885 if (!err)
3886 err = -EINVAL;
3710 goto out2; 3887 goto out2;
3711 } 3888 }
3712 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 3889 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3745 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 3922 memcpy(&mc->addr, addr, rdma_addr_size(addr));
3746 mc->context = context; 3923 mc->context = context;
3747 mc->id_priv = id_priv; 3924 mc->id_priv = id_priv;
3748 3925 mc->igmp_joined = false;
3749 spin_lock(&id_priv->lock); 3926 spin_lock(&id_priv->lock);
3750 list_add(&mc->list, &id_priv->mc_list); 3927 list_add(&mc->list, &id_priv->mc_list);
3751 spin_unlock(&id_priv->lock); 3928 spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3790 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 3967 if (rdma_cap_ib_mcast(id->device, id->port_num)) {
3791 ib_sa_free_multicast(mc->multicast.ib); 3968 ib_sa_free_multicast(mc->multicast.ib);
3792 kfree(mc); 3969 kfree(mc);
3793 } else if (rdma_protocol_roce(id->device, id->port_num)) 3970 } else if (rdma_protocol_roce(id->device, id->port_num)) {
3971 if (mc->igmp_joined) {
3972 struct rdma_dev_addr *dev_addr =
3973 &id->route.addr.dev_addr;
3974 struct net_device *ndev = NULL;
3975
3976 if (dev_addr->bound_dev_if)
3977 ndev = dev_get_by_index(&init_net,
3978 dev_addr->bound_dev_if);
3979 if (ndev) {
3980 cma_igmp_send(ndev,
3981 &mc->multicast.ib->rec.mgid,
3982 false);
3983 dev_put(ndev);
3984 }
3985 mc->igmp_joined = false;
3986 }
3794 kref_put(&mc->mcref, release_mc); 3987 kref_put(&mc->mcref, release_mc);
3795 3988 }
3796 return; 3989 return;
3797 } 3990 }
3798 } 3991 }
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
3861{ 4054{
3862 struct cma_device *cma_dev; 4055 struct cma_device *cma_dev;
3863 struct rdma_id_private *id_priv; 4056 struct rdma_id_private *id_priv;
4057 unsigned int i;
4058 unsigned long supported_gids = 0;
3864 4059
3865 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4060 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
3866 if (!cma_dev) 4061 if (!cma_dev)
3867 return; 4062 return;
3868 4063
3869 cma_dev->device = device; 4064 cma_dev->device = device;
4065 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
4066 sizeof(*cma_dev->default_gid_type),
4067 GFP_KERNEL);
4068 if (!cma_dev->default_gid_type) {
4069 kfree(cma_dev);
4070 return;
4071 }
4072 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
4073 supported_gids = roce_gid_type_mask_support(device, i);
4074 WARN_ON(!supported_gids);
4075 cma_dev->default_gid_type[i - rdma_start_port(device)] =
4076 find_first_bit(&supported_gids, BITS_PER_LONG);
4077 }
3870 4078
3871 init_completion(&cma_dev->comp); 4079 init_completion(&cma_dev->comp);
3872 atomic_set(&cma_dev->refcount, 1); 4080 atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
3946 mutex_unlock(&lock); 4154 mutex_unlock(&lock);
3947 4155
3948 cma_process_remove(cma_dev); 4156 cma_process_remove(cma_dev);
4157 kfree(cma_dev->default_gid_type);
3949 kfree(cma_dev); 4158 kfree(cma_dev);
3950} 4159}
3951 4160
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
4079 4288
4080 if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) 4289 if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
4081 printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); 4290 printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
4291 cma_configfs_init();
4082 4292
4083 return 0; 4293 return 0;
4084 4294
@@ -4093,6 +4303,7 @@ err_wq:
4093 4303
4094static void __exit cma_cleanup(void) 4304static void __exit cma_cleanup(void)
4095{ 4305{
4306 cma_configfs_exit();
4096 ibnl_remove_client(RDMA_NL_RDMA_CM); 4307 ibnl_remove_client(RDMA_NL_RDMA_CM);
4097 ib_unregister_client(&cma_client); 4308 ib_unregister_client(&cma_client);
4098 unregister_netdevice_notifier(&cma_nb); 4309 unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644
index 000000000000..18b112aa577e
--- /dev/null
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -0,0 +1,321 @@
1/*
2 * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/configfs.h>
35#include <rdma/ib_verbs.h>
36#include "core_priv.h"
37
38struct cma_device;
39
40struct cma_dev_group;
41
42struct cma_dev_port_group {
43 unsigned int port_num;
44 struct cma_dev_group *cma_dev_group;
45 struct config_group group;
46};
47
48struct cma_dev_group {
49 char name[IB_DEVICE_NAME_MAX];
50 struct config_group device_group;
51 struct config_group ports_group;
52 struct config_group *default_dev_group[2];
53 struct config_group **default_ports_group;
54 struct cma_dev_port_group *ports;
55};
56
57static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
58{
59 struct config_group *group;
60
61 if (!item)
62 return NULL;
63
64 group = container_of(item, struct config_group, cg_item);
65 return container_of(group, struct cma_dev_port_group, group);
66}
67
68static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
69{
70 return !strcmp(ib_dev->name, cookie);
71}
72
73static int cma_configfs_params_get(struct config_item *item,
74 struct cma_device **pcma_dev,
75 struct cma_dev_port_group **pgroup)
76{
77 struct cma_dev_port_group *group = to_dev_port_group(item);
78 struct cma_device *cma_dev;
79
80 if (!group)
81 return -ENODEV;
82
83 cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
84 group->cma_dev_group->name);
85 if (!cma_dev)
86 return -ENODEV;
87
88 *pcma_dev = cma_dev;
89 *pgroup = group;
90
91 return 0;
92}
93
94static void cma_configfs_params_put(struct cma_device *cma_dev)
95{
96 cma_deref_dev(cma_dev);
97}
98
99static ssize_t default_roce_mode_show(struct config_item *item,
100 char *buf)
101{
102 struct cma_device *cma_dev;
103 struct cma_dev_port_group *group;
104 int gid_type;
105 ssize_t ret;
106
107 ret = cma_configfs_params_get(item, &cma_dev, &group);
108 if (ret)
109 return ret;
110
111 gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
112 cma_configfs_params_put(cma_dev);
113
114 if (gid_type < 0)
115 return gid_type;
116
117 return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
118}
119
120static ssize_t default_roce_mode_store(struct config_item *item,
121 const char *buf, size_t count)
122{
123 struct cma_device *cma_dev;
124 struct cma_dev_port_group *group;
125 int gid_type = ib_cache_gid_parse_type_str(buf);
126 ssize_t ret;
127
128 if (gid_type < 0)
129 return -EINVAL;
130
131 ret = cma_configfs_params_get(item, &cma_dev, &group);
132 if (ret)
133 return ret;
134
135 ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
136
137 cma_configfs_params_put(cma_dev);
138
139 return !ret ? strnlen(buf, count) : ret;
140}
141
142CONFIGFS_ATTR(, default_roce_mode);
143
144static struct configfs_attribute *cma_configfs_attributes[] = {
145 &attr_default_roce_mode,
146 NULL,
147};
148
149static struct config_item_type cma_port_group_type = {
150 .ct_attrs = cma_configfs_attributes,
151 .ct_owner = THIS_MODULE
152};
153
154static int make_cma_ports(struct cma_dev_group *cma_dev_group,
155 struct cma_device *cma_dev)
156{
157 struct ib_device *ibdev;
158 unsigned int i;
159 unsigned int ports_num;
160 struct cma_dev_port_group *ports;
161 struct config_group **ports_group;
162 int err;
163
164 ibdev = cma_get_ib_dev(cma_dev);
165
166 if (!ibdev)
167 return -ENODEV;
168
169 ports_num = ibdev->phys_port_cnt;
170 ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
171 GFP_KERNEL);
172 ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
173
174 if (!ports || !ports_group) {
175 err = -ENOMEM;
176 goto free;
177 }
178
179 for (i = 0; i < ports_num; i++) {
180 char port_str[10];
181
182 ports[i].port_num = i + 1;
183 snprintf(port_str, sizeof(port_str), "%u", i + 1);
184 ports[i].cma_dev_group = cma_dev_group;
185 config_group_init_type_name(&ports[i].group,
186 port_str,
187 &cma_port_group_type);
188 ports_group[i] = &ports[i].group;
189 }
190 ports_group[i] = NULL;
191 cma_dev_group->default_ports_group = ports_group;
192 cma_dev_group->ports = ports;
193
194 return 0;
195free:
196 kfree(ports);
197 kfree(ports_group);
198 cma_dev_group->ports = NULL;
199 cma_dev_group->default_ports_group = NULL;
200 return err;
201}
202
203static void release_cma_dev(struct config_item *item)
204{
205 struct config_group *group = container_of(item, struct config_group,
206 cg_item);
207 struct cma_dev_group *cma_dev_group = container_of(group,
208 struct cma_dev_group,
209 device_group);
210
211 kfree(cma_dev_group);
212};
213
214static void release_cma_ports_group(struct config_item *item)
215{
216 struct config_group *group = container_of(item, struct config_group,
217 cg_item);
218 struct cma_dev_group *cma_dev_group = container_of(group,
219 struct cma_dev_group,
220 ports_group);
221
222 kfree(cma_dev_group->ports);
223 kfree(cma_dev_group->default_ports_group);
224 cma_dev_group->ports = NULL;
225 cma_dev_group->default_ports_group = NULL;
226};
227
228static struct configfs_item_operations cma_ports_item_ops = {
229 .release = release_cma_ports_group
230};
231
232static struct config_item_type cma_ports_group_type = {
233 .ct_item_ops = &cma_ports_item_ops,
234 .ct_owner = THIS_MODULE
235};
236
237static struct configfs_item_operations cma_device_item_ops = {
238 .release = release_cma_dev
239};
240
241static struct config_item_type cma_device_group_type = {
242 .ct_item_ops = &cma_device_item_ops,
243 .ct_owner = THIS_MODULE
244};
245
246static struct config_group *make_cma_dev(struct config_group *group,
247 const char *name)
248{
249 int err = -ENODEV;
250 struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
251 (void *)name);
252 struct cma_dev_group *cma_dev_group = NULL;
253
254 if (!cma_dev)
255 goto fail;
256
257 cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
258
259 if (!cma_dev_group) {
260 err = -ENOMEM;
261 goto fail;
262 }
263
264 strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
265
266 err = make_cma_ports(cma_dev_group, cma_dev);
267 if (err)
268 goto fail;
269
270 cma_dev_group->ports_group.default_groups =
271 cma_dev_group->default_ports_group;
272 config_group_init_type_name(&cma_dev_group->ports_group, "ports",
273 &cma_ports_group_type);
274
275 cma_dev_group->device_group.default_groups
276 = cma_dev_group->default_dev_group;
277 cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
278 cma_dev_group->default_dev_group[1] = NULL;
279
280 config_group_init_type_name(&cma_dev_group->device_group, name,
281 &cma_device_group_type);
282
283 cma_deref_dev(cma_dev);
284 return &cma_dev_group->device_group;
285
286fail:
287 if (cma_dev)
288 cma_deref_dev(cma_dev);
289 kfree(cma_dev_group);
290 return ERR_PTR(err);
291}
292
293static struct configfs_group_operations cma_subsys_group_ops = {
294 .make_group = make_cma_dev,
295};
296
297static struct config_item_type cma_subsys_type = {
298 .ct_group_ops = &cma_subsys_group_ops,
299 .ct_owner = THIS_MODULE,
300};
301
302static struct configfs_subsystem cma_subsys = {
303 .su_group = {
304 .cg_item = {
305 .ci_namebuf = "rdma_cm",
306 .ci_type = &cma_subsys_type,
307 },
308 },
309};
310
311int __init cma_configfs_init(void)
312{
313 config_group_init(&cma_subsys.su_group);
314 mutex_init(&cma_subsys.su_mutex);
315 return configfs_register_subsystem(&cma_subsys);
316}
317
318void __exit cma_configfs_exit(void)
319{
320 configfs_unregister_subsystem(&cma_subsys);
321}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 5cf6eb716f00..eab32215756b 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,6 +38,32 @@
38 38
39#include <rdma/ib_verbs.h> 39#include <rdma/ib_verbs.h>
40 40
41#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
42int cma_configfs_init(void);
43void cma_configfs_exit(void);
44#else
45static inline int cma_configfs_init(void)
46{
47 return 0;
48}
49
50static inline void cma_configfs_exit(void)
51{
52}
53#endif
54struct cma_device;
55void cma_ref_dev(struct cma_device *cma_dev);
56void cma_deref_dev(struct cma_device *cma_dev);
57typedef bool (*cma_device_filter)(struct ib_device *, void *);
58struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
59 void *cookie);
60int cma_get_default_gid_type(struct cma_device *cma_dev,
61 unsigned int port);
62int cma_set_default_gid_type(struct cma_device *cma_dev,
63 unsigned int port,
64 enum ib_gid_type default_gid_type);
65struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
66
41int ib_device_register_sysfs(struct ib_device *device, 67int ib_device_register_sysfs(struct ib_device *device,
42 int (*port_callback)(struct ib_device *, 68 int (*port_callback)(struct ib_device *,
43 u8, struct kobject *)); 69 u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
70 IB_CACHE_GID_DEFAULT_MODE_DELETE 96 IB_CACHE_GID_DEFAULT_MODE_DELETE
71}; 97};
72 98
99int ib_cache_gid_parse_type_str(const char *buf);
100
101const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
102
73void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 103void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
74 struct net_device *ndev, 104 struct net_device *ndev,
105 unsigned long gid_type_mask,
75 enum ib_cache_gid_default_mode mode); 106 enum ib_cache_gid_default_mode mode);
76 107
77int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 108int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
87void roce_gid_mgmt_cleanup(void); 118void roce_gid_mgmt_cleanup(void);
88 119
89int roce_rescan_device(struct ib_device *ib_dev); 120int roce_rescan_device(struct ib_device *ib_dev);
121unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
90 122
91int ib_cache_setup_one(struct ib_device *device); 123int ib_cache_setup_one(struct ib_device *device);
92void ib_cache_cleanup_one(struct ib_device *device); 124void ib_cache_cleanup_one(struct ib_device *device);
93void ib_cache_release_one(struct ib_device *device); 125void ib_cache_release_one(struct ib_device *device);
94 126
127static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
128 struct net_device *upper)
129{
130 struct net_device *_upper = NULL;
131 struct list_head *iter;
132
133 netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
134 if (_upper == upper)
135 break;
136
137 return _upper == upper;
138}
139
95#endif /* _CORE_PRIV_H */ 140#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644
index 000000000000..a754fc727de5
--- /dev/null
+++ b/drivers/infiniband/core/cq.c
@@ -0,0 +1,209 @@
1/*
2 * Copyright (c) 2015 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#include <linux/module.h>
14#include <linux/err.h>
15#include <linux/slab.h>
16#include <rdma/ib_verbs.h>
17
18/* # of WCs to poll for with a single call to ib_poll_cq */
19#define IB_POLL_BATCH 16
20
21/* # of WCs to iterate over before yielding */
22#define IB_POLL_BUDGET_IRQ 256
23#define IB_POLL_BUDGET_WORKQUEUE 65536
24
25#define IB_POLL_FLAGS \
26 (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
27
28static int __ib_process_cq(struct ib_cq *cq, int budget)
29{
30 int i, n, completed = 0;
31
32 while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
33 for (i = 0; i < n; i++) {
34 struct ib_wc *wc = &cq->wc[i];
35
36 if (wc->wr_cqe)
37 wc->wr_cqe->done(cq, wc);
38 else
39 WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
40 }
41
42 completed += n;
43
44 if (n != IB_POLL_BATCH ||
45 (budget != -1 && completed >= budget))
46 break;
47 }
48
49 return completed;
50}
51
52/**
53 * ib_process_direct_cq - process a CQ in caller context
54 * @cq: CQ to process
55 * @budget: number of CQEs to poll for
56 *
57 * This function is used to process all outstanding CQ entries on a
58 * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
59 * context and does not ask for completion interrupts from the HCA.
60 *
61 * Note: for compatibility reasons -1 can be passed in %budget for unlimited
62 * polling. Do not use this feature in new code, it will be removed soon.
63 */
64int ib_process_cq_direct(struct ib_cq *cq, int budget)
65{
66 WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
67
68 return __ib_process_cq(cq, budget);
69}
70EXPORT_SYMBOL(ib_process_cq_direct);
71
72static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
73{
74 WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
75}
76
77static int ib_poll_handler(struct irq_poll *iop, int budget)
78{
79 struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
80 int completed;
81
82 completed = __ib_process_cq(cq, budget);
83 if (completed < budget) {
84 irq_poll_complete(&cq->iop);
85 if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
86 irq_poll_sched(&cq->iop);
87 }
88
89 return completed;
90}
91
92static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
93{
94 irq_poll_sched(&cq->iop);
95}
96
97static void ib_cq_poll_work(struct work_struct *work)
98{
99 struct ib_cq *cq = container_of(work, struct ib_cq, work);
100 int completed;
101
102 completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
103 if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
104 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
105 queue_work(ib_comp_wq, &cq->work);
106}
107
108static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
109{
110 queue_work(ib_comp_wq, &cq->work);
111}
112
113/**
114 * ib_alloc_cq - allocate a completion queue
115 * @dev: device to allocate the CQ for
116 * @private: driver private data, accessible from cq->cq_context
117 * @nr_cqe: number of CQEs to allocate
118 * @comp_vector: HCA completion vectors for this CQ
119 * @poll_ctx: context to poll the CQ from.
120 *
121 * This is the proper interface to allocate a CQ for in-kernel users. A
122 * CQ allocated with this interface will automatically be polled from the
123 * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
124 * to use this CQ abstraction.
125 */
126struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
127 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
128{
129 struct ib_cq_init_attr cq_attr = {
130 .cqe = nr_cqe,
131 .comp_vector = comp_vector,
132 };
133 struct ib_cq *cq;
134 int ret = -ENOMEM;
135
136 cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
137 if (IS_ERR(cq))
138 return cq;
139
140 cq->device = dev;
141 cq->uobject = NULL;
142 cq->event_handler = NULL;
143 cq->cq_context = private;
144 cq->poll_ctx = poll_ctx;
145 atomic_set(&cq->usecnt, 0);
146
147 cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
148 if (!cq->wc)
149 goto out_destroy_cq;
150
151 switch (cq->poll_ctx) {
152 case IB_POLL_DIRECT:
153 cq->comp_handler = ib_cq_completion_direct;
154 break;
155 case IB_POLL_SOFTIRQ:
156 cq->comp_handler = ib_cq_completion_softirq;
157
158 irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
159 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
160 break;
161 case IB_POLL_WORKQUEUE:
162 cq->comp_handler = ib_cq_completion_workqueue;
163 INIT_WORK(&cq->work, ib_cq_poll_work);
164 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
165 break;
166 default:
167 ret = -EINVAL;
168 goto out_free_wc;
169 }
170
171 return cq;
172
173out_free_wc:
174 kfree(cq->wc);
175out_destroy_cq:
176 cq->device->destroy_cq(cq);
177 return ERR_PTR(ret);
178}
179EXPORT_SYMBOL(ib_alloc_cq);
180
181/**
182 * ib_free_cq - free a completion queue
183 * @cq: completion queue to free.
184 */
185void ib_free_cq(struct ib_cq *cq)
186{
187 int ret;
188
189 if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
190 return;
191
192 switch (cq->poll_ctx) {
193 case IB_POLL_DIRECT:
194 break;
195 case IB_POLL_SOFTIRQ:
196 irq_poll_disable(&cq->iop);
197 break;
198 case IB_POLL_WORKQUEUE:
199 flush_work(&cq->work);
200 break;
201 default:
202 WARN_ON_ONCE(1);
203 }
204
205 kfree(cq->wc);
206 ret = cq->device->destroy_cq(cq);
207 WARN_ON_ONCE(ret);
208}
209EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 179e8134d57f..00da80e02154 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct ib_client_data {
58 bool going_down; 58 bool going_down;
59}; 59};
60 60
61struct workqueue_struct *ib_comp_wq;
61struct workqueue_struct *ib_wq; 62struct workqueue_struct *ib_wq;
62EXPORT_SYMBOL_GPL(ib_wq); 63EXPORT_SYMBOL_GPL(ib_wq);
63 64
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
325{ 326{
326 int ret; 327 int ret;
327 struct ib_client *client; 328 struct ib_client *client;
329 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
328 330
329 mutex_lock(&device_mutex); 331 mutex_lock(&device_mutex);
330 332
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
352 goto out; 354 goto out;
353 } 355 }
354 356
357 memset(&device->attrs, 0, sizeof(device->attrs));
358 ret = device->query_device(device, &device->attrs, &uhw);
359 if (ret) {
360 printk(KERN_WARNING "Couldn't query the device attributes\n");
361 goto out;
362 }
363
355 ret = ib_device_register_sysfs(device, port_callback); 364 ret = ib_device_register_sysfs(device, port_callback);
356 if (ret) { 365 if (ret) {
357 printk(KERN_WARNING "Couldn't register device %s with driver model\n", 366 printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -628,25 +637,6 @@ void ib_dispatch_event(struct ib_event *event)
628EXPORT_SYMBOL(ib_dispatch_event); 637EXPORT_SYMBOL(ib_dispatch_event);
629 638
630/** 639/**
631 * ib_query_device - Query IB device attributes
632 * @device:Device to query
633 * @device_attr:Device attributes
634 *
635 * ib_query_device() returns the attributes of a device through the
636 * @device_attr pointer.
637 */
638int ib_query_device(struct ib_device *device,
639 struct ib_device_attr *device_attr)
640{
641 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
642
643 memset(device_attr, 0, sizeof(*device_attr));
644
645 return device->query_device(device, device_attr, &uhw);
646}
647EXPORT_SYMBOL(ib_query_device);
648
649/**
650 * ib_query_port - Query IB port attributes 640 * ib_query_port - Query IB port attributes
651 * @device:Device to query 641 * @device:Device to query
652 * @port_num:Port number to query 642 * @port_num:Port number to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
825 * a specified GID value occurs. 815 * a specified GID value occurs.
826 * @device: The device to query. 816 * @device: The device to query.
827 * @gid: The GID value to search for. 817 * @gid: The GID value to search for.
818 * @gid_type: Type of GID.
828 * @ndev: The ndev related to the GID to search for. 819 * @ndev: The ndev related to the GID to search for.
829 * @port_num: The port number of the device where the GID value was found. 820 * @port_num: The port number of the device where the GID value was found.
830 * @index: The index into the GID table where the GID was found. This 821 * @index: The index into the GID table where the GID was found. This
831 * parameter may be NULL. 822 * parameter may be NULL.
832 */ 823 */
833int ib_find_gid(struct ib_device *device, union ib_gid *gid, 824int ib_find_gid(struct ib_device *device, union ib_gid *gid,
834 struct net_device *ndev, u8 *port_num, u16 *index) 825 enum ib_gid_type gid_type, struct net_device *ndev,
826 u8 *port_num, u16 *index)
835{ 827{
836 union ib_gid tmp_gid; 828 union ib_gid tmp_gid;
837 int ret, port, i; 829 int ret, port, i;
838 830
839 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { 831 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
840 if (rdma_cap_roce_gid_table(device, port)) { 832 if (rdma_cap_roce_gid_table(device, port)) {
841 if (!ib_find_cached_gid_by_port(device, gid, port, 833 if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
842 ndev, index)) { 834 ndev, index)) {
843 *port_num = port; 835 *port_num = port;
844 return 0; 836 return 0;
845 } 837 }
846 } 838 }
847 839
840 if (gid_type != IB_GID_TYPE_IB)
841 continue;
842
848 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 843 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
849 ret = ib_query_gid(device, port, i, &tmp_gid, NULL); 844 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
850 if (ret) 845 if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
954 if (!ib_wq) 949 if (!ib_wq)
955 return -ENOMEM; 950 return -ENOMEM;
956 951
952 ib_comp_wq = alloc_workqueue("ib-comp-wq",
953 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
954 WQ_UNBOUND_MAX_ACTIVE);
955 if (!ib_comp_wq) {
956 ret = -ENOMEM;
957 goto err;
958 }
959
957 ret = class_register(&ib_class); 960 ret = class_register(&ib_class);
958 if (ret) { 961 if (ret) {
959 printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); 962 printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
960 goto err; 963 goto err_comp;
961 } 964 }
962 965
963 ret = ibnl_init(); 966 ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
972 975
973err_sysfs: 976err_sysfs:
974 class_unregister(&ib_class); 977 class_unregister(&ib_class);
975 978err_comp:
979 destroy_workqueue(ib_comp_wq);
976err: 980err:
977 destroy_workqueue(ib_wq); 981 destroy_workqueue(ib_wq);
978 return ret; 982 return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
983 ib_cache_cleanup(); 987 ib_cache_cleanup();
984 ibnl_cleanup(); 988 ibnl_cleanup();
985 class_unregister(&ib_class); 989 class_unregister(&ib_class);
990 destroy_workqueue(ib_comp_wq);
986 /* Make sure that any pending umem accounting work is done. */ 991 /* Make sure that any pending umem accounting work is done. */
987 destroy_workqueue(ib_wq); 992 destroy_workqueue(ib_wq);
988} 993}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 9f5ad7cc33c8..6ac3683c144b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
212{ 212{
213 struct ib_device *device; 213 struct ib_device *device;
214 struct ib_fmr_pool *pool; 214 struct ib_fmr_pool *pool;
215 struct ib_device_attr *attr;
216 int i; 215 int i;
217 int ret; 216 int ret;
218 int max_remaps; 217 int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
228 return ERR_PTR(-ENOSYS); 227 return ERR_PTR(-ENOSYS);
229 } 228 }
230 229
231 attr = kmalloc(sizeof *attr, GFP_KERNEL); 230 if (!device->attrs.max_map_per_fmr)
232 if (!attr) {
233 printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
234 return ERR_PTR(-ENOMEM);
235 }
236
237 ret = ib_query_device(device, attr);
238 if (ret) {
239 printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
240 kfree(attr);
241 return ERR_PTR(ret);
242 }
243
244 if (!attr->max_map_per_fmr)
245 max_remaps = IB_FMR_MAX_REMAPS; 231 max_remaps = IB_FMR_MAX_REMAPS;
246 else 232 else
247 max_remaps = attr->max_map_per_fmr; 233 max_remaps = device->attrs.max_map_per_fmr;
248
249 kfree(attr);
250 234
251 pool = kmalloc(sizeof *pool, GFP_KERNEL); 235 pool = kmalloc(sizeof *pool, GFP_KERNEL);
252 if (!pool) { 236 if (!pool) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2281de122038..9fa5bf33f5a3 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
84 u8 mgmt_class); 84 u8 mgmt_class);
85static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, 85static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
86 struct ib_mad_agent_private *agent_priv); 86 struct ib_mad_agent_private *agent_priv);
87static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
88 struct ib_wc *wc);
89static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
87 90
88/* 91/*
89 * Returns a ib_mad_port_private structure or NULL for a device/port 92 * Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
681 684
682 atomic_inc(&mad_snoop_priv->refcount); 685 atomic_inc(&mad_snoop_priv->refcount);
683 spin_unlock_irqrestore(&qp_info->snoop_lock, flags); 686 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
684 mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, 687 mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
685 mad_recv_wc); 688 mad_recv_wc);
686 deref_snoop_agent(mad_snoop_priv); 689 deref_snoop_agent(mad_snoop_priv);
687 spin_lock_irqsave(&qp_info->snoop_lock, flags); 690 spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
689 spin_unlock_irqrestore(&qp_info->snoop_lock, flags); 692 spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
690} 693}
691 694
692static void build_smp_wc(struct ib_qp *qp, 695static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
693 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num, 696 u16 pkey_index, u8 port_num, struct ib_wc *wc)
694 struct ib_wc *wc)
695{ 697{
696 memset(wc, 0, sizeof *wc); 698 memset(wc, 0, sizeof *wc);
697 wc->wr_id = wr_id; 699 wc->wr_cqe = cqe;
698 wc->status = IB_WC_SUCCESS; 700 wc->status = IB_WC_SUCCESS;
699 wc->opcode = IB_WC_RECV; 701 wc->opcode = IB_WC_RECV;
700 wc->pkey_index = pkey_index; 702 wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
832 } 834 }
833 835
834 build_smp_wc(mad_agent_priv->agent.qp, 836 build_smp_wc(mad_agent_priv->agent.qp,
835 send_wr->wr.wr_id, drslid, 837 send_wr->wr.wr_cqe, drslid,
836 send_wr->pkey_index, 838 send_wr->pkey_index,
837 send_wr->port_num, &mad_wc); 839 send_wr->port_num, &mad_wc);
838 840
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
1039 1041
1040 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; 1042 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
1041 1043
1042 mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr; 1044 mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1045
1046 mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1043 mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; 1047 mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
1044 mad_send_wr->send_wr.wr.num_sge = 2; 1048 mad_send_wr->send_wr.wr.num_sge = 2;
1045 mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; 1049 mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1151 1155
1152 /* Set WR ID to find mad_send_wr upon completion */ 1156 /* Set WR ID to find mad_send_wr upon completion */
1153 qp_info = mad_send_wr->mad_agent_priv->qp_info; 1157 qp_info = mad_send_wr->mad_agent_priv->qp_info;
1154 mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1155 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; 1158 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1159 mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1160 mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1156 1161
1157 mad_agent = mad_send_wr->send_buf.mad_agent; 1162 mad_agent = mad_send_wr->send_buf.mad_agent;
1158 sge = mad_send_wr->sg_list; 1163 sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1982 /* user rmpp is in effect 1987 /* user rmpp is in effect
1983 * and this is an active RMPP MAD 1988 * and this is an active RMPP MAD
1984 */ 1989 */
1985 mad_recv_wc->wc->wr_id = 0; 1990 mad_agent_priv->agent.recv_handler(
1986 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 1991 &mad_agent_priv->agent, NULL,
1987 mad_recv_wc); 1992 mad_recv_wc);
1988 atomic_dec(&mad_agent_priv->refcount); 1993 atomic_dec(&mad_agent_priv->refcount);
1989 } else { 1994 } else {
1990 /* not user rmpp, revert to normal behavior and 1995 /* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1998 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2003 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1999 2004
2000 /* Defined behavior is to complete response before request */ 2005 /* Defined behavior is to complete response before request */
2001 mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; 2006 mad_agent_priv->agent.recv_handler(
2002 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 2007 &mad_agent_priv->agent,
2003 mad_recv_wc); 2008 &mad_send_wr->send_buf,
2009 mad_recv_wc);
2004 atomic_dec(&mad_agent_priv->refcount); 2010 atomic_dec(&mad_agent_priv->refcount);
2005 2011
2006 mad_send_wc.status = IB_WC_SUCCESS; 2012 mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
2009 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 2015 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2010 } 2016 }
2011 } else { 2017 } else {
2012 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 2018 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
2013 mad_recv_wc); 2019 mad_recv_wc);
2014 deref_mad_agent(mad_agent_priv); 2020 deref_mad_agent(mad_agent_priv);
2015 } 2021 }
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
2172 return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); 2178 return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2173} 2179}
2174 2180
2175static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, 2181static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2176 struct ib_wc *wc)
2177{ 2182{
2183 struct ib_mad_port_private *port_priv = cq->cq_context;
2184 struct ib_mad_list_head *mad_list =
2185 container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2178 struct ib_mad_qp_info *qp_info; 2186 struct ib_mad_qp_info *qp_info;
2179 struct ib_mad_private_header *mad_priv_hdr; 2187 struct ib_mad_private_header *mad_priv_hdr;
2180 struct ib_mad_private *recv, *response = NULL; 2188 struct ib_mad_private *recv, *response = NULL;
2181 struct ib_mad_list_head *mad_list;
2182 struct ib_mad_agent_private *mad_agent; 2189 struct ib_mad_agent_private *mad_agent;
2183 int port_num; 2190 int port_num;
2184 int ret = IB_MAD_RESULT_SUCCESS; 2191 int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
2186 u16 resp_mad_pkey_index = 0; 2193 u16 resp_mad_pkey_index = 0;
2187 bool opa; 2194 bool opa;
2188 2195
2189 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; 2196 if (list_empty_careful(&port_priv->port_list))
2197 return;
2198
2199 if (wc->status != IB_WC_SUCCESS) {
2200 /*
2201 * Receive errors indicate that the QP has entered the error
2202 * state - error handling/shutdown code will cleanup
2203 */
2204 return;
2205 }
2206
2190 qp_info = mad_list->mad_queue->qp_info; 2207 qp_info = mad_list->mad_queue->qp_info;
2191 dequeue_mad(mad_list); 2208 dequeue_mad(mad_list);
2192 2209
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
2227 response = alloc_mad_private(mad_size, GFP_KERNEL); 2244 response = alloc_mad_private(mad_size, GFP_KERNEL);
2228 if (!response) { 2245 if (!response) {
2229 dev_err(&port_priv->device->dev, 2246 dev_err(&port_priv->device->dev,
2230 "ib_mad_recv_done_handler no memory for response buffer\n"); 2247 "%s: no memory for response buffer\n", __func__);
2231 goto out; 2248 goto out;
2232 } 2249 }
2233 2250
@@ -2413,11 +2430,12 @@ done:
2413 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2430 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2414} 2431}
2415 2432
2416static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv, 2433static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2417 struct ib_wc *wc)
2418{ 2434{
2435 struct ib_mad_port_private *port_priv = cq->cq_context;
2436 struct ib_mad_list_head *mad_list =
2437 container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2419 struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; 2438 struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
2420 struct ib_mad_list_head *mad_list;
2421 struct ib_mad_qp_info *qp_info; 2439 struct ib_mad_qp_info *qp_info;
2422 struct ib_mad_queue *send_queue; 2440 struct ib_mad_queue *send_queue;
2423 struct ib_send_wr *bad_send_wr; 2441 struct ib_send_wr *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
2425 unsigned long flags; 2443 unsigned long flags;
2426 int ret; 2444 int ret;
2427 2445
2428 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; 2446 if (list_empty_careful(&port_priv->port_list))
2447 return;
2448
2449 if (wc->status != IB_WC_SUCCESS) {
2450 if (!ib_mad_send_error(port_priv, wc))
2451 return;
2452 }
2453
2429 mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, 2454 mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2430 mad_list); 2455 mad_list);
2431 send_queue = mad_list->mad_queue; 2456 send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2490 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); 2515 spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2491} 2516}
2492 2517
2493static void mad_error_handler(struct ib_mad_port_private *port_priv, 2518static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2494 struct ib_wc *wc) 2519 struct ib_wc *wc)
2495{ 2520{
2496 struct ib_mad_list_head *mad_list; 2521 struct ib_mad_list_head *mad_list =
2497 struct ib_mad_qp_info *qp_info; 2522 container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2523 struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
2498 struct ib_mad_send_wr_private *mad_send_wr; 2524 struct ib_mad_send_wr_private *mad_send_wr;
2499 int ret; 2525 int ret;
2500 2526
2501 /* Determine if failure was a send or receive */
2502 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2503 qp_info = mad_list->mad_queue->qp_info;
2504 if (mad_list->mad_queue == &qp_info->recv_queue)
2505 /*
2506 * Receive errors indicate that the QP has entered the error
2507 * state - error handling/shutdown code will cleanup
2508 */
2509 return;
2510
2511 /* 2527 /*
2512 * Send errors will transition the QP to SQE - move 2528 * Send errors will transition the QP to SQE - move
2513 * QP to RTS and repost flushed work requests 2529 * QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
2522 mad_send_wr->retry = 0; 2538 mad_send_wr->retry = 0;
2523 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, 2539 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2524 &bad_send_wr); 2540 &bad_send_wr);
2525 if (ret) 2541 if (!ret)
2526 ib_mad_send_done_handler(port_priv, wc); 2542 return false;
2527 } else 2543 }
2528 ib_mad_send_done_handler(port_priv, wc);
2529 } else { 2544 } else {
2530 struct ib_qp_attr *attr; 2545 struct ib_qp_attr *attr;
2531 2546
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
2539 kfree(attr); 2554 kfree(attr);
2540 if (ret) 2555 if (ret)
2541 dev_err(&port_priv->device->dev, 2556 dev_err(&port_priv->device->dev,
2542 "mad_error_handler - ib_modify_qp to RTS : %d\n", 2557 "%s - ib_modify_qp to RTS: %d\n",
2543 ret); 2558 __func__, ret);
2544 else 2559 else
2545 mark_sends_for_retry(qp_info); 2560 mark_sends_for_retry(qp_info);
2546 } 2561 }
2547 ib_mad_send_done_handler(port_priv, wc);
2548 } 2562 }
2549}
2550 2563
2551/* 2564 return true;
2552 * IB MAD completion callback
2553 */
2554static void ib_mad_completion_handler(struct work_struct *work)
2555{
2556 struct ib_mad_port_private *port_priv;
2557 struct ib_wc wc;
2558
2559 port_priv = container_of(work, struct ib_mad_port_private, work);
2560 ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2561
2562 while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2563 if (wc.status == IB_WC_SUCCESS) {
2564 switch (wc.opcode) {
2565 case IB_WC_SEND:
2566 ib_mad_send_done_handler(port_priv, &wc);
2567 break;
2568 case IB_WC_RECV:
2569 ib_mad_recv_done_handler(port_priv, &wc);
2570 break;
2571 default:
2572 BUG_ON(1);
2573 break;
2574 }
2575 } else
2576 mad_error_handler(port_priv, &wc);
2577 }
2578} 2565}
2579 2566
2580static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) 2567static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
2716 * before request 2703 * before request
2717 */ 2704 */
2718 build_smp_wc(recv_mad_agent->agent.qp, 2705 build_smp_wc(recv_mad_agent->agent.qp,
2719 (unsigned long) local->mad_send_wr, 2706 local->mad_send_wr->send_wr.wr.wr_cqe,
2720 be16_to_cpu(IB_LID_PERMISSIVE), 2707 be16_to_cpu(IB_LID_PERMISSIVE),
2721 local->mad_send_wr->send_wr.pkey_index, 2708 local->mad_send_wr->send_wr.pkey_index,
2722 recv_mad_agent->agent.port_num, &wc); 2709 recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
2744 IB_MAD_SNOOP_RECVS); 2731 IB_MAD_SNOOP_RECVS);
2745 recv_mad_agent->agent.recv_handler( 2732 recv_mad_agent->agent.recv_handler(
2746 &recv_mad_agent->agent, 2733 &recv_mad_agent->agent,
2734 &local->mad_send_wr->send_buf,
2747 &local->mad_priv->header.recv_wc); 2735 &local->mad_priv->header.recv_wc);
2748 spin_lock_irqsave(&recv_mad_agent->lock, flags); 2736 spin_lock_irqsave(&recv_mad_agent->lock, flags);
2749 atomic_dec(&recv_mad_agent->refcount); 2737 atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
2855 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2843 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2856} 2844}
2857 2845
2858static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2859{
2860 struct ib_mad_port_private *port_priv = cq->cq_context;
2861 unsigned long flags;
2862
2863 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2864 if (!list_empty(&port_priv->port_list))
2865 queue_work(port_priv->wq, &port_priv->work);
2866 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2867}
2868
2869/* 2846/*
2870 * Allocate receive MADs and post receive WRs for them 2847 * Allocate receive MADs and post receive WRs for them
2871 */ 2848 */
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2913 break; 2890 break;
2914 } 2891 }
2915 mad_priv->header.mapping = sg_list.addr; 2892 mad_priv->header.mapping = sg_list.addr;
2916 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2917 mad_priv->header.mad_list.mad_queue = recv_queue; 2893 mad_priv->header.mad_list.mad_queue = recv_queue;
2894 mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
2895 recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2918 2896
2919 /* Post receive WR */ 2897 /* Post receive WR */
2920 spin_lock_irqsave(&recv_queue->lock, flags); 2898 spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
3151 unsigned long flags; 3129 unsigned long flags;
3152 char name[sizeof "ib_mad123"]; 3130 char name[sizeof "ib_mad123"];
3153 int has_smi; 3131 int has_smi;
3154 struct ib_cq_init_attr cq_attr = {};
3155 3132
3156 if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) 3133 if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
3157 return -EFAULT; 3134 return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
3179 if (has_smi) 3156 if (has_smi)
3180 cq_size *= 2; 3157 cq_size *= 2;
3181 3158
3182 cq_attr.cqe = cq_size; 3159 port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
3183 port_priv->cq = ib_create_cq(port_priv->device, 3160 IB_POLL_WORKQUEUE);
3184 ib_mad_thread_completion_handler,
3185 NULL, port_priv, &cq_attr);
3186 if (IS_ERR(port_priv->cq)) { 3161 if (IS_ERR(port_priv->cq)) {
3187 dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); 3162 dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
3188 ret = PTR_ERR(port_priv->cq); 3163 ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
3211 ret = -ENOMEM; 3186 ret = -ENOMEM;
3212 goto error8; 3187 goto error8;
3213 } 3188 }
3214 INIT_WORK(&port_priv->work, ib_mad_completion_handler);
3215 3189
3216 spin_lock_irqsave(&ib_mad_port_list_lock, flags); 3190 spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3217 list_add_tail(&port_priv->port_list, &ib_mad_port_list); 3191 list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
3238error6: 3212error6:
3239 ib_dealloc_pd(port_priv->pd); 3213 ib_dealloc_pd(port_priv->pd);
3240error4: 3214error4:
3241 ib_destroy_cq(port_priv->cq); 3215 ib_free_cq(port_priv->cq);
3242 cleanup_recv_queue(&port_priv->qp_info[1]); 3216 cleanup_recv_queue(&port_priv->qp_info[1]);
3243 cleanup_recv_queue(&port_priv->qp_info[0]); 3217 cleanup_recv_queue(&port_priv->qp_info[0]);
3244error3: 3218error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
3271 destroy_mad_qp(&port_priv->qp_info[1]); 3245 destroy_mad_qp(&port_priv->qp_info[1]);
3272 destroy_mad_qp(&port_priv->qp_info[0]); 3246 destroy_mad_qp(&port_priv->qp_info[0]);
3273 ib_dealloc_pd(port_priv->pd); 3247 ib_dealloc_pd(port_priv->pd);
3274 ib_destroy_cq(port_priv->cq); 3248 ib_free_cq(port_priv->cq);
3275 cleanup_recv_queue(&port_priv->qp_info[1]); 3249 cleanup_recv_queue(&port_priv->qp_info[1]);
3276 cleanup_recv_queue(&port_priv->qp_info[0]); 3250 cleanup_recv_queue(&port_priv->qp_info[0]);
3277 /* XXX: Handle deallocation of MAD registration tables */ 3251 /* XXX: Handle deallocation of MAD registration tables */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 990698a6ab4b..28669f6419e1 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -64,6 +64,7 @@
64 64
65struct ib_mad_list_head { 65struct ib_mad_list_head {
66 struct list_head list; 66 struct list_head list;
67 struct ib_cqe cqe;
67 struct ib_mad_queue *mad_queue; 68 struct ib_mad_queue *mad_queue;
68}; 69};
69 70
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
204 struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; 205 struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
205 struct list_head agent_list; 206 struct list_head agent_list;
206 struct workqueue_struct *wq; 207 struct workqueue_struct *wq;
207 struct work_struct work;
208 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; 208 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
209}; 209};
210 210
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index bb6685fb08c6..250937cb9a1a 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
723 723
724int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 724int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
725 struct ib_sa_mcmember_rec *rec, 725 struct ib_sa_mcmember_rec *rec,
726 struct net_device *ndev,
727 enum ib_gid_type gid_type,
726 struct ib_ah_attr *ah_attr) 728 struct ib_ah_attr *ah_attr)
727{ 729{
728 int ret; 730 int ret;
729 u16 gid_index; 731 u16 gid_index;
730 u8 p; 732 u8 p;
731 733
732 ret = ib_find_cached_gid(device, &rec->port_gid, 734 if (rdma_protocol_roce(device, port_num)) {
733 NULL, &p, &gid_index); 735 ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
736 gid_type, port_num,
737 ndev,
738 &gid_index);
739 } else if (rdma_protocol_ib(device, port_num)) {
740 ret = ib_find_cached_gid(device, &rec->port_gid,
741 IB_GID_TYPE_IB, NULL, &p,
742 &gid_index);
743 } else {
744 ret = -EINVAL;
745 }
746
734 if (ret) 747 if (ret)
735 return ret; 748 return ret;
736 749
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 178f98482e13..06556c34606d 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -67,17 +67,53 @@ struct netdev_event_work {
67 struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; 67 struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
68}; 68};
69 69
70static const struct {
71 bool (*is_supported)(const struct ib_device *device, u8 port_num);
72 enum ib_gid_type gid_type;
73} PORT_CAP_TO_GID_TYPE[] = {
74 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
75 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
76};
77
78#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
79
80unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
81{
82 int i;
83 unsigned int ret_flags = 0;
84
85 if (!rdma_protocol_roce(ib_dev, port))
86 return 1UL << IB_GID_TYPE_IB;
87
88 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
89 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
90 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
91
92 return ret_flags;
93}
94EXPORT_SYMBOL(roce_gid_type_mask_support);
95
70static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 96static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
71 u8 port, union ib_gid *gid, 97 u8 port, union ib_gid *gid,
72 struct ib_gid_attr *gid_attr) 98 struct ib_gid_attr *gid_attr)
73{ 99{
74 switch (gid_op) { 100 int i;
75 case GID_ADD: 101 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
76 ib_cache_gid_add(ib_dev, port, gid, gid_attr); 102
77 break; 103 for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
78 case GID_DEL: 104 if ((1UL << i) & gid_type_mask) {
79 ib_cache_gid_del(ib_dev, port, gid, gid_attr); 105 gid_attr->gid_type = i;
80 break; 106 switch (gid_op) {
107 case GID_ADD:
108 ib_cache_gid_add(ib_dev, port,
109 gid, gid_attr);
110 break;
111 case GID_DEL:
112 ib_cache_gid_del(ib_dev, port,
113 gid, gid_attr);
114 break;
115 }
116 }
81 } 117 }
82} 118}
83 119
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
103 return BONDING_SLAVE_STATE_NA; 139 return BONDING_SLAVE_STATE_NA;
104} 140}
105 141
106static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
107{
108 struct net_device *_upper = NULL;
109 struct list_head *iter;
110
111 netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
112 if (_upper == upper)
113 break;
114
115 return _upper == upper;
116}
117
118#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ 142#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
119 BONDING_SLAVE_STATE_NA) 143 BONDING_SLAVE_STATE_NA)
120static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, 144static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
132 if (!real_dev) 156 if (!real_dev)
133 real_dev = event_ndev; 157 real_dev = event_ndev;
134 158
135 res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) && 159 res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
136 (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) & 160 (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
137 REQUIRED_BOND_STATES)) || 161 REQUIRED_BOND_STATES)) ||
138 real_dev == rdma_ndev); 162 real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
178 return 1; 202 return 1;
179 203
180 rcu_read_lock(); 204 rcu_read_lock();
181 res = is_upper_dev_rcu(rdma_ndev, event_ndev); 205 res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
182 rcu_read_unlock(); 206 rcu_read_unlock();
183 207
184 return res; 208 return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
203 u8 port, struct net_device *event_ndev, 227 u8 port, struct net_device *event_ndev,
204 struct net_device *rdma_ndev) 228 struct net_device *rdma_ndev)
205{ 229{
230 unsigned long gid_type_mask;
231
206 rcu_read_lock(); 232 rcu_read_lock();
207 if (!rdma_ndev || 233 if (!rdma_ndev ||
208 ((rdma_ndev != event_ndev && 234 ((rdma_ndev != event_ndev &&
209 !is_upper_dev_rcu(rdma_ndev, event_ndev)) || 235 !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
210 is_eth_active_slave_of_bonding_rcu(rdma_ndev, 236 is_eth_active_slave_of_bonding_rcu(rdma_ndev,
211 netdev_master_upper_dev_get_rcu(rdma_ndev)) == 237 netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
212 BONDING_SLAVE_STATE_INACTIVE)) { 238 BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
215 } 241 }
216 rcu_read_unlock(); 242 rcu_read_unlock();
217 243
218 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, 244 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
245
246 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
219 IB_CACHE_GID_DEFAULT_MODE_SET); 247 IB_CACHE_GID_DEFAULT_MODE_SET);
220} 248}
221 249
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
234 262
235 rcu_read_lock(); 263 rcu_read_lock();
236 264
237 if (is_upper_dev_rcu(rdma_ndev, event_ndev) && 265 if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
238 is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == 266 is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
239 BONDING_SLAVE_STATE_INACTIVE) { 267 BONDING_SLAVE_STATE_INACTIVE) {
268 unsigned long gid_type_mask;
269
240 rcu_read_unlock(); 270 rcu_read_unlock();
241 271
272 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
273
242 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, 274 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
275 gid_type_mask,
243 IB_CACHE_GID_DEFAULT_MODE_DELETE); 276 IB_CACHE_GID_DEFAULT_MODE_DELETE);
244 } else { 277 } else {
245 rcu_read_unlock(); 278 rcu_read_unlock();
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a95a32ba596e..f334090bb612 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -49,7 +49,9 @@
49#include <net/netlink.h> 49#include <net/netlink.h>
50#include <uapi/rdma/ib_user_sa.h> 50#include <uapi/rdma/ib_user_sa.h>
51#include <rdma/ib_marshall.h> 51#include <rdma/ib_marshall.h>
52#include <rdma/ib_addr.h>
52#include "sa.h" 53#include "sa.h"
54#include "core_priv.h"
53 55
54MODULE_AUTHOR("Roland Dreier"); 56MODULE_AUTHOR("Roland Dreier");
55MODULE_DESCRIPTION("InfiniBand subnet administration query support"); 57MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
715 struct nlattr *tb[LS_NLA_TYPE_MAX]; 717 struct nlattr *tb[LS_NLA_TYPE_MAX];
716 int ret; 718 int ret;
717 719
718 if (!netlink_capable(skb, CAP_NET_ADMIN)) 720 if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
721 !(NETLINK_CB(skb).sk) ||
722 !netlink_capable(skb, CAP_NET_ADMIN))
719 return -EPERM; 723 return -EPERM;
720 724
721 ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), 725 ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
789 int found = 0; 793 int found = 0;
790 int ret; 794 int ret;
791 795
792 if (!netlink_capable(skb, CAP_NET_ADMIN)) 796 if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
797 !(NETLINK_CB(skb).sk) ||
798 !netlink_capable(skb, CAP_NET_ADMIN))
793 return -EPERM; 799 return -EPERM;
794 800
795 spin_lock_irqsave(&ib_nl_request_lock, flags); 801 spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
996{ 1002{
997 int ret; 1003 int ret;
998 u16 gid_index; 1004 u16 gid_index;
999 int force_grh; 1005 int use_roce;
1006 struct net_device *ndev = NULL;
1000 1007
1001 memset(ah_attr, 0, sizeof *ah_attr); 1008 memset(ah_attr, 0, sizeof *ah_attr);
1002 ah_attr->dlid = be16_to_cpu(rec->dlid); 1009 ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1006 ah_attr->port_num = port_num; 1013 ah_attr->port_num = port_num;
1007 ah_attr->static_rate = rec->rate; 1014 ah_attr->static_rate = rec->rate;
1008 1015
1009 force_grh = rdma_cap_eth_ah(device, port_num); 1016 use_roce = rdma_cap_eth_ah(device, port_num);
1017
1018 if (use_roce) {
1019 struct net_device *idev;
1020 struct net_device *resolved_dev;
1021 struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
1022 .net = rec->net ? rec->net :
1023 &init_net};
1024 union {
1025 struct sockaddr _sockaddr;
1026 struct sockaddr_in _sockaddr_in;
1027 struct sockaddr_in6 _sockaddr_in6;
1028 } sgid_addr, dgid_addr;
1029
1030 if (!device->get_netdev)
1031 return -EOPNOTSUPP;
1032
1033 rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
1034 rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
1035
1036 /* validate the route */
1037 ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
1038 &dgid_addr._sockaddr, &dev_addr);
1039 if (ret)
1040 return ret;
1010 1041
1011 if (rec->hop_limit > 1 || force_grh) { 1042 if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
1012 struct net_device *ndev = ib_get_ndev_from_path(rec); 1043 dev_addr.network == RDMA_NETWORK_IPV6) &&
1044 rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
1045 return -EINVAL;
1046
1047 idev = device->get_netdev(device, port_num);
1048 if (!idev)
1049 return -ENODEV;
1050
1051 resolved_dev = dev_get_by_index(dev_addr.net,
1052 dev_addr.bound_dev_if);
1053 if (resolved_dev->flags & IFF_LOOPBACK) {
1054 dev_put(resolved_dev);
1055 resolved_dev = idev;
1056 dev_hold(resolved_dev);
1057 }
1058 ndev = ib_get_ndev_from_path(rec);
1059 rcu_read_lock();
1060 if ((ndev && ndev != resolved_dev) ||
1061 (resolved_dev != idev &&
1062 !rdma_is_upper_dev_rcu(idev, resolved_dev)))
1063 ret = -EHOSTUNREACH;
1064 rcu_read_unlock();
1065 dev_put(idev);
1066 dev_put(resolved_dev);
1067 if (ret) {
1068 if (ndev)
1069 dev_put(ndev);
1070 return ret;
1071 }
1072 }
1013 1073
1074 if (rec->hop_limit > 1 || use_roce) {
1014 ah_attr->ah_flags = IB_AH_GRH; 1075 ah_attr->ah_flags = IB_AH_GRH;
1015 ah_attr->grh.dgid = rec->dgid; 1076 ah_attr->grh.dgid = rec->dgid;
1016 1077
1017 ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num, 1078 ret = ib_find_cached_gid_by_port(device, &rec->sgid,
1018 &gid_index); 1079 rec->gid_type, port_num, ndev,
1080 &gid_index);
1019 if (ret) { 1081 if (ret) {
1020 if (ndev) 1082 if (ndev)
1021 dev_put(ndev); 1083 dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1029 if (ndev) 1091 if (ndev)
1030 dev_put(ndev); 1092 dev_put(ndev);
1031 } 1093 }
1032 if (force_grh) { 1094
1095 if (use_roce)
1033 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); 1096 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
1034 } 1097
1035 return 0; 1098 return 0;
1036} 1099}
1037EXPORT_SYMBOL(ib_init_ah_from_path); 1100EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1157 mad->data, &rec); 1220 mad->data, &rec);
1158 rec.net = NULL; 1221 rec.net = NULL;
1159 rec.ifindex = 0; 1222 rec.ifindex = 0;
1223 rec.gid_type = IB_GID_TYPE_IB;
1160 memset(rec.dmac, 0, ETH_ALEN); 1224 memset(rec.dmac, 0, ETH_ALEN);
1161 query->callback(status, &rec, query->context); 1225 query->callback(status, &rec, query->context);
1162 } else 1226 } else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
1609} 1673}
1610 1674
1611static void recv_handler(struct ib_mad_agent *mad_agent, 1675static void recv_handler(struct ib_mad_agent *mad_agent,
1676 struct ib_mad_send_buf *send_buf,
1612 struct ib_mad_recv_wc *mad_recv_wc) 1677 struct ib_mad_recv_wc *mad_recv_wc)
1613{ 1678{
1614 struct ib_sa_query *query; 1679 struct ib_sa_query *query;
1615 struct ib_mad_send_buf *mad_buf;
1616 1680
1617 mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id; 1681 if (!send_buf)
1618 query = mad_buf->context[0]; 1682 return;
1619 1683
1684 query = send_buf->context[0];
1620 if (query->callback) { 1685 if (query->callback) {
1621 if (mad_recv_wc->wc->status == IB_WC_SUCCESS) 1686 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
1622 query->callback(query, 1687 query->callback(query,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index b1f37d4095fa..3de93517efe4 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -37,15 +37,27 @@
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/stat.h> 38#include <linux/stat.h>
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/netdevice.h>
40 41
41#include <rdma/ib_mad.h> 42#include <rdma/ib_mad.h>
43#include <rdma/ib_pma.h>
42 44
45struct ib_port;
46
47struct gid_attr_group {
48 struct ib_port *port;
49 struct kobject kobj;
50 struct attribute_group ndev;
51 struct attribute_group type;
52};
43struct ib_port { 53struct ib_port {
44 struct kobject kobj; 54 struct kobject kobj;
45 struct ib_device *ibdev; 55 struct ib_device *ibdev;
56 struct gid_attr_group *gid_attr_group;
46 struct attribute_group gid_group; 57 struct attribute_group gid_group;
47 struct attribute_group pkey_group; 58 struct attribute_group pkey_group;
48 u8 port_num; 59 u8 port_num;
60 struct attribute_group *pma_table;
49}; 61};
50 62
51struct port_attribute { 63struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
65 struct port_attribute attr; 77 struct port_attribute attr;
66 char name[8]; 78 char name[8];
67 int index; 79 int index;
80 __be16 attr_id;
68}; 81};
69 82
70static ssize_t port_attr_show(struct kobject *kobj, 83static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
84 .show = port_attr_show 97 .show = port_attr_show
85}; 98};
86 99
100static ssize_t gid_attr_show(struct kobject *kobj,
101 struct attribute *attr, char *buf)
102{
103 struct port_attribute *port_attr =
104 container_of(attr, struct port_attribute, attr);
105 struct ib_port *p = container_of(kobj, struct gid_attr_group,
106 kobj)->port;
107
108 if (!port_attr->show)
109 return -EIO;
110
111 return port_attr->show(p, port_attr, buf);
112}
113
114static const struct sysfs_ops gid_attr_sysfs_ops = {
115 .show = gid_attr_show
116};
117
87static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, 118static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
88 char *buf) 119 char *buf)
89{ 120{
@@ -281,6 +312,46 @@ static struct attribute *port_default_attrs[] = {
281 NULL 312 NULL
282}; 313};
283 314
315static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
316{
317 if (!gid_attr->ndev)
318 return -EINVAL;
319
320 return sprintf(buf, "%s\n", gid_attr->ndev->name);
321}
322
323static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
324{
325 return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
326}
327
328static ssize_t _show_port_gid_attr(struct ib_port *p,
329 struct port_attribute *attr,
330 char *buf,
331 size_t (*print)(struct ib_gid_attr *gid_attr,
332 char *buf))
333{
334 struct port_table_attribute *tab_attr =
335 container_of(attr, struct port_table_attribute, attr);
336 union ib_gid gid;
337 struct ib_gid_attr gid_attr = {};
338 ssize_t ret;
339 va_list args;
340
341 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
342 &gid_attr);
343 if (ret)
344 goto err;
345
346 ret = print(&gid_attr, buf);
347
348err:
349 if (gid_attr.ndev)
350 dev_put(gid_attr.ndev);
351 va_end(args);
352 return ret;
353}
354
284static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, 355static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
285 char *buf) 356 char *buf)
286{ 357{
@@ -296,6 +367,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
296 return sprintf(buf, "%pI6\n", gid.raw); 367 return sprintf(buf, "%pI6\n", gid.raw);
297} 368}
298 369
370static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
371 struct port_attribute *attr, char *buf)
372{
373 return _show_port_gid_attr(p, attr, buf, print_ndev);
374}
375
376static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
377 struct port_attribute *attr,
378 char *buf)
379{
380 return _show_port_gid_attr(p, attr, buf, print_gid_type);
381}
382
299static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, 383static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
300 char *buf) 384 char *buf)
301{ 385{
@@ -314,24 +398,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
314#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ 398#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
315struct port_table_attribute port_pma_attr_##_name = { \ 399struct port_table_attribute port_pma_attr_##_name = { \
316 .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ 400 .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
317 .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \ 401 .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
402 .attr_id = IB_PMA_PORT_COUNTERS , \
318} 403}
319 404
320static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, 405#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
321 char *buf) 406struct port_table_attribute port_pma_attr_ext_##_name = { \
407 .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
408 .index = (_offset) | ((_width) << 16), \
409 .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
410}
411
412/*
413 * Get a Perfmgmt MAD block of data.
414 * Returns error code or the number of bytes retrieved.
415 */
416static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
417 void *data, int offset, size_t size)
322{ 418{
323 struct port_table_attribute *tab_attr = 419 struct ib_mad *in_mad;
324 container_of(attr, struct port_table_attribute, attr); 420 struct ib_mad *out_mad;
325 int offset = tab_attr->index & 0xffff;
326 int width = (tab_attr->index >> 16) & 0xff;
327 struct ib_mad *in_mad = NULL;
328 struct ib_mad *out_mad = NULL;
329 size_t mad_size = sizeof(*out_mad); 421 size_t mad_size = sizeof(*out_mad);
330 u16 out_mad_pkey_index = 0; 422 u16 out_mad_pkey_index = 0;
331 ssize_t ret; 423 ssize_t ret;
332 424
333 if (!p->ibdev->process_mad) 425 if (!dev->process_mad)
334 return sprintf(buf, "N/A (no PMA)\n"); 426 return -ENOSYS;
335 427
336 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 428 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
337 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 429 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +436,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
344 in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; 436 in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
345 in_mad->mad_hdr.class_version = 1; 437 in_mad->mad_hdr.class_version = 1;
346 in_mad->mad_hdr.method = IB_MGMT_METHOD_GET; 438 in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
347 in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */ 439 in_mad->mad_hdr.attr_id = attr;
348 440
349 in_mad->data[41] = p->port_num; /* PortSelect field */ 441 if (attr != IB_PMA_CLASS_PORT_INFO)
442 in_mad->data[41] = port_num; /* PortSelect field */
350 443
351 if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY, 444 if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
352 p->port_num, NULL, NULL, 445 port_num, NULL, NULL,
353 (const struct ib_mad_hdr *)in_mad, mad_size, 446 (const struct ib_mad_hdr *)in_mad, mad_size,
354 (struct ib_mad_hdr *)out_mad, &mad_size, 447 (struct ib_mad_hdr *)out_mad, &mad_size,
355 &out_mad_pkey_index) & 448 &out_mad_pkey_index) &
@@ -358,31 +451,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
358 ret = -EINVAL; 451 ret = -EINVAL;
359 goto out; 452 goto out;
360 } 453 }
454 memcpy(data, out_mad->data + offset, size);
455 ret = size;
456out:
457 kfree(in_mad);
458 kfree(out_mad);
459 return ret;
460}
461
462static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
463 char *buf)
464{
465 struct port_table_attribute *tab_attr =
466 container_of(attr, struct port_table_attribute, attr);
467 int offset = tab_attr->index & 0xffff;
468 int width = (tab_attr->index >> 16) & 0xff;
469 ssize_t ret;
470 u8 data[8];
471
472 ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
473 40 + offset / 8, sizeof(data));
474 if (ret < 0)
475 return sprintf(buf, "N/A (no PMA)\n");
361 476
362 switch (width) { 477 switch (width) {
363 case 4: 478 case 4:
364 ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >> 479 ret = sprintf(buf, "%u\n", (*data >>
365 (4 - (offset % 8))) & 0xf); 480 (4 - (offset % 8))) & 0xf);
366 break; 481 break;
367 case 8: 482 case 8:
368 ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]); 483 ret = sprintf(buf, "%u\n", *data);
369 break; 484 break;
370 case 16: 485 case 16:
371 ret = sprintf(buf, "%u\n", 486 ret = sprintf(buf, "%u\n",
372 be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8))); 487 be16_to_cpup((__be16 *)data));
373 break; 488 break;
374 case 32: 489 case 32:
375 ret = sprintf(buf, "%u\n", 490 ret = sprintf(buf, "%u\n",
376 be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8))); 491 be32_to_cpup((__be32 *)data));
492 break;
493 case 64:
494 ret = sprintf(buf, "%llu\n",
495 be64_to_cpup((__be64 *)data));
377 break; 496 break;
497
378 default: 498 default:
379 ret = 0; 499 ret = 0;
380 } 500 }
381 501
382out:
383 kfree(in_mad);
384 kfree(out_mad);
385
386 return ret; 502 return ret;
387} 503}
388 504
@@ -403,6 +519,18 @@ static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
403static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256); 519static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
404static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288); 520static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
405 521
522/*
523 * Counters added by extended set
524 */
525static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64);
526static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128);
527static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192);
528static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256);
529static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320);
530static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384);
531static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448);
532static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512);
533
406static struct attribute *pma_attrs[] = { 534static struct attribute *pma_attrs[] = {
407 &port_pma_attr_symbol_error.attr.attr, 535 &port_pma_attr_symbol_error.attr.attr,
408 &port_pma_attr_link_error_recovery.attr.attr, 536 &port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +551,65 @@ static struct attribute *pma_attrs[] = {
423 NULL 551 NULL
424}; 552};
425 553
554static struct attribute *pma_attrs_ext[] = {
555 &port_pma_attr_symbol_error.attr.attr,
556 &port_pma_attr_link_error_recovery.attr.attr,
557 &port_pma_attr_link_downed.attr.attr,
558 &port_pma_attr_port_rcv_errors.attr.attr,
559 &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
560 &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
561 &port_pma_attr_port_xmit_discards.attr.attr,
562 &port_pma_attr_port_xmit_constraint_errors.attr.attr,
563 &port_pma_attr_port_rcv_constraint_errors.attr.attr,
564 &port_pma_attr_local_link_integrity_errors.attr.attr,
565 &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
566 &port_pma_attr_VL15_dropped.attr.attr,
567 &port_pma_attr_ext_port_xmit_data.attr.attr,
568 &port_pma_attr_ext_port_rcv_data.attr.attr,
569 &port_pma_attr_ext_port_xmit_packets.attr.attr,
570 &port_pma_attr_ext_port_rcv_packets.attr.attr,
571 &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
572 &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
573 &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
574 &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
575 NULL
576};
577
578static struct attribute *pma_attrs_noietf[] = {
579 &port_pma_attr_symbol_error.attr.attr,
580 &port_pma_attr_link_error_recovery.attr.attr,
581 &port_pma_attr_link_downed.attr.attr,
582 &port_pma_attr_port_rcv_errors.attr.attr,
583 &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
584 &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
585 &port_pma_attr_port_xmit_discards.attr.attr,
586 &port_pma_attr_port_xmit_constraint_errors.attr.attr,
587 &port_pma_attr_port_rcv_constraint_errors.attr.attr,
588 &port_pma_attr_local_link_integrity_errors.attr.attr,
589 &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
590 &port_pma_attr_VL15_dropped.attr.attr,
591 &port_pma_attr_ext_port_xmit_data.attr.attr,
592 &port_pma_attr_ext_port_rcv_data.attr.attr,
593 &port_pma_attr_ext_port_xmit_packets.attr.attr,
594 &port_pma_attr_ext_port_rcv_packets.attr.attr,
595 NULL
596};
597
426static struct attribute_group pma_group = { 598static struct attribute_group pma_group = {
427 .name = "counters", 599 .name = "counters",
428 .attrs = pma_attrs 600 .attrs = pma_attrs
429}; 601};
430 602
603static struct attribute_group pma_group_ext = {
604 .name = "counters",
605 .attrs = pma_attrs_ext
606};
607
608static struct attribute_group pma_group_noietf = {
609 .name = "counters",
610 .attrs = pma_attrs_noietf
611};
612
431static void ib_port_release(struct kobject *kobj) 613static void ib_port_release(struct kobject *kobj)
432{ 614{
433 struct ib_port *p = container_of(kobj, struct ib_port, kobj); 615 struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +633,41 @@ static void ib_port_release(struct kobject *kobj)
451 kfree(p); 633 kfree(p);
452} 634}
453 635
636static void ib_port_gid_attr_release(struct kobject *kobj)
637{
638 struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
639 kobj);
640 struct attribute *a;
641 int i;
642
643 if (g->ndev.attrs) {
644 for (i = 0; (a = g->ndev.attrs[i]); ++i)
645 kfree(a);
646
647 kfree(g->ndev.attrs);
648 }
649
650 if (g->type.attrs) {
651 for (i = 0; (a = g->type.attrs[i]); ++i)
652 kfree(a);
653
654 kfree(g->type.attrs);
655 }
656
657 kfree(g);
658}
659
454static struct kobj_type port_type = { 660static struct kobj_type port_type = {
455 .release = ib_port_release, 661 .release = ib_port_release,
456 .sysfs_ops = &port_sysfs_ops, 662 .sysfs_ops = &port_sysfs_ops,
457 .default_attrs = port_default_attrs 663 .default_attrs = port_default_attrs
458}; 664};
459 665
666static struct kobj_type gid_attr_type = {
667 .sysfs_ops = &gid_attr_sysfs_ops,
668 .release = ib_port_gid_attr_release
669};
670
460static struct attribute ** 671static struct attribute **
461alloc_group_attrs(ssize_t (*show)(struct ib_port *, 672alloc_group_attrs(ssize_t (*show)(struct ib_port *,
462 struct port_attribute *, char *buf), 673 struct port_attribute *, char *buf),
@@ -500,6 +711,31 @@ err:
500 return NULL; 711 return NULL;
501} 712}
502 713
714/*
715 * Figure out which counter table to use depending on
716 * the device capabilities.
717 */
718static struct attribute_group *get_counter_table(struct ib_device *dev,
719 int port_num)
720{
721 struct ib_class_port_info cpi;
722
723 if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
724 &cpi, 40, sizeof(cpi)) >= 0) {
725
726 if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
727 /* We have extended counters */
728 return &pma_group_ext;
729
730 if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
731 /* But not the IETF ones */
732 return &pma_group_noietf;
733 }
734
735 /* Fall back to normal counters */
736 return &pma_group;
737}
738
503static int add_port(struct ib_device *device, int port_num, 739static int add_port(struct ib_device *device, int port_num,
504 int (*port_callback)(struct ib_device *, 740 int (*port_callback)(struct ib_device *,
505 u8, struct kobject *)) 741 u8, struct kobject *))
@@ -528,9 +764,24 @@ static int add_port(struct ib_device *device, int port_num,
528 return ret; 764 return ret;
529 } 765 }
530 766
531 ret = sysfs_create_group(&p->kobj, &pma_group); 767 p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
532 if (ret) 768 if (!p->gid_attr_group) {
769 ret = -ENOMEM;
533 goto err_put; 770 goto err_put;
771 }
772
773 p->gid_attr_group->port = p;
774 ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
775 &p->kobj, "gid_attrs");
776 if (ret) {
777 kfree(p->gid_attr_group);
778 goto err_put;
779 }
780
781 p->pma_table = get_counter_table(device, port_num);
782 ret = sysfs_create_group(&p->kobj, p->pma_table);
783 if (ret)
784 goto err_put_gid_attrs;
534 785
535 p->gid_group.name = "gids"; 786 p->gid_group.name = "gids";
536 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); 787 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +794,38 @@ static int add_port(struct ib_device *device, int port_num,
543 if (ret) 794 if (ret)
544 goto err_free_gid; 795 goto err_free_gid;
545 796
797 p->gid_attr_group->ndev.name = "ndevs";
798 p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
799 attr.gid_tbl_len);
800 if (!p->gid_attr_group->ndev.attrs) {
801 ret = -ENOMEM;
802 goto err_remove_gid;
803 }
804
805 ret = sysfs_create_group(&p->gid_attr_group->kobj,
806 &p->gid_attr_group->ndev);
807 if (ret)
808 goto err_free_gid_ndev;
809
810 p->gid_attr_group->type.name = "types";
811 p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
812 attr.gid_tbl_len);
813 if (!p->gid_attr_group->type.attrs) {
814 ret = -ENOMEM;
815 goto err_remove_gid_ndev;
816 }
817
818 ret = sysfs_create_group(&p->gid_attr_group->kobj,
819 &p->gid_attr_group->type);
820 if (ret)
821 goto err_free_gid_type;
822
546 p->pkey_group.name = "pkeys"; 823 p->pkey_group.name = "pkeys";
547 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, 824 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
548 attr.pkey_tbl_len); 825 attr.pkey_tbl_len);
549 if (!p->pkey_group.attrs) { 826 if (!p->pkey_group.attrs) {
550 ret = -ENOMEM; 827 ret = -ENOMEM;
551 goto err_remove_gid; 828 goto err_remove_gid_type;
552 } 829 }
553 830
554 ret = sysfs_create_group(&p->kobj, &p->pkey_group); 831 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +853,28 @@ err_free_pkey:
576 kfree(p->pkey_group.attrs); 853 kfree(p->pkey_group.attrs);
577 p->pkey_group.attrs = NULL; 854 p->pkey_group.attrs = NULL;
578 855
856err_remove_gid_type:
857 sysfs_remove_group(&p->gid_attr_group->kobj,
858 &p->gid_attr_group->type);
859
860err_free_gid_type:
861 for (i = 0; i < attr.gid_tbl_len; ++i)
862 kfree(p->gid_attr_group->type.attrs[i]);
863
864 kfree(p->gid_attr_group->type.attrs);
865 p->gid_attr_group->type.attrs = NULL;
866
867err_remove_gid_ndev:
868 sysfs_remove_group(&p->gid_attr_group->kobj,
869 &p->gid_attr_group->ndev);
870
871err_free_gid_ndev:
872 for (i = 0; i < attr.gid_tbl_len; ++i)
873 kfree(p->gid_attr_group->ndev.attrs[i]);
874
875 kfree(p->gid_attr_group->ndev.attrs);
876 p->gid_attr_group->ndev.attrs = NULL;
877
579err_remove_gid: 878err_remove_gid:
580 sysfs_remove_group(&p->kobj, &p->gid_group); 879 sysfs_remove_group(&p->kobj, &p->gid_group);
581 880
@@ -587,7 +886,10 @@ err_free_gid:
587 p->gid_group.attrs = NULL; 886 p->gid_group.attrs = NULL;
588 887
589err_remove_pma: 888err_remove_pma:
590 sysfs_remove_group(&p->kobj, &pma_group); 889 sysfs_remove_group(&p->kobj, p->pma_table);
890
891err_put_gid_attrs:
892 kobject_put(&p->gid_attr_group->kobj);
591 893
592err_put: 894err_put:
593 kobject_put(&p->kobj); 895 kobject_put(&p->kobj);
@@ -614,18 +916,12 @@ static ssize_t show_sys_image_guid(struct device *device,
614 struct device_attribute *dev_attr, char *buf) 916 struct device_attribute *dev_attr, char *buf)
615{ 917{
616 struct ib_device *dev = container_of(device, struct ib_device, dev); 918 struct ib_device *dev = container_of(device, struct ib_device, dev);
617 struct ib_device_attr attr;
618 ssize_t ret;
619
620 ret = ib_query_device(dev, &attr);
621 if (ret)
622 return ret;
623 919
624 return sprintf(buf, "%04x:%04x:%04x:%04x\n", 920 return sprintf(buf, "%04x:%04x:%04x:%04x\n",
625 be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]), 921 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
626 be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]), 922 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
627 be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]), 923 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
628 be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3])); 924 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
629} 925}
630 926
631static ssize_t show_node_guid(struct device *device, 927static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1096,14 @@ static void free_port_list_attributes(struct ib_device *device)
800 list_for_each_entry_safe(p, t, &device->port_list, entry) { 1096 list_for_each_entry_safe(p, t, &device->port_list, entry) {
801 struct ib_port *port = container_of(p, struct ib_port, kobj); 1097 struct ib_port *port = container_of(p, struct ib_port, kobj);
802 list_del(&p->entry); 1098 list_del(&p->entry);
803 sysfs_remove_group(p, &pma_group); 1099 sysfs_remove_group(p, port->pma_table);
804 sysfs_remove_group(p, &port->pkey_group); 1100 sysfs_remove_group(p, &port->pkey_group);
805 sysfs_remove_group(p, &port->gid_group); 1101 sysfs_remove_group(p, &port->gid_group);
1102 sysfs_remove_group(&port->gid_attr_group->kobj,
1103 &port->gid_attr_group->ndev);
1104 sysfs_remove_group(&port->gid_attr_group->kobj,
1105 &port->gid_attr_group->type);
1106 kobject_put(&port->gid_attr_group->kobj);
806 kobject_put(p); 1107 kobject_put(p);
807 } 1108 }
808 1109
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 72feee620ebf..19837d270278 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -35,6 +35,7 @@
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/export.h> 36#include <linux/export.h>
37#include <linux/if_ether.h> 37#include <linux/if_ether.h>
38#include <linux/ip.h>
38 39
39#include <rdma/ib_pack.h> 40#include <rdma/ib_pack.h>
40 41
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[] = {
116 .size_bits = 16 } 117 .size_bits = 16 }
117}; 118};
118 119
120static const struct ib_field ip4_table[] = {
121 { STRUCT_FIELD(ip4, ver),
122 .offset_words = 0,
123 .offset_bits = 0,
124 .size_bits = 4 },
125 { STRUCT_FIELD(ip4, hdr_len),
126 .offset_words = 0,
127 .offset_bits = 4,
128 .size_bits = 4 },
129 { STRUCT_FIELD(ip4, tos),
130 .offset_words = 0,
131 .offset_bits = 8,
132 .size_bits = 8 },
133 { STRUCT_FIELD(ip4, tot_len),
134 .offset_words = 0,
135 .offset_bits = 16,
136 .size_bits = 16 },
137 { STRUCT_FIELD(ip4, id),
138 .offset_words = 1,
139 .offset_bits = 0,
140 .size_bits = 16 },
141 { STRUCT_FIELD(ip4, frag_off),
142 .offset_words = 1,
143 .offset_bits = 16,
144 .size_bits = 16 },
145 { STRUCT_FIELD(ip4, ttl),
146 .offset_words = 2,
147 .offset_bits = 0,
148 .size_bits = 8 },
149 { STRUCT_FIELD(ip4, protocol),
150 .offset_words = 2,
151 .offset_bits = 8,
152 .size_bits = 8 },
153 { STRUCT_FIELD(ip4, check),
154 .offset_words = 2,
155 .offset_bits = 16,
156 .size_bits = 16 },
157 { STRUCT_FIELD(ip4, saddr),
158 .offset_words = 3,
159 .offset_bits = 0,
160 .size_bits = 32 },
161 { STRUCT_FIELD(ip4, daddr),
162 .offset_words = 4,
163 .offset_bits = 0,
164 .size_bits = 32 }
165};
166
167static const struct ib_field udp_table[] = {
168 { STRUCT_FIELD(udp, sport),
169 .offset_words = 0,
170 .offset_bits = 0,
171 .size_bits = 16 },
172 { STRUCT_FIELD(udp, dport),
173 .offset_words = 0,
174 .offset_bits = 16,
175 .size_bits = 16 },
176 { STRUCT_FIELD(udp, length),
177 .offset_words = 1,
178 .offset_bits = 0,
179 .size_bits = 16 },
180 { STRUCT_FIELD(udp, csum),
181 .offset_words = 1,
182 .offset_bits = 16,
183 .size_bits = 16 }
184};
185
119static const struct ib_field grh_table[] = { 186static const struct ib_field grh_table[] = {
120 { STRUCT_FIELD(grh, ip_version), 187 { STRUCT_FIELD(grh, ip_version),
121 .offset_words = 0, 188 .offset_words = 0,
@@ -213,26 +280,57 @@ static const struct ib_field deth_table[] = {
213 .size_bits = 24 } 280 .size_bits = 24 }
214}; 281};
215 282
283__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
284{
285 struct iphdr iph;
286
287 iph.ihl = 5;
288 iph.version = 4;
289 iph.tos = header->ip4.tos;
290 iph.tot_len = header->ip4.tot_len;
291 iph.id = header->ip4.id;
292 iph.frag_off = header->ip4.frag_off;
293 iph.ttl = header->ip4.ttl;
294 iph.protocol = header->ip4.protocol;
295 iph.check = 0;
296 iph.saddr = header->ip4.saddr;
297 iph.daddr = header->ip4.daddr;
298
299 return ip_fast_csum((u8 *)&iph, iph.ihl);
300}
301EXPORT_SYMBOL(ib_ud_ip4_csum);
302
216/** 303/**
217 * ib_ud_header_init - Initialize UD header structure 304 * ib_ud_header_init - Initialize UD header structure
218 * @payload_bytes:Length of packet payload 305 * @payload_bytes:Length of packet payload
219 * @lrh_present: specify if LRH is present 306 * @lrh_present: specify if LRH is present
220 * @eth_present: specify if Eth header is present 307 * @eth_present: specify if Eth header is present
221 * @vlan_present: packet is tagged vlan 308 * @vlan_present: packet is tagged vlan
222 * @grh_present:GRH flag (if non-zero, GRH will be included) 309 * @grh_present: GRH flag (if non-zero, GRH will be included)
310 * @ip_version: if non-zero, IP header, V4 or V6, will be included
311 * @udp_present :if non-zero, UDP header will be included
223 * @immediate_present: specify if immediate data is present 312 * @immediate_present: specify if immediate data is present
224 * @header:Structure to initialize 313 * @header:Structure to initialize
225 */ 314 */
226void ib_ud_header_init(int payload_bytes, 315int ib_ud_header_init(int payload_bytes,
227 int lrh_present, 316 int lrh_present,
228 int eth_present, 317 int eth_present,
229 int vlan_present, 318 int vlan_present,
230 int grh_present, 319 int grh_present,
231 int immediate_present, 320 int ip_version,
232 struct ib_ud_header *header) 321 int udp_present,
322 int immediate_present,
323 struct ib_ud_header *header)
233{ 324{
325 grh_present = grh_present && !ip_version;
234 memset(header, 0, sizeof *header); 326 memset(header, 0, sizeof *header);
235 327
328 /*
329 * UDP header without IP header doesn't make sense
330 */
331 if (udp_present && ip_version != 4 && ip_version != 6)
332 return -EINVAL;
333
236 if (lrh_present) { 334 if (lrh_present) {
237 u16 packet_length; 335 u16 packet_length;
238 336
@@ -252,7 +350,7 @@ void ib_ud_header_init(int payload_bytes,
252 if (vlan_present) 350 if (vlan_present)
253 header->eth.type = cpu_to_be16(ETH_P_8021Q); 351 header->eth.type = cpu_to_be16(ETH_P_8021Q);
254 352
255 if (grh_present) { 353 if (ip_version == 6 || grh_present) {
256 header->grh.ip_version = 6; 354 header->grh.ip_version = 6;
257 header->grh.payload_length = 355 header->grh.payload_length =
258 cpu_to_be16((IB_BTH_BYTES + 356 cpu_to_be16((IB_BTH_BYTES +
@@ -260,8 +358,30 @@ void ib_ud_header_init(int payload_bytes,
260 payload_bytes + 358 payload_bytes +
261 4 + /* ICRC */ 359 4 + /* ICRC */
262 3) & ~3); /* round up */ 360 3) & ~3); /* round up */
263 header->grh.next_header = 0x1b; 361 header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b;
362 }
363
364 if (ip_version == 4) {
365 int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
366
367 header->ip4.ver = 4; /* version 4 */
368 header->ip4.hdr_len = 5; /* 5 words */
369 header->ip4.tot_len =
370 cpu_to_be16(IB_IP4_BYTES +
371 udp_bytes +
372 IB_BTH_BYTES +
373 IB_DETH_BYTES +
374 payload_bytes +
375 4); /* ICRC */
376 header->ip4.protocol = IPPROTO_UDP;
264 } 377 }
378 if (udp_present && ip_version)
379 header->udp.length =
380 cpu_to_be16(IB_UDP_BYTES +
381 IB_BTH_BYTES +
382 IB_DETH_BYTES +
383 payload_bytes +
384 4); /* ICRC */
265 385
266 if (immediate_present) 386 if (immediate_present)
267 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 387 header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +393,11 @@ void ib_ud_header_init(int payload_bytes,
273 header->lrh_present = lrh_present; 393 header->lrh_present = lrh_present;
274 header->eth_present = eth_present; 394 header->eth_present = eth_present;
275 header->vlan_present = vlan_present; 395 header->vlan_present = vlan_present;
276 header->grh_present = grh_present; 396 header->grh_present = grh_present || (ip_version == 6);
397 header->ipv4_present = ip_version == 4;
398 header->udp_present = udp_present;
277 header->immediate_present = immediate_present; 399 header->immediate_present = immediate_present;
400 return 0;
278} 401}
279EXPORT_SYMBOL(ib_ud_header_init); 402EXPORT_SYMBOL(ib_ud_header_init);
280 403
@@ -311,6 +434,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
311 &header->grh, buf + len); 434 &header->grh, buf + len);
312 len += IB_GRH_BYTES; 435 len += IB_GRH_BYTES;
313 } 436 }
437 if (header->ipv4_present) {
438 ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
439 &header->ip4, buf + len);
440 len += IB_IP4_BYTES;
441 }
442 if (header->udp_present) {
443 ib_pack(udp_table, ARRAY_SIZE(udp_table),
444 &header->udp, buf + len);
445 len += IB_UDP_BYTES;
446 }
314 447
315 ib_pack(bth_table, ARRAY_SIZE(bth_table), 448 ib_pack(bth_table, ARRAY_SIZE(bth_table),
316 &header->bth, buf + len); 449 &header->bth, buf + len);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 40becdb3196e..e69bf266049d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
232 ib_ucontext_notifier_end_account(context); 232 ib_ucontext_notifier_end_account(context);
233} 233}
234 234
235static struct mmu_notifier_ops ib_umem_notifiers = { 235static const struct mmu_notifier_ops ib_umem_notifiers = {
236 .release = ib_umem_notifier_release, 236 .release = ib_umem_notifier_release,
237 .invalidate_page = ib_umem_notifier_invalidate_page, 237 .invalidate_page = ib_umem_notifier_invalidate_page,
238 .invalidate_range_start = ib_umem_notifier_invalidate_range_start, 238 .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 57f281f8d686..415a3185cde7 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
210} 210}
211 211
212static void recv_handler(struct ib_mad_agent *agent, 212static void recv_handler(struct ib_mad_agent *agent,
213 struct ib_mad_send_buf *send_buf,
213 struct ib_mad_recv_wc *mad_recv_wc) 214 struct ib_mad_recv_wc *mad_recv_wc)
214{ 215{
215 struct ib_umad_file *file = agent->context; 216 struct ib_umad_file *file = agent->context;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 94bbd8c155fc..612ccfd39bf9 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
204 struct ib_event *event); 204 struct ib_event *event);
205void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); 205void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
206 206
207int uverbs_dealloc_mw(struct ib_mw *mw);
208
207struct ib_uverbs_flow_spec { 209struct ib_uverbs_flow_spec {
208 union { 210 union {
209 union { 211 union {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1c02deab068f..6ffc9c4e93af 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
291 struct ib_uverbs_get_context cmd; 291 struct ib_uverbs_get_context cmd;
292 struct ib_uverbs_get_context_resp resp; 292 struct ib_uverbs_get_context_resp resp;
293 struct ib_udata udata; 293 struct ib_udata udata;
294#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
295 struct ib_device_attr dev_attr;
296#endif
297 struct ib_ucontext *ucontext; 294 struct ib_ucontext *ucontext;
298 struct file *filp; 295 struct file *filp;
299 int ret; 296 int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
342 ucontext->odp_mrs_count = 0; 339 ucontext->odp_mrs_count = 0;
343 INIT_LIST_HEAD(&ucontext->no_private_counters); 340 INIT_LIST_HEAD(&ucontext->no_private_counters);
344 341
345 ret = ib_query_device(ib_dev, &dev_attr); 342 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
346 if (ret)
347 goto err_free;
348 if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
349 ucontext->invalidate_range = NULL; 343 ucontext->invalidate_range = NULL;
350 344
351#endif 345#endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
447{ 441{
448 struct ib_uverbs_query_device cmd; 442 struct ib_uverbs_query_device cmd;
449 struct ib_uverbs_query_device_resp resp; 443 struct ib_uverbs_query_device_resp resp;
450 struct ib_device_attr attr;
451 int ret;
452 444
453 if (out_len < sizeof resp) 445 if (out_len < sizeof resp)
454 return -ENOSPC; 446 return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
456 if (copy_from_user(&cmd, buf, sizeof cmd)) 448 if (copy_from_user(&cmd, buf, sizeof cmd))
457 return -EFAULT; 449 return -EFAULT;
458 450
459 ret = ib_query_device(ib_dev, &attr);
460 if (ret)
461 return ret;
462
463 memset(&resp, 0, sizeof resp); 451 memset(&resp, 0, sizeof resp);
464 copy_query_dev_fields(file, ib_dev, &resp, &attr); 452 copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
465 453
466 if (copy_to_user((void __user *) (unsigned long) cmd.response, 454 if (copy_to_user((void __user *) (unsigned long) cmd.response,
467 &resp, sizeof resp)) 455 &resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
986 } 974 }
987 975
988 if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { 976 if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
989 struct ib_device_attr attr; 977 if (!(pd->device->attrs.device_cap_flags &
990 978 IB_DEVICE_ON_DEMAND_PAGING)) {
991 ret = ib_query_device(pd->device, &attr);
992 if (ret || !(attr.device_cap_flags &
993 IB_DEVICE_ON_DEMAND_PAGING)) {
994 pr_debug("ODP support not available\n"); 979 pr_debug("ODP support not available\n");
995 ret = -EINVAL; 980 ret = -EINVAL;
996 goto err_put; 981 goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
1008 mr->pd = pd; 993 mr->pd = pd;
1009 mr->uobject = uobj; 994 mr->uobject = uobj;
1010 atomic_inc(&pd->usecnt); 995 atomic_inc(&pd->usecnt);
1011 atomic_set(&mr->usecnt, 0);
1012 996
1013 uobj->object = mr; 997 uobj->object = mr;
1014 ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); 998 ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1106 } 1090 }
1107 } 1091 }
1108 1092
1109 if (atomic_read(&mr->usecnt)) {
1110 ret = -EBUSY;
1111 goto put_uobj_pd;
1112 }
1113
1114 old_pd = mr->pd; 1093 old_pd = mr->pd;
1115 ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, 1094 ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1116 cmd.length, cmd.hca_va, 1095 cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
1258 idr_remove_uobj(&ib_uverbs_mw_idr, uobj); 1237 idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
1259 1238
1260err_unalloc: 1239err_unalloc:
1261 ib_dealloc_mw(mw); 1240 uverbs_dealloc_mw(mw);
1262 1241
1263err_put: 1242err_put:
1264 put_pd_read(pd); 1243 put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
1287 1266
1288 mw = uobj->object; 1267 mw = uobj->object;
1289 1268
1290 ret = ib_dealloc_mw(mw); 1269 ret = uverbs_dealloc_mw(mw);
1291 if (!ret) 1270 if (!ret)
1292 uobj->live = 0; 1271 uobj->live = 0;
1293 1272
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
1845 sizeof(cmd->create_flags)) 1824 sizeof(cmd->create_flags))
1846 attr.create_flags = cmd->create_flags; 1825 attr.create_flags = cmd->create_flags;
1847 1826
1848 if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { 1827 if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
1828 IB_QP_CREATE_CROSS_CHANNEL |
1829 IB_QP_CREATE_MANAGED_SEND |
1830 IB_QP_CREATE_MANAGED_RECV)) {
1849 ret = -EINVAL; 1831 ret = -EINVAL;
1850 goto err_put; 1832 goto err_put;
1851 } 1833 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef28861be6..39680aed99dd 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
133static void ib_uverbs_add_one(struct ib_device *device); 133static void ib_uverbs_add_one(struct ib_device *device);
134static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 134static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
135 135
136int uverbs_dealloc_mw(struct ib_mw *mw)
137{
138 struct ib_pd *pd = mw->pd;
139 int ret;
140
141 ret = mw->device->dealloc_mw(mw);
142 if (!ret)
143 atomic_dec(&pd->usecnt);
144 return ret;
145}
146
136static void ib_uverbs_release_dev(struct kobject *kobj) 147static void ib_uverbs_release_dev(struct kobject *kobj)
137{ 148{
138 struct ib_uverbs_device *dev = 149 struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
224 struct ib_mw *mw = uobj->object; 235 struct ib_mw *mw = uobj->object;
225 236
226 idr_remove_uobj(&ib_uverbs_mw_idr, uobj); 237 idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
227 ib_dealloc_mw(mw); 238 uverbs_dealloc_mw(mw);
228 kfree(uobj); 239 kfree(uobj);
229 } 240 }
230 241
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 7d2f14c9bbef..af020f80d50f 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
144 memset(dst->dmac, 0, sizeof(dst->dmac)); 144 memset(dst->dmac, 0, sizeof(dst->dmac));
145 dst->net = NULL; 145 dst->net = NULL;
146 dst->ifindex = 0; 146 dst->ifindex = 0;
147 dst->gid_type = IB_GID_TYPE_IB;
147} 148}
148EXPORT_SYMBOL(ib_copy_path_rec_from_user); 149EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 545906dec26d..5af6d024e053 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
229struct ib_pd *ib_alloc_pd(struct ib_device *device) 229struct ib_pd *ib_alloc_pd(struct ib_device *device)
230{ 230{
231 struct ib_pd *pd; 231 struct ib_pd *pd;
232 struct ib_device_attr devattr;
233 int rc;
234
235 rc = ib_query_device(device, &devattr);
236 if (rc)
237 return ERR_PTR(rc);
238 232
239 pd = device->alloc_pd(device, NULL, NULL); 233 pd = device->alloc_pd(device, NULL, NULL);
240 if (IS_ERR(pd)) 234 if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
245 pd->local_mr = NULL; 239 pd->local_mr = NULL;
246 atomic_set(&pd->usecnt, 0); 240 atomic_set(&pd->usecnt, 0);
247 241
248 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) 242 if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
249 pd->local_dma_lkey = device->local_dma_lkey; 243 pd->local_dma_lkey = device->local_dma_lkey;
250 else { 244 else {
251 struct ib_mr *mr; 245 struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
311} 305}
312EXPORT_SYMBOL(ib_create_ah); 306EXPORT_SYMBOL(ib_create_ah);
313 307
308static int ib_get_header_version(const union rdma_network_hdr *hdr)
309{
310 const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
311 struct iphdr ip4h_checked;
312 const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
313
314 /* If it's IPv6, the version must be 6, otherwise, the first
315 * 20 bytes (before the IPv4 header) are garbled.
316 */
317 if (ip6h->version != 6)
318 return (ip4h->version == 4) ? 4 : 0;
319 /* version may be 6 or 4 because the first 20 bytes could be garbled */
320
321 /* RoCE v2 requires no options, thus header length
322 * must be 5 words
323 */
324 if (ip4h->ihl != 5)
325 return 6;
326
327 /* Verify checksum.
328 * We can't write on scattered buffers so we need to copy to
329 * temp buffer.
330 */
331 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
332 ip4h_checked.check = 0;
333 ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
334 /* if IPv4 header checksum is OK, believe it */
335 if (ip4h->check == ip4h_checked.check)
336 return 4;
337 return 6;
338}
339
340static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
341 u8 port_num,
342 const struct ib_grh *grh)
343{
344 int grh_version;
345
346 if (rdma_protocol_ib(device, port_num))
347 return RDMA_NETWORK_IB;
348
349 grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
350
351 if (grh_version == 4)
352 return RDMA_NETWORK_IPV4;
353
354 if (grh->next_hdr == IPPROTO_UDP)
355 return RDMA_NETWORK_IPV6;
356
357 return RDMA_NETWORK_ROCE_V1;
358}
359
314struct find_gid_index_context { 360struct find_gid_index_context {
315 u16 vlan_id; 361 u16 vlan_id;
362 enum ib_gid_type gid_type;
316}; 363};
317 364
318static bool find_gid_index(const union ib_gid *gid, 365static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
322 struct find_gid_index_context *ctx = 369 struct find_gid_index_context *ctx =
323 (struct find_gid_index_context *)context; 370 (struct find_gid_index_context *)context;
324 371
372 if (ctx->gid_type != gid_attr->gid_type)
373 return false;
374
325 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || 375 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
326 (is_vlan_dev(gid_attr->ndev) && 376 (is_vlan_dev(gid_attr->ndev) &&
327 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) 377 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
332 382
333static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, 383static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
334 u16 vlan_id, const union ib_gid *sgid, 384 u16 vlan_id, const union ib_gid *sgid,
385 enum ib_gid_type gid_type,
335 u16 *gid_index) 386 u16 *gid_index)
336{ 387{
337 struct find_gid_index_context context = {.vlan_id = vlan_id}; 388 struct find_gid_index_context context = {.vlan_id = vlan_id,
389 .gid_type = gid_type};
338 390
339 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, 391 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
340 &context, gid_index); 392 &context, gid_index);
341} 393}
342 394
395static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
396 enum rdma_network_type net_type,
397 union ib_gid *sgid, union ib_gid *dgid)
398{
399 struct sockaddr_in src_in;
400 struct sockaddr_in dst_in;
401 __be32 src_saddr, dst_saddr;
402
403 if (!sgid || !dgid)
404 return -EINVAL;
405
406 if (net_type == RDMA_NETWORK_IPV4) {
407 memcpy(&src_in.sin_addr.s_addr,
408 &hdr->roce4grh.saddr, 4);
409 memcpy(&dst_in.sin_addr.s_addr,
410 &hdr->roce4grh.daddr, 4);
411 src_saddr = src_in.sin_addr.s_addr;
412 dst_saddr = dst_in.sin_addr.s_addr;
413 ipv6_addr_set_v4mapped(src_saddr,
414 (struct in6_addr *)sgid);
415 ipv6_addr_set_v4mapped(dst_saddr,
416 (struct in6_addr *)dgid);
417 return 0;
418 } else if (net_type == RDMA_NETWORK_IPV6 ||
419 net_type == RDMA_NETWORK_IB) {
420 *dgid = hdr->ibgrh.dgid;
421 *sgid = hdr->ibgrh.sgid;
422 return 0;
423 } else {
424 return -EINVAL;
425 }
426}
427
343int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 428int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
344 const struct ib_wc *wc, const struct ib_grh *grh, 429 const struct ib_wc *wc, const struct ib_grh *grh,
345 struct ib_ah_attr *ah_attr) 430 struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
347 u32 flow_class; 432 u32 flow_class;
348 u16 gid_index; 433 u16 gid_index;
349 int ret; 434 int ret;
435 enum rdma_network_type net_type = RDMA_NETWORK_IB;
436 enum ib_gid_type gid_type = IB_GID_TYPE_IB;
437 int hoplimit = 0xff;
438 union ib_gid dgid;
439 union ib_gid sgid;
350 440
351 memset(ah_attr, 0, sizeof *ah_attr); 441 memset(ah_attr, 0, sizeof *ah_attr);
352 if (rdma_cap_eth_ah(device, port_num)) { 442 if (rdma_cap_eth_ah(device, port_num)) {
443 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
444 net_type = wc->network_hdr_type;
445 else
446 net_type = ib_get_net_type_by_grh(device, port_num, grh);
447 gid_type = ib_network_to_gid_type(net_type);
448 }
449 ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
450 &sgid, &dgid);
451 if (ret)
452 return ret;
453
454 if (rdma_protocol_roce(device, port_num)) {
455 int if_index = 0;
353 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? 456 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
354 wc->vlan_id : 0xffff; 457 wc->vlan_id : 0xffff;
458 struct net_device *idev;
459 struct net_device *resolved_dev;
355 460
356 if (!(wc->wc_flags & IB_WC_GRH)) 461 if (!(wc->wc_flags & IB_WC_GRH))
357 return -EPROTOTYPE; 462 return -EPROTOTYPE;
358 463
359 if (!(wc->wc_flags & IB_WC_WITH_SMAC) || 464 if (!device->get_netdev)
360 !(wc->wc_flags & IB_WC_WITH_VLAN)) { 465 return -EOPNOTSUPP;
361 ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, 466
362 ah_attr->dmac, 467 idev = device->get_netdev(device, port_num);
363 wc->wc_flags & IB_WC_WITH_VLAN ? 468 if (!idev)
364 NULL : &vlan_id, 469 return -ENODEV;
365 0); 470
366 if (ret) 471 ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
367 return ret; 472 ah_attr->dmac,
473 wc->wc_flags & IB_WC_WITH_VLAN ?
474 NULL : &vlan_id,
475 &if_index, &hoplimit);
476 if (ret) {
477 dev_put(idev);
478 return ret;
368 } 479 }
369 480
370 ret = get_sgid_index_from_eth(device, port_num, vlan_id, 481 resolved_dev = dev_get_by_index(&init_net, if_index);
371 &grh->dgid, &gid_index); 482 if (resolved_dev->flags & IFF_LOOPBACK) {
483 dev_put(resolved_dev);
484 resolved_dev = idev;
485 dev_hold(resolved_dev);
486 }
487 rcu_read_lock();
488 if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
489 resolved_dev))
490 ret = -EHOSTUNREACH;
491 rcu_read_unlock();
492 dev_put(idev);
493 dev_put(resolved_dev);
372 if (ret) 494 if (ret)
373 return ret; 495 return ret;
374 496
375 if (wc->wc_flags & IB_WC_WITH_SMAC) 497 ret = get_sgid_index_from_eth(device, port_num, vlan_id,
376 memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); 498 &dgid, gid_type, &gid_index);
499 if (ret)
500 return ret;
377 } 501 }
378 502
379 ah_attr->dlid = wc->slid; 503 ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
383 507
384 if (wc->wc_flags & IB_WC_GRH) { 508 if (wc->wc_flags & IB_WC_GRH) {
385 ah_attr->ah_flags = IB_AH_GRH; 509 ah_attr->ah_flags = IB_AH_GRH;
386 ah_attr->grh.dgid = grh->sgid; 510 ah_attr->grh.dgid = sgid;
387 511
388 if (!rdma_cap_eth_ah(device, port_num)) { 512 if (!rdma_cap_eth_ah(device, port_num)) {
389 ret = ib_find_cached_gid_by_port(device, &grh->dgid, 513 ret = ib_find_cached_gid_by_port(device, &dgid,
514 IB_GID_TYPE_IB,
390 port_num, NULL, 515 port_num, NULL,
391 &gid_index); 516 &gid_index);
392 if (ret) 517 if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
396 ah_attr->grh.sgid_index = (u8) gid_index; 521 ah_attr->grh.sgid_index = (u8) gid_index;
397 flow_class = be32_to_cpu(grh->version_tclass_flow); 522 flow_class = be32_to_cpu(grh->version_tclass_flow);
398 ah_attr->grh.flow_label = flow_class & 0xFFFFF; 523 ah_attr->grh.flow_label = flow_class & 0xFFFFF;
399 ah_attr->grh.hop_limit = 0xFF; 524 ah_attr->grh.hop_limit = hoplimit;
400 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; 525 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
401 } 526 }
402 return 0; 527 return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
1014 union ib_gid sgid; 1139 union ib_gid sgid;
1015 struct ib_gid_attr sgid_attr; 1140 struct ib_gid_attr sgid_attr;
1016 int ifindex; 1141 int ifindex;
1142 int hop_limit;
1017 1143
1018 ret = ib_query_gid(qp->device, 1144 ret = ib_query_gid(qp->device,
1019 qp_attr->ah_attr.port_num, 1145 qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
1028 1154
1029 ifindex = sgid_attr.ndev->ifindex; 1155 ifindex = sgid_attr.ndev->ifindex;
1030 1156
1031 ret = rdma_addr_find_dmac_by_grh(&sgid, 1157 ret = rdma_addr_find_l2_eth_by_grh(&sgid,
1032 &qp_attr->ah_attr.grh.dgid, 1158 &qp_attr->ah_attr.grh.dgid,
1033 qp_attr->ah_attr.dmac, 1159 qp_attr->ah_attr.dmac,
1034 NULL, ifindex); 1160 NULL, &ifindex, &hop_limit);
1035 1161
1036 dev_put(sgid_attr.ndev); 1162 dev_put(sgid_attr.ndev);
1163
1164 qp_attr->ah_attr.grh.hop_limit = hop_limit;
1037 } 1165 }
1038 } 1166 }
1039out: 1167out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
1215 mr->pd = pd; 1343 mr->pd = pd;
1216 mr->uobject = NULL; 1344 mr->uobject = NULL;
1217 atomic_inc(&pd->usecnt); 1345 atomic_inc(&pd->usecnt);
1218 atomic_set(&mr->usecnt, 0);
1219 } 1346 }
1220 1347
1221 return mr; 1348 return mr;
1222} 1349}
1223EXPORT_SYMBOL(ib_get_dma_mr); 1350EXPORT_SYMBOL(ib_get_dma_mr);
1224 1351
1225int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
1226{
1227 return mr->device->query_mr ?
1228 mr->device->query_mr(mr, mr_attr) : -ENOSYS;
1229}
1230EXPORT_SYMBOL(ib_query_mr);
1231
1232int ib_dereg_mr(struct ib_mr *mr) 1352int ib_dereg_mr(struct ib_mr *mr)
1233{ 1353{
1234 struct ib_pd *pd; 1354 struct ib_pd *pd = mr->pd;
1235 int ret; 1355 int ret;
1236 1356
1237 if (atomic_read(&mr->usecnt))
1238 return -EBUSY;
1239
1240 pd = mr->pd;
1241 ret = mr->device->dereg_mr(mr); 1357 ret = mr->device->dereg_mr(mr);
1242 if (!ret) 1358 if (!ret)
1243 atomic_dec(&pd->usecnt); 1359 atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
1273 mr->pd = pd; 1389 mr->pd = pd;
1274 mr->uobject = NULL; 1390 mr->uobject = NULL;
1275 atomic_inc(&pd->usecnt); 1391 atomic_inc(&pd->usecnt);
1276 atomic_set(&mr->usecnt, 0);
1277 } 1392 }
1278 1393
1279 return mr; 1394 return mr;
1280} 1395}
1281EXPORT_SYMBOL(ib_alloc_mr); 1396EXPORT_SYMBOL(ib_alloc_mr);
1282 1397
1283/* Memory windows */
1284
1285struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
1286{
1287 struct ib_mw *mw;
1288
1289 if (!pd->device->alloc_mw)
1290 return ERR_PTR(-ENOSYS);
1291
1292 mw = pd->device->alloc_mw(pd, type);
1293 if (!IS_ERR(mw)) {
1294 mw->device = pd->device;
1295 mw->pd = pd;
1296 mw->uobject = NULL;
1297 mw->type = type;
1298 atomic_inc(&pd->usecnt);
1299 }
1300
1301 return mw;
1302}
1303EXPORT_SYMBOL(ib_alloc_mw);
1304
1305int ib_dealloc_mw(struct ib_mw *mw)
1306{
1307 struct ib_pd *pd;
1308 int ret;
1309
1310 pd = mw->pd;
1311 ret = mw->device->dealloc_mw(mw);
1312 if (!ret)
1313 atomic_dec(&pd->usecnt);
1314
1315 return ret;
1316}
1317EXPORT_SYMBOL(ib_dealloc_mw);
1318
1319/* "Fast" memory regions */ 1398/* "Fast" memory regions */
1320 1399
1321struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, 1400struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
1530 int (*set_page)(struct ib_mr *, u64)) 1609 int (*set_page)(struct ib_mr *, u64))
1531{ 1610{
1532 struct scatterlist *sg; 1611 struct scatterlist *sg;
1533 u64 last_end_dma_addr = 0, last_page_addr = 0; 1612 u64 last_end_dma_addr = 0;
1534 unsigned int last_page_off = 0; 1613 unsigned int last_page_off = 0;
1535 u64 page_mask = ~((u64)mr->page_size - 1); 1614 u64 page_mask = ~((u64)mr->page_size - 1);
1536 int i, ret; 1615 int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
1572 1651
1573 mr->length += dma_len; 1652 mr->length += dma_len;
1574 last_end_dma_addr = end_dma_addr; 1653 last_end_dma_addr = end_dma_addr;
1575 last_page_addr = end_dma_addr & page_mask;
1576 last_page_off = end_dma_addr & ~page_mask; 1654 last_page_off = end_dma_addr & ~page_mask;
1577 } 1655 }
1578 1656
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index cb78b1e9bcd9..f504ba73e5dc 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
149 error = l2t_send(tdev, skb, l2e); 149 error = l2t_send(tdev, skb, l2e);
150 if (error < 0) 150 if (error < 0)
151 kfree_skb(skb); 151 kfree_skb(skb);
152 return error; 152 return error < 0 ? error : 0;
153} 153}
154 154
155int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) 155int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
165 error = cxgb3_ofld_send(tdev, skb); 165 error = cxgb3_ofld_send(tdev, skb);
166 if (error < 0) 166 if (error < 0)
167 kfree_skb(skb); 167 kfree_skb(skb);
168 return error; 168 return error < 0 ? error : 0;
169} 169}
170 170
171static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) 171static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cfe404925a39..97fbfd2c298e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
115 case T3_SEND_WITH_SE_INV: 115 case T3_SEND_WITH_SE_INV:
116 wc->opcode = IB_WC_SEND; 116 wc->opcode = IB_WC_SEND;
117 break; 117 break;
118 case T3_BIND_MW:
119 wc->opcode = IB_WC_BIND_MW;
120 break;
121
122 case T3_LOCAL_INV: 118 case T3_LOCAL_INV:
123 wc->opcode = IB_WC_LOCAL_INV; 119 wc->opcode = IB_WC_LOCAL_INV;
124 break; 120 break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 5c36ee2809ac..1d04c872c9d5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
75 return ret; 75 return ret;
76} 76}
77 77
78int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
79 struct iwch_mr *mhp,
80 int shift,
81 int npages)
82{
83 u32 stag;
84 int ret;
85
86 /* We could support this... */
87 if (npages > mhp->attr.pbl_size)
88 return -ENOMEM;
89
90 stag = mhp->attr.stag;
91 if (cxio_reregister_phys_mem(&rhp->rdev,
92 &stag, mhp->attr.pdid,
93 mhp->attr.perms,
94 mhp->attr.zbva,
95 mhp->attr.va_fbo,
96 mhp->attr.len,
97 shift - 12,
98 mhp->attr.pbl_size, mhp->attr.pbl_addr))
99 return -ENOMEM;
100
101 ret = iwch_finish_mem_reg(mhp, stag);
102 if (ret)
103 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
104 mhp->attr.pbl_addr);
105
106 return ret;
107}
108
109int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) 78int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
110{ 79{
111 mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, 80 mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
130 return cxio_write_pbl(&mhp->rhp->rdev, pages, 99 return cxio_write_pbl(&mhp->rhp->rdev, pages,
131 mhp->attr.pbl_addr + (offset << 3), npages); 100 mhp->attr.pbl_addr + (offset << 3), npages);
132} 101}
133
134int build_phys_page_list(struct ib_phys_buf *buffer_list,
135 int num_phys_buf,
136 u64 *iova_start,
137 u64 *total_size,
138 int *npages,
139 int *shift,
140 __be64 **page_list)
141{
142 u64 mask;
143 int i, j, n;
144
145 mask = 0;
146 *total_size = 0;
147 for (i = 0; i < num_phys_buf; ++i) {
148 if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
149 return -EINVAL;
150 if (i != 0 && i != num_phys_buf - 1 &&
151 (buffer_list[i].size & ~PAGE_MASK))
152 return -EINVAL;
153 *total_size += buffer_list[i].size;
154 if (i > 0)
155 mask |= buffer_list[i].addr;
156 else
157 mask |= buffer_list[i].addr & PAGE_MASK;
158 if (i != num_phys_buf - 1)
159 mask |= buffer_list[i].addr + buffer_list[i].size;
160 else
161 mask |= (buffer_list[i].addr + buffer_list[i].size +
162 PAGE_SIZE - 1) & PAGE_MASK;
163 }
164
165 if (*total_size > 0xFFFFFFFFULL)
166 return -ENOMEM;
167
168 /* Find largest page shift we can use to cover buffers */
169 for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
170 if ((1ULL << *shift) & mask)
171 break;
172
173 buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
174 buffer_list[0].addr &= ~0ull << *shift;
175
176 *npages = 0;
177 for (i = 0; i < num_phys_buf; ++i)
178 *npages += (buffer_list[i].size +
179 (1ULL << *shift) - 1) >> *shift;
180
181 if (!*npages)
182 return -EINVAL;
183
184 *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
185 if (!*page_list)
186 return -ENOMEM;
187
188 n = 0;
189 for (i = 0; i < num_phys_buf; ++i)
190 for (j = 0;
191 j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
192 ++j)
193 (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
194 ((u64) j << *shift));
195
196 PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
197 __func__, (unsigned long long) *iova_start,
198 (unsigned long long) mask, *shift, (unsigned long long) *total_size,
199 *npages);
200
201 return 0;
202
203}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index c34725ca0bb4..2734820d291b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
458 u32 mmid; 458 u32 mmid;
459 459
460 PDBG("%s ib_mr %p\n", __func__, ib_mr); 460 PDBG("%s ib_mr %p\n", __func__, ib_mr);
461 /* There can be no memory windows */
462 if (atomic_read(&ib_mr->usecnt))
463 return -EINVAL;
464 461
465 mhp = to_iwch_mr(ib_mr); 462 mhp = to_iwch_mr(ib_mr);
466 kfree(mhp->pages); 463 kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
479 return 0; 476 return 0;
480} 477}
481 478
482static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, 479static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
483 struct ib_phys_buf *buffer_list,
484 int num_phys_buf,
485 int acc,
486 u64 *iova_start)
487{ 480{
488 __be64 *page_list; 481 const u64 total_size = 0xffffffff;
489 int shift; 482 const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
490 u64 total_size; 483 struct iwch_pd *php = to_iwch_pd(pd);
491 int npages; 484 struct iwch_dev *rhp = php->rhp;
492 struct iwch_dev *rhp;
493 struct iwch_pd *php;
494 struct iwch_mr *mhp; 485 struct iwch_mr *mhp;
495 int ret; 486 __be64 *page_list;
487 int shift = 26, npages, ret, i;
496 488
497 PDBG("%s ib_pd %p\n", __func__, pd); 489 PDBG("%s ib_pd %p\n", __func__, pd);
498 php = to_iwch_pd(pd); 490
499 rhp = php->rhp; 491 /*
492 * T3 only supports 32 bits of size.
493 */
494 if (sizeof(phys_addr_t) > 4) {
495 pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
496 return ERR_PTR(-ENOTSUPP);
497 }
500 498
501 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); 499 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
502 if (!mhp) 500 if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
504 502
505 mhp->rhp = rhp; 503 mhp->rhp = rhp;
506 504
507 /* First check that we have enough alignment */ 505 npages = (total_size + (1ULL << shift) - 1) >> shift;
508 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { 506 if (!npages) {
509 ret = -EINVAL; 507 ret = -EINVAL;
510 goto err; 508 goto err;
511 } 509 }
512 510
513 if (num_phys_buf > 1 && 511 page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
514 ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { 512 if (!page_list) {
515 ret = -EINVAL; 513 ret = -ENOMEM;
516 goto err; 514 goto err;
517 } 515 }
518 516
519 ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, 517 for (i = 0; i < npages; i++)
520 &total_size, &npages, &shift, &page_list); 518 page_list[i] = cpu_to_be64((u64)i << shift);
521 if (ret) 519
522 goto err; 520 PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
521 __func__, mask, shift, total_size, npages);
523 522
524 ret = iwch_alloc_pbl(mhp, npages); 523 ret = iwch_alloc_pbl(mhp, npages);
525 if (ret) { 524 if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
536 mhp->attr.zbva = 0; 535 mhp->attr.zbva = 0;
537 536
538 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 537 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
539 mhp->attr.va_fbo = *iova_start; 538 mhp->attr.va_fbo = 0;
540 mhp->attr.page_size = shift - 12; 539 mhp->attr.page_size = shift - 12;
541 540
542 mhp->attr.len = (u32) total_size; 541 mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
553err: 552err:
554 kfree(mhp); 553 kfree(mhp);
555 return ERR_PTR(ret); 554 return ERR_PTR(ret);
556
557}
558
559static int iwch_reregister_phys_mem(struct ib_mr *mr,
560 int mr_rereg_mask,
561 struct ib_pd *pd,
562 struct ib_phys_buf *buffer_list,
563 int num_phys_buf,
564 int acc, u64 * iova_start)
565{
566
567 struct iwch_mr mh, *mhp;
568 struct iwch_pd *php;
569 struct iwch_dev *rhp;
570 __be64 *page_list = NULL;
571 int shift = 0;
572 u64 total_size;
573 int npages = 0;
574 int ret;
575
576 PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
577
578 /* There can be no memory windows */
579 if (atomic_read(&mr->usecnt))
580 return -EINVAL;
581
582 mhp = to_iwch_mr(mr);
583 rhp = mhp->rhp;
584 php = to_iwch_pd(mr->pd);
585
586 /* make sure we are on the same adapter */
587 if (rhp != php->rhp)
588 return -EINVAL;
589
590 memcpy(&mh, mhp, sizeof *mhp);
591
592 if (mr_rereg_mask & IB_MR_REREG_PD)
593 php = to_iwch_pd(pd);
594 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
595 mh.attr.perms = iwch_ib_to_tpt_access(acc);
596 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
597 ret = build_phys_page_list(buffer_list, num_phys_buf,
598 iova_start,
599 &total_size, &npages,
600 &shift, &page_list);
601 if (ret)
602 return ret;
603 }
604
605 ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
606 kfree(page_list);
607 if (ret) {
608 return ret;
609 }
610 if (mr_rereg_mask & IB_MR_REREG_PD)
611 mhp->attr.pdid = php->pdid;
612 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
613 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
614 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
615 mhp->attr.zbva = 0;
616 mhp->attr.va_fbo = *iova_start;
617 mhp->attr.page_size = shift - 12;
618 mhp->attr.len = (u32) total_size;
619 mhp->attr.pbl_size = npages;
620 }
621
622 return 0;
623} 555}
624 556
625
626static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 557static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
627 u64 virt, int acc, struct ib_udata *udata) 558 u64 virt, int acc, struct ib_udata *udata)
628{ 559{
@@ -726,28 +657,6 @@ err:
726 return ERR_PTR(err); 657 return ERR_PTR(err);
727} 658}
728 659
729static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
730{
731 struct ib_phys_buf bl;
732 u64 kva;
733 struct ib_mr *ibmr;
734
735 PDBG("%s ib_pd %p\n", __func__, pd);
736
737 /*
738 * T3 only supports 32 bits of size.
739 */
740 if (sizeof(phys_addr_t) > 4) {
741 pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
742 return ERR_PTR(-ENOTSUPP);
743 }
744 bl.size = 0xffffffff;
745 bl.addr = 0;
746 kva = 0;
747 ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
748 return ibmr;
749}
750
751static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 660static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
752{ 661{
753 struct iwch_dev *rhp; 662 struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
1452 dev->ibdev.resize_cq = iwch_resize_cq; 1361 dev->ibdev.resize_cq = iwch_resize_cq;
1453 dev->ibdev.poll_cq = iwch_poll_cq; 1362 dev->ibdev.poll_cq = iwch_poll_cq;
1454 dev->ibdev.get_dma_mr = iwch_get_dma_mr; 1363 dev->ibdev.get_dma_mr = iwch_get_dma_mr;
1455 dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
1456 dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
1457 dev->ibdev.reg_user_mr = iwch_reg_user_mr; 1364 dev->ibdev.reg_user_mr = iwch_reg_user_mr;
1458 dev->ibdev.dereg_mr = iwch_dereg_mr; 1365 dev->ibdev.dereg_mr = iwch_dereg_mr;
1459 dev->ibdev.alloc_mw = iwch_alloc_mw; 1366 dev->ibdev.alloc_mw = iwch_alloc_mw;
1460 dev->ibdev.bind_mw = iwch_bind_mw;
1461 dev->ibdev.dealloc_mw = iwch_dealloc_mw; 1367 dev->ibdev.dealloc_mw = iwch_dealloc_mw;
1462 dev->ibdev.alloc_mr = iwch_alloc_mr; 1368 dev->ibdev.alloc_mr = iwch_alloc_mr;
1463 dev->ibdev.map_mr_sg = iwch_map_mr_sg; 1369 dev->ibdev.map_mr_sg = iwch_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2ac85b86a680..252c464a09f6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
330 struct ib_send_wr **bad_wr); 330 struct ib_send_wr **bad_wr);
331int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 331int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
332 struct ib_recv_wr **bad_wr); 332 struct ib_recv_wr **bad_wr);
333int iwch_bind_mw(struct ib_qp *qp,
334 struct ib_mw *mw,
335 struct ib_mw_bind *mw_bind);
336int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 333int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
337int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); 334int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
338int iwch_post_zb_read(struct iwch_ep *ep); 335int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
341void stop_read_rep_timer(struct iwch_qp *qhp); 338void stop_read_rep_timer(struct iwch_qp *qhp);
342int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, 339int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
343 struct iwch_mr *mhp, int shift); 340 struct iwch_mr *mhp, int shift);
344int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
345 struct iwch_mr *mhp,
346 int shift,
347 int npages);
348int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); 341int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
349void iwch_free_pbl(struct iwch_mr *mhp); 342void iwch_free_pbl(struct iwch_mr *mhp);
350int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); 343int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
351int build_phys_page_list(struct ib_phys_buf *buffer_list,
352 int num_phys_buf,
353 u64 *iova_start,
354 u64 *total_size,
355 int *npages,
356 int *shift,
357 __be64 **page_list);
358
359 344
360#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" 345#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
361 346
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d0548fc6395e..d939980a708f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -526,88 +526,6 @@ out:
526 return err; 526 return err;
527} 527}
528 528
529int iwch_bind_mw(struct ib_qp *qp,
530 struct ib_mw *mw,
531 struct ib_mw_bind *mw_bind)
532{
533 struct iwch_dev *rhp;
534 struct iwch_mw *mhp;
535 struct iwch_qp *qhp;
536 union t3_wr *wqe;
537 u32 pbl_addr;
538 u8 page_size;
539 u32 num_wrs;
540 unsigned long flag;
541 struct ib_sge sgl;
542 int err=0;
543 enum t3_wr_flags t3_wr_flags;
544 u32 idx;
545 struct t3_swsq *sqp;
546
547 qhp = to_iwch_qp(qp);
548 mhp = to_iwch_mw(mw);
549 rhp = qhp->rhp;
550
551 spin_lock_irqsave(&qhp->lock, flag);
552 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
553 spin_unlock_irqrestore(&qhp->lock, flag);
554 return -EINVAL;
555 }
556 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
557 qhp->wq.sq_size_log2);
558 if (num_wrs == 0) {
559 spin_unlock_irqrestore(&qhp->lock, flag);
560 return -ENOMEM;
561 }
562 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
563 PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
564 mw, mw_bind);
565 wqe = (union t3_wr *) (qhp->wq.queue + idx);
566
567 t3_wr_flags = 0;
568 if (mw_bind->send_flags & IB_SEND_SIGNALED)
569 t3_wr_flags = T3_COMPLETION_FLAG;
570
571 sgl.addr = mw_bind->bind_info.addr;
572 sgl.lkey = mw_bind->bind_info.mr->lkey;
573 sgl.length = mw_bind->bind_info.length;
574 wqe->bind.reserved = 0;
575 wqe->bind.type = TPT_VATO;
576
577 /* TBD: check perms */
578 wqe->bind.perms = iwch_ib_to_tpt_bind_access(
579 mw_bind->bind_info.mw_access_flags);
580 wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
581 wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
582 wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
583 wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
584 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
585 if (err) {
586 spin_unlock_irqrestore(&qhp->lock, flag);
587 return err;
588 }
589 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
590 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
591 sqp->wr_id = mw_bind->wr_id;
592 sqp->opcode = T3_BIND_MW;
593 sqp->sq_wptr = qhp->wq.sq_wptr;
594 sqp->complete = 0;
595 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
596 wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
597 wqe->bind.mr_pagesz = page_size;
598 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
599 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
600 sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
601 ++(qhp->wq.wptr);
602 ++(qhp->wq.sq_wptr);
603 spin_unlock_irqrestore(&qhp->lock, flag);
604
605 if (cxio_wq_db_enabled(&qhp->wq))
606 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
607
608 return err;
609}
610
611static inline void build_term_codes(struct respQ_msg_t *rsp_msg, 529static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
612 u8 *layer_type, u8 *ecode) 530 u8 *layer_type, u8 *ecode)
613{ 531{
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 326d07d823a5..cd2ff5f9518a 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3271 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) 3271 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3272 &ep->com.mapped_local_addr; 3272 &ep->com.mapped_local_addr;
3273 3273
3274 if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3275 err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3276 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3277 if (err)
3278 return err;
3279 }
3274 c4iw_init_wr_wait(&ep->com.wr_wait); 3280 c4iw_init_wr_wait(&ep->com.wr_wait);
3275 err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], 3281 err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3276 ep->stid, &sin6->sin6_addr, 3282 ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3282 0, 0, __func__); 3288 0, 0, __func__);
3283 else if (err > 0) 3289 else if (err > 0)
3284 err = net_xmit_errno(err); 3290 err = net_xmit_errno(err);
3285 if (err) 3291 if (err) {
3292 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3293 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3286 pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", 3294 pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3287 err, ep->stid, 3295 err, ep->stid,
3288 sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); 3296 sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3289 else 3297 }
3290 cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3291 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3292 return err; 3298 return err;
3293} 3299}
3294 3300
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index de9cd6901752..cf21df4a8bf5 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
744 case FW_RI_SEND_WITH_SE: 744 case FW_RI_SEND_WITH_SE:
745 wc->opcode = IB_WC_SEND; 745 wc->opcode = IB_WC_SEND;
746 break; 746 break;
747 case FW_RI_BIND_MW:
748 wc->opcode = IB_WC_BIND_MW;
749 break;
750 747
751 case FW_RI_LOCAL_INV: 748 case FW_RI_LOCAL_INV:
752 wc->opcode = IB_WC_LOCAL_INV; 749 wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 58fce1742b8d..8024ea4417b8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
315static int qp_open(struct inode *inode, struct file *file) 315static int qp_open(struct inode *inode, struct file *file)
316{ 316{
317 struct c4iw_debugfs_data *qpd; 317 struct c4iw_debugfs_data *qpd;
318 int ret = 0;
319 int count = 1; 318 int count = 1;
320 319
321 qpd = kmalloc(sizeof *qpd, GFP_KERNEL); 320 qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
322 if (!qpd) { 321 if (!qpd)
323 ret = -ENOMEM; 322 return -ENOMEM;
324 goto out; 323
325 }
326 qpd->devp = inode->i_private; 324 qpd->devp = inode->i_private;
327 qpd->pos = 0; 325 qpd->pos = 0;
328 326
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
333 qpd->bufsize = count * 128; 331 qpd->bufsize = count * 128;
334 qpd->buf = vmalloc(qpd->bufsize); 332 qpd->buf = vmalloc(qpd->bufsize);
335 if (!qpd->buf) { 333 if (!qpd->buf) {
336 ret = -ENOMEM; 334 kfree(qpd);
337 goto err1; 335 return -ENOMEM;
338 } 336 }
339 337
340 spin_lock_irq(&qpd->devp->lock); 338 spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
343 341
344 qpd->buf[qpd->pos++] = 0; 342 qpd->buf[qpd->pos++] = 0;
345 file->private_data = qpd; 343 file->private_data = qpd;
346 goto out; 344 return 0;
347err1:
348 kfree(qpd);
349out:
350 return ret;
351} 345}
352 346
353static const struct file_operations qp_debugfs_fops = { 347static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
781 pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n", 775 pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
782 pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, 776 pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
783 rdev->lldi.ucq_density); 777 rdev->lldi.ucq_density);
784 err = -EINVAL; 778 return -EINVAL;
785 goto err1;
786 } 779 }
787 if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || 780 if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
788 rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { 781 rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
791 pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, 784 pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
792 rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, 785 rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
793 rdev->lldi.vr->cq.size); 786 rdev->lldi.vr->cq.size);
794 err = -EINVAL; 787 return -EINVAL;
795 goto err1;
796 } 788 }
797 789
798 rdev->qpmask = rdev->lldi.udb_density - 1; 790 rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
816 rdev->lldi.db_reg, rdev->lldi.gts_reg, 808 rdev->lldi.db_reg, rdev->lldi.gts_reg,
817 rdev->qpmask, rdev->cqmask); 809 rdev->qpmask, rdev->cqmask);
818 810
819 if (c4iw_num_stags(rdev) == 0) { 811 if (c4iw_num_stags(rdev) == 0)
820 err = -EINVAL; 812 return -EINVAL;
821 goto err1;
822 }
823 813
824 rdev->stats.pd.total = T4_MAX_NUM_PD; 814 rdev->stats.pd.total = T4_MAX_NUM_PD;
825 rdev->stats.stag.total = rdev->lldi.vr->stag.size; 815 rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
831 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); 821 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
832 if (err) { 822 if (err) {
833 printk(KERN_ERR MOD "error %d initializing resources\n", err); 823 printk(KERN_ERR MOD "error %d initializing resources\n", err);
834 goto err1; 824 return err;
835 } 825 }
836 err = c4iw_pblpool_create(rdev); 826 err = c4iw_pblpool_create(rdev);
837 if (err) { 827 if (err) {
838 printk(KERN_ERR MOD "error %d initializing pbl pool\n", err); 828 printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
839 goto err2; 829 goto destroy_resource;
840 } 830 }
841 err = c4iw_rqtpool_create(rdev); 831 err = c4iw_rqtpool_create(rdev);
842 if (err) { 832 if (err) {
843 printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); 833 printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
844 goto err3; 834 goto destroy_pblpool;
845 } 835 }
846 err = c4iw_ocqp_pool_create(rdev); 836 err = c4iw_ocqp_pool_create(rdev);
847 if (err) { 837 if (err) {
848 printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); 838 printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
849 goto err4; 839 goto destroy_rqtpool;
850 } 840 }
851 rdev->status_page = (struct t4_dev_status_page *) 841 rdev->status_page = (struct t4_dev_status_page *)
852 __get_free_page(GFP_KERNEL); 842 __get_free_page(GFP_KERNEL);
853 if (!rdev->status_page) { 843 if (!rdev->status_page)
854 pr_err(MOD "error allocating status page\n"); 844 goto destroy_ocqp_pool;
855 goto err4; 845 rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
856 } 846 rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
847 rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
848 rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
857 849
858 if (c4iw_wr_log) { 850 if (c4iw_wr_log) {
859 rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) * 851 rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
869 rdev->status_page->db_off = 0; 861 rdev->status_page->db_off = 0;
870 862
871 return 0; 863 return 0;
872err4: 864destroy_ocqp_pool:
865 c4iw_ocqp_pool_destroy(rdev);
866destroy_rqtpool:
873 c4iw_rqtpool_destroy(rdev); 867 c4iw_rqtpool_destroy(rdev);
874err3: 868destroy_pblpool:
875 c4iw_pblpool_destroy(rdev); 869 c4iw_pblpool_destroy(rdev);
876err2: 870destroy_resource:
877 c4iw_destroy_resource(&rdev->resource); 871 c4iw_destroy_resource(&rdev->resource);
878err1:
879 return err; 872 return err;
880} 873}
881 874
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 00e55faa086a..fb2de75a0392 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
947 struct ib_send_wr **bad_wr); 947 struct ib_send_wr **bad_wr);
948int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 948int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
949 struct ib_recv_wr **bad_wr); 949 struct ib_recv_wr **bad_wr);
950int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
951 struct ib_mw_bind *mw_bind);
952int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); 950int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
953int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); 951int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
954int c4iw_destroy_listen(struct iw_cm_id *cm_id); 952int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
968 u64 length, u64 virt, int acc, 966 u64 length, u64 virt, int acc,
969 struct ib_udata *udata); 967 struct ib_udata *udata);
970struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); 968struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
971struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
972 struct ib_phys_buf *buffer_list,
973 int num_phys_buf,
974 int acc,
975 u64 *iova_start);
976int c4iw_reregister_phys_mem(struct ib_mr *mr,
977 int mr_rereg_mask,
978 struct ib_pd *pd,
979 struct ib_phys_buf *buffer_list,
980 int num_phys_buf,
981 int acc, u64 *iova_start);
982int c4iw_dereg_mr(struct ib_mr *ib_mr); 969int c4iw_dereg_mr(struct ib_mr *ib_mr);
983int c4iw_destroy_cq(struct ib_cq *ib_cq); 970int c4iw_destroy_cq(struct ib_cq *ib_cq);
984struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, 971struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index e1629ab58db7..7849890c4781 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
392 return ret; 392 return ret;
393} 393}
394 394
395static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
396 struct c4iw_mr *mhp, int shift, int npages)
397{
398 u32 stag;
399 int ret;
400
401 if (npages > mhp->attr.pbl_size)
402 return -ENOMEM;
403
404 stag = mhp->attr.stag;
405 ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
406 FW_RI_STAG_NSMR, mhp->attr.perms,
407 mhp->attr.mw_bind_enable, mhp->attr.zbva,
408 mhp->attr.va_fbo, mhp->attr.len, shift - 12,
409 mhp->attr.pbl_size, mhp->attr.pbl_addr);
410 if (ret)
411 return ret;
412
413 ret = finish_mem_reg(mhp, stag);
414 if (ret)
415 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
416 mhp->attr.pbl_addr);
417
418 return ret;
419}
420
421static int alloc_pbl(struct c4iw_mr *mhp, int npages) 395static int alloc_pbl(struct c4iw_mr *mhp, int npages)
422{ 396{
423 mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, 397 mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
431 return 0; 405 return 0;
432} 406}
433 407
434static int build_phys_page_list(struct ib_phys_buf *buffer_list,
435 int num_phys_buf, u64 *iova_start,
436 u64 *total_size, int *npages,
437 int *shift, __be64 **page_list)
438{
439 u64 mask;
440 int i, j, n;
441
442 mask = 0;
443 *total_size = 0;
444 for (i = 0; i < num_phys_buf; ++i) {
445 if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
446 return -EINVAL;
447 if (i != 0 && i != num_phys_buf - 1 &&
448 (buffer_list[i].size & ~PAGE_MASK))
449 return -EINVAL;
450 *total_size += buffer_list[i].size;
451 if (i > 0)
452 mask |= buffer_list[i].addr;
453 else
454 mask |= buffer_list[i].addr & PAGE_MASK;
455 if (i != num_phys_buf - 1)
456 mask |= buffer_list[i].addr + buffer_list[i].size;
457 else
458 mask |= (buffer_list[i].addr + buffer_list[i].size +
459 PAGE_SIZE - 1) & PAGE_MASK;
460 }
461
462 if (*total_size > 0xFFFFFFFFULL)
463 return -ENOMEM;
464
465 /* Find largest page shift we can use to cover buffers */
466 for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
467 if ((1ULL << *shift) & mask)
468 break;
469
470 buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
471 buffer_list[0].addr &= ~0ull << *shift;
472
473 *npages = 0;
474 for (i = 0; i < num_phys_buf; ++i)
475 *npages += (buffer_list[i].size +
476 (1ULL << *shift) - 1) >> *shift;
477
478 if (!*npages)
479 return -EINVAL;
480
481 *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
482 if (!*page_list)
483 return -ENOMEM;
484
485 n = 0;
486 for (i = 0; i < num_phys_buf; ++i)
487 for (j = 0;
488 j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
489 ++j)
490 (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
491 ((u64) j << *shift));
492
493 PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
494 __func__, (unsigned long long)*iova_start,
495 (unsigned long long)mask, *shift, (unsigned long long)*total_size,
496 *npages);
497
498 return 0;
499
500}
501
502int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
503 struct ib_pd *pd, struct ib_phys_buf *buffer_list,
504 int num_phys_buf, int acc, u64 *iova_start)
505{
506
507 struct c4iw_mr mh, *mhp;
508 struct c4iw_pd *php;
509 struct c4iw_dev *rhp;
510 __be64 *page_list = NULL;
511 int shift = 0;
512 u64 total_size;
513 int npages;
514 int ret;
515
516 PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
517
518 /* There can be no memory windows */
519 if (atomic_read(&mr->usecnt))
520 return -EINVAL;
521
522 mhp = to_c4iw_mr(mr);
523 rhp = mhp->rhp;
524 php = to_c4iw_pd(mr->pd);
525
526 /* make sure we are on the same adapter */
527 if (rhp != php->rhp)
528 return -EINVAL;
529
530 memcpy(&mh, mhp, sizeof *mhp);
531
532 if (mr_rereg_mask & IB_MR_REREG_PD)
533 php = to_c4iw_pd(pd);
534 if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
535 mh.attr.perms = c4iw_ib_to_tpt_access(acc);
536 mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
537 IB_ACCESS_MW_BIND;
538 }
539 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
540 ret = build_phys_page_list(buffer_list, num_phys_buf,
541 iova_start,
542 &total_size, &npages,
543 &shift, &page_list);
544 if (ret)
545 return ret;
546 }
547
548 if (mr_exceeds_hw_limits(rhp, total_size)) {
549 kfree(page_list);
550 return -EINVAL;
551 }
552
553 ret = reregister_mem(rhp, php, &mh, shift, npages);
554 kfree(page_list);
555 if (ret)
556 return ret;
557 if (mr_rereg_mask & IB_MR_REREG_PD)
558 mhp->attr.pdid = php->pdid;
559 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
560 mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
561 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
562 mhp->attr.zbva = 0;
563 mhp->attr.va_fbo = *iova_start;
564 mhp->attr.page_size = shift - 12;
565 mhp->attr.len = (u32) total_size;
566 mhp->attr.pbl_size = npages;
567 }
568
569 return 0;
570}
571
572struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
573 struct ib_phys_buf *buffer_list,
574 int num_phys_buf, int acc, u64 *iova_start)
575{
576 __be64 *page_list;
577 int shift;
578 u64 total_size;
579 int npages;
580 struct c4iw_dev *rhp;
581 struct c4iw_pd *php;
582 struct c4iw_mr *mhp;
583 int ret;
584
585 PDBG("%s ib_pd %p\n", __func__, pd);
586 php = to_c4iw_pd(pd);
587 rhp = php->rhp;
588
589 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
590 if (!mhp)
591 return ERR_PTR(-ENOMEM);
592
593 mhp->rhp = rhp;
594
595 /* First check that we have enough alignment */
596 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
597 ret = -EINVAL;
598 goto err;
599 }
600
601 if (num_phys_buf > 1 &&
602 ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
603 ret = -EINVAL;
604 goto err;
605 }
606
607 ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
608 &total_size, &npages, &shift,
609 &page_list);
610 if (ret)
611 goto err;
612
613 if (mr_exceeds_hw_limits(rhp, total_size)) {
614 kfree(page_list);
615 ret = -EINVAL;
616 goto err;
617 }
618
619 ret = alloc_pbl(mhp, npages);
620 if (ret) {
621 kfree(page_list);
622 goto err;
623 }
624
625 ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
626 npages);
627 kfree(page_list);
628 if (ret)
629 goto err_pbl;
630
631 mhp->attr.pdid = php->pdid;
632 mhp->attr.zbva = 0;
633
634 mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
635 mhp->attr.va_fbo = *iova_start;
636 mhp->attr.page_size = shift - 12;
637
638 mhp->attr.len = (u32) total_size;
639 mhp->attr.pbl_size = npages;
640 ret = register_mem(rhp, php, mhp, shift);
641 if (ret)
642 goto err_pbl;
643
644 return &mhp->ibmr;
645
646err_pbl:
647 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
648 mhp->attr.pbl_size << 3);
649
650err:
651 kfree(mhp);
652 return ERR_PTR(ret);
653
654}
655
656struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) 408struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
657{ 409{
658 struct c4iw_dev *rhp; 410 struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
952 u32 mmid; 704 u32 mmid;
953 705
954 PDBG("%s ib_mr %p\n", __func__, ib_mr); 706 PDBG("%s ib_mr %p\n", __func__, ib_mr);
955 /* There can be no memory windows */
956 if (atomic_read(&ib_mr->usecnt))
957 return -EINVAL;
958 707
959 mhp = to_c4iw_mr(ib_mr); 708 mhp = to_c4iw_mr(ib_mr);
960 rhp = mhp->rhp; 709 rhp = mhp->rhp;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0a7d99818b17..ec04272fbdc2 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
549 dev->ibdev.resize_cq = c4iw_resize_cq; 549 dev->ibdev.resize_cq = c4iw_resize_cq;
550 dev->ibdev.poll_cq = c4iw_poll_cq; 550 dev->ibdev.poll_cq = c4iw_poll_cq;
551 dev->ibdev.get_dma_mr = c4iw_get_dma_mr; 551 dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
552 dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
553 dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
554 dev->ibdev.reg_user_mr = c4iw_reg_user_mr; 552 dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
555 dev->ibdev.dereg_mr = c4iw_dereg_mr; 553 dev->ibdev.dereg_mr = c4iw_dereg_mr;
556 dev->ibdev.alloc_mw = c4iw_alloc_mw; 554 dev->ibdev.alloc_mw = c4iw_alloc_mw;
557 dev->ibdev.bind_mw = c4iw_bind_mw;
558 dev->ibdev.dealloc_mw = c4iw_dealloc_mw; 555 dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
559 dev->ibdev.alloc_mr = c4iw_alloc_mr; 556 dev->ibdev.alloc_mr = c4iw_alloc_mr;
560 dev->ibdev.map_mr_sg = c4iw_map_mr_sg; 557 dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aa515afee724..e99345eb875a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
933 return err; 933 return err;
934} 934}
935 935
936int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
937{
938 return -ENOSYS;
939}
940
941static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, 936static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
942 u8 *ecode) 937 u8 *ecode)
943{ 938{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 1092a2d1f607..6126bbe36095 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
699 699
700struct t4_dev_status_page { 700struct t4_dev_status_page {
701 u8 db_off; 701 u8 db_off;
702 u8 pad1;
703 u16 pad2;
704 u32 pad3;
705 u64 qp_start;
706 u64 qp_size;
707 u64 cq_start;
708 u64 cq_size;
702}; 709};
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index cbd0ce170728..295f422b9a3a 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
32#ifndef __C4IW_USER_H__ 32#ifndef __C4IW_USER_H__
33#define __C4IW_USER_H__ 33#define __C4IW_USER_H__
34 34
35#define C4IW_UVERBS_ABI_VERSION 2 35#define C4IW_UVERBS_ABI_VERSION 3
36 36
37/* 37/*
38 * Make sure that all structs defined in this file remain laid out so 38 * Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 86af71351d9a..105246fba2e7 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
92 ah_attr->grh.sgid_index, &sgid, &gid_attr); 92 ah_attr->grh.sgid_index, &sgid, &gid_attr);
93 if (ret) 93 if (ret)
94 return ERR_PTR(ret); 94 return ERR_PTR(ret);
95 memset(ah->av.eth.s_mac, 0, ETH_ALEN); 95 eth_zero_addr(ah->av.eth.s_mac);
96 if (gid_attr.ndev) { 96 if (gid_attr.ndev) {
97 if (is_vlan_dev(gid_attr.ndev)) 97 if (is_vlan_dev(gid_attr.ndev))
98 vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); 98 vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
104 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); 104 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
105 ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); 105 ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
106 ah->av.eth.vlan = cpu_to_be16(vlan_tag); 106 ah->av.eth.vlan = cpu_to_be16(vlan_tag);
107 ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
107 if (ah_attr->static_rate) { 108 if (ah_attr->static_rate) {
108 ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; 109 ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
109 while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && 110 while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index b88fc8f5ab18..9f8b516eb2b0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -811,9 +811,6 @@ repoll:
811 wc->opcode = IB_WC_MASKED_FETCH_ADD; 811 wc->opcode = IB_WC_MASKED_FETCH_ADD;
812 wc->byte_len = 8; 812 wc->byte_len = 8;
813 break; 813 break;
814 case MLX4_OPCODE_BIND_MW:
815 wc->opcode = IB_WC_BIND_MW;
816 break;
817 case MLX4_OPCODE_LSO: 814 case MLX4_OPCODE_LSO:
818 wc->opcode = IB_WC_LSO; 815 wc->opcode = IB_WC_LSO;
819 break; 816 break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 97d6878f9938..1c7ab6cabbb8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
154 return dev; 154 return dev;
155} 155}
156 156
157static int mlx4_ib_update_gids(struct gid_entry *gids, 157static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
158 struct mlx4_ib_dev *ibdev, 158 struct mlx4_ib_dev *ibdev,
159 u8 port_num) 159 u8 port_num)
160{ 160{
161 struct mlx4_cmd_mailbox *mailbox; 161 struct mlx4_cmd_mailbox *mailbox;
162 int err; 162 int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
187 return err; 187 return err;
188} 188}
189 189
190static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
191 struct mlx4_ib_dev *ibdev,
192 u8 port_num)
193{
194 struct mlx4_cmd_mailbox *mailbox;
195 int err;
196 struct mlx4_dev *dev = ibdev->dev;
197 int i;
198 struct {
199 union ib_gid gid;
200 __be32 rsrvd1[2];
201 __be16 rsrvd2;
202 u8 type;
203 u8 version;
204 __be32 rsrvd3;
205 } *gid_tbl;
206
207 mailbox = mlx4_alloc_cmd_mailbox(dev);
208 if (IS_ERR(mailbox))
209 return -ENOMEM;
210
211 gid_tbl = mailbox->buf;
212 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
213 memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
214 if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
215 gid_tbl[i].version = 2;
216 if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
217 gid_tbl[i].type = 1;
218 else
219 memset(&gid_tbl[i].gid, 0, 12);
220 }
221 }
222
223 err = mlx4_cmd(dev, mailbox->dma,
224 MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
225 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
226 MLX4_CMD_WRAPPED);
227 if (mlx4_is_bonded(dev))
228 err += mlx4_cmd(dev, mailbox->dma,
229 MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
230 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
231 MLX4_CMD_WRAPPED);
232
233 mlx4_free_cmd_mailbox(dev, mailbox);
234 return err;
235}
236
237static int mlx4_ib_update_gids(struct gid_entry *gids,
238 struct mlx4_ib_dev *ibdev,
239 u8 port_num)
240{
241 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
242 return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
243
244 return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
245}
246
190static int mlx4_ib_add_gid(struct ib_device *device, 247static int mlx4_ib_add_gid(struct ib_device *device,
191 u8 port_num, 248 u8 port_num,
192 unsigned int index, 249 unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
215 port_gid_table = &iboe->gids[port_num - 1]; 272 port_gid_table = &iboe->gids[port_num - 1];
216 spin_lock_bh(&iboe->lock); 273 spin_lock_bh(&iboe->lock);
217 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { 274 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
218 if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) { 275 if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
276 (port_gid_table->gids[i].gid_type == attr->gid_type)) {
219 found = i; 277 found = i;
220 break; 278 break;
221 } 279 }
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
233 } else { 291 } else {
234 *context = port_gid_table->gids[free].ctx; 292 *context = port_gid_table->gids[free].ctx;
235 memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); 293 memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
294 port_gid_table->gids[free].gid_type = attr->gid_type;
236 port_gid_table->gids[free].ctx->real_index = free; 295 port_gid_table->gids[free].ctx->real_index = free;
237 port_gid_table->gids[free].ctx->refcount = 1; 296 port_gid_table->gids[free].ctx->refcount = 1;
238 hw_update = 1; 297 hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
248 if (!gids) { 307 if (!gids) {
249 ret = -ENOMEM; 308 ret = -ENOMEM;
250 } else { 309 } else {
251 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) 310 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
252 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); 311 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
312 gids[i].gid_type = port_gid_table->gids[i].gid_type;
313 }
253 } 314 }
254 } 315 }
255 spin_unlock_bh(&iboe->lock); 316 spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
325 int i; 386 int i;
326 int ret; 387 int ret;
327 unsigned long flags; 388 unsigned long flags;
389 struct ib_gid_attr attr;
328 390
329 if (port_num > MLX4_MAX_PORTS) 391 if (port_num > MLX4_MAX_PORTS)
330 return -EINVAL; 392 return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
335 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) 397 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
336 return index; 398 return index;
337 399
338 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL); 400 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
339 if (ret) 401 if (ret)
340 return ret; 402 return ret;
341 403
404 if (attr.ndev)
405 dev_put(attr.ndev);
406
342 if (!memcmp(&gid, &zgid, sizeof(gid))) 407 if (!memcmp(&gid, &zgid, sizeof(gid)))
343 return -EINVAL; 408 return -EINVAL;
344 409
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
346 port_gid_table = &iboe->gids[port_num - 1]; 411 port_gid_table = &iboe->gids[port_num - 1];
347 412
348 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) 413 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
349 if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) { 414 if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
415 attr.gid_type == port_gid_table->gids[i].gid_type) {
350 ctx = port_gid_table->gids[i].ctx; 416 ctx = port_gid_table->gids[i].ctx;
351 break; 417 break;
352 } 418 }
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2119 struct ib_port_immutable *immutable) 2185 struct ib_port_immutable *immutable)
2120{ 2186{
2121 struct ib_port_attr attr; 2187 struct ib_port_attr attr;
2188 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2122 int err; 2189 int err;
2123 2190
2124 err = mlx4_ib_query_port(ibdev, port_num, &attr); 2191 err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2128 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2195 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2129 immutable->gid_tbl_len = attr.gid_tbl_len; 2196 immutable->gid_tbl_len = attr.gid_tbl_len;
2130 2197
2131 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) 2198 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2132 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; 2199 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2133 else 2200 } else {
2134 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 2201 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2202 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2203 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2204 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2205 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2206 }
2135 2207
2136 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2208 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2137 2209
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2283 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || 2355 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2284 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { 2356 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
2285 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; 2357 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
2286 ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
2287 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; 2358 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
2288 2359
2289 ibdev->ib_dev.uverbs_cmd_mask |= 2360 ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2423 if (mlx4_ib_init_sriov(ibdev)) 2494 if (mlx4_ib_init_sriov(ibdev))
2424 goto err_mad; 2495 goto err_mad;
2425 2496
2426 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { 2497 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
2498 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2427 if (!iboe->nb.notifier_call) { 2499 if (!iboe->nb.notifier_call) {
2428 iboe->nb.notifier_call = mlx4_ib_netdev_event; 2500 iboe->nb.notifier_call = mlx4_ib_netdev_event;
2429 err = register_netdevice_notifier(&iboe->nb); 2501 err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2432 goto err_notif; 2504 goto err_notif;
2433 } 2505 }
2434 } 2506 }
2507 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2508 err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2509 if (err) {
2510 goto err_notif;
2511 }
2512 }
2435 } 2513 }
2436 2514
2437 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 2515 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1caa11edac03..52ce7b000044 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
177 unsigned tail; 177 unsigned tail;
178}; 178};
179 179
180enum {
181 MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
182};
183
180enum mlx4_ib_qp_flags { 184enum mlx4_ib_qp_flags {
181 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, 185 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
182 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, 186 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
183 MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, 187 MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
184 MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, 188 MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
189
190 /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
191 MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
185 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, 192 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
186 MLX4_IB_SRIOV_SQP = 1 << 31, 193 MLX4_IB_SRIOV_SQP = 1 << 31,
187}; 194};
@@ -478,6 +485,7 @@ struct gid_cache_context {
478 485
479struct gid_entry { 486struct gid_entry {
480 union ib_gid gid; 487 union ib_gid gid;
488 enum ib_gid_type gid_type;
481 struct gid_cache_context *ctx; 489 struct gid_cache_context *ctx;
482}; 490};
483 491
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
704 struct ib_udata *udata); 712 struct ib_udata *udata);
705int mlx4_ib_dereg_mr(struct ib_mr *mr); 713int mlx4_ib_dereg_mr(struct ib_mr *mr);
706struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 714struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
707int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
708 struct ib_mw_bind *mw_bind);
709int mlx4_ib_dealloc_mw(struct ib_mw *mw); 715int mlx4_ib_dealloc_mw(struct ib_mw *mw);
710struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 716struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
711 enum ib_mr_type mr_type, 717 enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 4d1e1c632603..242b94ec105b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -366,28 +366,6 @@ err_free:
366 return ERR_PTR(err); 366 return ERR_PTR(err);
367} 367}
368 368
369int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
370 struct ib_mw_bind *mw_bind)
371{
372 struct ib_bind_mw_wr wr;
373 struct ib_send_wr *bad_wr;
374 int ret;
375
376 memset(&wr, 0, sizeof(wr));
377 wr.wr.opcode = IB_WR_BIND_MW;
378 wr.wr.wr_id = mw_bind->wr_id;
379 wr.wr.send_flags = mw_bind->send_flags;
380 wr.mw = mw;
381 wr.bind_info = mw_bind->bind_info;
382 wr.rkey = ib_inc_rkey(mw->rkey);
383
384 ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
385 if (!ret)
386 mw->rkey = wr.rkey;
387
388 return ret;
389}
390
391int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) 369int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
392{ 370{
393 struct mlx4_ib_mw *mw = to_mmw(ibmw); 371 struct mlx4_ib_mw *mw = to_mmw(ibmw);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 13eaaf45288f..bc5536f00b6c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,6 +32,8 @@
32 */ 32 */
33 33
34#include <linux/log2.h> 34#include <linux/log2.h>
35#include <linux/etherdevice.h>
36#include <net/ip.h>
35#include <linux/slab.h> 37#include <linux/slab.h>
36#include <linux/netdevice.h> 38#include <linux/netdevice.h>
37#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
85 u32 send_psn; 87 u32 send_psn;
86 struct ib_ud_header ud_header; 88 struct ib_ud_header ud_header;
87 u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; 89 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
90 struct ib_qp *roce_v2_gsi;
88}; 91};
89 92
90enum { 93enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
115 [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), 118 [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
116 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), 119 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
117 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), 120 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
118 [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
119}; 121};
120 122
121static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) 123static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
154 } 156 }
155 } 157 }
156 } 158 }
157 return proxy_sqp; 159 if (proxy_sqp)
160 return 1;
161
162 return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
158} 163}
159 164
160/* used for INIT/CLOSE port logic */ 165/* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
796 if (err) 801 if (err)
797 goto err_mtt; 802 goto err_mtt;
798 803
799 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp); 804 qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
805 gfp | __GFP_NOWARN);
800 if (!qp->sq.wrid) 806 if (!qp->sq.wrid)
801 qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), 807 qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
802 gfp, PAGE_KERNEL); 808 gfp, PAGE_KERNEL);
803 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp); 809 qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
810 gfp | __GFP_NOWARN);
804 if (!qp->rq.wrid) 811 if (!qp->rq.wrid)
805 qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), 812 qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
806 gfp, PAGE_KERNEL); 813 gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
1099 return dev->dev->caps.qp1_proxy[attr->port_num - 1]; 1106 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
1100} 1107}
1101 1108
1102struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 1109static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
1103 struct ib_qp_init_attr *init_attr, 1110 struct ib_qp_init_attr *init_attr,
1104 struct ib_udata *udata) 1111 struct ib_udata *udata)
1105{ 1112{
1106 struct mlx4_ib_qp *qp = NULL; 1113 struct mlx4_ib_qp *qp = NULL;
1107 int err; 1114 int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1120 MLX4_IB_SRIOV_TUNNEL_QP | 1127 MLX4_IB_SRIOV_TUNNEL_QP |
1121 MLX4_IB_SRIOV_SQP | 1128 MLX4_IB_SRIOV_SQP |
1122 MLX4_IB_QP_NETIF | 1129 MLX4_IB_QP_NETIF |
1130 MLX4_IB_QP_CREATE_ROCE_V2_GSI |
1123 MLX4_IB_QP_CREATE_USE_GFP_NOIO)) 1131 MLX4_IB_QP_CREATE_USE_GFP_NOIO))
1124 return ERR_PTR(-EINVAL); 1132 return ERR_PTR(-EINVAL);
1125 1133
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1128 return ERR_PTR(-EINVAL); 1136 return ERR_PTR(-EINVAL);
1129 } 1137 }
1130 1138
1131 if (init_attr->create_flags && 1139 if (init_attr->create_flags) {
1132 ((udata && init_attr->create_flags & ~(sup_u_create_flags)) || 1140 if (udata && init_attr->create_flags & ~(sup_u_create_flags))
1133 ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | 1141 return ERR_PTR(-EINVAL);
1134 MLX4_IB_QP_CREATE_USE_GFP_NOIO | 1142
1135 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) && 1143 if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
1136 init_attr->qp_type != IB_QPT_UD) || 1144 MLX4_IB_QP_CREATE_USE_GFP_NOIO |
1137 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && 1145 MLX4_IB_QP_CREATE_ROCE_V2_GSI |
1138 init_attr->qp_type > IB_QPT_GSI))) 1146 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
1139 return ERR_PTR(-EINVAL); 1147 init_attr->qp_type != IB_QPT_UD) ||
1148 (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
1149 init_attr->qp_type > IB_QPT_GSI) ||
1150 (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
1151 init_attr->qp_type != IB_QPT_GSI))
1152 return ERR_PTR(-EINVAL);
1153 }
1140 1154
1141 switch (init_attr->qp_type) { 1155 switch (init_attr->qp_type) {
1142 case IB_QPT_XRC_TGT: 1156 case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1173 case IB_QPT_SMI: 1187 case IB_QPT_SMI:
1174 case IB_QPT_GSI: 1188 case IB_QPT_GSI:
1175 { 1189 {
1190 int sqpn;
1191
1176 /* Userspace is not allowed to create special QPs: */ 1192 /* Userspace is not allowed to create special QPs: */
1177 if (udata) 1193 if (udata)
1178 return ERR_PTR(-EINVAL); 1194 return ERR_PTR(-EINVAL);
1195 if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
1196 int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
1197
1198 if (res)
1199 return ERR_PTR(res);
1200 } else {
1201 sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
1202 }
1179 1203
1180 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 1204 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
1181 get_sqp_num(to_mdev(pd->device), init_attr), 1205 sqpn,
1182 &qp, gfp); 1206 &qp, gfp);
1183 if (err) 1207 if (err)
1184 return ERR_PTR(err); 1208 return ERR_PTR(err);
1185 1209
1186 qp->port = init_attr->port_num; 1210 qp->port = init_attr->port_num;
1187 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 1211 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
1188 1212 init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
1189 break; 1213 break;
1190 } 1214 }
1191 default: 1215 default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1196 return &qp->ibqp; 1220 return &qp->ibqp;
1197} 1221}
1198 1222
1199int mlx4_ib_destroy_qp(struct ib_qp *qp) 1223struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1224 struct ib_qp_init_attr *init_attr,
1225 struct ib_udata *udata) {
1226 struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
1227 struct ib_qp *ibqp;
1228 struct mlx4_ib_dev *dev = to_mdev(device);
1229
1230 ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
1231
1232 if (!IS_ERR(ibqp) &&
1233 (init_attr->qp_type == IB_QPT_GSI) &&
1234 !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
1235 struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
1236 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
1237
1238 if (is_eth &&
1239 dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
1240 init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
1241 sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
1242
1243 if (IS_ERR(sqp->roce_v2_gsi)) {
1244 pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
1245 sqp->roce_v2_gsi = NULL;
1246 } else {
1247 sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
1248 sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
1249 }
1250
1251 init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
1252 }
1253 }
1254 return ibqp;
1255}
1256
1257static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
1200{ 1258{
1201 struct mlx4_ib_dev *dev = to_mdev(qp->device); 1259 struct mlx4_ib_dev *dev = to_mdev(qp->device);
1202 struct mlx4_ib_qp *mqp = to_mqp(qp); 1260 struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
1225 return 0; 1283 return 0;
1226} 1284}
1227 1285
1286int mlx4_ib_destroy_qp(struct ib_qp *qp)
1287{
1288 struct mlx4_ib_qp *mqp = to_mqp(qp);
1289
1290 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
1291 struct mlx4_ib_sqp *sqp = to_msqp(mqp);
1292
1293 if (sqp->roce_v2_gsi)
1294 ib_destroy_qp(sqp->roce_v2_gsi);
1295 }
1296
1297 return _mlx4_ib_destroy_qp(qp);
1298}
1299
1228static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) 1300static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
1229{ 1301{
1230 switch (type) { 1302 switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1507 return 0; 1579 return 0;
1508} 1580}
1509 1581
1582enum {
1583 MLX4_QPC_ROCE_MODE_1 = 0,
1584 MLX4_QPC_ROCE_MODE_2 = 2,
1585 MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
1586};
1587
1588static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
1589{
1590 switch (gid_type) {
1591 case IB_GID_TYPE_ROCE:
1592 return MLX4_QPC_ROCE_MODE_1;
1593 case IB_GID_TYPE_ROCE_UDP_ENCAP:
1594 return MLX4_QPC_ROCE_MODE_2;
1595 default:
1596 return MLX4_QPC_ROCE_MODE_UNDEFINED;
1597 }
1598}
1599
1510static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 1600static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1511 const struct ib_qp_attr *attr, int attr_mask, 1601 const struct ib_qp_attr *attr, int attr_mask,
1512 enum ib_qp_state cur_state, enum ib_qp_state new_state) 1602 enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1633 mlx4_ib_steer_qp_reg(dev, qp, 1); 1723 mlx4_ib_steer_qp_reg(dev, qp, 1);
1634 steer_qp = 1; 1724 steer_qp = 1;
1635 } 1725 }
1726
1727 if (ibqp->qp_type == IB_QPT_GSI) {
1728 enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
1729 IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
1730 u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
1731
1732 context->rlkey_roce_mode |= (qpc_roce_mode << 6);
1733 }
1636 } 1734 }
1637 1735
1638 if (attr_mask & IB_QP_PKEY_INDEX) { 1736 if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1650 u16 vlan = 0xffff; 1748 u16 vlan = 0xffff;
1651 u8 smac[ETH_ALEN]; 1749 u8 smac[ETH_ALEN];
1652 int status = 0; 1750 int status = 0;
1751 int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
1752 attr->ah_attr.ah_flags & IB_AH_GRH;
1653 1753
1654 if (rdma_cap_eth_ah(&dev->ib_dev, port_num) && 1754 if (is_eth) {
1655 attr->ah_attr.ah_flags & IB_AH_GRH) {
1656 int index = attr->ah_attr.grh.sgid_index; 1755 int index = attr->ah_attr.grh.sgid_index;
1657 1756
1658 status = ib_get_cached_gid(ibqp->device, port_num, 1757 status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1674 1773
1675 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1774 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
1676 MLX4_QP_OPTPAR_SCHED_QUEUE); 1775 MLX4_QP_OPTPAR_SCHED_QUEUE);
1776
1777 if (is_eth &&
1778 (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
1779 u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
1780
1781 if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
1782 err = -EINVAL;
1783 goto out;
1784 }
1785 context->rlkey_roce_mode |= (qpc_roce_mode << 6);
1786 }
1787
1677 } 1788 }
1678 1789
1679 if (attr_mask & IB_QP_TIMEOUT) { 1790 if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1845 sqd_event = 0; 1956 sqd_event = 0;
1846 1957
1847 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1958 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1848 context->rlkey |= (1 << 4); 1959 context->rlkey_roce_mode |= (1 << 4);
1849 1960
1850 /* 1961 /*
1851 * Before passing a kernel QP to the HW, make sure that the 1962 * Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
2022 return err; 2133 return err;
2023} 2134}
2024 2135
2025int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2136static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2026 int attr_mask, struct ib_udata *udata) 2137 int attr_mask, struct ib_udata *udata)
2027{ 2138{
2028 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 2139 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
2029 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2140 struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
2126 return err; 2237 return err;
2127} 2238}
2128 2239
2240int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2241 int attr_mask, struct ib_udata *udata)
2242{
2243 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
2244 int ret;
2245
2246 ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
2247
2248 if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
2249 struct mlx4_ib_sqp *sqp = to_msqp(mqp);
2250 int err = 0;
2251
2252 if (sqp->roce_v2_gsi)
2253 err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
2254 if (err)
2255 pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
2256 err);
2257 }
2258 return ret;
2259}
2260
2129static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) 2261static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
2130{ 2262{
2131 int i; 2263 int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2168 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) 2300 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
2169 send_size += sizeof (struct mlx4_ib_tunnel_header); 2301 send_size += sizeof (struct mlx4_ib_tunnel_header);
2170 2302
2171 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); 2303 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
2172 2304
2173 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { 2305 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
2174 sqp->ud_header.lrh.service_level = 2306 sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2252 return 0; 2384 return 0;
2253} 2385}
2254 2386
2255static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) 2387#define MLX4_ROCEV2_QP1_SPORT 0xC000
2256{
2257 int i;
2258
2259 for (i = ETH_ALEN; i; i--) {
2260 dst_mac[i - 1] = src_mac & 0xff;
2261 src_mac >>= 8;
2262 }
2263}
2264
2265static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, 2388static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2266 void *wqe, unsigned *mlx_seg_len) 2389 void *wqe, unsigned *mlx_seg_len)
2267{ 2390{
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2281 bool is_eth; 2404 bool is_eth;
2282 bool is_vlan = false; 2405 bool is_vlan = false;
2283 bool is_grh; 2406 bool is_grh;
2407 bool is_udp = false;
2408 int ip_version = 0;
2284 2409
2285 send_size = 0; 2410 send_size = 0;
2286 for (i = 0; i < wr->wr.num_sge; ++i) 2411 for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2289 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 2414 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
2290 is_grh = mlx4_ib_ah_grh_present(ah); 2415 is_grh = mlx4_ib_ah_grh_present(ah);
2291 if (is_eth) { 2416 if (is_eth) {
2417 struct ib_gid_attr gid_attr;
2418
2292 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 2419 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2293 /* When multi-function is enabled, the ib_core gid 2420 /* When multi-function is enabled, the ib_core gid
2294 * indexes don't necessarily match the hw ones, so 2421 * indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2302 err = ib_get_cached_gid(ib_dev, 2429 err = ib_get_cached_gid(ib_dev,
2303 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2430 be32_to_cpu(ah->av.ib.port_pd) >> 24,
2304 ah->av.ib.gid_index, &sgid, 2431 ah->av.ib.gid_index, &sgid,
2305 NULL); 2432 &gid_attr);
2306 if (!err && !memcmp(&sgid, &zgid, sizeof(sgid))) 2433 if (!err) {
2307 err = -ENOENT; 2434 if (gid_attr.ndev)
2308 if (err) 2435 dev_put(gid_attr.ndev);
2436 if (!memcmp(&sgid, &zgid, sizeof(sgid)))
2437 err = -ENOENT;
2438 }
2439 if (!err) {
2440 is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
2441 if (is_udp) {
2442 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
2443 ip_version = 4;
2444 else
2445 ip_version = 6;
2446 is_grh = false;
2447 }
2448 } else {
2309 return err; 2449 return err;
2450 }
2310 } 2451 }
2311
2312 if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { 2452 if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
2313 vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; 2453 vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
2314 is_vlan = 1; 2454 is_vlan = 1;
2315 } 2455 }
2316 } 2456 }
2317 ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); 2457 err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
2458 ip_version, is_udp, 0, &sqp->ud_header);
2459 if (err)
2460 return err;
2318 2461
2319 if (!is_eth) { 2462 if (!is_eth) {
2320 sqp->ud_header.lrh.service_level = 2463 sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2323 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); 2466 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
2324 } 2467 }
2325 2468
2326 if (is_grh) { 2469 if (is_grh || (ip_version == 6)) {
2327 sqp->ud_header.grh.traffic_class = 2470 sqp->ud_header.grh.traffic_class =
2328 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; 2471 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
2329 sqp->ud_header.grh.flow_label = 2472 sqp->ud_header.grh.flow_label =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2352 ah->av.ib.dgid, 16); 2495 ah->av.ib.dgid, 16);
2353 } 2496 }
2354 2497
2498 if (ip_version == 4) {
2499 sqp->ud_header.ip4.tos =
2500 (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
2501 sqp->ud_header.ip4.id = 0;
2502 sqp->ud_header.ip4.frag_off = htons(IP_DF);
2503 sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
2504
2505 memcpy(&sqp->ud_header.ip4.saddr,
2506 sgid.raw + 12, 4);
2507 memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
2508 sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
2509 }
2510
2511 if (is_udp) {
2512 sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
2513 sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
2514 sqp->ud_header.udp.csum = 0;
2515 }
2516
2355 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 2517 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
2356 2518
2357 if (!is_eth) { 2519 if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2380 2542
2381 if (is_eth) { 2543 if (is_eth) {
2382 struct in6_addr in6; 2544 struct in6_addr in6;
2383 2545 u16 ether_type;
2384 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; 2546 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
2385 2547
2548 ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
2549 (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
2550
2386 mlx->sched_prio = cpu_to_be16(pcp); 2551 mlx->sched_prio = cpu_to_be16(pcp);
2387 2552
2553 ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
2388 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); 2554 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
2389 /* FIXME: cache smac value? */
2390 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); 2555 memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
2391 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); 2556 memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
2392 memcpy(&in6, sgid.raw, sizeof(in6)); 2557 memcpy(&in6, sgid.raw, sizeof(in6));
2393 2558
2394 if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
2395 u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
2396 u8 smac[ETH_ALEN];
2397
2398 mlx4_u64_to_smac(smac, mac);
2399 memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
2400 } else {
2401 /* use the src mac of the tunnel */
2402 memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
2403 }
2404 2559
2405 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) 2560 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
2406 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); 2561 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
2407 if (!is_vlan) { 2562 if (!is_vlan) {
2408 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 2563 sqp->ud_header.eth.type = cpu_to_be16(ether_type);
2409 } else { 2564 } else {
2410 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 2565 sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
2411 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); 2566 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
2412 } 2567 }
2413 } else { 2568 } else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
2528 fseg->reserved[1] = 0; 2683 fseg->reserved[1] = 0;
2529} 2684}
2530 2685
2531static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
2532 struct ib_bind_mw_wr *wr)
2533{
2534 bseg->flags1 =
2535 convert_access(wr->bind_info.mw_access_flags) &
2536 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
2537 MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
2538 MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
2539 bseg->flags2 = 0;
2540 if (wr->mw->type == IB_MW_TYPE_2)
2541 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
2542 if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
2543 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
2544 bseg->new_rkey = cpu_to_be32(wr->rkey);
2545 bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
2546 bseg->addr = cpu_to_be64(wr->bind_info.addr);
2547 bseg->length = cpu_to_be64(wr->bind_info.length);
2548}
2549
2550static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) 2686static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
2551{ 2687{
2552 memset(iseg, 0, sizeof(*iseg)); 2688 memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2766 int i; 2902 int i;
2767 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 2903 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
2768 2904
2905 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
2906 struct mlx4_ib_sqp *sqp = to_msqp(qp);
2907
2908 if (sqp->roce_v2_gsi) {
2909 struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
2910 struct ib_gid_attr gid_attr;
2911 union ib_gid gid;
2912
2913 if (!ib_get_cached_gid(ibqp->device,
2914 be32_to_cpu(ah->av.ib.port_pd) >> 24,
2915 ah->av.ib.gid_index, &gid,
2916 &gid_attr)) {
2917 if (gid_attr.ndev)
2918 dev_put(gid_attr.ndev);
2919 qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
2920 to_mqp(sqp->roce_v2_gsi) : qp;
2921 } else {
2922 pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
2923 ah->av.ib.gid_index);
2924 }
2925 }
2926 }
2927
2769 spin_lock_irqsave(&qp->sq.lock, flags); 2928 spin_lock_irqsave(&qp->sq.lock, flags);
2770 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 2929 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
2771 err = -EIO; 2930 err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2867 size += sizeof(struct mlx4_wqe_fmr_seg) / 16; 3026 size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
2868 break; 3027 break;
2869 3028
2870 case IB_WR_BIND_MW:
2871 ctrl->srcrb_flags |=
2872 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2873 set_bind_seg(wqe, bind_mw_wr(wr));
2874 wqe += sizeof(struct mlx4_wqe_bind_seg);
2875 size += sizeof(struct mlx4_wqe_bind_seg) / 16;
2876 break;
2877 default: 3029 default:
2878 /* No extra segments required for sends */ 3030 /* No extra segments required for sends */
2879 break; 3031 break;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index c394376ebe06..0597f3eef5d0 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
171 if (err) 171 if (err)
172 goto err_mtt; 172 goto err_mtt;
173 173
174 srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); 174 srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
175 GFP_KERNEL | __GFP_NOWARN);
175 if (!srq->wrid) { 176 if (!srq->wrid) {
176 srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64), 177 srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
177 GFP_KERNEL, PAGE_KERNEL); 178 GFP_KERNEL, PAGE_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 66080580e24d..745efa4cfc71 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,8 +32,10 @@
32 32
33#include "mlx5_ib.h" 33#include "mlx5_ib.h"
34 34
35struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, 35static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
36 struct mlx5_ib_ah *ah) 36 struct mlx5_ib_ah *ah,
37 struct ib_ah_attr *ah_attr,
38 enum rdma_link_layer ll)
37{ 39{
38 if (ah_attr->ah_flags & IB_AH_GRH) { 40 if (ah_attr->ah_flags & IB_AH_GRH) {
39 memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); 41 memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
44 ah->av.tclass = ah_attr->grh.traffic_class; 46 ah->av.tclass = ah_attr->grh.traffic_class;
45 } 47 }
46 48
47 ah->av.rlid = cpu_to_be16(ah_attr->dlid); 49 ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
48 ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; 50
49 ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); 51 if (ll == IB_LINK_LAYER_ETHERNET) {
52 memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
53 ah->av.udp_sport =
54 mlx5_get_roce_udp_sport(dev,
55 ah_attr->port_num,
56 ah_attr->grh.sgid_index);
57 ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
58 } else {
59 ah->av.rlid = cpu_to_be16(ah_attr->dlid);
60 ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
61 ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
62 }
50 63
51 return &ah->ibah; 64 return &ah->ibah;
52} 65}
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
54struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) 67struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
55{ 68{
56 struct mlx5_ib_ah *ah; 69 struct mlx5_ib_ah *ah;
70 struct mlx5_ib_dev *dev = to_mdev(pd->device);
71 enum rdma_link_layer ll;
72
73 ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
74
75 if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
76 return ERR_PTR(-EINVAL);
57 77
58 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 78 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
59 if (!ah) 79 if (!ah)
60 return ERR_PTR(-ENOMEM); 80 return ERR_PTR(-ENOMEM);
61 81
62 return create_ib_ah(ah_attr, ah); /* never fails */ 82 return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
63} 83}
64 84
65int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) 85int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 92ddae101ecc..fd1de31e0611 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
154 wc->opcode = IB_WC_MASKED_FETCH_ADD; 154 wc->opcode = IB_WC_MASKED_FETCH_ADD;
155 wc->byte_len = 8; 155 wc->byte_len = 8;
156 break; 156 break;
157 case MLX5_OPCODE_BIND_MW:
158 wc->opcode = IB_WC_BIND_MW;
159 break;
160 case MLX5_OPCODE_UMR: 157 case MLX5_OPCODE_UMR:
161 wc->opcode = get_umr_comp(wq, idx); 158 wc->opcode = get_umr_comp(wq, idx);
162 break; 159 break;
@@ -171,6 +168,7 @@ enum {
171static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, 168static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
172 struct mlx5_ib_qp *qp) 169 struct mlx5_ib_qp *qp)
173{ 170{
171 enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
174 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); 172 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
175 struct mlx5_ib_srq *srq; 173 struct mlx5_ib_srq *srq;
176 struct mlx5_ib_wq *wq; 174 struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
236 } else { 234 } else {
237 wc->pkey_index = 0; 235 wc->pkey_index = 0;
238 } 236 }
237
238 if (ll != IB_LINK_LAYER_ETHERNET)
239 return;
240
241 switch (wc->sl & 0x3) {
242 case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
243 wc->network_hdr_type = RDMA_NETWORK_IB;
244 break;
245 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
246 wc->network_hdr_type = RDMA_NETWORK_IPV6;
247 break;
248 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
249 wc->network_hdr_type = RDMA_NETWORK_IPV4;
250 break;
251 }
252 wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
239} 253}
240 254
241static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) 255static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
760 int eqn; 774 int eqn;
761 int err; 775 int err;
762 776
763 if (attr->flags)
764 return ERR_PTR(-EINVAL);
765
766 if (entries < 0) 777 if (entries < 0)
767 return ERR_PTR(-EINVAL); 778 return ERR_PTR(-EINVAL);
768 779
780 if (check_cq_create_flags(attr->flags))
781 return ERR_PTR(-EOPNOTSUPP);
782
769 entries = roundup_pow_of_two(entries + 1); 783 entries = roundup_pow_of_two(entries + 1);
770 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) 784 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
771 return ERR_PTR(-EINVAL); 785 return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
779 spin_lock_init(&cq->lock); 793 spin_lock_init(&cq->lock);
780 cq->resize_buf = NULL; 794 cq->resize_buf = NULL;
781 cq->resize_umem = NULL; 795 cq->resize_umem = NULL;
796 cq->create_flags = attr->flags;
782 797
783 if (context) { 798 if (context) {
784 err = create_cq_user(dev, udata, context, cq, entries, 799 err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
796 811
797 cq->cqe_size = cqe_size; 812 cq->cqe_size = cqe_size;
798 cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; 813 cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
814
815 if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
816 cqb->ctx.cqe_sz_flags |= (1 << 1);
817
799 cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); 818 cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
800 err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); 819 err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
801 if (err) 820 if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b0ec175cc6ba..ec737e2287fe 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,6 +40,8 @@
40#include <linux/io-mapping.h> 40#include <linux/io-mapping.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <rdma/ib_user_verbs.h> 42#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_addr.h>
44#include <rdma/ib_cache.h>
43#include <linux/mlx5/vport.h> 45#include <linux/mlx5/vport.h>
44#include <rdma/ib_smi.h> 46#include <rdma/ib_smi.h>
45#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
66 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 68 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
67 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 69 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
68 70
71enum {
72 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
73};
74
69static enum rdma_link_layer 75static enum rdma_link_layer
70mlx5_ib_port_link_layer(struct ib_device *device) 76mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
71{ 77{
72 struct mlx5_ib_dev *dev = to_mdev(device); 78 switch (port_type_cap) {
73
74 switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
75 case MLX5_CAP_PORT_TYPE_IB: 79 case MLX5_CAP_PORT_TYPE_IB:
76 return IB_LINK_LAYER_INFINIBAND; 80 return IB_LINK_LAYER_INFINIBAND;
77 case MLX5_CAP_PORT_TYPE_ETH: 81 case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
81 } 85 }
82} 86}
83 87
88static enum rdma_link_layer
89mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
90{
91 struct mlx5_ib_dev *dev = to_mdev(device);
92 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
93
94 return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
95}
96
97static int mlx5_netdev_event(struct notifier_block *this,
98 unsigned long event, void *ptr)
99{
100 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
101 struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
102 roce.nb);
103
104 if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
105 return NOTIFY_DONE;
106
107 write_lock(&ibdev->roce.netdev_lock);
108 if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
109 ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
110 write_unlock(&ibdev->roce.netdev_lock);
111
112 return NOTIFY_DONE;
113}
114
115static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
116 u8 port_num)
117{
118 struct mlx5_ib_dev *ibdev = to_mdev(device);
119 struct net_device *ndev;
120
121 /* Ensure ndev does not disappear before we invoke dev_hold()
122 */
123 read_lock(&ibdev->roce.netdev_lock);
124 ndev = ibdev->roce.netdev;
125 if (ndev)
126 dev_hold(ndev);
127 read_unlock(&ibdev->roce.netdev_lock);
128
129 return ndev;
130}
131
132static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
133 struct ib_port_attr *props)
134{
135 struct mlx5_ib_dev *dev = to_mdev(device);
136 struct net_device *ndev;
137 enum ib_mtu ndev_ib_mtu;
138 u16 qkey_viol_cntr;
139
140 memset(props, 0, sizeof(*props));
141
142 props->port_cap_flags |= IB_PORT_CM_SUP;
143 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
144
145 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
146 roce_address_table_size);
147 props->max_mtu = IB_MTU_4096;
148 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
149 props->pkey_tbl_len = 1;
150 props->state = IB_PORT_DOWN;
151 props->phys_state = 3;
152
153 mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
154 props->qkey_viol_cntr = qkey_viol_cntr;
155
156 ndev = mlx5_ib_get_netdev(device, port_num);
157 if (!ndev)
158 return 0;
159
160 if (netif_running(ndev) && netif_carrier_ok(ndev)) {
161 props->state = IB_PORT_ACTIVE;
162 props->phys_state = 5;
163 }
164
165 ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
166
167 dev_put(ndev);
168
169 props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
170
171 props->active_width = IB_WIDTH_4X; /* TODO */
172 props->active_speed = IB_SPEED_QDR; /* TODO */
173
174 return 0;
175}
176
177static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
178 const struct ib_gid_attr *attr,
179 void *mlx5_addr)
180{
181#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
182 char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
183 source_l3_address);
184 void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
185 source_mac_47_32);
186
187 if (!gid)
188 return;
189
190 ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
191
192 if (is_vlan_dev(attr->ndev)) {
193 MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
194 MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
195 }
196
197 switch (attr->gid_type) {
198 case IB_GID_TYPE_IB:
199 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
200 break;
201 case IB_GID_TYPE_ROCE_UDP_ENCAP:
202 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
203 break;
204
205 default:
206 WARN_ON(true);
207 }
208
209 if (attr->gid_type != IB_GID_TYPE_IB) {
210 if (ipv6_addr_v4mapped((void *)gid))
211 MLX5_SET_RA(mlx5_addr, roce_l3_type,
212 MLX5_ROCE_L3_TYPE_IPV4);
213 else
214 MLX5_SET_RA(mlx5_addr, roce_l3_type,
215 MLX5_ROCE_L3_TYPE_IPV6);
216 }
217
218 if ((attr->gid_type == IB_GID_TYPE_IB) ||
219 !ipv6_addr_v4mapped((void *)gid))
220 memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
221 else
222 memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
223}
224
225static int set_roce_addr(struct ib_device *device, u8 port_num,
226 unsigned int index,
227 const union ib_gid *gid,
228 const struct ib_gid_attr *attr)
229{
230 struct mlx5_ib_dev *dev = to_mdev(device);
231 u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
232 u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
233 void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
234 enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
235
236 if (ll != IB_LINK_LAYER_ETHERNET)
237 return -EINVAL;
238
239 memset(in, 0, sizeof(in));
240
241 ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
242
243 MLX5_SET(set_roce_address_in, in, roce_address_index, index);
244 MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
245
246 memset(out, 0, sizeof(out));
247 return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
248}
249
250static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
251 unsigned int index, const union ib_gid *gid,
252 const struct ib_gid_attr *attr,
253 __always_unused void **context)
254{
255 return set_roce_addr(device, port_num, index, gid, attr);
256}
257
258static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
259 unsigned int index, __always_unused void **context)
260{
261 return set_roce_addr(device, port_num, index, NULL, NULL);
262}
263
264__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
265 int index)
266{
267 struct ib_gid_attr attr;
268 union ib_gid gid;
269
270 if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
271 return 0;
272
273 if (!attr.ndev)
274 return 0;
275
276 dev_put(attr.ndev);
277
278 if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
279 return 0;
280
281 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
282}
283
84static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 284static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
85{ 285{
86 return !dev->mdev->issi; 286 return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
97 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 297 if (mlx5_use_mad_ifc(to_mdev(ibdev)))
98 return MLX5_VPORT_ACCESS_METHOD_MAD; 298 return MLX5_VPORT_ACCESS_METHOD_MAD;
99 299
100 if (mlx5_ib_port_link_layer(ibdev) == 300 if (mlx5_ib_port_link_layer(ibdev, 1) ==
101 IB_LINK_LAYER_ETHERNET) 301 IB_LINK_LAYER_ETHERNET)
102 return MLX5_VPORT_ACCESS_METHOD_NIC; 302 return MLX5_VPORT_ACCESS_METHOD_NIC;
103 303
104 return MLX5_VPORT_ACCESS_METHOD_HCA; 304 return MLX5_VPORT_ACCESS_METHOD_HCA;
105} 305}
106 306
307static void get_atomic_caps(struct mlx5_ib_dev *dev,
308 struct ib_device_attr *props)
309{
310 u8 tmp;
311 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
312 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
313 u8 atomic_req_8B_endianness_mode =
314 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
315
316 /* Check if HW supports 8 bytes standard atomic operations and capable
317 * of host endianness respond
318 */
319 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
320 if (((atomic_operations & tmp) == tmp) &&
321 (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
322 (atomic_req_8B_endianness_mode)) {
323 props->atomic_cap = IB_ATOMIC_HCA;
324 } else {
325 props->atomic_cap = IB_ATOMIC_NONE;
326 }
327}
328
107static int mlx5_query_system_image_guid(struct ib_device *ibdev, 329static int mlx5_query_system_image_guid(struct ib_device *ibdev,
108 __be64 *sys_image_guid) 330 __be64 *sys_image_guid)
109{ 331{
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
119 341
120 case MLX5_VPORT_ACCESS_METHOD_HCA: 342 case MLX5_VPORT_ACCESS_METHOD_HCA:
121 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 343 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
122 if (!err) 344 break;
123 *sys_image_guid = cpu_to_be64(tmp); 345
124 return err; 346 case MLX5_VPORT_ACCESS_METHOD_NIC:
347 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
348 break;
125 349
126 default: 350 default:
127 return -EINVAL; 351 return -EINVAL;
128 } 352 }
353
354 if (!err)
355 *sys_image_guid = cpu_to_be64(tmp);
356
357 return err;
358
129} 359}
130 360
131static int mlx5_query_max_pkeys(struct ib_device *ibdev, 361static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
179 409
180 case MLX5_VPORT_ACCESS_METHOD_HCA: 410 case MLX5_VPORT_ACCESS_METHOD_HCA:
181 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 411 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
182 if (!err) 412 break;
183 *node_guid = cpu_to_be64(tmp); 413
184 return err; 414 case MLX5_VPORT_ACCESS_METHOD_NIC:
415 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
416 break;
185 417
186 default: 418 default:
187 return -EINVAL; 419 return -EINVAL;
188 } 420 }
421
422 if (!err)
423 *node_guid = cpu_to_be64(tmp);
424
425 return err;
189} 426}
190 427
191struct mlx5_reg_node_desc { 428struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
263 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 500 if (MLX5_CAP_GEN(mdev, block_lb_mc))
264 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 501 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
265 502
503 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
504 (MLX5_CAP_ETH(dev->mdev, csum_cap)))
505 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
506
266 props->vendor_part_id = mdev->pdev->device; 507 props->vendor_part_id = mdev->pdev->device;
267 props->hw_ver = mdev->pdev->revision; 508 props->hw_ver = mdev->pdev->revision;
268 509
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
278 props->max_sge = min(max_rq_sg, max_sq_sg); 519 props->max_sge = min(max_rq_sg, max_sq_sg);
279 props->max_sge_rd = props->max_sge; 520 props->max_sge_rd = props->max_sge;
280 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 521 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
281 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; 522 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
282 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 523 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
283 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 524 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
284 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 525 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
289 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 530 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
290 props->max_srq_sge = max_rq_sg - 1; 531 props->max_srq_sge = max_rq_sg - 1;
291 props->max_fast_reg_page_list_len = (unsigned int)-1; 532 props->max_fast_reg_page_list_len = (unsigned int)-1;
292 props->atomic_cap = IB_ATOMIC_NONE; 533 get_atomic_caps(dev, props);
293 props->masked_atomic_cap = IB_ATOMIC_NONE; 534 props->masked_atomic_cap = IB_ATOMIC_NONE;
294 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 535 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
295 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 536 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
296 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 537 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
297 props->max_mcast_grp; 538 props->max_mcast_grp;
298 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 539 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
540 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
541 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
299 542
300#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 543#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
301 if (MLX5_CAP_GEN(mdev, pg)) 544 if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
303 props->odp_caps = dev->odp_caps; 546 props->odp_caps = dev->odp_caps;
304#endif 547#endif
305 548
549 if (MLX5_CAP_GEN(mdev, cd))
550 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
551
306 return 0; 552 return 0;
307} 553}
308 554
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
483 case MLX5_VPORT_ACCESS_METHOD_HCA: 729 case MLX5_VPORT_ACCESS_METHOD_HCA:
484 return mlx5_query_hca_port(ibdev, port, props); 730 return mlx5_query_hca_port(ibdev, port, props);
485 731
732 case MLX5_VPORT_ACCESS_METHOD_NIC:
733 return mlx5_query_port_roce(ibdev, port, props);
734
486 default: 735 default:
487 return -EINVAL; 736 return -EINVAL;
488 } 737 }
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
583 struct ib_udata *udata) 832 struct ib_udata *udata)
584{ 833{
585 struct mlx5_ib_dev *dev = to_mdev(ibdev); 834 struct mlx5_ib_dev *dev = to_mdev(ibdev);
586 struct mlx5_ib_alloc_ucontext_req_v2 req; 835 struct mlx5_ib_alloc_ucontext_req_v2 req = {};
587 struct mlx5_ib_alloc_ucontext_resp resp; 836 struct mlx5_ib_alloc_ucontext_resp resp = {};
588 struct mlx5_ib_ucontext *context; 837 struct mlx5_ib_ucontext *context;
589 struct mlx5_uuar_info *uuari; 838 struct mlx5_uuar_info *uuari;
590 struct mlx5_uar *uars; 839 struct mlx5_uar *uars;
@@ -599,20 +848,22 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
599 if (!dev->ib_active) 848 if (!dev->ib_active)
600 return ERR_PTR(-EAGAIN); 849 return ERR_PTR(-EAGAIN);
601 850
602 memset(&req, 0, sizeof(req)); 851 if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
852 return ERR_PTR(-EINVAL);
853
603 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 854 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
604 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 855 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
605 ver = 0; 856 ver = 0;
606 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) 857 else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
607 ver = 2; 858 ver = 2;
608 else 859 else
609 return ERR_PTR(-EINVAL); 860 return ERR_PTR(-EINVAL);
610 861
611 err = ib_copy_from_udata(&req, udata, reqlen); 862 err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
612 if (err) 863 if (err)
613 return ERR_PTR(err); 864 return ERR_PTR(err);
614 865
615 if (req.flags || req.reserved) 866 if (req.flags)
616 return ERR_PTR(-EINVAL); 867 return ERR_PTR(-EINVAL);
617 868
618 if (req.total_num_uuars > MLX5_MAX_UUARS) 869 if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +872,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
621 if (req.total_num_uuars == 0) 872 if (req.total_num_uuars == 0)
622 return ERR_PTR(-EINVAL); 873 return ERR_PTR(-EINVAL);
623 874
875 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
876 return ERR_PTR(-EOPNOTSUPP);
877
878 if (reqlen > sizeof(req) &&
879 !ib_is_udata_cleared(udata, sizeof(req),
880 reqlen - sizeof(req)))
881 return ERR_PTR(-EOPNOTSUPP);
882
624 req.total_num_uuars = ALIGN(req.total_num_uuars, 883 req.total_num_uuars = ALIGN(req.total_num_uuars,
625 MLX5_NON_FP_BF_REGS_PER_PAGE); 884 MLX5_NON_FP_BF_REGS_PER_PAGE);
626 if (req.num_low_latency_uuars > req.total_num_uuars - 1) 885 if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +895,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
636 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 895 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
637 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 896 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
638 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 897 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
898 resp.cqe_version = min_t(__u8,
899 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
900 req.max_cqe_version);
901 resp.response_length = min(offsetof(typeof(resp), response_length) +
902 sizeof(resp.response_length), udata->outlen);
639 903
640 context = kzalloc(sizeof(*context), GFP_KERNEL); 904 context = kzalloc(sizeof(*context), GFP_KERNEL);
641 if (!context) 905 if (!context)
@@ -681,22 +945,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
681 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 945 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
682#endif 946#endif
683 947
948 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
949 err = mlx5_core_alloc_transport_domain(dev->mdev,
950 &context->tdn);
951 if (err)
952 goto out_uars;
953 }
954
684 INIT_LIST_HEAD(&context->db_page_list); 955 INIT_LIST_HEAD(&context->db_page_list);
685 mutex_init(&context->db_page_mutex); 956 mutex_init(&context->db_page_mutex);
686 957
687 resp.tot_uuars = req.total_num_uuars; 958 resp.tot_uuars = req.total_num_uuars;
688 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 959 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
689 err = ib_copy_to_udata(udata, &resp, 960
690 sizeof(resp) - sizeof(resp.reserved)); 961 if (field_avail(typeof(resp), cqe_version, udata->outlen))
962 resp.response_length += sizeof(resp.cqe_version);
963
964 if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
965 resp.comp_mask |=
966 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
967 resp.hca_core_clock_offset =
968 offsetof(struct mlx5_init_seg, internal_timer_h) %
969 PAGE_SIZE;
970 resp.response_length += sizeof(resp.hca_core_clock_offset) +
971 sizeof(resp.reserved2) +
972 sizeof(resp.reserved3);
973 }
974
975 err = ib_copy_to_udata(udata, &resp, resp.response_length);
691 if (err) 976 if (err)
692 goto out_uars; 977 goto out_td;
693 978
694 uuari->ver = ver; 979 uuari->ver = ver;
695 uuari->num_low_latency_uuars = req.num_low_latency_uuars; 980 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
696 uuari->uars = uars; 981 uuari->uars = uars;
697 uuari->num_uars = num_uars; 982 uuari->num_uars = num_uars;
983 context->cqe_version = resp.cqe_version;
984
698 return &context->ibucontext; 985 return &context->ibucontext;
699 986
987out_td:
988 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
989 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
990
700out_uars: 991out_uars:
701 for (i--; i >= 0; i--) 992 for (i--; i >= 0; i--)
702 mlx5_cmd_free_uar(dev->mdev, uars[i].index); 993 mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1012,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
721 struct mlx5_uuar_info *uuari = &context->uuari; 1012 struct mlx5_uuar_info *uuari = &context->uuari;
722 int i; 1013 int i;
723 1014
1015 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1016 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
1017
724 for (i = 0; i < uuari->num_uars; i++) { 1018 for (i = 0; i < uuari->num_uars; i++) {
725 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) 1019 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
726 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); 1020 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1084,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
790 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1084 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
791 return -ENOSYS; 1085 return -ENOSYS;
792 1086
1087 case MLX5_IB_MMAP_CORE_CLOCK:
1088 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1089 return -EINVAL;
1090
1091 if (vma->vm_flags & (VM_WRITE | VM_EXEC))
1092 return -EPERM;
1093
1094 /* Don't expose to user-space information it shouldn't have */
1095 if (PAGE_SIZE > 4096)
1096 return -EOPNOTSUPP;
1097
1098 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1099 pfn = (dev->mdev->iseg_base +
1100 offsetof(struct mlx5_init_seg, internal_timer_h)) >>
1101 PAGE_SHIFT;
1102 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1103 PAGE_SIZE, vma->vm_page_prot))
1104 return -EAGAIN;
1105
1106 mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
1107 vma->vm_start,
1108 (unsigned long long)pfn << PAGE_SHIFT);
1109 break;
1110
793 default: 1111 default:
794 return -EINVAL; 1112 return -EINVAL;
795 } 1113 }
@@ -1758,6 +2076,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1758 mlx5_ib_dealloc_pd(devr->p0); 2076 mlx5_ib_dealloc_pd(devr->p0);
1759} 2077}
1760 2078
2079static u32 get_core_cap_flags(struct ib_device *ibdev)
2080{
2081 struct mlx5_ib_dev *dev = to_mdev(ibdev);
2082 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
2083 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
2084 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
2085 u32 ret = 0;
2086
2087 if (ll == IB_LINK_LAYER_INFINIBAND)
2088 return RDMA_CORE_PORT_IBA_IB;
2089
2090 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
2091 return 0;
2092
2093 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
2094 return 0;
2095
2096 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
2097 ret |= RDMA_CORE_PORT_IBA_ROCE;
2098
2099 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
2100 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2101
2102 return ret;
2103}
2104
1761static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, 2105static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1762 struct ib_port_immutable *immutable) 2106 struct ib_port_immutable *immutable)
1763{ 2107{
@@ -1770,20 +2114,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1770 2114
1771 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2115 immutable->pkey_tbl_len = attr.pkey_tbl_len;
1772 immutable->gid_tbl_len = attr.gid_tbl_len; 2116 immutable->gid_tbl_len = attr.gid_tbl_len;
1773 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; 2117 immutable->core_cap_flags = get_core_cap_flags(ibdev);
1774 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2118 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1775 2119
1776 return 0; 2120 return 0;
1777} 2121}
1778 2122
2123static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
2124{
2125 int err;
2126
2127 dev->roce.nb.notifier_call = mlx5_netdev_event;
2128 err = register_netdevice_notifier(&dev->roce.nb);
2129 if (err)
2130 return err;
2131
2132 err = mlx5_nic_vport_enable_roce(dev->mdev);
2133 if (err)
2134 goto err_unregister_netdevice_notifier;
2135
2136 return 0;
2137
2138err_unregister_netdevice_notifier:
2139 unregister_netdevice_notifier(&dev->roce.nb);
2140 return err;
2141}
2142
2143static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
2144{
2145 mlx5_nic_vport_disable_roce(dev->mdev);
2146 unregister_netdevice_notifier(&dev->roce.nb);
2147}
2148
1779static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2149static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1780{ 2150{
1781 struct mlx5_ib_dev *dev; 2151 struct mlx5_ib_dev *dev;
2152 enum rdma_link_layer ll;
2153 int port_type_cap;
1782 int err; 2154 int err;
1783 int i; 2155 int i;
1784 2156
1785 /* don't create IB instance over Eth ports, no RoCE yet! */ 2157 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
1786 if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) 2158 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
2159
2160 if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
1787 return NULL; 2161 return NULL;
1788 2162
1789 printk_once(KERN_INFO "%s", mlx5_version); 2163 printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2168,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1794 2168
1795 dev->mdev = mdev; 2169 dev->mdev = mdev;
1796 2170
2171 rwlock_init(&dev->roce.netdev_lock);
1797 err = get_port_caps(dev); 2172 err = get_port_caps(dev);
1798 if (err) 2173 if (err)
1799 goto err_dealloc; 2174 goto err_dealloc;
@@ -1843,7 +2218,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1843 2218
1844 dev->ib_dev.query_device = mlx5_ib_query_device; 2219 dev->ib_dev.query_device = mlx5_ib_query_device;
1845 dev->ib_dev.query_port = mlx5_ib_query_port; 2220 dev->ib_dev.query_port = mlx5_ib_query_port;
2221 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
2222 if (ll == IB_LINK_LAYER_ETHERNET)
2223 dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
1846 dev->ib_dev.query_gid = mlx5_ib_query_gid; 2224 dev->ib_dev.query_gid = mlx5_ib_query_gid;
2225 dev->ib_dev.add_gid = mlx5_ib_add_gid;
2226 dev->ib_dev.del_gid = mlx5_ib_del_gid;
1847 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 2227 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
1848 dev->ib_dev.modify_device = mlx5_ib_modify_device; 2228 dev->ib_dev.modify_device = mlx5_ib_modify_device;
1849 dev->ib_dev.modify_port = mlx5_ib_modify_port; 2229 dev->ib_dev.modify_port = mlx5_ib_modify_port;
@@ -1893,7 +2273,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1893 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2273 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1894 } 2274 }
1895 2275
1896 if (mlx5_ib_port_link_layer(&dev->ib_dev) == 2276 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
1897 IB_LINK_LAYER_ETHERNET) { 2277 IB_LINK_LAYER_ETHERNET) {
1898 dev->ib_dev.create_flow = mlx5_ib_create_flow; 2278 dev->ib_dev.create_flow = mlx5_ib_create_flow;
1899 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; 2279 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2288,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1908 mutex_init(&dev->flow_db.lock); 2288 mutex_init(&dev->flow_db.lock);
1909 mutex_init(&dev->cap_mask_mutex); 2289 mutex_init(&dev->cap_mask_mutex);
1910 2290
2291 if (ll == IB_LINK_LAYER_ETHERNET) {
2292 err = mlx5_enable_roce(dev);
2293 if (err)
2294 goto err_dealloc;
2295 }
2296
1911 err = create_dev_resources(&dev->devr); 2297 err = create_dev_resources(&dev->devr);
1912 if (err) 2298 if (err)
1913 goto err_dealloc; 2299 goto err_disable_roce;
1914 2300
1915 err = mlx5_ib_odp_init_one(dev); 2301 err = mlx5_ib_odp_init_one(dev);
1916 if (err) 2302 if (err)
@@ -1947,6 +2333,10 @@ err_odp:
1947err_rsrc: 2333err_rsrc:
1948 destroy_dev_resources(&dev->devr); 2334 destroy_dev_resources(&dev->devr);
1949 2335
2336err_disable_roce:
2337 if (ll == IB_LINK_LAYER_ETHERNET)
2338 mlx5_disable_roce(dev);
2339
1950err_dealloc: 2340err_dealloc:
1951 ib_dealloc_device((struct ib_device *)dev); 2341 ib_dealloc_device((struct ib_device *)dev);
1952 2342
@@ -1956,11 +2346,14 @@ err_dealloc:
1956static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 2346static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
1957{ 2347{
1958 struct mlx5_ib_dev *dev = context; 2348 struct mlx5_ib_dev *dev = context;
2349 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
1959 2350
1960 ib_unregister_device(&dev->ib_dev); 2351 ib_unregister_device(&dev->ib_dev);
1961 destroy_umrc_res(dev); 2352 destroy_umrc_res(dev);
1962 mlx5_ib_odp_remove_one(dev); 2353 mlx5_ib_odp_remove_one(dev);
1963 destroy_dev_resources(&dev->devr); 2354 destroy_dev_resources(&dev->devr);
2355 if (ll == IB_LINK_LAYER_ETHERNET)
2356 mlx5_disable_roce(dev);
1964 ib_dealloc_device(&dev->ib_dev); 2357 ib_dealloc_device(&dev->ib_dev);
1965} 2358}
1966 2359
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1474cccd1e0f..d2b9737baa36 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,7 @@
42#include <linux/mlx5/qp.h> 42#include <linux/mlx5/qp.h>
43#include <linux/mlx5/srq.h> 43#include <linux/mlx5/srq.h>
44#include <linux/types.h> 44#include <linux/types.h>
45#include <linux/mlx5/transobj.h>
45 46
46#define mlx5_ib_dbg(dev, format, arg...) \ 47#define mlx5_ib_dbg(dev, format, arg...) \
47pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 48pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
55pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 56pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
56 __LINE__, current->pid, ##arg) 57 __LINE__, current->pid, ##arg)
57 58
59#define field_avail(type, fld, sz) (offsetof(type, fld) + \
60 sizeof(((type *)0)->fld) <= (sz))
61#define MLX5_IB_DEFAULT_UIDX 0xffffff
62#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
63
58enum { 64enum {
59 MLX5_IB_MMAP_CMD_SHIFT = 8, 65 MLX5_IB_MMAP_CMD_SHIFT = 8,
60 MLX5_IB_MMAP_CMD_MASK = 0xff, 66 MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -62,7 +68,9 @@ enum {
62 68
63enum mlx5_ib_mmap_cmd { 69enum mlx5_ib_mmap_cmd {
64 MLX5_IB_MMAP_REGULAR_PAGE = 0, 70 MLX5_IB_MMAP_REGULAR_PAGE = 0,
65 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */ 71 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
72 /* 5 is chosen in order to be compatible with old versions of libmlx5 */
73 MLX5_IB_MMAP_CORE_CLOCK = 5,
66}; 74};
67 75
68enum { 76enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
85 MLX5_MAD_IFC_NET_VIEW = 4, 93 MLX5_MAD_IFC_NET_VIEW = 4,
86}; 94};
87 95
96enum {
97 MLX5_CROSS_CHANNEL_UUAR = 0,
98};
99
100enum {
101 MLX5_CQE_VERSION_V0,
102 MLX5_CQE_VERSION_V1,
103};
104
88struct mlx5_ib_ucontext { 105struct mlx5_ib_ucontext {
89 struct ib_ucontext ibucontext; 106 struct ib_ucontext ibucontext;
90 struct list_head db_page_list; 107 struct list_head db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
93 */ 110 */
94 struct mutex db_page_mutex; 111 struct mutex db_page_mutex;
95 struct mlx5_uuar_info uuari; 112 struct mlx5_uuar_info uuari;
113 u8 cqe_version;
114 /* Transport Domain number */
115 u32 tdn;
96}; 116};
97 117
98static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) 118static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
201 struct mlx5_pagefault mpfault; 221 struct mlx5_pagefault mpfault;
202}; 222};
203 223
224struct mlx5_ib_ubuffer {
225 struct ib_umem *umem;
226 int buf_size;
227 u64 buf_addr;
228};
229
230struct mlx5_ib_qp_base {
231 struct mlx5_ib_qp *container_mibqp;
232 struct mlx5_core_qp mqp;
233 struct mlx5_ib_ubuffer ubuffer;
234};
235
236struct mlx5_ib_qp_trans {
237 struct mlx5_ib_qp_base base;
238 u16 xrcdn;
239 u8 alt_port;
240 u8 atomic_rd_en;
241 u8 resp_depth;
242};
243
204struct mlx5_ib_rq { 244struct mlx5_ib_rq {
245 struct mlx5_ib_qp_base base;
246 struct mlx5_ib_wq *rq;
247 struct mlx5_ib_ubuffer ubuffer;
248 struct mlx5_db *doorbell;
205 u32 tirn; 249 u32 tirn;
250 u8 state;
251};
252
253struct mlx5_ib_sq {
254 struct mlx5_ib_qp_base base;
255 struct mlx5_ib_wq *sq;
256 struct mlx5_ib_ubuffer ubuffer;
257 struct mlx5_db *doorbell;
258 u32 tisn;
259 u8 state;
206}; 260};
207 261
208struct mlx5_ib_raw_packet_qp { 262struct mlx5_ib_raw_packet_qp {
263 struct mlx5_ib_sq sq;
209 struct mlx5_ib_rq rq; 264 struct mlx5_ib_rq rq;
210}; 265};
211 266
212struct mlx5_ib_qp { 267struct mlx5_ib_qp {
213 struct ib_qp ibqp; 268 struct ib_qp ibqp;
214 union { 269 union {
215 struct mlx5_core_qp mqp; 270 struct mlx5_ib_qp_trans trans_qp;
216 struct mlx5_ib_raw_packet_qp raw_packet_qp; 271 struct mlx5_ib_raw_packet_qp raw_packet_qp;
217 }; 272 };
218
219 struct mlx5_buf buf; 273 struct mlx5_buf buf;
220 274
221 struct mlx5_db db; 275 struct mlx5_db db;
222 struct mlx5_ib_wq rq; 276 struct mlx5_ib_wq rq;
223 277
224 u32 doorbell_qpn;
225 u8 sq_signal_bits; 278 u8 sq_signal_bits;
226 u8 fm_cache; 279 u8 fm_cache;
227 int sq_max_wqes_per_wr;
228 int sq_spare_wqes;
229 struct mlx5_ib_wq sq; 280 struct mlx5_ib_wq sq;
230 281
231 struct ib_umem *umem;
232 int buf_size;
233
234 /* serialize qp state modifications 282 /* serialize qp state modifications
235 */ 283 */
236 struct mutex mutex; 284 struct mutex mutex;
237 u16 xrcdn;
238 u32 flags; 285 u32 flags;
239 u8 port; 286 u8 port;
240 u8 alt_port;
241 u8 atomic_rd_en;
242 u8 resp_depth;
243 u8 state; 287 u8 state;
244 int mlx_type;
245 int wq_sig; 288 int wq_sig;
246 int scat_cqe; 289 int scat_cqe;
247 int max_inline_data; 290 int max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
284enum mlx5_ib_qp_flags { 327enum mlx5_ib_qp_flags {
285 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, 328 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
286 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, 329 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
330 MLX5_IB_QP_CROSS_CHANNEL = 1 << 2,
331 MLX5_IB_QP_MANAGED_SEND = 1 << 3,
332 MLX5_IB_QP_MANAGED_RECV = 1 << 4,
287}; 333};
288 334
289struct mlx5_umr_wr { 335struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
326 struct mlx5_ib_cq_buf *resize_buf; 372 struct mlx5_ib_cq_buf *resize_buf;
327 struct ib_umem *resize_umem; 373 struct ib_umem *resize_umem;
328 int cqe_size; 374 int cqe_size;
375 u32 create_flags;
329}; 376};
330 377
331struct mlx5_ib_srq { 378struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
449 struct ib_srq *s1; 496 struct ib_srq *s1;
450}; 497};
451 498
499struct mlx5_roce {
500 /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
501 * netdev pointer
502 */
503 rwlock_t netdev_lock;
504 struct net_device *netdev;
505 struct notifier_block nb;
506};
507
452struct mlx5_ib_dev { 508struct mlx5_ib_dev {
453 struct ib_device ib_dev; 509 struct ib_device ib_dev;
454 struct mlx5_core_dev *mdev; 510 struct mlx5_core_dev *mdev;
511 struct mlx5_roce roce;
455 MLX5_DECLARE_DOORBELL_LOCK(uar_lock); 512 MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
456 int num_ports; 513 int num_ports;
457 /* serialize update of capability mask 514 /* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
498 555
499static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) 556static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
500{ 557{
501 return container_of(mqp, struct mlx5_ib_qp, mqp); 558 return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
502} 559}
503 560
504static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) 561static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
550int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, 607int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
551 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, 608 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
552 const void *in_mad, void *response_mad); 609 const void *in_mad, void *response_mad);
553struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
554 struct mlx5_ib_ah *ah);
555struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); 610struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
556int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); 611int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
557int mlx5_ib_destroy_ah(struct ib_ah *ah); 612int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
578 struct ib_recv_wr **bad_wr); 633 struct ib_recv_wr **bad_wr);
579void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); 634void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
580int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, 635int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
581 void *buffer, u32 length); 636 void *buffer, u32 length,
637 struct mlx5_ib_qp_base *base);
582struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, 638struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
583 const struct ib_cq_init_attr *attr, 639 const struct ib_cq_init_attr *attr,
584 struct ib_ucontext *context, 640 struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
680 736
681#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 737#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
682 738
739__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
740 int index);
741
683static inline void init_query_mad(struct ib_smp *mad) 742static inline void init_query_mad(struct ib_smp *mad)
684{ 743{
685 mad->base_version = 1; 744 mad->base_version = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
705#define MLX5_MAX_UMR_SHIFT 16 764#define MLX5_MAX_UMR_SHIFT 16
706#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) 765#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
707 766
767static inline u32 check_cq_create_flags(u32 flags)
768{
769 /*
770 * It returns non-zero value for unsupported CQ
771 * create flags, otherwise it returns zero.
772 */
773 return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
774 IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
775}
776
777static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
778 u32 *user_index)
779{
780 if (cqe_version) {
781 if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
782 (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
783 return -EINVAL;
784 *user_index = cmd_uidx;
785 } else {
786 *user_index = MLX5_IB_DEFAULT_UIDX;
787 }
788
789 return 0;
790}
708#endif /* MLX5_IB_H */ 791#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa8391e75385..b8d76361a48d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
153 153
154static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, 154static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
155 struct mlx5_ib_pfault *pfault, 155 struct mlx5_ib_pfault *pfault,
156 int error) { 156 int error)
157{
157 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device); 158 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
158 int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn, 159 u32 qpn = qp->trans_qp.base.mqp.qpn;
160 int ret = mlx5_core_page_fault_resume(dev->mdev,
161 qpn,
159 pfault->mpfault.flags, 162 pfault->mpfault.flags,
160 error); 163 error);
161 if (ret) 164 if (ret)
162 pr_err("Failed to resolve the page fault on QP 0x%x\n", 165 pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
163 qp->mqp.qpn);
164} 166}
165 167
166/* 168/*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
391#if defined(DEBUG) 393#if defined(DEBUG)
392 u32 ctrl_wqe_index, ctrl_qpn; 394 u32 ctrl_wqe_index, ctrl_qpn;
393#endif 395#endif
396 u32 qpn = qp->trans_qp.base.mqp.qpn;
394 397
395 ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; 398 ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
396 if (ds * MLX5_WQE_DS_UNITS > wqe_length) { 399 if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
401 404
402 if (ds == 0) { 405 if (ds == 0) {
403 mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n", 406 mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
404 wqe_index, qp->mqp.qpn); 407 wqe_index, qpn);
405 return -EFAULT; 408 return -EFAULT;
406 } 409 }
407 410
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
411 MLX5_WQE_CTRL_WQE_INDEX_SHIFT; 414 MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
412 if (wqe_index != ctrl_wqe_index) { 415 if (wqe_index != ctrl_wqe_index) {
413 mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n", 416 mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
414 wqe_index, qp->mqp.qpn, 417 wqe_index, qpn,
415 ctrl_wqe_index); 418 ctrl_wqe_index);
416 return -EFAULT; 419 return -EFAULT;
417 } 420 }
418 421
419 ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >> 422 ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
420 MLX5_WQE_CTRL_QPN_SHIFT; 423 MLX5_WQE_CTRL_QPN_SHIFT;
421 if (qp->mqp.qpn != ctrl_qpn) { 424 if (qpn != ctrl_qpn) {
422 mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n", 425 mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
423 wqe_index, qp->mqp.qpn, 426 wqe_index, qpn,
424 ctrl_qpn); 427 ctrl_qpn);
425 return -EFAULT; 428 return -EFAULT;
426 } 429 }
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
537 int resume_with_error = 0; 540 int resume_with_error = 0;
538 u16 wqe_index = pfault->mpfault.wqe.wqe_index; 541 u16 wqe_index = pfault->mpfault.wqe.wqe_index;
539 int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR; 542 int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
543 u32 qpn = qp->trans_qp.base.mqp.qpn;
540 544
541 buffer = (char *)__get_free_page(GFP_KERNEL); 545 buffer = (char *)__get_free_page(GFP_KERNEL);
542 if (!buffer) { 546 if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
546 } 550 }
547 551
548 ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, 552 ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
549 PAGE_SIZE); 553 PAGE_SIZE, &qp->trans_qp.base);
550 if (ret < 0) { 554 if (ret < 0) {
551 mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n", 555 mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
552 -ret, wqe_index, qp->mqp.qpn); 556 -ret, wqe_index, qpn);
553 resume_with_error = 1; 557 resume_with_error = 1;
554 goto resolve_page_fault; 558 goto resolve_page_fault;
555 } 559 }
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
586resolve_page_fault: 590resolve_page_fault:
587 mlx5_ib_page_fault_resume(qp, pfault, resume_with_error); 591 mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
588 mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n", 592 mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
589 qp->mqp.qpn, resume_with_error, pfault->mpfault.flags); 593 qpn, resume_with_error,
594 pfault->mpfault.flags);
590 595
591 free_page((unsigned long)buffer); 596 free_page((unsigned long)buffer);
592} 597}
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
753 qp->disable_page_faults = 1; 758 qp->disable_page_faults = 1;
754 spin_lock_init(&qp->disable_page_faults_lock); 759 spin_lock_init(&qp->disable_page_faults_lock);
755 760
756 qp->mqp.pfault_handler = mlx5_ib_pfault_handler; 761 qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
757 762
758 for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i) 763 for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
759 INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action); 764 INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 307bdbca8938..8fb9c27485e1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -32,6 +32,8 @@
32 32
33#include <linux/module.h> 33#include <linux/module.h>
34#include <rdma/ib_umem.h> 34#include <rdma/ib_umem.h>
35#include <rdma/ib_cache.h>
36#include <rdma/ib_user_verbs.h>
35#include "mlx5_ib.h" 37#include "mlx5_ib.h"
36#include "user.h" 38#include "user.h"
37 39
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
114 * Return: the number of bytes copied, or an error code. 116 * Return: the number of bytes copied, or an error code.
115 */ 117 */
116int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, 118int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
117 void *buffer, u32 length) 119 void *buffer, u32 length,
120 struct mlx5_ib_qp_base *base)
118{ 121{
119 struct ib_device *ibdev = qp->ibqp.device; 122 struct ib_device *ibdev = qp->ibqp.device;
120 struct mlx5_ib_dev *dev = to_mdev(ibdev); 123 struct mlx5_ib_dev *dev = to_mdev(ibdev);
121 struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; 124 struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
122 size_t offset; 125 size_t offset;
123 size_t wq_end; 126 size_t wq_end;
124 struct ib_umem *umem = qp->umem; 127 struct ib_umem *umem = base->ubuffer.umem;
125 u32 first_copy_length; 128 u32 first_copy_length;
126 int wqe_length; 129 int wqe_length;
127 int ret; 130 int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
172 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; 175 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
173 struct ib_event event; 176 struct ib_event event;
174 177
175 if (type == MLX5_EVENT_TYPE_PATH_MIG) 178 if (type == MLX5_EVENT_TYPE_PATH_MIG) {
176 to_mibqp(qp)->port = to_mibqp(qp)->alt_port; 179 /* This event is only valid for trans_qps */
180 to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
181 }
177 182
178 if (ibqp->event_handler) { 183 if (ibqp->event_handler) {
179 event.device = ibqp->device; 184 event.device = ibqp->device;
@@ -366,7 +371,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
366 371
367static int set_user_buf_size(struct mlx5_ib_dev *dev, 372static int set_user_buf_size(struct mlx5_ib_dev *dev,
368 struct mlx5_ib_qp *qp, 373 struct mlx5_ib_qp *qp,
369 struct mlx5_ib_create_qp *ucmd) 374 struct mlx5_ib_create_qp *ucmd,
375 struct mlx5_ib_qp_base *base,
376 struct ib_qp_init_attr *attr)
370{ 377{
371 int desc_sz = 1 << qp->sq.wqe_shift; 378 int desc_sz = 1 << qp->sq.wqe_shift;
372 379
@@ -391,8 +398,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
391 return -EINVAL; 398 return -EINVAL;
392 } 399 }
393 400
394 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 401 if (attr->qp_type == IB_QPT_RAW_PACKET) {
395 (qp->sq.wqe_cnt << 6); 402 base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
403 qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
404 } else {
405 base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
406 (qp->sq.wqe_cnt << 6);
407 }
396 408
397 return 0; 409 return 0;
398} 410}
@@ -578,8 +590,8 @@ static int to_mlx5_st(enum ib_qp_type type)
578 case IB_QPT_SMI: return MLX5_QP_ST_QP0; 590 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
579 case IB_QPT_GSI: return MLX5_QP_ST_QP1; 591 case IB_QPT_GSI: return MLX5_QP_ST_QP1;
580 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; 592 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
581 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
582 case IB_QPT_RAW_PACKET: 593 case IB_QPT_RAW_PACKET:
594 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
583 case IB_QPT_MAX: 595 case IB_QPT_MAX:
584 default: return -EINVAL; 596 default: return -EINVAL;
585 } 597 }
@@ -590,13 +602,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
590 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; 602 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
591} 603}
592 604
605static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
606 struct ib_pd *pd,
607 unsigned long addr, size_t size,
608 struct ib_umem **umem,
609 int *npages, int *page_shift, int *ncont,
610 u32 *offset)
611{
612 int err;
613
614 *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
615 if (IS_ERR(*umem)) {
616 mlx5_ib_dbg(dev, "umem_get failed\n");
617 return PTR_ERR(*umem);
618 }
619
620 mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
621
622 err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
623 if (err) {
624 mlx5_ib_warn(dev, "bad offset\n");
625 goto err_umem;
626 }
627
628 mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
629 addr, size, *npages, *page_shift, *ncont, *offset);
630
631 return 0;
632
633err_umem:
634 ib_umem_release(*umem);
635 *umem = NULL;
636
637 return err;
638}
639
593static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, 640static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
594 struct mlx5_ib_qp *qp, struct ib_udata *udata, 641 struct mlx5_ib_qp *qp, struct ib_udata *udata,
642 struct ib_qp_init_attr *attr,
595 struct mlx5_create_qp_mbox_in **in, 643 struct mlx5_create_qp_mbox_in **in,
596 struct mlx5_ib_create_qp_resp *resp, int *inlen) 644 struct mlx5_ib_create_qp_resp *resp, int *inlen,
645 struct mlx5_ib_qp_base *base)
597{ 646{
598 struct mlx5_ib_ucontext *context; 647 struct mlx5_ib_ucontext *context;
599 struct mlx5_ib_create_qp ucmd; 648 struct mlx5_ib_create_qp ucmd;
649 struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
600 int page_shift = 0; 650 int page_shift = 0;
601 int uar_index; 651 int uar_index;
602 int npages; 652 int npages;
@@ -615,18 +665,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
615 /* 665 /*
616 * TBD: should come from the verbs when we have the API 666 * TBD: should come from the verbs when we have the API
617 */ 667 */
618 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); 668 if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
619 if (uuarn < 0) { 669 /* In CROSS_CHANNEL CQ and QP must use the same UAR */
620 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); 670 uuarn = MLX5_CROSS_CHANNEL_UUAR;
621 mlx5_ib_dbg(dev, "reverting to medium latency\n"); 671 else {
622 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); 672 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
623 if (uuarn < 0) { 673 if (uuarn < 0) {
624 mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); 674 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
625 mlx5_ib_dbg(dev, "reverting to high latency\n"); 675 mlx5_ib_dbg(dev, "reverting to medium latency\n");
626 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); 676 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
627 if (uuarn < 0) { 677 if (uuarn < 0) {
628 mlx5_ib_warn(dev, "uuar allocation failed\n"); 678 mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
629 return uuarn; 679 mlx5_ib_dbg(dev, "reverting to high latency\n");
680 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
681 if (uuarn < 0) {
682 mlx5_ib_warn(dev, "uuar allocation failed\n");
683 return uuarn;
684 }
630 } 685 }
631 } 686 }
632 } 687 }
@@ -638,32 +693,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
638 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); 693 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
639 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 694 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
640 695
641 err = set_user_buf_size(dev, qp, &ucmd); 696 err = set_user_buf_size(dev, qp, &ucmd, base, attr);
642 if (err) 697 if (err)
643 goto err_uuar; 698 goto err_uuar;
644 699
645 if (ucmd.buf_addr && qp->buf_size) { 700 if (ucmd.buf_addr && ubuffer->buf_size) {
646 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 701 ubuffer->buf_addr = ucmd.buf_addr;
647 qp->buf_size, 0, 0); 702 err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
648 if (IS_ERR(qp->umem)) { 703 ubuffer->buf_size,
649 mlx5_ib_dbg(dev, "umem_get failed\n"); 704 &ubuffer->umem, &npages, &page_shift,
650 err = PTR_ERR(qp->umem); 705 &ncont, &offset);
706 if (err)
651 goto err_uuar; 707 goto err_uuar;
652 }
653 } else { 708 } else {
654 qp->umem = NULL; 709 ubuffer->umem = NULL;
655 }
656
657 if (qp->umem) {
658 mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
659 &ncont, NULL);
660 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
661 if (err) {
662 mlx5_ib_warn(dev, "bad offset\n");
663 goto err_umem;
664 }
665 mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
666 ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
667 } 710 }
668 711
669 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; 712 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +715,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
672 err = -ENOMEM; 715 err = -ENOMEM;
673 goto err_umem; 716 goto err_umem;
674 } 717 }
675 if (qp->umem) 718 if (ubuffer->umem)
676 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); 719 mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
720 (*in)->pas, 0);
677 (*in)->ctx.log_pg_sz_remote_qpn = 721 (*in)->ctx.log_pg_sz_remote_qpn =
678 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); 722 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
679 (*in)->ctx.params2 = cpu_to_be32(offset << 6); 723 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +748,31 @@ err_free:
704 kvfree(*in); 748 kvfree(*in);
705 749
706err_umem: 750err_umem:
707 if (qp->umem) 751 if (ubuffer->umem)
708 ib_umem_release(qp->umem); 752 ib_umem_release(ubuffer->umem);
709 753
710err_uuar: 754err_uuar:
711 free_uuar(&context->uuari, uuarn); 755 free_uuar(&context->uuari, uuarn);
712 return err; 756 return err;
713} 757}
714 758
715static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) 759static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
760 struct mlx5_ib_qp_base *base)
716{ 761{
717 struct mlx5_ib_ucontext *context; 762 struct mlx5_ib_ucontext *context;
718 763
719 context = to_mucontext(pd->uobject->context); 764 context = to_mucontext(pd->uobject->context);
720 mlx5_ib_db_unmap_user(context, &qp->db); 765 mlx5_ib_db_unmap_user(context, &qp->db);
721 if (qp->umem) 766 if (base->ubuffer.umem)
722 ib_umem_release(qp->umem); 767 ib_umem_release(base->ubuffer.umem);
723 free_uuar(&context->uuari, qp->uuarn); 768 free_uuar(&context->uuari, qp->uuarn);
724} 769}
725 770
726static int create_kernel_qp(struct mlx5_ib_dev *dev, 771static int create_kernel_qp(struct mlx5_ib_dev *dev,
727 struct ib_qp_init_attr *init_attr, 772 struct ib_qp_init_attr *init_attr,
728 struct mlx5_ib_qp *qp, 773 struct mlx5_ib_qp *qp,
729 struct mlx5_create_qp_mbox_in **in, int *inlen) 774 struct mlx5_create_qp_mbox_in **in, int *inlen,
775 struct mlx5_ib_qp_base *base)
730{ 776{
731 enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; 777 enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
732 struct mlx5_uuar_info *uuari; 778 struct mlx5_uuar_info *uuari;
@@ -758,9 +804,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
758 804
759 qp->rq.offset = 0; 805 qp->rq.offset = 0;
760 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; 806 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
761 qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); 807 base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
762 808
763 err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf); 809 err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
764 if (err) { 810 if (err) {
765 mlx5_ib_dbg(dev, "err %d\n", err); 811 mlx5_ib_dbg(dev, "err %d\n", err);
766 goto err_uuar; 812 goto err_uuar;
@@ -853,19 +899,304 @@ static int is_connected(enum ib_qp_type qp_type)
853 return 0; 899 return 0;
854} 900}
855 901
902static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
903 struct mlx5_ib_sq *sq, u32 tdn)
904{
905 u32 in[MLX5_ST_SZ_DW(create_tis_in)];
906 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
907
908 memset(in, 0, sizeof(in));
909
910 MLX5_SET(tisc, tisc, transport_domain, tdn);
911
912 return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
913}
914
915static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
916 struct mlx5_ib_sq *sq)
917{
918 mlx5_core_destroy_tis(dev->mdev, sq->tisn);
919}
920
921static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
922 struct mlx5_ib_sq *sq, void *qpin,
923 struct ib_pd *pd)
924{
925 struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
926 __be64 *pas;
927 void *in;
928 void *sqc;
929 void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
930 void *wq;
931 int inlen;
932 int err;
933 int page_shift = 0;
934 int npages;
935 int ncont = 0;
936 u32 offset = 0;
937
938 err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
939 &sq->ubuffer.umem, &npages, &page_shift,
940 &ncont, &offset);
941 if (err)
942 return err;
943
944 inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
945 in = mlx5_vzalloc(inlen);
946 if (!in) {
947 err = -ENOMEM;
948 goto err_umem;
949 }
950
951 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
952 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
953 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
954 MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
955 MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
956 MLX5_SET(sqc, sqc, tis_lst_sz, 1);
957 MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
958
959 wq = MLX5_ADDR_OF(sqc, sqc, wq);
960 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
961 MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
962 MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
963 MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
964 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
965 MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
966 MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
967 MLX5_SET(wq, wq, page_offset, offset);
968
969 pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
970 mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
971
972 err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
973
974 kvfree(in);
975
976 if (err)
977 goto err_umem;
978
979 return 0;
980
981err_umem:
982 ib_umem_release(sq->ubuffer.umem);
983 sq->ubuffer.umem = NULL;
984
985 return err;
986}
987
988static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
989 struct mlx5_ib_sq *sq)
990{
991 mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
992 ib_umem_release(sq->ubuffer.umem);
993}
994
995static int get_rq_pas_size(void *qpc)
996{
997 u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
998 u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
999 u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
1000 u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
1001 u32 po_quanta = 1 << (log_page_size - 6);
1002 u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
1003 u32 page_size = 1 << log_page_size;
1004 u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
1005 u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
1006
1007 return rq_num_pas * sizeof(u64);
1008}
1009
1010static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1011 struct mlx5_ib_rq *rq, void *qpin)
1012{
1013 __be64 *pas;
1014 __be64 *qp_pas;
1015 void *in;
1016 void *rqc;
1017 void *wq;
1018 void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1019 int inlen;
1020 int err;
1021 u32 rq_pas_size = get_rq_pas_size(qpc);
1022
1023 inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
1024 in = mlx5_vzalloc(inlen);
1025 if (!in)
1026 return -ENOMEM;
1027
1028 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1029 MLX5_SET(rqc, rqc, vsd, 1);
1030 MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
1031 MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
1032 MLX5_SET(rqc, rqc, flush_in_error_en, 1);
1033 MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
1034 MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
1035
1036 wq = MLX5_ADDR_OF(rqc, rqc, wq);
1037 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1038 MLX5_SET(wq, wq, end_padding_mode,
1039 MLX5_GET64(qpc, qpc, end_padding_mode));
1040 MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
1041 MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1042 MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1043 MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
1044 MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
1045 MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
1046
1047 pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1048 qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
1049 memcpy(pas, qp_pas, rq_pas_size);
1050
1051 err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
1052
1053 kvfree(in);
1054
1055 return err;
1056}
1057
1058static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1059 struct mlx5_ib_rq *rq)
1060{
1061 mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
1062}
1063
1064static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1065 struct mlx5_ib_rq *rq, u32 tdn)
1066{
1067 u32 *in;
1068 void *tirc;
1069 int inlen;
1070 int err;
1071
1072 inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1073 in = mlx5_vzalloc(inlen);
1074 if (!in)
1075 return -ENOMEM;
1076
1077 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1078 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
1079 MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
1080 MLX5_SET(tirc, tirc, transport_domain, tdn);
1081
1082 err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
1083
1084 kvfree(in);
1085
1086 return err;
1087}
1088
1089static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1090 struct mlx5_ib_rq *rq)
1091{
1092 mlx5_core_destroy_tir(dev->mdev, rq->tirn);
1093}
1094
1095static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1096 struct mlx5_create_qp_mbox_in *in,
1097 struct ib_pd *pd)
1098{
1099 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1100 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1101 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1102 struct ib_uobject *uobj = pd->uobject;
1103 struct ib_ucontext *ucontext = uobj->context;
1104 struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
1105 int err;
1106 u32 tdn = mucontext->tdn;
1107
1108 if (qp->sq.wqe_cnt) {
1109 err = create_raw_packet_qp_tis(dev, sq, tdn);
1110 if (err)
1111 return err;
1112
1113 err = create_raw_packet_qp_sq(dev, sq, in, pd);
1114 if (err)
1115 goto err_destroy_tis;
1116
1117 sq->base.container_mibqp = qp;
1118 }
1119
1120 if (qp->rq.wqe_cnt) {
1121 err = create_raw_packet_qp_rq(dev, rq, in);
1122 if (err)
1123 goto err_destroy_sq;
1124
1125 rq->base.container_mibqp = qp;
1126
1127 err = create_raw_packet_qp_tir(dev, rq, tdn);
1128 if (err)
1129 goto err_destroy_rq;
1130 }
1131
1132 qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
1133 rq->base.mqp.qpn;
1134
1135 return 0;
1136
1137err_destroy_rq:
1138 destroy_raw_packet_qp_rq(dev, rq);
1139err_destroy_sq:
1140 if (!qp->sq.wqe_cnt)
1141 return err;
1142 destroy_raw_packet_qp_sq(dev, sq);
1143err_destroy_tis:
1144 destroy_raw_packet_qp_tis(dev, sq);
1145
1146 return err;
1147}
1148
1149static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
1150 struct mlx5_ib_qp *qp)
1151{
1152 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1153 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1154 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1155
1156 if (qp->rq.wqe_cnt) {
1157 destroy_raw_packet_qp_tir(dev, rq);
1158 destroy_raw_packet_qp_rq(dev, rq);
1159 }
1160
1161 if (qp->sq.wqe_cnt) {
1162 destroy_raw_packet_qp_sq(dev, sq);
1163 destroy_raw_packet_qp_tis(dev, sq);
1164 }
1165}
1166
1167static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1168 struct mlx5_ib_raw_packet_qp *raw_packet_qp)
1169{
1170 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1171 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1172
1173 sq->sq = &qp->sq;
1174 rq->rq = &qp->rq;
1175 sq->doorbell = &qp->db;
1176 rq->doorbell = &qp->db;
1177}
1178
856static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, 1179static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
857 struct ib_qp_init_attr *init_attr, 1180 struct ib_qp_init_attr *init_attr,
858 struct ib_udata *udata, struct mlx5_ib_qp *qp) 1181 struct ib_udata *udata, struct mlx5_ib_qp *qp)
859{ 1182{
860 struct mlx5_ib_resources *devr = &dev->devr; 1183 struct mlx5_ib_resources *devr = &dev->devr;
861 struct mlx5_core_dev *mdev = dev->mdev; 1184 struct mlx5_core_dev *mdev = dev->mdev;
1185 struct mlx5_ib_qp_base *base;
862 struct mlx5_ib_create_qp_resp resp; 1186 struct mlx5_ib_create_qp_resp resp;
863 struct mlx5_create_qp_mbox_in *in; 1187 struct mlx5_create_qp_mbox_in *in;
864 struct mlx5_ib_create_qp ucmd; 1188 struct mlx5_ib_create_qp ucmd;
865 int inlen = sizeof(*in); 1189 int inlen = sizeof(*in);
866 int err; 1190 int err;
1191 u32 uidx = MLX5_IB_DEFAULT_UIDX;
1192 void *qpc;
1193
1194 base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
1195 &qp->raw_packet_qp.rq.base :
1196 &qp->trans_qp.base;
867 1197
868 mlx5_ib_odp_create_qp(qp); 1198 if (init_attr->qp_type != IB_QPT_RAW_PACKET)
1199 mlx5_ib_odp_create_qp(qp);
869 1200
870 mutex_init(&qp->mutex); 1201 mutex_init(&qp->mutex);
871 spin_lock_init(&qp->sq.lock); 1202 spin_lock_init(&qp->sq.lock);
@@ -880,6 +1211,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
880 } 1211 }
881 } 1212 }
882 1213
1214 if (init_attr->create_flags &
1215 (IB_QP_CREATE_CROSS_CHANNEL |
1216 IB_QP_CREATE_MANAGED_SEND |
1217 IB_QP_CREATE_MANAGED_RECV)) {
1218 if (!MLX5_CAP_GEN(mdev, cd)) {
1219 mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
1220 return -EINVAL;
1221 }
1222 if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
1223 qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
1224 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
1225 qp->flags |= MLX5_IB_QP_MANAGED_SEND;
1226 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
1227 qp->flags |= MLX5_IB_QP_MANAGED_RECV;
1228 }
883 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1229 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
884 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1230 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
885 1231
@@ -889,6 +1235,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
889 return -EFAULT; 1235 return -EFAULT;
890 } 1236 }
891 1237
1238 err = get_qp_user_index(to_mucontext(pd->uobject->context),
1239 &ucmd, udata->inlen, &uidx);
1240 if (err)
1241 return err;
1242
892 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); 1243 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
893 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); 1244 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
894 } else { 1245 } else {
@@ -918,11 +1269,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
918 ucmd.sq_wqe_count, max_wqes); 1269 ucmd.sq_wqe_count, max_wqes);
919 return -EINVAL; 1270 return -EINVAL;
920 } 1271 }
921 err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); 1272 err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
1273 &resp, &inlen, base);
922 if (err) 1274 if (err)
923 mlx5_ib_dbg(dev, "err %d\n", err); 1275 mlx5_ib_dbg(dev, "err %d\n", err);
924 } else { 1276 } else {
925 err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); 1277 err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
1278 base);
926 if (err) 1279 if (err)
927 mlx5_ib_dbg(dev, "err %d\n", err); 1280 mlx5_ib_dbg(dev, "err %d\n", err);
928 } 1281 }
@@ -954,6 +1307,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
954 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) 1307 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
955 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); 1308 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
956 1309
1310 if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
1311 in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
1312 if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
1313 in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
1314 if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
1315 in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
1316
957 if (qp->scat_cqe && is_connected(init_attr->qp_type)) { 1317 if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
958 int rcqe_sz; 1318 int rcqe_sz;
959 int scqe_sz; 1319 int scqe_sz;
@@ -1018,26 +1378,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1018 1378
1019 in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); 1379 in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
1020 1380
1021 err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen); 1381 if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
1382 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1383 /* 0xffffff means we ask to work with cqe version 0 */
1384 MLX5_SET(qpc, qpc, user_index, uidx);
1385 }
1386
1387 if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1388 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
1389 raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
1390 err = create_raw_packet_qp(dev, qp, in, pd);
1391 } else {
1392 err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
1393 }
1394
1022 if (err) { 1395 if (err) {
1023 mlx5_ib_dbg(dev, "create qp failed\n"); 1396 mlx5_ib_dbg(dev, "create qp failed\n");
1024 goto err_create; 1397 goto err_create;
1025 } 1398 }
1026 1399
1027 kvfree(in); 1400 kvfree(in);
1028 /* Hardware wants QPN written in big-endian order (after
1029 * shifting) for send doorbell. Precompute this value to save
1030 * a little bit when posting sends.
1031 */
1032 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
1033 1401
1034 qp->mqp.event = mlx5_ib_qp_event; 1402 base->container_mibqp = qp;
1403 base->mqp.event = mlx5_ib_qp_event;
1035 1404
1036 return 0; 1405 return 0;
1037 1406
1038err_create: 1407err_create:
1039 if (qp->create_type == MLX5_QP_USER) 1408 if (qp->create_type == MLX5_QP_USER)
1040 destroy_qp_user(pd, qp); 1409 destroy_qp_user(pd, qp, base);
1041 else if (qp->create_type == MLX5_QP_KERNEL) 1410 else if (qp->create_type == MLX5_QP_KERNEL)
1042 destroy_qp_kernel(dev, qp); 1411 destroy_qp_kernel(dev, qp);
1043 1412
@@ -1129,11 +1498,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
1129 case IB_QPT_UD: 1498 case IB_QPT_UD:
1130 case IB_QPT_RAW_IPV6: 1499 case IB_QPT_RAW_IPV6:
1131 case IB_QPT_RAW_ETHERTYPE: 1500 case IB_QPT_RAW_ETHERTYPE:
1501 case IB_QPT_RAW_PACKET:
1132 *send_cq = to_mcq(qp->ibqp.send_cq); 1502 *send_cq = to_mcq(qp->ibqp.send_cq);
1133 *recv_cq = to_mcq(qp->ibqp.recv_cq); 1503 *recv_cq = to_mcq(qp->ibqp.recv_cq);
1134 break; 1504 break;
1135 1505
1136 case IB_QPT_RAW_PACKET:
1137 case IB_QPT_MAX: 1506 case IB_QPT_MAX:
1138 default: 1507 default:
1139 *send_cq = NULL; 1508 *send_cq = NULL;
@@ -1142,45 +1511,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
1142 } 1511 }
1143} 1512}
1144 1513
1514static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1515 u16 operation);
1516
1145static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) 1517static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1146{ 1518{
1147 struct mlx5_ib_cq *send_cq, *recv_cq; 1519 struct mlx5_ib_cq *send_cq, *recv_cq;
1520 struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
1148 struct mlx5_modify_qp_mbox_in *in; 1521 struct mlx5_modify_qp_mbox_in *in;
1149 int err; 1522 int err;
1150 1523
1524 base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
1525 &qp->raw_packet_qp.rq.base :
1526 &qp->trans_qp.base;
1527
1151 in = kzalloc(sizeof(*in), GFP_KERNEL); 1528 in = kzalloc(sizeof(*in), GFP_KERNEL);
1152 if (!in) 1529 if (!in)
1153 return; 1530 return;
1154 1531
1155 if (qp->state != IB_QPS_RESET) { 1532 if (qp->state != IB_QPS_RESET) {
1156 mlx5_ib_qp_disable_pagefaults(qp); 1533 if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
1157 if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), 1534 mlx5_ib_qp_disable_pagefaults(qp);
1158 MLX5_QP_STATE_RST, in, 0, &qp->mqp)) 1535 err = mlx5_core_qp_modify(dev->mdev,
1159 mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", 1536 MLX5_CMD_OP_2RST_QP, in, 0,
1160 qp->mqp.qpn); 1537 &base->mqp);
1538 } else {
1539 err = modify_raw_packet_qp(dev, qp,
1540 MLX5_CMD_OP_2RST_QP);
1541 }
1542 if (err)
1543 mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
1544 base->mqp.qpn);
1161 } 1545 }
1162 1546
1163 get_cqs(qp, &send_cq, &recv_cq); 1547 get_cqs(qp, &send_cq, &recv_cq);
1164 1548
1165 if (qp->create_type == MLX5_QP_KERNEL) { 1549 if (qp->create_type == MLX5_QP_KERNEL) {
1166 mlx5_ib_lock_cqs(send_cq, recv_cq); 1550 mlx5_ib_lock_cqs(send_cq, recv_cq);
1167 __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, 1551 __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
1168 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 1552 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1169 if (send_cq != recv_cq) 1553 if (send_cq != recv_cq)
1170 __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 1554 __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
1555 NULL);
1171 mlx5_ib_unlock_cqs(send_cq, recv_cq); 1556 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1172 } 1557 }
1173 1558
1174 err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp); 1559 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1175 if (err) 1560 destroy_raw_packet_qp(dev, qp);
1176 mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn); 1561 } else {
1177 kfree(in); 1562 err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
1563 if (err)
1564 mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
1565 base->mqp.qpn);
1566 }
1178 1567
1568 kfree(in);
1179 1569
1180 if (qp->create_type == MLX5_QP_KERNEL) 1570 if (qp->create_type == MLX5_QP_KERNEL)
1181 destroy_qp_kernel(dev, qp); 1571 destroy_qp_kernel(dev, qp);
1182 else if (qp->create_type == MLX5_QP_USER) 1572 else if (qp->create_type == MLX5_QP_USER)
1183 destroy_qp_user(&get_pd(qp)->ibpd, qp); 1573 destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
1184} 1574}
1185 1575
1186static const char *ib_qp_type_str(enum ib_qp_type type) 1576static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1234,6 +1624,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1234 return ERR_PTR(-EINVAL); 1624 return ERR_PTR(-EINVAL);
1235 } 1625 }
1236 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); 1626 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1627
1628 if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1629 if (!pd->uobject) {
1630 mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
1631 return ERR_PTR(-EINVAL);
1632 } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
1633 mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
1634 return ERR_PTR(-EINVAL);
1635 }
1636 }
1237 } 1637 }
1238 1638
1239 switch (init_attr->qp_type) { 1639 switch (init_attr->qp_type) {
@@ -1250,6 +1650,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1250 } 1650 }
1251 1651
1252 /* fall through */ 1652 /* fall through */
1653 case IB_QPT_RAW_PACKET:
1253 case IB_QPT_RC: 1654 case IB_QPT_RC:
1254 case IB_QPT_UC: 1655 case IB_QPT_UC:
1255 case IB_QPT_UD: 1656 case IB_QPT_UD:
@@ -1272,19 +1673,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1272 else if (is_qp1(init_attr->qp_type)) 1673 else if (is_qp1(init_attr->qp_type))
1273 qp->ibqp.qp_num = 1; 1674 qp->ibqp.qp_num = 1;
1274 else 1675 else
1275 qp->ibqp.qp_num = qp->mqp.qpn; 1676 qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
1276 1677
1277 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", 1678 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1278 qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn, 1679 qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
1680 to_mcq(init_attr->recv_cq)->mcq.cqn,
1279 to_mcq(init_attr->send_cq)->mcq.cqn); 1681 to_mcq(init_attr->send_cq)->mcq.cqn);
1280 1682
1281 qp->xrcdn = xrcdn; 1683 qp->trans_qp.xrcdn = xrcdn;
1282 1684
1283 break; 1685 break;
1284 1686
1285 case IB_QPT_RAW_IPV6: 1687 case IB_QPT_RAW_IPV6:
1286 case IB_QPT_RAW_ETHERTYPE: 1688 case IB_QPT_RAW_ETHERTYPE:
1287 case IB_QPT_RAW_PACKET:
1288 case IB_QPT_MAX: 1689 case IB_QPT_MAX:
1289 default: 1690 default:
1290 mlx5_ib_dbg(dev, "unsupported qp type %d\n", 1691 mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1719,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
1318 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 1719 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1319 dest_rd_atomic = attr->max_dest_rd_atomic; 1720 dest_rd_atomic = attr->max_dest_rd_atomic;
1320 else 1721 else
1321 dest_rd_atomic = qp->resp_depth; 1722 dest_rd_atomic = qp->trans_qp.resp_depth;
1322 1723
1323 if (attr_mask & IB_QP_ACCESS_FLAGS) 1724 if (attr_mask & IB_QP_ACCESS_FLAGS)
1324 access_flags = attr->qp_access_flags; 1725 access_flags = attr->qp_access_flags;
1325 else 1726 else
1326 access_flags = qp->atomic_rd_en; 1727 access_flags = qp->trans_qp.atomic_rd_en;
1327 1728
1328 if (!dest_rd_atomic) 1729 if (!dest_rd_atomic)
1329 access_flags &= IB_ACCESS_REMOTE_WRITE; 1730 access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1761,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1360 return rate + MLX5_STAT_RATE_OFFSET; 1761 return rate + MLX5_STAT_RATE_OFFSET;
1361} 1762}
1362 1763
1363static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, 1764static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
1765 struct mlx5_ib_sq *sq, u8 sl)
1766{
1767 void *in;
1768 void *tisc;
1769 int inlen;
1770 int err;
1771
1772 inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
1773 in = mlx5_vzalloc(inlen);
1774 if (!in)
1775 return -ENOMEM;
1776
1777 MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
1778
1779 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
1780 MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
1781
1782 err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
1783
1784 kvfree(in);
1785
1786 return err;
1787}
1788
1789static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1790 const struct ib_ah_attr *ah,
1364 struct mlx5_qp_path *path, u8 port, int attr_mask, 1791 struct mlx5_qp_path *path, u8 port, int attr_mask,
1365 u32 path_flags, const struct ib_qp_attr *attr) 1792 u32 path_flags, const struct ib_qp_attr *attr)
1366{ 1793{
1794 enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
1367 int err; 1795 int err;
1368 1796
1369 path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1370 path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
1371
1372 if (attr_mask & IB_QP_PKEY_INDEX) 1797 if (attr_mask & IB_QP_PKEY_INDEX)
1373 path->pkey_index = attr->pkey_index; 1798 path->pkey_index = attr->pkey_index;
1374 1799
1375 path->grh_mlid = ah->src_path_bits & 0x7f;
1376 path->rlid = cpu_to_be16(ah->dlid);
1377
1378 if (ah->ah_flags & IB_AH_GRH) { 1800 if (ah->ah_flags & IB_AH_GRH) {
1379 if (ah->grh.sgid_index >= 1801 if (ah->grh.sgid_index >=
1380 dev->mdev->port_caps[port - 1].gid_table_len) { 1802 dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1805,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1383 dev->mdev->port_caps[port - 1].gid_table_len); 1805 dev->mdev->port_caps[port - 1].gid_table_len);
1384 return -EINVAL; 1806 return -EINVAL;
1385 } 1807 }
1386 path->grh_mlid |= 1 << 7; 1808 }
1809
1810 if (ll == IB_LINK_LAYER_ETHERNET) {
1811 if (!(ah->ah_flags & IB_AH_GRH))
1812 return -EINVAL;
1813 memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
1814 path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
1815 ah->grh.sgid_index);
1816 path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
1817 } else {
1818 path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1819 path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
1820 0;
1821 path->rlid = cpu_to_be16(ah->dlid);
1822 path->grh_mlid = ah->src_path_bits & 0x7f;
1823 if (ah->ah_flags & IB_AH_GRH)
1824 path->grh_mlid |= 1 << 7;
1825 path->dci_cfi_prio_sl = ah->sl & 0xf;
1826 }
1827
1828 if (ah->ah_flags & IB_AH_GRH) {
1387 path->mgid_index = ah->grh.sgid_index; 1829 path->mgid_index = ah->grh.sgid_index;
1388 path->hop_limit = ah->grh.hop_limit; 1830 path->hop_limit = ah->grh.hop_limit;
1389 path->tclass_flowlabel = 1831 path->tclass_flowlabel =
@@ -1401,7 +1843,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1401 if (attr_mask & IB_QP_TIMEOUT) 1843 if (attr_mask & IB_QP_TIMEOUT)
1402 path->ackto_lt = attr->timeout << 3; 1844 path->ackto_lt = attr->timeout << 3;
1403 1845
1404 path->sl = ah->sl & 0xf; 1846 if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
1847 return modify_raw_packet_eth_prio(dev->mdev,
1848 &qp->raw_packet_qp.sq,
1849 ah->sl & 0xf);
1405 1850
1406 return 0; 1851 return 0;
1407} 1852}
@@ -1549,12 +1994,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
1549 return result; 1994 return result;
1550} 1995}
1551 1996
1997static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
1998 struct mlx5_ib_rq *rq, int new_state)
1999{
2000 void *in;
2001 void *rqc;
2002 int inlen;
2003 int err;
2004
2005 inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
2006 in = mlx5_vzalloc(inlen);
2007 if (!in)
2008 return -ENOMEM;
2009
2010 MLX5_SET(modify_rq_in, in, rq_state, rq->state);
2011
2012 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
2013 MLX5_SET(rqc, rqc, state, new_state);
2014
2015 err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
2016 if (err)
2017 goto out;
2018
2019 rq->state = new_state;
2020
2021out:
2022 kvfree(in);
2023 return err;
2024}
2025
2026static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
2027 struct mlx5_ib_sq *sq, int new_state)
2028{
2029 void *in;
2030 void *sqc;
2031 int inlen;
2032 int err;
2033
2034 inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
2035 in = mlx5_vzalloc(inlen);
2036 if (!in)
2037 return -ENOMEM;
2038
2039 MLX5_SET(modify_sq_in, in, sq_state, sq->state);
2040
2041 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
2042 MLX5_SET(sqc, sqc, state, new_state);
2043
2044 err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
2045 if (err)
2046 goto out;
2047
2048 sq->state = new_state;
2049
2050out:
2051 kvfree(in);
2052 return err;
2053}
2054
2055static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2056 u16 operation)
2057{
2058 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
2059 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
2060 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
2061 int rq_state;
2062 int sq_state;
2063 int err;
2064
2065 switch (operation) {
2066 case MLX5_CMD_OP_RST2INIT_QP:
2067 rq_state = MLX5_RQC_STATE_RDY;
2068 sq_state = MLX5_SQC_STATE_RDY;
2069 break;
2070 case MLX5_CMD_OP_2ERR_QP:
2071 rq_state = MLX5_RQC_STATE_ERR;
2072 sq_state = MLX5_SQC_STATE_ERR;
2073 break;
2074 case MLX5_CMD_OP_2RST_QP:
2075 rq_state = MLX5_RQC_STATE_RST;
2076 sq_state = MLX5_SQC_STATE_RST;
2077 break;
2078 case MLX5_CMD_OP_INIT2INIT_QP:
2079 case MLX5_CMD_OP_INIT2RTR_QP:
2080 case MLX5_CMD_OP_RTR2RTS_QP:
2081 case MLX5_CMD_OP_RTS2RTS_QP:
2082 /* Nothing to do here... */
2083 return 0;
2084 default:
2085 WARN_ON(1);
2086 return -EINVAL;
2087 }
2088
2089 if (qp->rq.wqe_cnt) {
2090 err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
2091 if (err)
2092 return err;
2093 }
2094
2095 if (qp->sq.wqe_cnt)
2096 return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
2097
2098 return 0;
2099}
2100
1552static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, 2101static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1553 const struct ib_qp_attr *attr, int attr_mask, 2102 const struct ib_qp_attr *attr, int attr_mask,
1554 enum ib_qp_state cur_state, enum ib_qp_state new_state) 2103 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1555{ 2104{
2105 static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
2106 [MLX5_QP_STATE_RST] = {
2107 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2108 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2109 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
2110 },
2111 [MLX5_QP_STATE_INIT] = {
2112 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2113 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2114 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
2115 [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
2116 },
2117 [MLX5_QP_STATE_RTR] = {
2118 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2119 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2120 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
2121 },
2122 [MLX5_QP_STATE_RTS] = {
2123 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2124 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2125 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
2126 },
2127 [MLX5_QP_STATE_SQD] = {
2128 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2129 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2130 },
2131 [MLX5_QP_STATE_SQER] = {
2132 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2133 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2134 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
2135 },
2136 [MLX5_QP_STATE_ERR] = {
2137 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
2138 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
2139 }
2140 };
2141
1556 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2142 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1557 struct mlx5_ib_qp *qp = to_mqp(ibqp); 2143 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2144 struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
1558 struct mlx5_ib_cq *send_cq, *recv_cq; 2145 struct mlx5_ib_cq *send_cq, *recv_cq;
1559 struct mlx5_qp_context *context; 2146 struct mlx5_qp_context *context;
1560 struct mlx5_modify_qp_mbox_in *in; 2147 struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1564 int sqd_event; 2151 int sqd_event;
1565 int mlx5_st; 2152 int mlx5_st;
1566 int err; 2153 int err;
2154 u16 op;
1567 2155
1568 in = kzalloc(sizeof(*in), GFP_KERNEL); 2156 in = kzalloc(sizeof(*in), GFP_KERNEL);
1569 if (!in) 2157 if (!in)
@@ -1623,7 +2211,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1623 context->pri_path.port = attr->port_num; 2211 context->pri_path.port = attr->port_num;
1624 2212
1625 if (attr_mask & IB_QP_AV) { 2213 if (attr_mask & IB_QP_AV) {
1626 err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, 2214 err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
1627 attr_mask & IB_QP_PORT ? attr->port_num : qp->port, 2215 attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1628 attr_mask, 0, attr); 2216 attr_mask, 0, attr);
1629 if (err) 2217 if (err)
@@ -1634,7 +2222,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1634 context->pri_path.ackto_lt |= attr->timeout << 3; 2222 context->pri_path.ackto_lt |= attr->timeout << 3;
1635 2223
1636 if (attr_mask & IB_QP_ALT_PATH) { 2224 if (attr_mask & IB_QP_ALT_PATH) {
1637 err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, 2225 err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
2226 &context->alt_path,
1638 attr->alt_port_num, attr_mask, 0, attr); 2227 attr->alt_port_num, attr_mask, 0, attr);
1639 if (err) 2228 if (err)
1640 goto out; 2229 goto out;
@@ -1706,41 +2295,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1706 * again to RTS, and may cause the driver and the device to get out of 2295 * again to RTS, and may cause the driver and the device to get out of
1707 * sync. */ 2296 * sync. */
1708 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && 2297 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
1709 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) 2298 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
2299 (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
1710 mlx5_ib_qp_disable_pagefaults(qp); 2300 mlx5_ib_qp_disable_pagefaults(qp);
1711 2301
2302 if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
2303 !optab[mlx5_cur][mlx5_new])
2304 goto out;
2305
2306 op = optab[mlx5_cur][mlx5_new];
1712 optpar = ib_mask_to_mlx5_opt(attr_mask); 2307 optpar = ib_mask_to_mlx5_opt(attr_mask);
1713 optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; 2308 optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
1714 in->optparam = cpu_to_be32(optpar); 2309 in->optparam = cpu_to_be32(optpar);
1715 err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state), 2310
1716 to_mlx5_state(new_state), in, sqd_event, 2311 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
1717 &qp->mqp); 2312 err = modify_raw_packet_qp(dev, qp, op);
2313 else
2314 err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
2315 &base->mqp);
1718 if (err) 2316 if (err)
1719 goto out; 2317 goto out;
1720 2318
1721 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2319 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
2320 (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
1722 mlx5_ib_qp_enable_pagefaults(qp); 2321 mlx5_ib_qp_enable_pagefaults(qp);
1723 2322
1724 qp->state = new_state; 2323 qp->state = new_state;
1725 2324
1726 if (attr_mask & IB_QP_ACCESS_FLAGS) 2325 if (attr_mask & IB_QP_ACCESS_FLAGS)
1727 qp->atomic_rd_en = attr->qp_access_flags; 2326 qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
1728 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 2327 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1729 qp->resp_depth = attr->max_dest_rd_atomic; 2328 qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
1730 if (attr_mask & IB_QP_PORT) 2329 if (attr_mask & IB_QP_PORT)
1731 qp->port = attr->port_num; 2330 qp->port = attr->port_num;
1732 if (attr_mask & IB_QP_ALT_PATH) 2331 if (attr_mask & IB_QP_ALT_PATH)
1733 qp->alt_port = attr->alt_port_num; 2332 qp->trans_qp.alt_port = attr->alt_port_num;
1734 2333
1735 /* 2334 /*
1736 * If we moved a kernel QP to RESET, clean up all old CQ 2335 * If we moved a kernel QP to RESET, clean up all old CQ
1737 * entries and reinitialize the QP. 2336 * entries and reinitialize the QP.
1738 */ 2337 */
1739 if (new_state == IB_QPS_RESET && !ibqp->uobject) { 2338 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1740 mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, 2339 mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
1741 ibqp->srq ? to_msrq(ibqp->srq) : NULL); 2340 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1742 if (send_cq != recv_cq) 2341 if (send_cq != recv_cq)
1743 mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 2342 mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
1744 2343
1745 qp->rq.head = 0; 2344 qp->rq.head = 0;
1746 qp->rq.tail = 0; 2345 qp->rq.tail = 0;
@@ -1765,15 +2364,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1765 enum ib_qp_state cur_state, new_state; 2364 enum ib_qp_state cur_state, new_state;
1766 int err = -EINVAL; 2365 int err = -EINVAL;
1767 int port; 2366 int port;
2367 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
1768 2368
1769 mutex_lock(&qp->mutex); 2369 mutex_lock(&qp->mutex);
1770 2370
1771 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2371 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1772 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 2372 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1773 2373
2374 if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
2375 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2376 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
2377 }
2378
1774 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && 2379 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
1775 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 2380 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
1776 IB_LINK_LAYER_UNSPECIFIED)) 2381 ll))
1777 goto out; 2382 goto out;
1778 2383
1779 if ((attr_mask & IB_QP_PORT) && 2384 if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3175,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
2570 3175
2571 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | 3176 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2572 mlx5_opcode | ((u32)opmod << 24)); 3177 mlx5_opcode | ((u32)opmod << 24));
2573 ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); 3178 ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
2574 ctrl->fm_ce_se |= fence; 3179 ctrl->fm_ce_se |= fence;
2575 qp->fm_cache = next_fence; 3180 qp->fm_cache = next_fence;
2576 if (unlikely(qp->wq_sig)) 3181 if (unlikely(qp->wq_sig))
@@ -3003,7 +3608,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
3003 ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports)) 3608 ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
3004 return; 3609 return;
3005 3610
3006 ib_ah_attr->sl = path->sl & 0xf; 3611 ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
3007 3612
3008 ib_ah_attr->dlid = be16_to_cpu(path->rlid); 3613 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
3009 ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; 3614 ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3626,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
3021 } 3626 }
3022} 3627}
3023 3628
3024int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, 3629static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
3025 struct ib_qp_init_attr *qp_init_attr) 3630 struct mlx5_ib_sq *sq,
3631 u8 *sq_state)
3632{
3633 void *out;
3634 void *sqc;
3635 int inlen;
3636 int err;
3637
3638 inlen = MLX5_ST_SZ_BYTES(query_sq_out);
3639 out = mlx5_vzalloc(inlen);
3640 if (!out)
3641 return -ENOMEM;
3642
3643 err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
3644 if (err)
3645 goto out;
3646
3647 sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
3648 *sq_state = MLX5_GET(sqc, sqc, state);
3649 sq->state = *sq_state;
3650
3651out:
3652 kvfree(out);
3653 return err;
3654}
3655
3656static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
3657 struct mlx5_ib_rq *rq,
3658 u8 *rq_state)
3659{
3660 void *out;
3661 void *rqc;
3662 int inlen;
3663 int err;
3664
3665 inlen = MLX5_ST_SZ_BYTES(query_rq_out);
3666 out = mlx5_vzalloc(inlen);
3667 if (!out)
3668 return -ENOMEM;
3669
3670 err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
3671 if (err)
3672 goto out;
3673
3674 rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
3675 *rq_state = MLX5_GET(rqc, rqc, state);
3676 rq->state = *rq_state;
3677
3678out:
3679 kvfree(out);
3680 return err;
3681}
3682
3683static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
3684 struct mlx5_ib_qp *qp, u8 *qp_state)
3685{
3686 static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
3687 [MLX5_RQC_STATE_RST] = {
3688 [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
3689 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
3690 [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
3691 [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
3692 },
3693 [MLX5_RQC_STATE_RDY] = {
3694 [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
3695 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
3696 [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
3697 [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
3698 },
3699 [MLX5_RQC_STATE_ERR] = {
3700 [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
3701 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
3702 [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
3703 [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
3704 },
3705 [MLX5_RQ_STATE_NA] = {
3706 [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
3707 [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
3708 [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
3709 [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
3710 },
3711 };
3712
3713 *qp_state = sqrq_trans[rq_state][sq_state];
3714
3715 if (*qp_state == MLX5_QP_STATE_BAD) {
3716 WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
3717 qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
3718 qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
3719 return -EINVAL;
3720 }
3721
3722 if (*qp_state == MLX5_QP_STATE)
3723 *qp_state = qp->state;
3724
3725 return 0;
3726}
3727
3728static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
3729 struct mlx5_ib_qp *qp,
3730 u8 *raw_packet_qp_state)
3731{
3732 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
3733 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
3734 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
3735 int err;
3736 u8 sq_state = MLX5_SQ_STATE_NA;
3737 u8 rq_state = MLX5_RQ_STATE_NA;
3738
3739 if (qp->sq.wqe_cnt) {
3740 err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
3741 if (err)
3742 return err;
3743 }
3744
3745 if (qp->rq.wqe_cnt) {
3746 err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
3747 if (err)
3748 return err;
3749 }
3750
3751 return sqrq_state_to_qp_state(sq_state, rq_state, qp,
3752 raw_packet_qp_state);
3753}
3754
3755static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3756 struct ib_qp_attr *qp_attr)
3026{ 3757{
3027 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3028 struct mlx5_ib_qp *qp = to_mqp(ibqp);
3029 struct mlx5_query_qp_mbox_out *outb; 3758 struct mlx5_query_qp_mbox_out *outb;
3030 struct mlx5_qp_context *context; 3759 struct mlx5_qp_context *context;
3031 int mlx5_state; 3760 int mlx5_state;
3032 int err = 0; 3761 int err = 0;
3033 3762
3034#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3035 /*
3036 * Wait for any outstanding page faults, in case the user frees memory
3037 * based upon this query's result.
3038 */
3039 flush_workqueue(mlx5_ib_page_fault_wq);
3040#endif
3041
3042 mutex_lock(&qp->mutex);
3043 outb = kzalloc(sizeof(*outb), GFP_KERNEL); 3763 outb = kzalloc(sizeof(*outb), GFP_KERNEL);
3044 if (!outb) { 3764 if (!outb)
3045 err = -ENOMEM; 3765 return -ENOMEM;
3046 goto out; 3766
3047 }
3048 context = &outb->ctx; 3767 context = &outb->ctx;
3049 err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb)); 3768 err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
3769 sizeof(*outb));
3050 if (err) 3770 if (err)
3051 goto out_free; 3771 goto out;
3052 3772
3053 mlx5_state = be32_to_cpu(context->flags) >> 28; 3773 mlx5_state = be32_to_cpu(context->flags) >> 28;
3054 3774
3055 qp->state = to_ib_qp_state(mlx5_state); 3775 qp->state = to_ib_qp_state(mlx5_state);
3056 qp_attr->qp_state = qp->state;
3057 qp_attr->path_mtu = context->mtu_msgmax >> 5; 3776 qp_attr->path_mtu = context->mtu_msgmax >> 5;
3058 qp_attr->path_mig_state = 3777 qp_attr->path_mig_state =
3059 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); 3778 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3806,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
3087 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; 3806 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
3088 qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; 3807 qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
3089 qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; 3808 qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
3809
3810out:
3811 kfree(outb);
3812 return err;
3813}
3814
3815int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3816 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
3817{
3818 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3819 struct mlx5_ib_qp *qp = to_mqp(ibqp);
3820 int err = 0;
3821 u8 raw_packet_qp_state;
3822
3823#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3824 /*
3825 * Wait for any outstanding page faults, in case the user frees memory
3826 * based upon this query's result.
3827 */
3828 flush_workqueue(mlx5_ib_page_fault_wq);
3829#endif
3830
3831 mutex_lock(&qp->mutex);
3832
3833 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
3834 err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
3835 if (err)
3836 goto out;
3837 qp->state = raw_packet_qp_state;
3838 qp_attr->port_num = 1;
3839 } else {
3840 err = query_qp_attr(dev, qp, qp_attr);
3841 if (err)
3842 goto out;
3843 }
3844
3845 qp_attr->qp_state = qp->state;
3090 qp_attr->cur_qp_state = qp_attr->qp_state; 3846 qp_attr->cur_qp_state = qp_attr->qp_state;
3091 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; 3847 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
3092 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 3848 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -3110,12 +3866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
3110 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) 3866 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
3111 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; 3867 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
3112 3868
3869 if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
3870 qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
3871 if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
3872 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
3873 if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
3874 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
3875
3113 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? 3876 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
3114 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 3877 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
3115 3878
3116out_free:
3117 kfree(outb);
3118
3119out: 3879out:
3120 mutex_unlock(&qp->mutex); 3880 mutex_unlock(&qp->mutex);
3121 return err; 3881 return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e008505e96e9..4659256cd95e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
78 struct ib_udata *udata, int buf_size, int *inlen) 78 struct ib_udata *udata, int buf_size, int *inlen)
79{ 79{
80 struct mlx5_ib_dev *dev = to_mdev(pd->device); 80 struct mlx5_ib_dev *dev = to_mdev(pd->device);
81 struct mlx5_ib_create_srq ucmd; 81 struct mlx5_ib_create_srq ucmd = {};
82 size_t ucmdlen; 82 size_t ucmdlen;
83 void *xsrqc;
83 int err; 84 int err;
84 int npages; 85 int npages;
85 int page_shift; 86 int page_shift;
86 int ncont; 87 int ncont;
87 u32 offset; 88 u32 offset;
89 u32 uidx = MLX5_IB_DEFAULT_UIDX;
90 int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
88 91
89 ucmdlen = 92 if (drv_data < 0)
90 (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < 93 return -EINVAL;
91 sizeof(ucmd)) ? (sizeof(ucmd) - 94
92 sizeof(ucmd.reserved)) : sizeof(ucmd); 95 ucmdlen = (drv_data < sizeof(ucmd)) ?
96 drv_data : sizeof(ucmd);
93 97
94 if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { 98 if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
95 mlx5_ib_dbg(dev, "failed copy udata\n"); 99 mlx5_ib_dbg(dev, "failed copy udata\n");
96 return -EFAULT; 100 return -EFAULT;
97 } 101 }
98 102
99 if (ucmdlen == sizeof(ucmd) && 103 if (ucmd.reserved0 || ucmd.reserved1)
100 ucmd.reserved != 0)
101 return -EINVAL; 104 return -EINVAL;
102 105
106 if (drv_data > sizeof(ucmd) &&
107 !ib_is_udata_cleared(udata, sizeof(ucmd),
108 drv_data - sizeof(ucmd)))
109 return -EINVAL;
110
111 err = get_srq_user_index(to_mucontext(pd->uobject->context),
112 &ucmd, udata->inlen, &uidx);
113 if (err)
114 return err;
115
103 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); 116 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
104 117
105 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, 118 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
138 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 151 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
139 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); 152 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
140 153
154 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
155 xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
156 xrc_srq_context_entry);
157 MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
158 }
159
141 return 0; 160 return 0;
142 161
143err_in: 162err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
158 struct mlx5_wqe_srq_next_seg *next; 177 struct mlx5_wqe_srq_next_seg *next;
159 int page_shift; 178 int page_shift;
160 int npages; 179 int npages;
180 void *xsrqc;
161 181
162 err = mlx5_db_alloc(dev->mdev, &srq->db); 182 err = mlx5_db_alloc(dev->mdev, &srq->db);
163 if (err) { 183 if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
204 224
205 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 225 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
206 226
227 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
228 xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
229 xrc_srq_context_entry);
230 /* 0xffffff means we ask to work with cqe version 0 */
231 MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
232 }
233
207 return 0; 234 return 0;
208 235
209err_in: 236err_in:
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 76fb7b927d37..b94a55404a59 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -35,6 +35,8 @@
35 35
36#include <linux/types.h> 36#include <linux/types.h>
37 37
38#include "mlx5_ib.h"
39
38enum { 40enum {
39 MLX5_QP_FLAG_SIGNATURE = 1 << 0, 41 MLX5_QP_FLAG_SIGNATURE = 1 << 0,
40 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, 42 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
66 __u32 total_num_uuars; 68 __u32 total_num_uuars;
67 __u32 num_low_latency_uuars; 69 __u32 num_low_latency_uuars;
68 __u32 flags; 70 __u32 flags;
69 __u32 reserved; 71 __u32 comp_mask;
72 __u8 max_cqe_version;
73 __u8 reserved0;
74 __u16 reserved1;
75 __u32 reserved2;
76};
77
78enum mlx5_ib_alloc_ucontext_resp_mask {
79 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
70}; 80};
71 81
72struct mlx5_ib_alloc_ucontext_resp { 82struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
80 __u32 max_recv_wr; 90 __u32 max_recv_wr;
81 __u32 max_srq_recv_wr; 91 __u32 max_srq_recv_wr;
82 __u16 num_ports; 92 __u16 num_ports;
83 __u16 reserved; 93 __u16 reserved1;
94 __u32 comp_mask;
95 __u32 response_length;
96 __u8 cqe_version;
97 __u8 reserved2;
98 __u16 reserved3;
99 __u64 hca_core_clock_offset;
84}; 100};
85 101
86struct mlx5_ib_alloc_pd_resp { 102struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
110 __u64 buf_addr; 126 __u64 buf_addr;
111 __u64 db_addr; 127 __u64 db_addr;
112 __u32 flags; 128 __u32 flags;
113 __u32 reserved; /* explicit padding (optional on i386) */ 129 __u32 reserved0; /* explicit padding (optional on i386) */
130 __u32 uidx;
131 __u32 reserved1;
114}; 132};
115 133
116struct mlx5_ib_create_srq_resp { 134struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
125 __u32 rq_wqe_count; 143 __u32 rq_wqe_count;
126 __u32 rq_wqe_shift; 144 __u32 rq_wqe_shift;
127 __u32 flags; 145 __u32 flags;
146 __u32 uidx;
147 __u32 reserved0;
148 __u64 sq_buf_addr;
128}; 149};
129 150
130struct mlx5_ib_create_qp_resp { 151struct mlx5_ib_create_qp_resp {
131 __u32 uuar_index; 152 __u32 uuar_index;
132}; 153};
154
155static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
156 struct mlx5_ib_create_qp *ucmd,
157 int inlen,
158 u32 *user_index)
159{
160 u8 cqe_version = ucontext->cqe_version;
161
162 if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
163 !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
164 return 0;
165
166 if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
167 !!cqe_version))
168 return -EINVAL;
169
170 return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
171}
172
173static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
174 struct mlx5_ib_create_srq *ucmd,
175 int inlen,
176 u32 *user_index)
177{
178 u8 cqe_version = ucontext->cqe_version;
179
180 if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
181 !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
182 return 0;
183
184 if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
185 !!cqe_version))
186 return -EINVAL;
187
188 return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
189}
133#endif /* MLX5_IB_USER_H */ 190#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 40ba83338155..a6531ffe29a6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
608 entry->opcode = IB_WC_FETCH_ADD; 608 entry->opcode = IB_WC_FETCH_ADD;
609 entry->byte_len = MTHCA_ATOMIC_BYTE_LEN; 609 entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
610 break; 610 break;
611 case MTHCA_OPCODE_BIND_MW:
612 entry->opcode = IB_WC_BIND_MW;
613 break;
614 default: 611 default:
615 entry->opcode = MTHCA_OPCODE_INVALID; 612 entry->opcode = MTHCA_OPCODE_INVALID;
616 break; 613 break;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index dc2d48c59e62..9866c35cc977 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
898 return &mr->ibmr; 898 return &mr->ibmr;
899} 899}
900 900
901static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
902 struct ib_phys_buf *buffer_list,
903 int num_phys_buf,
904 int acc,
905 u64 *iova_start)
906{
907 struct mthca_mr *mr;
908 u64 *page_list;
909 u64 total_size;
910 unsigned long mask;
911 int shift;
912 int npages;
913 int err;
914 int i, j, n;
915
916 mask = buffer_list[0].addr ^ *iova_start;
917 total_size = 0;
918 for (i = 0; i < num_phys_buf; ++i) {
919 if (i != 0)
920 mask |= buffer_list[i].addr;
921 if (i != num_phys_buf - 1)
922 mask |= buffer_list[i].addr + buffer_list[i].size;
923
924 total_size += buffer_list[i].size;
925 }
926
927 if (mask & ~PAGE_MASK)
928 return ERR_PTR(-EINVAL);
929
930 shift = __ffs(mask | 1 << 31);
931
932 buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
933 buffer_list[0].addr &= ~0ull << shift;
934
935 mr = kmalloc(sizeof *mr, GFP_KERNEL);
936 if (!mr)
937 return ERR_PTR(-ENOMEM);
938
939 npages = 0;
940 for (i = 0; i < num_phys_buf; ++i)
941 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
942
943 if (!npages)
944 return &mr->ibmr;
945
946 page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
947 if (!page_list) {
948 kfree(mr);
949 return ERR_PTR(-ENOMEM);
950 }
951
952 n = 0;
953 for (i = 0; i < num_phys_buf; ++i)
954 for (j = 0;
955 j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
956 ++j)
957 page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
958
959 mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
960 "in PD %x; shift %d, npages %d.\n",
961 (unsigned long long) buffer_list[0].addr,
962 (unsigned long long) *iova_start,
963 to_mpd(pd)->pd_num,
964 shift, npages);
965
966 err = mthca_mr_alloc_phys(to_mdev(pd->device),
967 to_mpd(pd)->pd_num,
968 page_list, shift, npages,
969 *iova_start, total_size,
970 convert_access(acc), mr);
971
972 if (err) {
973 kfree(page_list);
974 kfree(mr);
975 return ERR_PTR(err);
976 }
977
978 kfree(page_list);
979 mr->umem = NULL;
980
981 return &mr->ibmr;
982}
983
984static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 901static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
985 u64 virt, int acc, struct ib_udata *udata) 902 u64 virt, int acc, struct ib_udata *udata)
986{ 903{
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
1346 dev->ib_dev.destroy_cq = mthca_destroy_cq; 1263 dev->ib_dev.destroy_cq = mthca_destroy_cq;
1347 dev->ib_dev.poll_cq = mthca_poll_cq; 1264 dev->ib_dev.poll_cq = mthca_poll_cq;
1348 dev->ib_dev.get_dma_mr = mthca_get_dma_mr; 1265 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
1349 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
1350 dev->ib_dev.reg_user_mr = mthca_reg_user_mr; 1266 dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
1351 dev->ib_dev.dereg_mr = mthca_dereg_mr; 1267 dev->ib_dev.dereg_mr = mthca_dereg_mr;
1352 dev->ib_dev.get_port_immutable = mthca_port_immutable; 1268 dev->ib_dev.get_port_immutable = mthca_port_immutable;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 35fe506e2cfa..96e5fb91fb48 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1485 u16 pkey; 1485 u16 pkey;
1486 1486
1487 ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, 1487 ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
1488 mthca_ah_grh_present(to_mah(wr->ah)), 0, 1488 mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
1489 &sqp->ud_header); 1489 &sqp->ud_header);
1490 1490
1491 err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header); 1491 err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8a3ad170d790..cb9f0f27308d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
134/* External CM API Interface */ 134/* External CM API Interface */
135/* instance of function pointers for client API */ 135/* instance of function pointers for client API */
136/* set address of this instance to cm_core->cm_ops at cm_core alloc */ 136/* set address of this instance to cm_core->cm_ops at cm_core alloc */
137static struct nes_cm_ops nes_cm_api = { 137static const struct nes_cm_ops nes_cm_api = {
138 mini_cm_accelerated, 138 mini_cm_accelerated,
139 mini_cm_listen, 139 mini_cm_listen,
140 mini_cm_del_listen, 140 mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3232 int passive_state; 3232 int passive_state;
3233 struct nes_ib_device *nesibdev; 3233 struct nes_ib_device *nesibdev;
3234 struct ib_mr *ibmr = NULL; 3234 struct ib_mr *ibmr = NULL;
3235 struct ib_phys_buf ibphysbuf;
3236 struct nes_pd *nespd; 3235 struct nes_pd *nespd;
3237 u64 tagged_offset; 3236 u64 tagged_offset;
3238 u8 mpa_frame_offset = 0; 3237 u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3316 u64temp = (unsigned long)nesqp; 3315 u64temp = (unsigned long)nesqp;
3317 nesibdev = nesvnic->nesibdev; 3316 nesibdev = nesvnic->nesibdev;
3318 nespd = nesqp->nespd; 3317 nespd = nesqp->nespd;
3319 ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
3320 ibphysbuf.size = buff_len;
3321 tagged_offset = (u64)(unsigned long)*start_buff; 3318 tagged_offset = (u64)(unsigned long)*start_buff;
3322 ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, 3319 ibmr = nes_reg_phys_mr(&nespd->ibpd,
3323 &ibphysbuf, 1, 3320 nesqp->ietf_frame_pbase + mpa_frame_offset,
3324 IB_ACCESS_LOCAL_WRITE, 3321 buff_len, IB_ACCESS_LOCAL_WRITE,
3325 &tagged_offset); 3322 &tagged_offset);
3326 if (!ibmr) { 3323 if (IS_ERR(ibmr)) {
3327 nes_debug(NES_DBG_CM, "Unable to register memory region" 3324 nes_debug(NES_DBG_CM, "Unable to register memory region"
3328 "for lSMM for cm_node = %p \n", 3325 "for lSMM for cm_node = %p \n",
3329 cm_node); 3326 cm_node);
3330 pci_free_consistent(nesdev->pcidev, 3327 pci_free_consistent(nesdev->pcidev,
3331 nesqp->private_data_len + nesqp->ietf_frame_size, 3328 nesqp->private_data_len + nesqp->ietf_frame_size,
3332 nesqp->ietf_frame, nesqp->ietf_frame_pbase); 3329 nesqp->ietf_frame, nesqp->ietf_frame_pbase);
3333 return -ENOMEM; 3330 return PTR_ERR(ibmr);
3334 } 3331 }
3335 3332
3336 ibmr->pd = &nespd->ibpd; 3333 ibmr->pd = &nespd->ibpd;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 32a6420c2940..147c2c884227 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -423,7 +423,7 @@ struct nes_cm_core {
423 423
424 struct timer_list tcp_timer; 424 struct timer_list tcp_timer;
425 425
426 struct nes_cm_ops *api; 426 const struct nes_cm_ops *api;
427 427
428 int (*post_event)(struct nes_cm_event *event); 428 int (*post_event)(struct nes_cm_event *event);
429 atomic_t events_posted; 429 atomic_t events_posted;
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 2042c0f29759..6d3a169c049b 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
727 if (action == NES_ARP_DELETE) { 727 if (action == NES_ARP_DELETE) {
728 nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index); 728 nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
729 nesadapter->arp_table[arp_index].ip_addr = 0; 729 nesadapter->arp_table[arp_index].ip_addr = 0;
730 memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN); 730 eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
731 nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index); 731 nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
732 return arp_index; 732 return arp_index;
733 } 733 }
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 137880a19ebe..8c4daf7f22ec 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
206} 206}
207 207
208 208
209/**
210 * nes_bind_mw
211 */
212static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
213 struct ib_mw_bind *ibmw_bind)
214{
215 u64 u64temp;
216 struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
217 struct nes_device *nesdev = nesvnic->nesdev;
218 /* struct nes_mr *nesmr = to_nesmw(ibmw); */
219 struct nes_qp *nesqp = to_nesqp(ibqp);
220 struct nes_hw_qp_wqe *wqe;
221 unsigned long flags = 0;
222 u32 head;
223 u32 wqe_misc = 0;
224 u32 qsize;
225
226 if (nesqp->ibqp_state > IB_QPS_RTS)
227 return -EINVAL;
228
229 spin_lock_irqsave(&nesqp->lock, flags);
230
231 head = nesqp->hwqp.sq_head;
232 qsize = nesqp->hwqp.sq_tail;
233
234 /* Check for SQ overflow */
235 if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
236 spin_unlock_irqrestore(&nesqp->lock, flags);
237 return -ENOMEM;
238 }
239
240 wqe = &nesqp->hwqp.sq_vbase[head];
241 /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
242 nes_fill_init_qp_wqe(wqe, nesqp, head);
243 u64temp = ibmw_bind->wr_id;
244 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
245 wqe_misc = NES_IWARP_SQ_OP_BIND;
246
247 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
248
249 if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
250 wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
251
252 if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
253 wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
254 if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
255 wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
256
257 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
258 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
259 ibmw_bind->bind_info.mr->lkey);
260 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
261 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
262 ibmw_bind->bind_info.length);
263 wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
264 u64temp = (u64)ibmw_bind->bind_info.addr;
265 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
266
267 head++;
268 if (head >= qsize)
269 head = 0;
270
271 nesqp->hwqp.sq_head = head;
272 barrier();
273
274 nes_write32(nesdev->regs+NES_WQE_ALLOC,
275 (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
276
277 spin_unlock_irqrestore(&nesqp->lock, flags);
278
279 return 0;
280}
281
282
283/* 209/*
284 * nes_alloc_fast_mr 210 * nes_alloc_fast_mr
285 */ 211 */
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
2074/** 2000/**
2075 * nes_reg_phys_mr 2001 * nes_reg_phys_mr
2076 */ 2002 */
2077static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, 2003struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
2078 struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, 2004 int acc, u64 *iova_start)
2079 u64 * iova_start)
2080{ 2005{
2081 u64 region_length; 2006 u64 region_length;
2082 struct nes_pd *nespd = to_nespd(ib_pd); 2007 struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2088 struct nes_vpbl vpbl; 2013 struct nes_vpbl vpbl;
2089 struct nes_root_vpbl root_vpbl; 2014 struct nes_root_vpbl root_vpbl;
2090 u32 stag; 2015 u32 stag;
2091 u32 i;
2092 unsigned long mask; 2016 unsigned long mask;
2093 u32 stag_index = 0; 2017 u32 stag_index = 0;
2094 u32 next_stag_index = 0; 2018 u32 next_stag_index = 0;
2095 u32 driver_key = 0; 2019 u32 driver_key = 0;
2096 u32 root_pbl_index = 0;
2097 u32 cur_pbl_index = 0;
2098 int err = 0; 2020 int err = 0;
2099 int ret = 0; 2021 int ret = 0;
2100 u16 pbl_count = 0; 2022 u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2113 2035
2114 next_stag_index >>= 8; 2036 next_stag_index >>= 8;
2115 next_stag_index %= nesadapter->max_mr; 2037 next_stag_index %= nesadapter->max_mr;
2116 if (num_phys_buf > (1024*512)) {
2117 return ERR_PTR(-E2BIG);
2118 }
2119 2038
2120 if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK) 2039 if ((addr ^ *iova_start) & ~PAGE_MASK)
2121 return ERR_PTR(-EINVAL); 2040 return ERR_PTR(-EINVAL);
2122 2041
2123 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, 2042 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2132 return ERR_PTR(-ENOMEM); 2051 return ERR_PTR(-ENOMEM);
2133 } 2052 }
2134 2053
2135 for (i = 0; i < num_phys_buf; i++) { 2054 /* Allocate a 4K buffer for the PBL */
2055 vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
2056 &vpbl.pbl_pbase);
2057 nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
2058 vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
2059 if (!vpbl.pbl_vbase) {
2060 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2061 ibmr = ERR_PTR(-ENOMEM);
2062 kfree(nesmr);
2063 goto reg_phys_err;
2064 }
2136 2065
2137 if ((i & 0x01FF) == 0) {
2138 if (root_pbl_index == 1) {
2139 /* Allocate the root PBL */
2140 root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
2141 &root_vpbl.pbl_pbase);
2142 nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
2143 root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
2144 if (!root_vpbl.pbl_vbase) {
2145 pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
2146 vpbl.pbl_pbase);
2147 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2148 kfree(nesmr);
2149 return ERR_PTR(-ENOMEM);
2150 }
2151 root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
2152 if (!root_vpbl.leaf_vpbl) {
2153 pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
2154 root_vpbl.pbl_pbase);
2155 pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
2156 vpbl.pbl_pbase);
2157 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2158 kfree(nesmr);
2159 return ERR_PTR(-ENOMEM);
2160 }
2161 root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
2162 root_vpbl.pbl_vbase[0].pa_high =
2163 cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
2164 root_vpbl.leaf_vpbl[0] = vpbl;
2165 }
2166 /* Allocate a 4K buffer for the PBL */
2167 vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
2168 &vpbl.pbl_pbase);
2169 nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
2170 vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
2171 if (!vpbl.pbl_vbase) {
2172 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2173 ibmr = ERR_PTR(-ENOMEM);
2174 kfree(nesmr);
2175 goto reg_phys_err;
2176 }
2177 /* Fill in the root table */
2178 if (1 <= root_pbl_index) {
2179 root_vpbl.pbl_vbase[root_pbl_index].pa_low =
2180 cpu_to_le32((u32)vpbl.pbl_pbase);
2181 root_vpbl.pbl_vbase[root_pbl_index].pa_high =
2182 cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
2183 root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
2184 }
2185 root_pbl_index++;
2186 cur_pbl_index = 0;
2187 }
2188 2066
2189 mask = !buffer_list[i].size; 2067 mask = !size;
2190 if (i != 0)
2191 mask |= buffer_list[i].addr;
2192 if (i != num_phys_buf - 1)
2193 mask |= buffer_list[i].addr + buffer_list[i].size;
2194
2195 if (mask & ~PAGE_MASK) {
2196 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2197 nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
2198 ibmr = ERR_PTR(-EINVAL);
2199 kfree(nesmr);
2200 goto reg_phys_err;
2201 }
2202 2068
2203 region_length += buffer_list[i].size; 2069 if (mask & ~PAGE_MASK) {
2204 if ((i != 0) && (single_page)) { 2070 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
2205 if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr) 2071 nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
2206 single_page = 0; 2072 ibmr = ERR_PTR(-EINVAL);
2207 } 2073 kfree(nesmr);
2208 vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK); 2074 goto reg_phys_err;
2209 vpbl.pbl_vbase[cur_pbl_index++].pa_high =
2210 cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
2211 } 2075 }
2212 2076
2077 region_length += size;
2078 vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
2079 vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
2080
2213 stag = stag_index << 8; 2081 stag = stag_index << 8;
2214 stag |= driver_key; 2082 stag |= driver_key;
2215 stag += (u32)stag_key; 2083 stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2219 stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); 2087 stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
2220 2088
2221 /* Make the leaf PBL the root if only one PBL */ 2089 /* Make the leaf PBL the root if only one PBL */
2222 if (root_pbl_index == 1) { 2090 root_vpbl.pbl_pbase = vpbl.pbl_pbase;
2223 root_vpbl.pbl_pbase = vpbl.pbl_pbase;
2224 }
2225 2091
2226 if (single_page) { 2092 if (single_page) {
2227 pbl_count = 0; 2093 pbl_count = 0;
2228 } else { 2094 } else {
2229 pbl_count = root_pbl_index; 2095 pbl_count = 1;
2230 } 2096 }
2231 ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, 2097 ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
2232 buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start, 2098 addr, pbl_count, 1, acc, iova_start,
2233 &nesmr->pbls_used, &nesmr->pbl_4k); 2099 &nesmr->pbls_used, &nesmr->pbl_4k);
2234 2100
2235 if (ret == 0) { 2101 if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2242 ibmr = ERR_PTR(-ENOMEM); 2108 ibmr = ERR_PTR(-ENOMEM);
2243 } 2109 }
2244 2110
2245 reg_phys_err: 2111reg_phys_err:
2246 /* free the resources */ 2112 /* single PBL case */
2247 if (root_pbl_index == 1) { 2113 pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
2248 /* single PBL case */
2249 pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
2250 } else {
2251 for (i=0; i<root_pbl_index; i++) {
2252 pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
2253 root_vpbl.leaf_vpbl[i].pbl_pbase);
2254 }
2255 kfree(root_vpbl.leaf_vpbl);
2256 pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
2257 root_vpbl.pbl_pbase);
2258 }
2259
2260 return ibmr; 2114 return ibmr;
2261} 2115}
2262 2116
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2266 */ 2120 */
2267static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) 2121static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
2268{ 2122{
2269 struct ib_phys_buf bl;
2270 u64 kva = 0; 2123 u64 kva = 0;
2271 2124
2272 nes_debug(NES_DBG_MR, "\n"); 2125 nes_debug(NES_DBG_MR, "\n");
2273 2126
2274 bl.size = (u64)0xffffffffffULL; 2127 return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
2275 bl.addr = 0;
2276 return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
2277} 2128}
2278 2129
2279
2280/** 2130/**
2281 * nes_reg_user_mr 2131 * nes_reg_user_mr
2282 */ 2132 */
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3888 nesibdev->ibdev.destroy_cq = nes_destroy_cq; 3738 nesibdev->ibdev.destroy_cq = nes_destroy_cq;
3889 nesibdev->ibdev.poll_cq = nes_poll_cq; 3739 nesibdev->ibdev.poll_cq = nes_poll_cq;
3890 nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; 3740 nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
3891 nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
3892 nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; 3741 nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
3893 nesibdev->ibdev.dereg_mr = nes_dereg_mr; 3742 nesibdev->ibdev.dereg_mr = nes_dereg_mr;
3894 nesibdev->ibdev.alloc_mw = nes_alloc_mw; 3743 nesibdev->ibdev.alloc_mw = nes_alloc_mw;
3895 nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; 3744 nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
3896 nesibdev->ibdev.bind_mw = nes_bind_mw;
3897 3745
3898 nesibdev->ibdev.alloc_mr = nes_alloc_mr; 3746 nesibdev->ibdev.alloc_mr = nes_alloc_mr;
3899 nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; 3747 nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index a204b677af22..70290883d067 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -190,4 +190,8 @@ struct nes_qp {
190 u8 pau_state; 190 u8 pau_state;
191 __u64 nesuqp_addr; 191 __u64 nesuqp_addr;
192}; 192};
193
194struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
195 u64 addr, u64 size, int acc, u64 *iova_start);
196
193#endif /* NES_VERBS_H */ 197#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 9820074be59d..3790771f2baa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
152 if ((pd->uctx) && 152 if ((pd->uctx) &&
153 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && 153 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
154 (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { 154 (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
155 status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, 155 status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
156 attr->dmac, &vlan_tag, 156 attr->dmac, &vlan_tag,
157 sgid_attr.ndev->ifindex); 157 &sgid_attr.ndev->ifindex,
158 NULL);
158 if (status) { 159 if (status) {
159 pr_err("%s(): Failed to resolve dmac from gid." 160 pr_err("%s(): Failed to resolve dmac from gid."
160 "status = %d\n", __func__, status); 161 "status = %d\n", __func__, status);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 3afb40b85159..573849354cb9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
175 dev->ibdev.req_notify_cq = ocrdma_arm_cq; 175 dev->ibdev.req_notify_cq = ocrdma_arm_cq;
176 176
177 dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; 177 dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
178 dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
179 dev->ibdev.dereg_mr = ocrdma_dereg_mr; 178 dev->ibdev.dereg_mr = ocrdma_dereg_mr;
180 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; 179 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
181 180
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 76e96f97b3f6..d4c687b548d8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -3066,169 +3066,6 @@ pl_err:
3066 return ERR_PTR(-ENOMEM); 3066 return ERR_PTR(-ENOMEM);
3067} 3067}
3068 3068
3069#define MAX_KERNEL_PBE_SIZE 65536
3070static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
3071 int buf_cnt, u32 *pbe_size)
3072{
3073 u64 total_size = 0;
3074 u64 buf_size = 0;
3075 int i;
3076 *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
3077 *pbe_size = roundup_pow_of_two(*pbe_size);
3078
3079 /* find the smallest PBE size that we can have */
3080 for (i = 0; i < buf_cnt; i++) {
3081 /* first addr may not be page aligned, so ignore checking */
3082 if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
3083 (buf_list[i].size & ~PAGE_MASK))) {
3084 return 0;
3085 }
3086
3087 /* if configured PBE size is greater then the chosen one,
3088 * reduce the PBE size.
3089 */
3090 buf_size = roundup(buf_list[i].size, PAGE_SIZE);
3091 /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
3092 buf_size = roundup_pow_of_two(buf_size);
3093 if (*pbe_size > buf_size)
3094 *pbe_size = buf_size;
3095
3096 total_size += buf_size;
3097 }
3098 *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
3099 (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
3100
3101 /* num_pbes = total_size / (*pbe_size); this is implemented below. */
3102
3103 return total_size >> ilog2(*pbe_size);
3104}
3105
3106static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
3107 u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
3108 struct ocrdma_hw_mr *hwmr)
3109{
3110 int i;
3111 int idx;
3112 int pbes_per_buf = 0;
3113 u64 buf_addr = 0;
3114 int num_pbes;
3115 struct ocrdma_pbe *pbe;
3116 int total_num_pbes = 0;
3117
3118 if (!hwmr->num_pbes)
3119 return;
3120
3121 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
3122 num_pbes = 0;
3123
3124 /* go through the OS phy regions & fill hw pbe entries into pbls. */
3125 for (i = 0; i < ib_buf_cnt; i++) {
3126 buf_addr = buf_list[i].addr;
3127 pbes_per_buf =
3128 roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
3129 pbe_size;
3130 hwmr->len += buf_list[i].size;
3131 /* number of pbes can be more for one OS buf, when
3132 * buffers are of different sizes.
3133 * split the ib_buf to one or more pbes.
3134 */
3135 for (idx = 0; idx < pbes_per_buf; idx++) {
3136 /* we program always page aligned addresses,
3137 * first unaligned address is taken care by fbo.
3138 */
3139 if (i == 0) {
3140 /* for non zero fbo, assign the
3141 * start of the page.
3142 */
3143 pbe->pa_lo =
3144 cpu_to_le32((u32) (buf_addr & PAGE_MASK));
3145 pbe->pa_hi =
3146 cpu_to_le32((u32) upper_32_bits(buf_addr));
3147 } else {
3148 pbe->pa_lo =
3149 cpu_to_le32((u32) (buf_addr & 0xffffffff));
3150 pbe->pa_hi =
3151 cpu_to_le32((u32) upper_32_bits(buf_addr));
3152 }
3153 buf_addr += pbe_size;
3154 num_pbes += 1;
3155 total_num_pbes += 1;
3156 pbe++;
3157
3158 if (total_num_pbes == hwmr->num_pbes)
3159 goto mr_tbl_done;
3160 /* if the pbl is full storing the pbes,
3161 * move to next pbl.
3162 */
3163 if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
3164 pbl_tbl++;
3165 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
3166 num_pbes = 0;
3167 }
3168 }
3169 }
3170mr_tbl_done:
3171 return;
3172}
3173
3174struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
3175 struct ib_phys_buf *buf_list,
3176 int buf_cnt, int acc, u64 *iova_start)
3177{
3178 int status = -ENOMEM;
3179 struct ocrdma_mr *mr;
3180 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
3181 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
3182 u32 num_pbes;
3183 u32 pbe_size = 0;
3184
3185 if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
3186 return ERR_PTR(-EINVAL);
3187
3188 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3189 if (!mr)
3190 return ERR_PTR(status);
3191
3192 num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
3193 if (num_pbes == 0) {
3194 status = -EINVAL;
3195 goto pbl_err;
3196 }
3197 status = ocrdma_get_pbl_info(dev, mr, num_pbes);
3198 if (status)
3199 goto pbl_err;
3200
3201 mr->hwmr.pbe_size = pbe_size;
3202 mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
3203 mr->hwmr.va = *iova_start;
3204 mr->hwmr.local_rd = 1;
3205 mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
3206 mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
3207 mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
3208 mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
3209 mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
3210
3211 status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
3212 if (status)
3213 goto pbl_err;
3214 build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
3215 &mr->hwmr);
3216 status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
3217 if (status)
3218 goto mbx_err;
3219
3220 mr->ibmr.lkey = mr->hwmr.lkey;
3221 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
3222 mr->ibmr.rkey = mr->hwmr.lkey;
3223 return &mr->ibmr;
3224
3225mbx_err:
3226 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
3227pbl_err:
3228 kfree(mr);
3229 return ERR_PTR(status);
3230}
3231
3232static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) 3069static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
3233{ 3070{
3234 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); 3071 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index a2f3b4dc20b0..8b517fd36779 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
117 117
118int ocrdma_dereg_mr(struct ib_mr *); 118int ocrdma_dereg_mr(struct ib_mr *);
119struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); 119struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
120struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
121 struct ib_phys_buf *buffer_list,
122 int num_phys_buf, int acc, u64 *iova_start);
123struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, 120struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
124 u64 virt, int acc, struct ib_udata *); 121 u64 virt, int acc, struct ib_udata *);
125struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, 122struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 294f5c706be9..5f53304e8a9b 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
150 rval = init_qib_mregion(&mr->mr, pd, count); 150 rval = init_qib_mregion(&mr->mr, pd, count);
151 if (rval) 151 if (rval)
152 goto bail; 152 goto bail;
153 /* 153
154 * ib_reg_phys_mr() will initialize mr->ibmr except for
155 * lkey and rkey.
156 */
157 rval = qib_alloc_lkey(&mr->mr, 0); 154 rval = qib_alloc_lkey(&mr->mr, 0);
158 if (rval) 155 if (rval)
159 goto bail_mregion; 156 goto bail_mregion;
@@ -171,52 +168,6 @@ bail:
171} 168}
172 169
173/** 170/**
174 * qib_reg_phys_mr - register a physical memory region
175 * @pd: protection domain for this memory region
176 * @buffer_list: pointer to the list of physical buffers to register
177 * @num_phys_buf: the number of physical buffers to register
178 * @iova_start: the starting address passed over IB which maps to this MR
179 *
180 * Returns the memory region on success, otherwise returns an errno.
181 */
182struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
183 struct ib_phys_buf *buffer_list,
184 int num_phys_buf, int acc, u64 *iova_start)
185{
186 struct qib_mr *mr;
187 int n, m, i;
188 struct ib_mr *ret;
189
190 mr = alloc_mr(num_phys_buf, pd);
191 if (IS_ERR(mr)) {
192 ret = (struct ib_mr *)mr;
193 goto bail;
194 }
195
196 mr->mr.user_base = *iova_start;
197 mr->mr.iova = *iova_start;
198 mr->mr.access_flags = acc;
199
200 m = 0;
201 n = 0;
202 for (i = 0; i < num_phys_buf; i++) {
203 mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
204 mr->mr.map[m]->segs[n].length = buffer_list[i].size;
205 mr->mr.length += buffer_list[i].size;
206 n++;
207 if (n == QIB_SEGSZ) {
208 m++;
209 n = 0;
210 }
211 }
212
213 ret = &mr->ibmr;
214
215bail:
216 return ret;
217}
218
219/**
220 * qib_reg_user_mr - register a userspace memory region 171 * qib_reg_user_mr - register a userspace memory region
221 * @pd: protection domain for this memory region 172 * @pd: protection domain for this memory region
222 * @start: starting userspace address 173 * @start: starting userspace address
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 40f85bb3e0d3..3eff35c2d453 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
100 32768 /* 1E */ 100 32768 /* 1E */
101}; 101};
102 102
103static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) 103static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
104 gfp_t gfp)
104{ 105{
105 unsigned long page = get_zeroed_page(GFP_KERNEL); 106 unsigned long page = get_zeroed_page(gfp);
106 107
107 /* 108 /*
108 * Free the page if someone raced with us installing it. 109 * Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
121 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. 122 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
122 */ 123 */
123static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, 124static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
124 enum ib_qp_type type, u8 port) 125 enum ib_qp_type type, u8 port, gfp_t gfp)
125{ 126{
126 u32 i, offset, max_scan, qpn; 127 u32 i, offset, max_scan, qpn;
127 struct qpn_map *map; 128 struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
151 max_scan = qpt->nmaps - !offset; 152 max_scan = qpt->nmaps - !offset;
152 for (i = 0;;) { 153 for (i = 0;;) {
153 if (unlikely(!map->page)) { 154 if (unlikely(!map->page)) {
154 get_map_page(qpt, map); 155 get_map_page(qpt, map, gfp);
155 if (unlikely(!map->page)) 156 if (unlikely(!map->page))
156 break; 157 break;
157 } 158 }
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
983 size_t sz; 984 size_t sz;
984 size_t sg_list_sz; 985 size_t sg_list_sz;
985 struct ib_qp *ret; 986 struct ib_qp *ret;
987 gfp_t gfp;
988
986 989
987 if (init_attr->cap.max_send_sge > ib_qib_max_sges || 990 if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
988 init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || 991 init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
989 init_attr->create_flags) { 992 init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
990 ret = ERR_PTR(-EINVAL); 993 return ERR_PTR(-EINVAL);
991 goto bail; 994
992 } 995 /* GFP_NOIO is applicable in RC QPs only */
996 if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
997 init_attr->qp_type != IB_QPT_RC)
998 return ERR_PTR(-EINVAL);
999
1000 gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
1001 GFP_NOIO : GFP_KERNEL;
993 1002
994 /* Check receive queue parameters if no SRQ is specified. */ 1003 /* Check receive queue parameters if no SRQ is specified. */
995 if (!init_attr->srq) { 1004 if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
1021 sz = sizeof(struct qib_sge) * 1030 sz = sizeof(struct qib_sge) *
1022 init_attr->cap.max_send_sge + 1031 init_attr->cap.max_send_sge +
1023 sizeof(struct qib_swqe); 1032 sizeof(struct qib_swqe);
1024 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); 1033 swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
1034 gfp, PAGE_KERNEL);
1025 if (swq == NULL) { 1035 if (swq == NULL) {
1026 ret = ERR_PTR(-ENOMEM); 1036 ret = ERR_PTR(-ENOMEM);
1027 goto bail; 1037 goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
1037 } else if (init_attr->cap.max_recv_sge > 1) 1047 } else if (init_attr->cap.max_recv_sge > 1)
1038 sg_list_sz = sizeof(*qp->r_sg_list) * 1048 sg_list_sz = sizeof(*qp->r_sg_list) *
1039 (init_attr->cap.max_recv_sge - 1); 1049 (init_attr->cap.max_recv_sge - 1);
1040 qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); 1050 qp = kzalloc(sz + sg_list_sz, gfp);
1041 if (!qp) { 1051 if (!qp) {
1042 ret = ERR_PTR(-ENOMEM); 1052 ret = ERR_PTR(-ENOMEM);
1043 goto bail_swq; 1053 goto bail_swq;
1044 } 1054 }
1045 RCU_INIT_POINTER(qp->next, NULL); 1055 RCU_INIT_POINTER(qp->next, NULL);
1046 qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); 1056 qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
1047 if (!qp->s_hdr) { 1057 if (!qp->s_hdr) {
1048 ret = ERR_PTR(-ENOMEM); 1058 ret = ERR_PTR(-ENOMEM);
1049 goto bail_qp; 1059 goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
1058 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 1068 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
1059 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + 1069 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
1060 sizeof(struct qib_rwqe); 1070 sizeof(struct qib_rwqe);
1061 qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + 1071 if (gfp != GFP_NOIO)
1062 qp->r_rq.size * sz); 1072 qp->r_rq.wq = vmalloc_user(
1073 sizeof(struct qib_rwq) +
1074 qp->r_rq.size * sz);
1075 else
1076 qp->r_rq.wq = __vmalloc(
1077 sizeof(struct qib_rwq) +
1078 qp->r_rq.size * sz,
1079 gfp, PAGE_KERNEL);
1080
1063 if (!qp->r_rq.wq) { 1081 if (!qp->r_rq.wq) {
1064 ret = ERR_PTR(-ENOMEM); 1082 ret = ERR_PTR(-ENOMEM);
1065 goto bail_qp; 1083 goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
1090 dev = to_idev(ibpd->device); 1108 dev = to_idev(ibpd->device);
1091 dd = dd_from_dev(dev); 1109 dd = dd_from_dev(dev);
1092 err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, 1110 err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
1093 init_attr->port_num); 1111 init_attr->port_num, gfp);
1094 if (err < 0) { 1112 if (err < 0) {
1095 ret = ERR_PTR(err); 1113 ret = ERR_PTR(err);
1096 vfree(qp->r_rq.wq); 1114 vfree(qp->r_rq.wq);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index de6cb6fcda8d..baf1e42b6896 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
346 unsigned long flags; 346 unsigned long flags;
347 struct qib_lkey_table *rkt; 347 struct qib_lkey_table *rkt;
348 struct qib_pd *pd; 348 struct qib_pd *pd;
349 int avoid_schedule = 0;
349 350
350 spin_lock_irqsave(&qp->s_lock, flags); 351 spin_lock_irqsave(&qp->s_lock, flags);
351 352
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
438 qp->ibqp.qp_type == IB_QPT_RC) { 439 qp->ibqp.qp_type == IB_QPT_RC) {
439 if (wqe->length > 0x80000000U) 440 if (wqe->length > 0x80000000U)
440 goto bail_inval_free; 441 goto bail_inval_free;
442 if (wqe->length <= qp->pmtu)
443 avoid_schedule = 1;
441 } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport + 444 } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
442 qp->port_num - 1)->ibmtu) 445 qp->port_num - 1)->ibmtu) {
443 goto bail_inval_free; 446 goto bail_inval_free;
444 else 447 } else {
445 atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount); 448 atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
449 avoid_schedule = 1;
450 }
446 wqe->ssn = qp->s_ssn++; 451 wqe->ssn = qp->s_ssn++;
447 qp->s_head = next; 452 qp->s_head = next;
448 453
@@ -458,7 +463,7 @@ bail_inval_free:
458bail_inval: 463bail_inval:
459 ret = -EINVAL; 464 ret = -EINVAL;
460bail: 465bail:
461 if (!ret && !wr->next && 466 if (!ret && !wr->next && !avoid_schedule &&
462 !qib_sdma_empty( 467 !qib_sdma_empty(
463 dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { 468 dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
464 qib_schedule_send(qp); 469 qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
2256 ibdev->poll_cq = qib_poll_cq; 2261 ibdev->poll_cq = qib_poll_cq;
2257 ibdev->req_notify_cq = qib_req_notify_cq; 2262 ibdev->req_notify_cq = qib_req_notify_cq;
2258 ibdev->get_dma_mr = qib_get_dma_mr; 2263 ibdev->get_dma_mr = qib_get_dma_mr;
2259 ibdev->reg_phys_mr = qib_reg_phys_mr;
2260 ibdev->reg_user_mr = qib_reg_user_mr; 2264 ibdev->reg_user_mr = qib_reg_user_mr;
2261 ibdev->dereg_mr = qib_dereg_mr; 2265 ibdev->dereg_mr = qib_dereg_mr;
2262 ibdev->alloc_mr = qib_alloc_mr; 2266 ibdev->alloc_mr = qib_alloc_mr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index bc803f33d5f6..6c5e77753d85 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
1032 1032
1033struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc); 1033struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
1034 1034
1035struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
1036 struct ib_phys_buf *buffer_list,
1037 int num_phys_buf, int acc, u64 *iova_start);
1038
1039struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1035struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1040 u64 virt_addr, int mr_access_flags, 1036 u64 virt_addr, int mr_access_flags,
1041 struct ib_udata *udata); 1037 struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
index f8ea069a3eaf..b2fb5286dbd9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
286 struct qib_ibdev *dev = to_idev(ibqp->device); 286 struct qib_ibdev *dev = to_idev(ibqp->device);
287 struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); 287 struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
288 struct qib_mcast *mcast = NULL; 288 struct qib_mcast *mcast = NULL;
289 struct qib_mcast_qp *p, *tmp; 289 struct qib_mcast_qp *p, *tmp, *delp = NULL;
290 struct rb_node *n; 290 struct rb_node *n;
291 int last = 0; 291 int last = 0;
292 int ret; 292 int ret;
293 293
294 if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { 294 if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
295 ret = -EINVAL; 295 return -EINVAL;
296 goto bail;
297 }
298 296
299 spin_lock_irq(&ibp->lock); 297 spin_lock_irq(&ibp->lock);
300 298
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
303 while (1) { 301 while (1) {
304 if (n == NULL) { 302 if (n == NULL) {
305 spin_unlock_irq(&ibp->lock); 303 spin_unlock_irq(&ibp->lock);
306 ret = -EINVAL; 304 return -EINVAL;
307 goto bail;
308 } 305 }
309 306
310 mcast = rb_entry(n, struct qib_mcast, rb_node); 307 mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
328 */ 325 */
329 list_del_rcu(&p->list); 326 list_del_rcu(&p->list);
330 mcast->n_attached--; 327 mcast->n_attached--;
328 delp = p;
331 329
332 /* If this was the last attached QP, remove the GID too. */ 330 /* If this was the last attached QP, remove the GID too. */
333 if (list_empty(&mcast->qp_list)) { 331 if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
338 } 336 }
339 337
340 spin_unlock_irq(&ibp->lock); 338 spin_unlock_irq(&ibp->lock);
339 /* QP not attached */
340 if (!delp)
341 return -EINVAL;
342 /*
343 * Wait for any list walkers to finish before freeing the
344 * list element.
345 */
346 wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
347 qib_mcast_qp_free(delp);
341 348
342 if (p) {
343 /*
344 * Wait for any list walkers to finish before freeing the
345 * list element.
346 */
347 wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
348 qib_mcast_qp_free(p);
349 }
350 if (last) { 349 if (last) {
351 atomic_dec(&mcast->refcount); 350 atomic_dec(&mcast->refcount);
352 wait_event(mcast->wait, !atomic_read(&mcast->refcount)); 351 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
355 dev->n_mcast_grps_allocated--; 354 dev->n_mcast_grps_allocated--;
356 spin_unlock_irq(&dev->n_mcast_grps_lock); 355 spin_unlock_irq(&dev->n_mcast_grps_lock);
357 } 356 }
358 357 return 0;
359 ret = 0;
360
361bail:
362 return ret;
363} 358}
364 359
365int qib_mcast_tree_empty(struct qib_ibport *ibp) 360int qib_mcast_tree_empty(struct qib_ibport *ibp)
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 5e55b8bc6fe4..92dc66cc2d50 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
157 qp_flow, 157 qp_flow,
158 &flowinfo_ops); 158 &flowinfo_ops);
159 if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { 159 if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
160 usnic_err("Failed to create dbg fs entry for flow %u\n", 160 usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
161 qp_flow->flow->flow_id); 161 qp_flow->flow->flow_id,
162 PTR_ERR(qp_flow->dbgfs_dentry));
162 } 163 }
163} 164}
164 165
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index fcea3a24d3eb..5f44b66ccb86 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
521 521
522 if (!status) { 522 if (!status) {
523 qp_grp->state = new_state; 523 qp_grp->state = new_state;
524 usnic_info("Transistioned %u from %s to %s", 524 usnic_info("Transitioned %u from %s to %s",
525 qp_grp->grp_id, 525 qp_grp->grp_id,
526 usnic_ib_qp_grp_state_to_string(old_state), 526 usnic_ib_qp_grp_state_to_string(old_state),
527 usnic_ib_qp_grp_state_to_string(new_state)); 527 usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
575 return res_chunk_list; 575 return res_chunk_list;
576 576
577out_free_res: 577out_free_res:
578 for (i--; i > 0; i--) 578 for (i--; i >= 0; i--)
579 usnic_vnic_put_resources(res_chunk_list[i]); 579 usnic_vnic_put_resources(res_chunk_list[i]);
580 kfree(res_chunk_list); 580 kfree(res_chunk_list);
581 return ERR_PTR(err); 581 return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index f8e3211689a3..6cdb4d23f78f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -51,7 +51,7 @@
51 51
52static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) 52static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
53{ 53{
54 *fw_ver = (u64) *fw_ver_str; 54 *fw_ver = *((u64 *)fw_ver_str);
55} 55}
56 56
57static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, 57static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
571 571
572 qp_grp = to_uqp_grp(ibqp); 572 qp_grp = to_uqp_grp(ibqp);
573 573
574 /* TODO: Future Support All States */
575 mutex_lock(&qp_grp->vf->pf->usdev_lock); 574 mutex_lock(&qp_grp->vf->pf->usdev_lock);
576 if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { 575 if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
577 status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL); 576 /* usnic devices only have one port */
578 } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { 577 status = -EINVAL;
579 status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); 578 goto out_unlock;
580 } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { 579 }
581 status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL); 580 if (attr_mask & IB_QP_STATE) {
581 status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
582 } else { 582 } else {
583 usnic_err("Unexpected combination mask: %u state: %u\n", 583 usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
584 attr_mask & IB_QP_STATE, attr->qp_state);
585 status = -EINVAL; 584 status = -EINVAL;
586 } 585 }
587 586
587out_unlock:
588 mutex_unlock(&qp_grp->vf->pf->usdev_lock); 588 mutex_unlock(&qp_grp->vf->pf->usdev_lock);
589 return status; 589 return status;
590} 590}
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
625 virt_addr, length); 625 virt_addr, length);
626 626
627 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 627 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
628 if (IS_ERR_OR_NULL(mr)) 628 if (!mr)
629 return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM); 629 return ERR_PTR(-ENOMEM);
630 630
631 mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, 631 mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
632 access_flags, 0); 632 access_flags, 0);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 414eaa566bd9..0d9d2e6a14d5 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
43 struct ib_udata *uhw); 43 struct ib_udata *uhw);
44int usnic_ib_query_port(struct ib_device *ibdev, u8 port, 44int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
45 struct ib_port_attr *props); 45 struct ib_port_attr *props);
46enum rdma_protocol_type
47usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
48int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 46int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
49 int qp_attr_mask, 47 int qp_attr_mask,
50 struct ib_qp_init_attr *qp_init_attr); 48 struct ib_qp_init_attr *qp_init_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 66de93fb8ea9..887510718690 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
237 struct usnic_vnic_res *res; 237 struct usnic_vnic_res *res;
238 int i; 238 int i;
239 239
240 if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) 240 if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
241 return ERR_PTR(-EINVAL); 241 return ERR_PTR(-EINVAL);
242 242
243 ret = kzalloc(sizeof(*ret), GFP_ATOMIC); 243 ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
247 return ERR_PTR(-ENOMEM); 247 return ERR_PTR(-ENOMEM);
248 } 248 }
249 249
250 ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); 250 if (cnt > 0) {
251 if (!ret->res) { 251 ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
252 usnic_err("Failed to allocate resources for %s. Out of memory\n", 252 if (!ret->res) {
253 usnic_vnic_pci_name(vnic)); 253 usnic_err("Failed to allocate resources for %s. Out of memory\n",
254 kfree(ret); 254 usnic_vnic_pci_name(vnic));
255 return ERR_PTR(-ENOMEM); 255 kfree(ret);
256 } 256 return ERR_PTR(-ENOMEM);
257 }
257 258
258 spin_lock(&vnic->res_lock); 259 spin_lock(&vnic->res_lock);
259 src = &vnic->chunks[type]; 260 src = &vnic->chunks[type];
260 for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { 261 for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
261 res = src->res[i]; 262 res = src->res[i];
262 if (!res->owner) { 263 if (!res->owner) {
263 src->free_cnt--; 264 src->free_cnt--;
264 res->owner = owner; 265 res->owner = owner;
265 ret->res[ret->cnt++] = res; 266 ret->res[ret->cnt++] = res;
267 }
266 } 268 }
267 }
268 269
269 spin_unlock(&vnic->res_lock); 270 spin_unlock(&vnic->res_lock);
271 }
270 ret->type = type; 272 ret->type = type;
271 ret->vnic = vnic; 273 ret->vnic = vnic;
272 WARN_ON(ret->cnt != cnt); 274 WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
281 int i; 283 int i;
282 struct usnic_vnic *vnic = chunk->vnic; 284 struct usnic_vnic *vnic = chunk->vnic;
283 285
284 spin_lock(&vnic->res_lock); 286 if (chunk->cnt > 0) {
285 while ((i = --chunk->cnt) >= 0) { 287 spin_lock(&vnic->res_lock);
286 res = chunk->res[i]; 288 while ((i = --chunk->cnt) >= 0) {
287 chunk->res[i] = NULL; 289 res = chunk->res[i];
288 res->owner = NULL; 290 chunk->res[i] = NULL;
289 vnic->chunks[res->type].free_cnt++; 291 res->owner = NULL;
292 vnic->chunks[res->type].free_cnt++;
293 }
294 spin_unlock(&vnic->res_lock);
290 } 295 }
291 spin_unlock(&vnic->res_lock);
292 296
293 kfree(chunk->res); 297 kfree(chunk->res);
294 kfree(chunk); 298 kfree(chunk);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3ede10309754..a6f3eab0f350 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
495void ipoib_mcast_join_task(struct work_struct *work); 495void ipoib_mcast_join_task(struct work_struct *work);
496void ipoib_mcast_carrier_on_task(struct work_struct *work); 496void ipoib_mcast_carrier_on_task(struct work_struct *work);
497void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); 497void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
498void ipoib_mcast_free(struct ipoib_mcast *mc);
499 498
500void ipoib_mcast_restart_task(struct work_struct *work); 499void ipoib_mcast_restart_task(struct work_struct *work);
501int ipoib_mcast_start_thread(struct net_device *dev); 500int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
549 548
550int ipoib_mcast_attach(struct net_device *dev, u16 mlid, 549int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
551 union ib_gid *mgid, int set_qkey); 550 union ib_gid *mgid, int set_qkey);
552int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast); 551void ipoib_mcast_remove_list(struct list_head *remove_list);
553struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid); 552void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
553 struct list_head *remove_list);
554 554
555int ipoib_init_qp(struct net_device *dev); 555int ipoib_init_qp(struct net_device *dev);
556int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); 556int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3ae9726efb98..917e46ea3bf6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
70#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 70#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
71 71
72static struct ib_send_wr ipoib_cm_rx_drain_wr = { 72static struct ib_send_wr ipoib_cm_rx_drain_wr = {
73 .wr_id = IPOIB_CM_RX_DRAIN_WRID,
74 .opcode = IB_WR_SEND, 73 .opcode = IB_WR_SEND,
75}; 74};
76 75
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
223 * error" WC will be immediately generated for each WR we post. 222 * error" WC will be immediately generated for each WR we post.
224 */ 223 */
225 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 224 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
225 ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
226 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 226 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
227 ipoib_warn(priv, "failed to post drain wr\n"); 227 ipoib_warn(priv, "failed to post drain wr\n");
228 228
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1522int ipoib_cm_dev_init(struct net_device *dev) 1522int ipoib_cm_dev_init(struct net_device *dev)
1523{ 1523{
1524 struct ipoib_dev_priv *priv = netdev_priv(dev); 1524 struct ipoib_dev_priv *priv = netdev_priv(dev);
1525 int i, ret; 1525 int max_srq_sge, i;
1526 struct ib_device_attr attr;
1527 1526
1528 INIT_LIST_HEAD(&priv->cm.passive_ids); 1527 INIT_LIST_HEAD(&priv->cm.passive_ids);
1529 INIT_LIST_HEAD(&priv->cm.reap_list); 1528 INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
1540 1539
1541 skb_queue_head_init(&priv->cm.skb_queue); 1540 skb_queue_head_init(&priv->cm.skb_queue);
1542 1541
1543 ret = ib_query_device(priv->ca, &attr); 1542 ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
1544 if (ret) {
1545 printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
1546 return ret;
1547 }
1548
1549 ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
1550 1543
1551 attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge); 1544 max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
1552 ipoib_cm_create_srq(dev, attr.max_srq_sge); 1545 ipoib_cm_create_srq(dev, max_srq_sge);
1553 if (ipoib_cm_has_srq(dev)) { 1546 if (ipoib_cm_has_srq(dev)) {
1554 priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10; 1547 priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
1555 priv->cm.num_frags = attr.max_srq_sge; 1548 priv->cm.num_frags = max_srq_sge;
1556 ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", 1549 ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
1557 priv->cm.max_cm_mtu, priv->cm.num_frags); 1550 priv->cm.max_cm_mtu, priv->cm.num_frags);
1558 } else { 1551 } else {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 078cadd6c797..a53fa5fc0dec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
40 struct ethtool_drvinfo *drvinfo) 40 struct ethtool_drvinfo *drvinfo)
41{ 41{
42 struct ipoib_dev_priv *priv = netdev_priv(netdev); 42 struct ipoib_dev_priv *priv = netdev_priv(netdev);
43 struct ib_device_attr *attr; 43
44 44 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
45 attr = kmalloc(sizeof(*attr), GFP_KERNEL); 45 "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
46 if (attr && !ib_query_device(priv->ca, attr)) 46 (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
47 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), 47 (int)priv->ca->attrs.fw_ver & 0xffff);
48 "%d.%d.%d", (int)(attr->fw_ver >> 32),
49 (int)(attr->fw_ver >> 16) & 0xffff,
50 (int)attr->fw_ver & 0xffff);
51 kfree(attr);
52 48
53 strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), 49 strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
54 sizeof(drvinfo->bus_info)); 50 sizeof(drvinfo->bus_info));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7d3281866ffc..25509bbd4a05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1150 unsigned long flags; 1150 unsigned long flags;
1151 int i; 1151 int i;
1152 LIST_HEAD(remove_list); 1152 LIST_HEAD(remove_list);
1153 struct ipoib_mcast *mcast, *tmcast;
1154 struct net_device *dev = priv->dev;
1155 1153
1156 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 1154 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
1157 return; 1155 return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1179 lockdep_is_held(&priv->lock))) != NULL) { 1177 lockdep_is_held(&priv->lock))) != NULL) {
1180 /* was the neigh idle for two GC periods */ 1178 /* was the neigh idle for two GC periods */
1181 if (time_after(neigh_obsolete, neigh->alive)) { 1179 if (time_after(neigh_obsolete, neigh->alive)) {
1182 u8 *mgid = neigh->daddr + 4;
1183 1180
1184 /* Is this multicast ? */ 1181 ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
1185 if (*mgid == 0xff) {
1186 mcast = __ipoib_mcast_find(dev, mgid);
1187
1188 if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
1189 list_del(&mcast->list);
1190 rb_erase(&mcast->rb_node, &priv->multicast_tree);
1191 list_add_tail(&mcast->list, &remove_list);
1192 }
1193 }
1194 1182
1195 rcu_assign_pointer(*np, 1183 rcu_assign_pointer(*np,
1196 rcu_dereference_protected(neigh->hnext, 1184 rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1207 1195
1208out_unlock: 1196out_unlock:
1209 spin_unlock_irqrestore(&priv->lock, flags); 1197 spin_unlock_irqrestore(&priv->lock, flags);
1210 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 1198 ipoib_mcast_remove_list(&remove_list);
1211 ipoib_mcast_leave(dev, mcast);
1212 ipoib_mcast_free(mcast);
1213 }
1214} 1199}
1215 1200
1216static void ipoib_reap_neigh(struct work_struct *work) 1201static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
1777 1762
1778int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) 1763int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
1779{ 1764{
1780 struct ib_device_attr *device_attr; 1765 priv->hca_caps = hca->attrs.device_cap_flags;
1781 int result = -ENOMEM;
1782
1783 device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
1784 if (!device_attr) {
1785 printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
1786 hca->name, sizeof *device_attr);
1787 return result;
1788 }
1789
1790 result = ib_query_device(hca, device_attr);
1791 if (result) {
1792 printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
1793 hca->name, result);
1794 kfree(device_attr);
1795 return result;
1796 }
1797 priv->hca_caps = device_attr->device_cap_flags;
1798
1799 kfree(device_attr);
1800 1766
1801 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { 1767 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1802 priv->dev->hw_features = NETIF_F_SG | 1768 priv->dev->hw_features = NETIF_F_SG |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f357ca67a41c..050dfa175d16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
106 queue_delayed_work(priv->wq, &priv->mcast_task, 0); 106 queue_delayed_work(priv->wq, &priv->mcast_task, 0);
107} 107}
108 108
109void ipoib_mcast_free(struct ipoib_mcast *mcast) 109static void ipoib_mcast_free(struct ipoib_mcast *mcast)
110{ 110{
111 struct net_device *dev = mcast->dev; 111 struct net_device *dev = mcast->dev;
112 int tx_dropped = 0; 112 int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
153 return mcast; 153 return mcast;
154} 154}
155 155
156struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) 156static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
157{ 157{
158 struct ipoib_dev_priv *priv = netdev_priv(dev); 158 struct ipoib_dev_priv *priv = netdev_priv(dev);
159 struct rb_node *n = priv->multicast_tree.rb_node; 159 struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
677 return 0; 677 return 0;
678} 678}
679 679
680int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) 680static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
681{ 681{
682 struct ipoib_dev_priv *priv = netdev_priv(dev); 682 struct ipoib_dev_priv *priv = netdev_priv(dev);
683 int ret = 0; 683 int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
704 return 0; 704 return 0;
705} 705}
706 706
707/*
708 * Check if the multicast group is sendonly. If so remove it from the maps
709 * and add to the remove list
710 */
711void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
712 struct list_head *remove_list)
713{
714 /* Is this multicast ? */
715 if (*mgid == 0xff) {
716 struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
717
718 if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
719 list_del(&mcast->list);
720 rb_erase(&mcast->rb_node, &priv->multicast_tree);
721 list_add_tail(&mcast->list, remove_list);
722 }
723 }
724}
725
726void ipoib_mcast_remove_list(struct list_head *remove_list)
727{
728 struct ipoib_mcast *mcast, *tmcast;
729
730 list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
731 ipoib_mcast_leave(mcast->dev, mcast);
732 ipoib_mcast_free(mcast);
733 }
734}
735
707void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) 736void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
708{ 737{
709 struct ipoib_dev_priv *priv = netdev_priv(dev); 738 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
810 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 839 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
811 wait_for_completion(&mcast->done); 840 wait_for_completion(&mcast->done);
812 841
813 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 842 ipoib_mcast_remove_list(&remove_list);
814 ipoib_mcast_leave(dev, mcast);
815 ipoib_mcast_free(mcast);
816 }
817} 843}
818 844
819static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) 845static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
939 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) 965 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
940 wait_for_completion(&mcast->done); 966 wait_for_completion(&mcast->done);
941 967
942 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 968 ipoib_mcast_remove_list(&remove_list);
943 ipoib_mcast_leave(mcast->dev, mcast);
944 ipoib_mcast_free(mcast);
945 }
946 969
947 /* 970 /*
948 * Double check that we are still up 971 * Double check that we are still up
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9080161e01af..c827c93f46c5 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
644 644
645 ib_conn = &iser_conn->ib_conn; 645 ib_conn = &iser_conn->ib_conn;
646 if (ib_conn->pi_support) { 646 if (ib_conn->pi_support) {
647 u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; 647 u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
648 648
649 scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); 649 scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
650 scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | 650 scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
656 * max fastreg page list length. 656 * max fastreg page list length.
657 */ 657 */
658 shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, 658 shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
659 ib_conn->device->dev_attr.max_fast_reg_page_list_len); 659 ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
660 shost->max_sectors = min_t(unsigned int, 660 shost->max_sectors = min_t(unsigned int,
661 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9); 661 1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
662 662
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
1059 release_wq = alloc_workqueue("release workqueue", 0, 0); 1059 release_wq = alloc_workqueue("release workqueue", 0, 0);
1060 if (!release_wq) { 1060 if (!release_wq) {
1061 iser_err("failed to allocate release workqueue\n"); 1061 iser_err("failed to allocate release workqueue\n");
1062 return -ENOMEM; 1062 err = -ENOMEM;
1063 goto err_alloc_wq;
1063 } 1064 }
1064 1065
1065 iscsi_iser_scsi_transport = iscsi_register_transport( 1066 iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
1067 if (!iscsi_iser_scsi_transport) { 1068 if (!iscsi_iser_scsi_transport) {
1068 iser_err("iscsi_register_transport failed\n"); 1069 iser_err("iscsi_register_transport failed\n");
1069 err = -EINVAL; 1070 err = -EINVAL;
1070 goto register_transport_failure; 1071 goto err_reg;
1071 } 1072 }
1072 1073
1073 return 0; 1074 return 0;
1074 1075
1075register_transport_failure: 1076err_reg:
1077 destroy_workqueue(release_wq);
1078err_alloc_wq:
1076 kmem_cache_destroy(ig.desc_cache); 1079 kmem_cache_destroy(ig.desc_cache);
1077 1080
1078 return err; 1081 return err;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e6a407..95f0a64e076b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -48,6 +48,7 @@
48#include <scsi/scsi_transport_iscsi.h> 48#include <scsi/scsi_transport_iscsi.h>
49#include <scsi/scsi_cmnd.h> 49#include <scsi/scsi_cmnd.h>
50#include <scsi/scsi_device.h> 50#include <scsi/scsi_device.h>
51#include <scsi/iser.h>
51 52
52#include <linux/interrupt.h> 53#include <linux/interrupt.h>
53#include <linux/wait.h> 54#include <linux/wait.h>
@@ -151,46 +152,10 @@
151 - ISER_MAX_RX_MISC_PDUS) / \ 152 - ISER_MAX_RX_MISC_PDUS) / \
152 (1 + ISER_INFLIGHT_DATAOUTS)) 153 (1 + ISER_INFLIGHT_DATAOUTS))
153 154
154#define ISER_WC_BATCH_COUNT 16
155#define ISER_SIGNAL_CMD_COUNT 32 155#define ISER_SIGNAL_CMD_COUNT 32
156 156
157#define ISER_VER 0x10
158#define ISER_WSV 0x08
159#define ISER_RSV 0x04
160
161#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
162#define ISER_BEACON_WRID 0xfffffffffffffffeULL
163
164/**
165 * struct iser_hdr - iSER header
166 *
167 * @flags: flags support (zbva, remote_inv)
168 * @rsvd: reserved
169 * @write_stag: write rkey
170 * @write_va: write virtual address
171 * @reaf_stag: read rkey
172 * @read_va: read virtual address
173 */
174struct iser_hdr {
175 u8 flags;
176 u8 rsvd[3];
177 __be32 write_stag;
178 __be64 write_va;
179 __be32 read_stag;
180 __be64 read_va;
181} __attribute__((packed));
182
183
184#define ISER_ZBVA_NOT_SUPPORTED 0x80
185#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
186
187struct iser_cm_hdr {
188 u8 flags;
189 u8 rsvd[3];
190} __packed;
191
192/* Constant PDU lengths calculations */ 157/* Constant PDU lengths calculations */
193#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr)) 158#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
194 159
195#define ISER_RECV_DATA_SEG_LEN 128 160#define ISER_RECV_DATA_SEG_LEN 128
196#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) 161#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
269#define ISER_MAX_WRS 7 234#define ISER_MAX_WRS 7
270 235
271/** 236/**
272 * struct iser_tx_desc - iSER TX descriptor (for send wr_id) 237 * struct iser_tx_desc - iSER TX descriptor
273 * 238 *
274 * @iser_header: iser header 239 * @iser_header: iser header
275 * @iscsi_header: iscsi header 240 * @iscsi_header: iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
287 * @sig_attrs: Signature attributes 252 * @sig_attrs: Signature attributes
288 */ 253 */
289struct iser_tx_desc { 254struct iser_tx_desc {
290 struct iser_hdr iser_header; 255 struct iser_ctrl iser_header;
291 struct iscsi_hdr iscsi_header; 256 struct iscsi_hdr iscsi_header;
292 enum iser_desc_type type; 257 enum iser_desc_type type;
293 u64 dma_addr; 258 u64 dma_addr;
294 struct ib_sge tx_sg[2]; 259 struct ib_sge tx_sg[2];
295 int num_sge; 260 int num_sge;
261 struct ib_cqe cqe;
296 bool mapped; 262 bool mapped;
297 u8 wr_idx; 263 u8 wr_idx;
298 union iser_wr { 264 union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
306}; 272};
307 273
308#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ 274#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
309 sizeof(u64) + sizeof(struct ib_sge))) 275 sizeof(u64) + sizeof(struct ib_sge) + \
276 sizeof(struct ib_cqe)))
310/** 277/**
311 * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) 278 * struct iser_rx_desc - iSER RX descriptor
312 * 279 *
313 * @iser_header: iser header 280 * @iser_header: iser header
314 * @iscsi_header: iscsi header 281 * @iscsi_header: iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
318 * @pad: for sense data TODO: Modify to maximum sense length supported 285 * @pad: for sense data TODO: Modify to maximum sense length supported
319 */ 286 */
320struct iser_rx_desc { 287struct iser_rx_desc {
321 struct iser_hdr iser_header; 288 struct iser_ctrl iser_header;
322 struct iscsi_hdr iscsi_header; 289 struct iscsi_hdr iscsi_header;
323 char data[ISER_RECV_DATA_SEG_LEN]; 290 char data[ISER_RECV_DATA_SEG_LEN];
324 u64 dma_addr; 291 u64 dma_addr;
325 struct ib_sge rx_sg; 292 struct ib_sge rx_sg;
293 struct ib_cqe cqe;
326 char pad[ISER_RX_PAD_SIZE]; 294 char pad[ISER_RX_PAD_SIZE];
295} __packed;
296
297/**
298 * struct iser_login_desc - iSER login descriptor
299 *
300 * @req: pointer to login request buffer
301 * @resp: pointer to login response buffer
302 * @req_dma: DMA address of login request buffer
303 * @rsp_dma: DMA address of login response buffer
304 * @sge: IB sge for login post recv
305 * @cqe: completion handler
306 */
307struct iser_login_desc {
308 void *req;
309 void *rsp;
310 u64 req_dma;
311 u64 rsp_dma;
312 struct ib_sge sge;
313 struct ib_cqe cqe;
327} __attribute__((packed)); 314} __attribute__((packed));
328 315
329struct iser_conn; 316struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
333/** 320/**
334 * struct iser_comp - iSER completion context 321 * struct iser_comp - iSER completion context
335 * 322 *
336 * @device: pointer to device handle
337 * @cq: completion queue 323 * @cq: completion queue
338 * @wcs: work completion array
339 * @tasklet: Tasklet handle
340 * @active_qps: Number of active QPs attached 324 * @active_qps: Number of active QPs attached
341 * to completion context 325 * to completion context
342 */ 326 */
343struct iser_comp { 327struct iser_comp {
344 struct iser_device *device;
345 struct ib_cq *cq; 328 struct ib_cq *cq;
346 struct ib_wc wcs[ISER_WC_BATCH_COUNT];
347 struct tasklet_struct tasklet;
348 int active_qps; 329 int active_qps;
349}; 330};
350 331
@@ -380,7 +361,6 @@ struct iser_reg_ops {
380 * 361 *
381 * @ib_device: RDMA device 362 * @ib_device: RDMA device
382 * @pd: Protection Domain for this device 363 * @pd: Protection Domain for this device
383 * @dev_attr: Device attributes container
384 * @mr: Global DMA memory region 364 * @mr: Global DMA memory region
385 * @event_handler: IB events handle routine 365 * @event_handler: IB events handle routine
386 * @ig_list: entry in devices list 366 * @ig_list: entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
389 * cpus and device max completion vectors 369 * cpus and device max completion vectors
390 * @comps: Dinamically allocated array of completion handlers 370 * @comps: Dinamically allocated array of completion handlers
391 * @reg_ops: Registration ops 371 * @reg_ops: Registration ops
372 * @remote_inv_sup: Remote invalidate is supported on this device
392 */ 373 */
393struct iser_device { 374struct iser_device {
394 struct ib_device *ib_device; 375 struct ib_device *ib_device;
395 struct ib_pd *pd; 376 struct ib_pd *pd;
396 struct ib_device_attr dev_attr;
397 struct ib_mr *mr; 377 struct ib_mr *mr;
398 struct ib_event_handler event_handler; 378 struct ib_event_handler event_handler;
399 struct list_head ig_list; 379 struct list_head ig_list;
400 int refcount; 380 int refcount;
401 int comps_used; 381 int comps_used;
402 struct iser_comp *comps; 382 struct iser_comp *comps;
403 struct iser_reg_ops *reg_ops; 383 const struct iser_reg_ops *reg_ops;
384 bool remote_inv_sup;
404}; 385};
405 386
406#define ISER_CHECK_GUARD 0xc0 387#define ISER_CHECK_GUARD 0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
475 * @rx_wr: receive work request for batch posts 456 * @rx_wr: receive work request for batch posts
476 * @device: reference to iser device 457 * @device: reference to iser device
477 * @comp: iser completion context 458 * @comp: iser completion context
478 * @pi_support: Indicate device T10-PI support
479 * @beacon: beacon send wr to signal all flush errors were drained
480 * @flush_comp: completes when all connection completions consumed
481 * @fr_pool: connection fast registration poool 459 * @fr_pool: connection fast registration poool
460 * @pi_support: Indicate device T10-PI support
461 * @last: last send wr to signal all flush errors were drained
462 * @last_cqe: cqe handler for last wr
463 * @last_comp: completes when all connection completions consumed
482 */ 464 */
483struct ib_conn { 465struct ib_conn {
484 struct rdma_cm_id *cma_id; 466 struct rdma_cm_id *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
488 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; 470 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
489 struct iser_device *device; 471 struct iser_device *device;
490 struct iser_comp *comp; 472 struct iser_comp *comp;
491 bool pi_support;
492 struct ib_send_wr beacon;
493 struct completion flush_comp;
494 struct iser_fr_pool fr_pool; 473 struct iser_fr_pool fr_pool;
474 bool pi_support;
475 struct ib_send_wr last;
476 struct ib_cqe last_cqe;
477 struct ib_cqe reg_cqe;
478 struct completion last_comp;
495}; 479};
496 480
497/** 481/**
@@ -514,11 +498,7 @@ struct ib_conn {
514 * @up_completion: connection establishment completed 498 * @up_completion: connection establishment completed
515 * (state is ISER_CONN_UP) 499 * (state is ISER_CONN_UP)
516 * @conn_list: entry in ig conn list 500 * @conn_list: entry in ig conn list
517 * @login_buf: login data buffer (stores login parameters) 501 * @login_desc: login descriptor
518 * @login_req_buf: login request buffer
519 * @login_req_dma: login request buffer dma address
520 * @login_resp_buf: login response buffer
521 * @login_resp_dma: login response buffer dma address
522 * @rx_desc_head: head of rx_descs cyclic buffer 502 * @rx_desc_head: head of rx_descs cyclic buffer
523 * @rx_descs: rx buffers array (cyclic buffer) 503 * @rx_descs: rx buffers array (cyclic buffer)
524 * @num_rx_descs: number of rx descriptors 504 * @num_rx_descs: number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
541 struct completion ib_completion; 521 struct completion ib_completion;
542 struct completion up_completion; 522 struct completion up_completion;
543 struct list_head conn_list; 523 struct list_head conn_list;
544 524 struct iser_login_desc login_desc;
545 char *login_buf;
546 char *login_req_buf, *login_resp_buf;
547 u64 login_req_dma, login_resp_dma;
548 unsigned int rx_desc_head; 525 unsigned int rx_desc_head;
549 struct iser_rx_desc *rx_descs; 526 struct iser_rx_desc *rx_descs;
550 u32 num_rx_descs; 527 u32 num_rx_descs;
551 unsigned short scsi_sg_tablesize; 528 unsigned short scsi_sg_tablesize;
552 unsigned int scsi_max_sectors; 529 unsigned int scsi_max_sectors;
530 bool snd_w_inv;
553}; 531};
554 532
555/** 533/**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
579 557
580struct iser_page_vec { 558struct iser_page_vec {
581 u64 *pages; 559 u64 *pages;
582 int length; 560 int npages;
583 int offset; 561 struct ib_mr fake_mr;
584 int data_size;
585}; 562};
586 563
587/** 564/**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
633 610
634void iser_release_work(struct work_struct *work); 611void iser_release_work(struct work_struct *work);
635 612
636void iser_rcv_completion(struct iser_rx_desc *desc, 613void iser_err_comp(struct ib_wc *wc, const char *type);
637 unsigned long dto_xfer_len, 614void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
638 struct ib_conn *ib_conn); 615void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
639 616void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
640void iser_snd_completion(struct iser_tx_desc *desc, 617void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
641 struct ib_conn *ib_conn); 618void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
619void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
620void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
642 621
643void iser_task_rdma_init(struct iscsi_iser_task *task); 622void iser_task_rdma_init(struct iscsi_iser_task *task);
644 623
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
651 enum iser_data_dir cmd_dir); 630 enum iser_data_dir cmd_dir);
652 631
653int iser_reg_rdma_mem(struct iscsi_iser_task *task, 632int iser_reg_rdma_mem(struct iscsi_iser_task *task,
654 enum iser_data_dir dir); 633 enum iser_data_dir dir,
634 bool all_imm);
655void iser_unreg_rdma_mem(struct iscsi_iser_task *task, 635void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
656 enum iser_data_dir dir); 636 enum iser_data_dir dir);
657 637
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
719 return cur_wr; 699 return cur_wr;
720} 700}
721 701
702static inline struct iser_conn *
703to_iser_conn(struct ib_conn *ib_conn)
704{
705 return container_of(ib_conn, struct iser_conn, ib_conn);
706}
707
708static inline struct iser_rx_desc *
709iser_rx(struct ib_cqe *cqe)
710{
711 return container_of(cqe, struct iser_rx_desc, cqe);
712}
713
714static inline struct iser_tx_desc *
715iser_tx(struct ib_cqe *cqe)
716{
717 return container_of(cqe, struct iser_tx_desc, cqe);
718}
719
720static inline struct iser_login_desc *
721iser_login(struct ib_cqe *cqe)
722{
723 return container_of(cqe, struct iser_login_desc, cqe);
724}
725
722#endif 726#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ffd00c420729..ed54b388e7ad 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
51 struct iscsi_iser_task *iser_task = task->dd_data; 51 struct iscsi_iser_task *iser_task = task->dd_data;
52 struct iser_mem_reg *mem_reg; 52 struct iser_mem_reg *mem_reg;
53 int err; 53 int err;
54 struct iser_hdr *hdr = &iser_task->desc.iser_header; 54 struct iser_ctrl *hdr = &iser_task->desc.iser_header;
55 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; 55 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
56 56
57 err = iser_dma_map_task_data(iser_task, 57 err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
72 return err; 72 return err;
73 } 73 }
74 74
75 err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN); 75 err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
76 if (err) { 76 if (err) {
77 iser_err("Failed to set up Data-IN RDMA\n"); 77 iser_err("Failed to set up Data-IN RDMA\n");
78 return err; 78 return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
104 struct iscsi_iser_task *iser_task = task->dd_data; 104 struct iscsi_iser_task *iser_task = task->dd_data;
105 struct iser_mem_reg *mem_reg; 105 struct iser_mem_reg *mem_reg;
106 int err; 106 int err;
107 struct iser_hdr *hdr = &iser_task->desc.iser_header; 107 struct iser_ctrl *hdr = &iser_task->desc.iser_header;
108 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 108 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
109 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; 109 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
110 110
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
126 return err; 126 return err;
127 } 127 }
128 128
129 err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); 129 err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
130 buf_out->data_len == imm_sz);
130 if (err != 0) { 131 if (err != 0) {
131 iser_err("Failed to register write cmd RDMA mem\n"); 132 iser_err("Failed to register write cmd RDMA mem\n");
132 return err; 133 return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
166 ib_dma_sync_single_for_cpu(device->ib_device, 167 ib_dma_sync_single_for_cpu(device->ib_device,
167 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 168 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
168 169
169 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 170 memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
170 tx_desc->iser_header.flags = ISER_VER; 171 tx_desc->iser_header.flags = ISER_VER;
171 tx_desc->num_sge = 1; 172 tx_desc->num_sge = 1;
172} 173}
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
174static void iser_free_login_buf(struct iser_conn *iser_conn) 175static void iser_free_login_buf(struct iser_conn *iser_conn)
175{ 176{
176 struct iser_device *device = iser_conn->ib_conn.device; 177 struct iser_device *device = iser_conn->ib_conn.device;
178 struct iser_login_desc *desc = &iser_conn->login_desc;
177 179
178 if (!iser_conn->login_buf) 180 if (!desc->req)
179 return; 181 return;
180 182
181 if (iser_conn->login_req_dma) 183 ib_dma_unmap_single(device->ib_device, desc->req_dma,
182 ib_dma_unmap_single(device->ib_device, 184 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
183 iser_conn->login_req_dma,
184 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
185 185
186 if (iser_conn->login_resp_dma) 186 ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
187 ib_dma_unmap_single(device->ib_device, 187 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
188 iser_conn->login_resp_dma,
189 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
190 188
191 kfree(iser_conn->login_buf); 189 kfree(desc->req);
190 kfree(desc->rsp);
192 191
193 /* make sure we never redo any unmapping */ 192 /* make sure we never redo any unmapping */
194 iser_conn->login_req_dma = 0; 193 desc->req = NULL;
195 iser_conn->login_resp_dma = 0; 194 desc->rsp = NULL;
196 iser_conn->login_buf = NULL;
197} 195}
198 196
199static int iser_alloc_login_buf(struct iser_conn *iser_conn) 197static int iser_alloc_login_buf(struct iser_conn *iser_conn)
200{ 198{
201 struct iser_device *device = iser_conn->ib_conn.device; 199 struct iser_device *device = iser_conn->ib_conn.device;
202 int req_err, resp_err; 200 struct iser_login_desc *desc = &iser_conn->login_desc;
203 201
204 BUG_ON(device == NULL); 202 desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
205 203 if (!desc->req)
206 iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 204 return -ENOMEM;
207 ISER_RX_LOGIN_SIZE, GFP_KERNEL); 205
208 if (!iser_conn->login_buf) 206 desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
209 goto out_err; 207 ISCSI_DEF_MAX_RECV_SEG_LEN,
210 208 DMA_TO_DEVICE);
211 iser_conn->login_req_buf = iser_conn->login_buf; 209 if (ib_dma_mapping_error(device->ib_device,
212 iser_conn->login_resp_buf = iser_conn->login_buf + 210 desc->req_dma))
213 ISCSI_DEF_MAX_RECV_SEG_LEN; 211 goto free_req;
214 212
215 iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, 213 desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
216 iser_conn->login_req_buf, 214 if (!desc->rsp)
217 ISCSI_DEF_MAX_RECV_SEG_LEN, 215 goto unmap_req;
218 DMA_TO_DEVICE); 216
219 217 desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
220 iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, 218 ISER_RX_LOGIN_SIZE,
221 iser_conn->login_resp_buf, 219 DMA_FROM_DEVICE);
222 ISER_RX_LOGIN_SIZE, 220 if (ib_dma_mapping_error(device->ib_device,
223 DMA_FROM_DEVICE); 221 desc->rsp_dma))
224 222 goto free_rsp;
225 req_err = ib_dma_mapping_error(device->ib_device, 223
226 iser_conn->login_req_dma);
227 resp_err = ib_dma_mapping_error(device->ib_device,
228 iser_conn->login_resp_dma);
229
230 if (req_err || resp_err) {
231 if (req_err)
232 iser_conn->login_req_dma = 0;
233 if (resp_err)
234 iser_conn->login_resp_dma = 0;
235 goto free_login_buf;
236 }
237 return 0; 224 return 0;
238 225
239free_login_buf: 226free_rsp:
240 iser_free_login_buf(iser_conn); 227 kfree(desc->rsp);
228unmap_req:
229 ib_dma_unmap_single(device->ib_device, desc->req_dma,
230 ISCSI_DEF_MAX_RECV_SEG_LEN,
231 DMA_TO_DEVICE);
232free_req:
233 kfree(desc->req);
241 234
242out_err:
243 iser_err("unable to alloc or map login buf\n");
244 return -ENOMEM; 235 return -ENOMEM;
245} 236}
246 237
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
280 goto rx_desc_dma_map_failed; 271 goto rx_desc_dma_map_failed;
281 272
282 rx_desc->dma_addr = dma_addr; 273 rx_desc->dma_addr = dma_addr;
283 274 rx_desc->cqe.done = iser_task_rsp;
284 rx_sg = &rx_desc->rx_sg; 275 rx_sg = &rx_desc->rx_sg;
285 rx_sg->addr = rx_desc->dma_addr; 276 rx_sg->addr = rx_desc->dma_addr;
286 rx_sg->length = ISER_RX_PAYLOAD_SIZE; 277 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
287 rx_sg->lkey = device->pd->local_dma_lkey; 278 rx_sg->lkey = device->pd->local_dma_lkey;
288 } 279 }
289 280
290 iser_conn->rx_desc_head = 0; 281 iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
383 374
384 /* build the tx desc regd header and add it to the tx desc dto */ 375 /* build the tx desc regd header and add it to the tx desc dto */
385 tx_desc->type = ISCSI_TX_SCSI_COMMAND; 376 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
377 tx_desc->cqe.done = iser_cmd_comp;
386 iser_create_send_desc(iser_conn, tx_desc); 378 iser_create_send_desc(iser_conn, tx_desc);
387 379
388 if (hdr->flags & ISCSI_FLAG_CMD_READ) { 380 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
464 } 456 }
465 457
466 tx_desc->type = ISCSI_TX_DATAOUT; 458 tx_desc->type = ISCSI_TX_DATAOUT;
459 tx_desc->cqe.done = iser_dataout_comp;
467 tx_desc->iser_header.flags = ISER_VER; 460 tx_desc->iser_header.flags = ISER_VER;
468 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 461 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
469 462
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
513 506
514 /* build the tx desc regd header and add it to the tx desc dto */ 507 /* build the tx desc regd header and add it to the tx desc dto */
515 mdesc->type = ISCSI_TX_CONTROL; 508 mdesc->type = ISCSI_TX_CONTROL;
509 mdesc->cqe.done = iser_ctrl_comp;
516 iser_create_send_desc(iser_conn, mdesc); 510 iser_create_send_desc(iser_conn, mdesc);
517 511
518 device = iser_conn->ib_conn.device; 512 device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
520 data_seg_len = ntoh24(task->hdr->dlength); 514 data_seg_len = ntoh24(task->hdr->dlength);
521 515
522 if (data_seg_len > 0) { 516 if (data_seg_len > 0) {
517 struct iser_login_desc *desc = &iser_conn->login_desc;
523 struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; 518 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
519
524 if (task != conn->login_task) { 520 if (task != conn->login_task) {
525 iser_err("data present on non login task!!!\n"); 521 iser_err("data present on non login task!!!\n");
526 goto send_control_error; 522 goto send_control_error;
527 } 523 }
528 524
529 ib_dma_sync_single_for_cpu(device->ib_device, 525 ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
530 iser_conn->login_req_dma, task->data_count, 526 task->data_count, DMA_TO_DEVICE);
531 DMA_TO_DEVICE);
532 527
533 memcpy(iser_conn->login_req_buf, task->data, task->data_count); 528 memcpy(desc->req, task->data, task->data_count);
534 529
535 ib_dma_sync_single_for_device(device->ib_device, 530 ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
536 iser_conn->login_req_dma, task->data_count, 531 task->data_count, DMA_TO_DEVICE);
537 DMA_TO_DEVICE);
538 532
539 tx_dsg->addr = iser_conn->login_req_dma; 533 tx_dsg->addr = desc->req_dma;
540 tx_dsg->length = task->data_count; 534 tx_dsg->length = task->data_count;
541 tx_dsg->lkey = device->pd->local_dma_lkey; 535 tx_dsg->lkey = device->pd->local_dma_lkey;
542 mdesc->num_sge = 2; 536 mdesc->num_sge = 2;
543 } 537 }
544 538
@@ -562,41 +556,126 @@ send_control_error:
562 return err; 556 return err;
563} 557}
564 558
565/** 559void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
566 * iser_rcv_dto_completion - recv DTO completion
567 */
568void iser_rcv_completion(struct iser_rx_desc *rx_desc,
569 unsigned long rx_xfer_len,
570 struct ib_conn *ib_conn)
571{ 560{
572 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 561 struct ib_conn *ib_conn = wc->qp->qp_context;
573 ib_conn); 562 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
563 struct iser_login_desc *desc = iser_login(wc->wr_cqe);
574 struct iscsi_hdr *hdr; 564 struct iscsi_hdr *hdr;
575 u64 rx_dma; 565 char *data;
576 int rx_buflen, outstanding, count, err; 566 int length;
577 567
578 /* differentiate between login to all other PDUs */ 568 if (unlikely(wc->status != IB_WC_SUCCESS)) {
579 if ((char *)rx_desc == iser_conn->login_resp_buf) { 569 iser_err_comp(wc, "login_rsp");
580 rx_dma = iser_conn->login_resp_dma; 570 return;
581 rx_buflen = ISER_RX_LOGIN_SIZE; 571 }
582 } else { 572
583 rx_dma = rx_desc->dma_addr; 573 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
584 rx_buflen = ISER_RX_PAYLOAD_SIZE; 574 desc->rsp_dma, ISER_RX_LOGIN_SIZE,
575 DMA_FROM_DEVICE);
576
577 hdr = desc->rsp + sizeof(struct iser_ctrl);
578 data = desc->rsp + ISER_HEADERS_LEN;
579 length = wc->byte_len - ISER_HEADERS_LEN;
580
581 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
582 hdr->itt, length);
583
584 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
585
586 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
587 desc->rsp_dma, ISER_RX_LOGIN_SIZE,
588 DMA_FROM_DEVICE);
589
590 ib_conn->post_recv_buf_count--;
591}
592
593static inline void
594iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
595{
596 if (likely(rkey == desc->rsc.mr->rkey))
597 desc->rsc.mr_valid = 0;
598 else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
599 desc->pi_ctx->sig_mr_valid = 0;
600}
601
602static int
603iser_check_remote_inv(struct iser_conn *iser_conn,
604 struct ib_wc *wc,
605 struct iscsi_hdr *hdr)
606{
607 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
608 struct iscsi_task *task;
609 u32 rkey = wc->ex.invalidate_rkey;
610
611 iser_dbg("conn %p: remote invalidation for rkey %#x\n",
612 iser_conn, rkey);
613
614 if (unlikely(!iser_conn->snd_w_inv)) {
615 iser_err("conn %p: unexepected remote invalidation, "
616 "terminating connection\n", iser_conn);
617 return -EPROTO;
618 }
619
620 task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
621 if (likely(task)) {
622 struct iscsi_iser_task *iser_task = task->dd_data;
623 struct iser_fr_desc *desc;
624
625 if (iser_task->dir[ISER_DIR_IN]) {
626 desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
627 iser_inv_desc(desc, rkey);
628 }
629
630 if (iser_task->dir[ISER_DIR_OUT]) {
631 desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
632 iser_inv_desc(desc, rkey);
633 }
634 } else {
635 iser_err("failed to get task for itt=%d\n", hdr->itt);
636 return -EINVAL;
637 }
585 } 638 }
586 639
587 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, 640 return 0;
588 rx_buflen, DMA_FROM_DEVICE); 641}
589 642
590 hdr = &rx_desc->iscsi_header; 643
644void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
645{
646 struct ib_conn *ib_conn = wc->qp->qp_context;
647 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
648 struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
649 struct iscsi_hdr *hdr;
650 int length;
651 int outstanding, count, err;
652
653 if (unlikely(wc->status != IB_WC_SUCCESS)) {
654 iser_err_comp(wc, "task_rsp");
655 return;
656 }
657
658 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
659 desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
660 DMA_FROM_DEVICE);
661
662 hdr = &desc->iscsi_header;
663 length = wc->byte_len - ISER_HEADERS_LEN;
591 664
592 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, 665 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
593 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); 666 hdr->itt, length);
667
668 if (iser_check_remote_inv(iser_conn, wc, hdr)) {
669 iscsi_conn_failure(iser_conn->iscsi_conn,
670 ISCSI_ERR_CONN_FAILED);
671 return;
672 }
594 673
595 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, 674 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
596 rx_xfer_len - ISER_HEADERS_LEN);
597 675
598 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, 676 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
599 rx_buflen, DMA_FROM_DEVICE); 677 desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
678 DMA_FROM_DEVICE);
600 679
601 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 680 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
602 * task eliminates the need to worry on tasks which are completed in * 681 * task eliminates the need to worry on tasks which are completed in *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
604 * for the posted rx bufs refcount to become zero handles everything */ 683 * for the posted rx bufs refcount to become zero handles everything */
605 ib_conn->post_recv_buf_count--; 684 ib_conn->post_recv_buf_count--;
606 685
607 if (rx_dma == iser_conn->login_resp_dma)
608 return;
609
610 outstanding = ib_conn->post_recv_buf_count; 686 outstanding = ib_conn->post_recv_buf_count;
611 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { 687 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
612 count = min(iser_conn->qp_max_recv_dtos - outstanding, 688 count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
617 } 693 }
618} 694}
619 695
620void iser_snd_completion(struct iser_tx_desc *tx_desc, 696void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
621 struct ib_conn *ib_conn)
622{ 697{
698 if (unlikely(wc->status != IB_WC_SUCCESS))
699 iser_err_comp(wc, "command");
700}
701
702void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
703{
704 struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
623 struct iscsi_task *task; 705 struct iscsi_task *task;
624 struct iser_device *device = ib_conn->device;
625 706
626 if (tx_desc->type == ISCSI_TX_DATAOUT) { 707 if (unlikely(wc->status != IB_WC_SUCCESS)) {
627 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 708 iser_err_comp(wc, "control");
628 ISER_HEADERS_LEN, DMA_TO_DEVICE); 709 return;
629 kmem_cache_free(ig.desc_cache, tx_desc);
630 tx_desc = NULL;
631 } 710 }
632 711
633 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { 712 /* this arithmetic is legal by libiscsi dd_data allocation */
634 /* this arithmetic is legal by libiscsi dd_data allocation */ 713 task = (void *)desc - sizeof(struct iscsi_task);
635 task = (void *) ((long)(void *)tx_desc - 714 if (task->hdr->itt == RESERVED_ITT)
636 sizeof(struct iscsi_task)); 715 iscsi_put_task(task);
637 if (task->hdr->itt == RESERVED_ITT) 716}
638 iscsi_put_task(task); 717
639 } 718void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
719{
720 struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
721 struct ib_conn *ib_conn = wc->qp->qp_context;
722 struct iser_device *device = ib_conn->device;
723
724 if (unlikely(wc->status != IB_WC_SUCCESS))
725 iser_err_comp(wc, "dataout");
726
727 ib_dma_unmap_single(device->ib_device, desc->dma_addr,
728 ISER_HEADERS_LEN, DMA_TO_DEVICE);
729 kmem_cache_free(ig.desc_cache, desc);
730}
731
732void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
733{
734 struct ib_conn *ib_conn = wc->qp->qp_context;
735
736 complete(&ib_conn->last_comp);
640} 737}
641 738
642void iser_task_rdma_init(struct iscsi_iser_task *iser_task) 739void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ea765fb9664d..9a391cc5b9b3 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
49 struct iser_reg_resources *rsc, 49 struct iser_reg_resources *rsc,
50 struct iser_mem_reg *mem_reg); 50 struct iser_mem_reg *mem_reg);
51 51
52static struct iser_reg_ops fastreg_ops = { 52static const struct iser_reg_ops fastreg_ops = {
53 .alloc_reg_res = iser_alloc_fastreg_pool, 53 .alloc_reg_res = iser_alloc_fastreg_pool,
54 .free_reg_res = iser_free_fastreg_pool, 54 .free_reg_res = iser_free_fastreg_pool,
55 .reg_mem = iser_fast_reg_mr, 55 .reg_mem = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
58 .reg_desc_put = iser_reg_desc_put_fr, 58 .reg_desc_put = iser_reg_desc_put_fr,
59}; 59};
60 60
61static struct iser_reg_ops fmr_ops = { 61static const struct iser_reg_ops fmr_ops = {
62 .alloc_reg_res = iser_alloc_fmr_pool, 62 .alloc_reg_res = iser_alloc_fmr_pool,
63 .free_reg_res = iser_free_fmr_pool, 63 .free_reg_res = iser_free_fmr_pool,
64 .reg_mem = iser_fast_reg_fmr, 64 .reg_mem = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
67 .reg_desc_put = iser_reg_desc_put_fmr, 67 .reg_desc_put = iser_reg_desc_put_fmr,
68}; 68};
69 69
70void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
71{
72 iser_err_comp(wc, "memreg");
73}
74
70int iser_assign_reg_ops(struct iser_device *device) 75int iser_assign_reg_ops(struct iser_device *device)
71{ 76{
72 struct ib_device_attr *dev_attr = &device->dev_attr; 77 struct ib_device *ib_dev = device->ib_device;
73 78
74 /* Assign function handles - based on FMR support */ 79 /* Assign function handles - based on FMR support */
75 if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && 80 if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
76 device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { 81 ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
77 iser_info("FMR supported, using FMR for registration\n"); 82 iser_info("FMR supported, using FMR for registration\n");
78 device->reg_ops = &fmr_ops; 83 device->reg_ops = &fmr_ops;
79 } else 84 } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
80 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
81 iser_info("FastReg supported, using FastReg for registration\n"); 85 iser_info("FastReg supported, using FastReg for registration\n");
82 device->reg_ops = &fastreg_ops; 86 device->reg_ops = &fastreg_ops;
87 device->remote_inv_sup = iser_always_reg;
83 } else { 88 } else {
84 iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); 89 iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
85 return -1; 90 return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
131{ 136{
132} 137}
133 138
134#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
135
136/**
137 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
138 * and returns the length of resulting physical address array (may be less than
139 * the original due to possible compaction).
140 *
141 * we build a "page vec" under the assumption that the SG meets the RDMA
142 * alignment requirements. Other then the first and last SG elements, all
143 * the "internal" elements can be compacted into a list whose elements are
144 * dma addresses of physical pages. The code supports also the weird case
145 * where --few fragments of the same page-- are present in the SG as
146 * consecutive elements. Also, it handles one entry SG.
147 */
148
149static int iser_sg_to_page_vec(struct iser_data_buf *data,
150 struct ib_device *ibdev, u64 *pages,
151 int *offset, int *data_size)
152{
153 struct scatterlist *sg, *sgl = data->sg;
154 u64 start_addr, end_addr, page, chunk_start = 0;
155 unsigned long total_sz = 0;
156 unsigned int dma_len;
157 int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
158
159 /* compute the offset of first element */
160 *offset = (u64) sgl[0].offset & ~MASK_4K;
161
162 new_chunk = 1;
163 cur_page = 0;
164 for_each_sg(sgl, sg, data->dma_nents, i) {
165 start_addr = ib_sg_dma_address(ibdev, sg);
166 if (new_chunk)
167 chunk_start = start_addr;
168 dma_len = ib_sg_dma_len(ibdev, sg);
169 end_addr = start_addr + dma_len;
170 total_sz += dma_len;
171
172 /* collect page fragments until aligned or end of SG list */
173 if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
174 new_chunk = 0;
175 continue;
176 }
177 new_chunk = 1;
178
179 /* address of the first page in the contiguous chunk;
180 masking relevant for the very first SG entry,
181 which might be unaligned */
182 page = chunk_start & MASK_4K;
183 do {
184 pages[cur_page++] = page;
185 page += SIZE_4K;
186 } while (page < end_addr);
187 }
188
189 *data_size = total_sz;
190 iser_dbg("page_vec->data_size:%d cur_page %d\n",
191 *data_size, cur_page);
192 return cur_page;
193}
194
195static void iser_data_buf_dump(struct iser_data_buf *data, 139static void iser_data_buf_dump(struct iser_data_buf *data,
196 struct ib_device *ibdev) 140 struct ib_device *ibdev)
197{ 141{
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
210{ 154{
211 int i; 155 int i;
212 156
213 iser_err("page vec length %d data size %d\n", 157 iser_err("page vec npages %d data length %d\n",
214 page_vec->length, page_vec->data_size); 158 page_vec->npages, page_vec->fake_mr.length);
215 for (i = 0; i < page_vec->length; i++) 159 for (i = 0; i < page_vec->npages; i++)
216 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); 160 iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
217} 161}
218 162
219int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, 163int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
251 struct scatterlist *sg = mem->sg; 195 struct scatterlist *sg = mem->sg;
252 196
253 reg->sge.lkey = device->pd->local_dma_lkey; 197 reg->sge.lkey = device->pd->local_dma_lkey;
254 reg->rkey = device->mr->rkey; 198 /*
199 * FIXME: rework the registration code path to differentiate
200 * rkey/lkey use cases
201 */
202 reg->rkey = device->mr ? device->mr->rkey : 0;
255 reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); 203 reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
256 reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); 204 reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
257 205
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
262 return 0; 210 return 0;
263} 211}
264 212
265/** 213static int iser_set_page(struct ib_mr *mr, u64 addr)
266 * iser_reg_page_vec - Register physical memory 214{
267 * 215 struct iser_page_vec *page_vec =
268 * returns: 0 on success, errno code on failure 216 container_of(mr, struct iser_page_vec, fake_mr);
269 */ 217
218 page_vec->pages[page_vec->npages++] = addr;
219
220 return 0;
221}
222
270static 223static
271int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, 224int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
272 struct iser_data_buf *mem, 225 struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
280 struct ib_pool_fmr *fmr; 233 struct ib_pool_fmr *fmr;
281 int ret, plen; 234 int ret, plen;
282 235
283 plen = iser_sg_to_page_vec(mem, device->ib_device, 236 page_vec->npages = 0;
284 page_vec->pages, 237 page_vec->fake_mr.page_size = SIZE_4K;
285 &page_vec->offset, 238 plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
286 &page_vec->data_size); 239 mem->size, iser_set_page);
287 page_vec->length = plen; 240 if (unlikely(plen < mem->size)) {
288 if (plen * SIZE_4K < page_vec->data_size) {
289 iser_err("page vec too short to hold this SG\n"); 241 iser_err("page vec too short to hold this SG\n");
290 iser_data_buf_dump(mem, device->ib_device); 242 iser_data_buf_dump(mem, device->ib_device);
291 iser_dump_page_vec(page_vec); 243 iser_dump_page_vec(page_vec);
292 return -EINVAL; 244 return -EINVAL;
293 } 245 }
294 246
295 fmr = ib_fmr_pool_map_phys(fmr_pool, 247 fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
296 page_vec->pages, 248 page_vec->npages, page_vec->pages[0]);
297 page_vec->length,
298 page_vec->pages[0]);
299 if (IS_ERR(fmr)) { 249 if (IS_ERR(fmr)) {
300 ret = PTR_ERR(fmr); 250 ret = PTR_ERR(fmr);
301 iser_err("ib_fmr_pool_map_phys failed: %d\n", ret); 251 iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
304 254
305 reg->sge.lkey = fmr->fmr->lkey; 255 reg->sge.lkey = fmr->fmr->lkey;
306 reg->rkey = fmr->fmr->rkey; 256 reg->rkey = fmr->fmr->rkey;
307 reg->sge.addr = page_vec->pages[0] + page_vec->offset; 257 reg->sge.addr = page_vec->fake_mr.iova;
308 reg->sge.length = page_vec->data_size; 258 reg->sge.length = page_vec->fake_mr.length;
309 reg->mem_h = fmr; 259 reg->mem_h = fmr;
310 260
311 iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," 261 iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
413 *mask |= ISER_CHECK_GUARD; 363 *mask |= ISER_CHECK_GUARD;
414} 364}
415 365
416static void 366static inline void
417iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 367iser_inv_rkey(struct ib_send_wr *inv_wr,
368 struct ib_mr *mr,
369 struct ib_cqe *cqe)
418{ 370{
419 u32 rkey;
420
421 inv_wr->opcode = IB_WR_LOCAL_INV; 371 inv_wr->opcode = IB_WR_LOCAL_INV;
422 inv_wr->wr_id = ISER_FASTREG_LI_WRID; 372 inv_wr->wr_cqe = cqe;
423 inv_wr->ex.invalidate_rkey = mr->rkey; 373 inv_wr->ex.invalidate_rkey = mr->rkey;
424 inv_wr->send_flags = 0; 374 inv_wr->send_flags = 0;
425 inv_wr->num_sge = 0; 375 inv_wr->num_sge = 0;
426
427 rkey = ib_inc_rkey(mr->rkey);
428 ib_update_fast_reg_key(mr, rkey);
429} 376}
430 377
431static int 378static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
437{ 384{
438 struct iser_tx_desc *tx_desc = &iser_task->desc; 385 struct iser_tx_desc *tx_desc = &iser_task->desc;
439 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; 386 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
387 struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
440 struct ib_sig_handover_wr *wr; 388 struct ib_sig_handover_wr *wr;
389 struct ib_mr *mr = pi_ctx->sig_mr;
441 int ret; 390 int ret;
442 391
443 memset(sig_attrs, 0, sizeof(*sig_attrs)); 392 memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
447 396
448 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); 397 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
449 398
450 if (!pi_ctx->sig_mr_valid) 399 if (pi_ctx->sig_mr_valid)
451 iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); 400 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
401
402 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
452 403
453 wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); 404 wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
454 wr->wr.opcode = IB_WR_REG_SIG_MR; 405 wr->wr.opcode = IB_WR_REG_SIG_MR;
455 wr->wr.wr_id = ISER_FASTREG_LI_WRID; 406 wr->wr.wr_cqe = cqe;
456 wr->wr.sg_list = &data_reg->sge; 407 wr->wr.sg_list = &data_reg->sge;
457 wr->wr.num_sge = 1; 408 wr->wr.num_sge = 1;
458 wr->wr.send_flags = 0; 409 wr->wr.send_flags = 0;
459 wr->sig_attrs = sig_attrs; 410 wr->sig_attrs = sig_attrs;
460 wr->sig_mr = pi_ctx->sig_mr; 411 wr->sig_mr = mr;
461 if (scsi_prot_sg_count(iser_task->sc)) 412 if (scsi_prot_sg_count(iser_task->sc))
462 wr->prot = &prot_reg->sge; 413 wr->prot = &prot_reg->sge;
463 else 414 else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
465 wr->access_flags = IB_ACCESS_LOCAL_WRITE | 416 wr->access_flags = IB_ACCESS_LOCAL_WRITE |
466 IB_ACCESS_REMOTE_READ | 417 IB_ACCESS_REMOTE_READ |
467 IB_ACCESS_REMOTE_WRITE; 418 IB_ACCESS_REMOTE_WRITE;
468 pi_ctx->sig_mr_valid = 0; 419 pi_ctx->sig_mr_valid = 1;
469 420
470 sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; 421 sig_reg->sge.lkey = mr->lkey;
471 sig_reg->rkey = pi_ctx->sig_mr->rkey; 422 sig_reg->rkey = mr->rkey;
472 sig_reg->sge.addr = 0; 423 sig_reg->sge.addr = 0;
473 sig_reg->sge.length = scsi_transfer_length(iser_task->sc); 424 sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
474 425
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
485 struct iser_mem_reg *reg) 436 struct iser_mem_reg *reg)
486{ 437{
487 struct iser_tx_desc *tx_desc = &iser_task->desc; 438 struct iser_tx_desc *tx_desc = &iser_task->desc;
439 struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
488 struct ib_mr *mr = rsc->mr; 440 struct ib_mr *mr = rsc->mr;
489 struct ib_reg_wr *wr; 441 struct ib_reg_wr *wr;
490 int n; 442 int n;
491 443
492 if (!rsc->mr_valid) 444 if (rsc->mr_valid)
493 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); 445 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
446
447 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
494 448
495 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); 449 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
496 if (unlikely(n != mem->size)) { 450 if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
501 455
502 wr = reg_wr(iser_tx_next_wr(tx_desc)); 456 wr = reg_wr(iser_tx_next_wr(tx_desc));
503 wr->wr.opcode = IB_WR_REG_MR; 457 wr->wr.opcode = IB_WR_REG_MR;
504 wr->wr.wr_id = ISER_FASTREG_LI_WRID; 458 wr->wr.wr_cqe = cqe;
505 wr->wr.send_flags = 0; 459 wr->wr.send_flags = 0;
506 wr->wr.num_sge = 0; 460 wr->wr.num_sge = 0;
507 wr->mr = mr; 461 wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
510 IB_ACCESS_REMOTE_WRITE | 464 IB_ACCESS_REMOTE_WRITE |
511 IB_ACCESS_REMOTE_READ; 465 IB_ACCESS_REMOTE_READ;
512 466
513 rsc->mr_valid = 0; 467 rsc->mr_valid = 1;
514 468
515 reg->sge.lkey = mr->lkey; 469 reg->sge.lkey = mr->lkey;
516 reg->rkey = mr->rkey; 470 reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
554} 508}
555 509
556int iser_reg_rdma_mem(struct iscsi_iser_task *task, 510int iser_reg_rdma_mem(struct iscsi_iser_task *task,
557 enum iser_data_dir dir) 511 enum iser_data_dir dir,
512 bool all_imm)
558{ 513{
559 struct ib_conn *ib_conn = &task->iser_conn->ib_conn; 514 struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
560 struct iser_device *device = ib_conn->device; 515 struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
565 bool use_dma_key; 520 bool use_dma_key;
566 int err; 521 int err;
567 522
568 use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && 523 use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
569 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); 524 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
570 525
571 if (!use_dma_key) { 526 if (!use_dma_key) {
572 desc = device->reg_ops->reg_desc_get(ib_conn); 527 desc = device->reg_ops->reg_desc_get(ib_conn);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da620f2e..40c0f4978e2f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -44,17 +44,6 @@
44#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ 44#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
45 ISCSI_ISER_MAX_CONN) 45 ISCSI_ISER_MAX_CONN)
46 46
47static int iser_cq_poll_limit = 512;
48
49static void iser_cq_tasklet_fn(unsigned long data);
50static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
51
52static void iser_cq_event_callback(struct ib_event *cause, void *context)
53{
54 iser_err("cq event %s (%d)\n",
55 ib_event_msg(cause->event), cause->event);
56}
57
58static void iser_qp_event_callback(struct ib_event *cause, void *context) 47static void iser_qp_event_callback(struct ib_event *cause, void *context)
59{ 48{
60 iser_err("qp event %s (%d)\n", 49 iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
78 */ 67 */
79static int iser_create_device_ib_res(struct iser_device *device) 68static int iser_create_device_ib_res(struct iser_device *device)
80{ 69{
81 struct ib_device_attr *dev_attr = &device->dev_attr; 70 struct ib_device *ib_dev = device->ib_device;
82 int ret, i, max_cqe; 71 int ret, i, max_cqe;
83 72
84 ret = ib_query_device(device->ib_device, dev_attr);
85 if (ret) {
86 pr_warn("Query device failed for %s\n", device->ib_device->name);
87 return ret;
88 }
89
90 ret = iser_assign_reg_ops(device); 73 ret = iser_assign_reg_ops(device);
91 if (ret) 74 if (ret)
92 return ret; 75 return ret;
93 76
94 device->comps_used = min_t(int, num_online_cpus(), 77 device->comps_used = min_t(int, num_online_cpus(),
95 device->ib_device->num_comp_vectors); 78 ib_dev->num_comp_vectors);
96 79
97 device->comps = kcalloc(device->comps_used, sizeof(*device->comps), 80 device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
98 GFP_KERNEL); 81 GFP_KERNEL);
99 if (!device->comps) 82 if (!device->comps)
100 goto comps_err; 83 goto comps_err;
101 84
102 max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe); 85 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
103 86
104 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", 87 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
105 device->comps_used, device->ib_device->name, 88 device->comps_used, ib_dev->name,
106 device->ib_device->num_comp_vectors, max_cqe); 89 ib_dev->num_comp_vectors, max_cqe);
107 90
108 device->pd = ib_alloc_pd(device->ib_device); 91 device->pd = ib_alloc_pd(ib_dev);
109 if (IS_ERR(device->pd)) 92 if (IS_ERR(device->pd))
110 goto pd_err; 93 goto pd_err;
111 94
112 for (i = 0; i < device->comps_used; i++) { 95 for (i = 0; i < device->comps_used; i++) {
113 struct ib_cq_init_attr cq_attr = {};
114 struct iser_comp *comp = &device->comps[i]; 96 struct iser_comp *comp = &device->comps[i];
115 97
116 comp->device = device; 98 comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
117 cq_attr.cqe = max_cqe; 99 IB_POLL_SOFTIRQ);
118 cq_attr.comp_vector = i;
119 comp->cq = ib_create_cq(device->ib_device,
120 iser_cq_callback,
121 iser_cq_event_callback,
122 (void *)comp,
123 &cq_attr);
124 if (IS_ERR(comp->cq)) { 100 if (IS_ERR(comp->cq)) {
125 comp->cq = NULL; 101 comp->cq = NULL;
126 goto cq_err; 102 goto cq_err;
127 } 103 }
128
129 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
130 goto cq_err;
131
132 tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
133 (unsigned long)comp);
134 } 104 }
135 105
136 if (!iser_always_reg) { 106 if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
140 110
141 device->mr = ib_get_dma_mr(device->pd, access); 111 device->mr = ib_get_dma_mr(device->pd, access);
142 if (IS_ERR(device->mr)) 112 if (IS_ERR(device->mr))
143 goto dma_mr_err; 113 goto cq_err;
144 } 114 }
145 115
146 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 116 INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
147 iser_event_handler); 117 iser_event_handler);
148 if (ib_register_event_handler(&device->event_handler)) 118 if (ib_register_event_handler(&device->event_handler))
149 goto handler_err; 119 goto handler_err;
150 120
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
153handler_err: 123handler_err:
154 if (device->mr) 124 if (device->mr)
155 ib_dereg_mr(device->mr); 125 ib_dereg_mr(device->mr);
156dma_mr_err:
157 for (i = 0; i < device->comps_used; i++)
158 tasklet_kill(&device->comps[i].tasklet);
159cq_err: 126cq_err:
160 for (i = 0; i < device->comps_used; i++) { 127 for (i = 0; i < device->comps_used; i++) {
161 struct iser_comp *comp = &device->comps[i]; 128 struct iser_comp *comp = &device->comps[i];
162 129
163 if (comp->cq) 130 if (comp->cq)
164 ib_destroy_cq(comp->cq); 131 ib_free_cq(comp->cq);
165 } 132 }
166 ib_dealloc_pd(device->pd); 133 ib_dealloc_pd(device->pd);
167pd_err: 134pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
182 for (i = 0; i < device->comps_used; i++) { 149 for (i = 0; i < device->comps_used; i++) {
183 struct iser_comp *comp = &device->comps[i]; 150 struct iser_comp *comp = &device->comps[i];
184 151
185 tasklet_kill(&comp->tasklet); 152 ib_free_cq(comp->cq);
186 ib_destroy_cq(comp->cq);
187 comp->cq = NULL; 153 comp->cq = NULL;
188 } 154 }
189 155
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
299 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 265 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
300 return ret; 266 return ret;
301 } 267 }
302 res->mr_valid = 1; 268 res->mr_valid = 0;
303 269
304 return 0; 270 return 0;
305} 271}
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
336 ret = PTR_ERR(pi_ctx->sig_mr); 302 ret = PTR_ERR(pi_ctx->sig_mr);
337 goto sig_mr_failure; 303 goto sig_mr_failure;
338 } 304 }
339 pi_ctx->sig_mr_valid = 1; 305 pi_ctx->sig_mr_valid = 0;
340 desc->pi_ctx->sig_protected = 0; 306 desc->pi_ctx->sig_protected = 0;
341 307
342 return 0; 308 return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
461 */ 427 */
462static int iser_create_ib_conn_res(struct ib_conn *ib_conn) 428static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
463{ 429{
464 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 430 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
465 ib_conn);
466 struct iser_device *device; 431 struct iser_device *device;
467 struct ib_device_attr *dev_attr; 432 struct ib_device *ib_dev;
468 struct ib_qp_init_attr init_attr; 433 struct ib_qp_init_attr init_attr;
469 int ret = -ENOMEM; 434 int ret = -ENOMEM;
470 int index, min_index = 0; 435 int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
472 BUG_ON(ib_conn->device == NULL); 437 BUG_ON(ib_conn->device == NULL);
473 438
474 device = ib_conn->device; 439 device = ib_conn->device;
475 dev_attr = &device->dev_attr; 440 ib_dev = device->ib_device;
476 441
477 memset(&init_attr, 0, sizeof init_attr); 442 memset(&init_attr, 0, sizeof init_attr);
478 443
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
503 iser_conn->max_cmds = 468 iser_conn->max_cmds =
504 ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); 469 ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
505 } else { 470 } else {
506 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 471 if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
507 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; 472 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
508 iser_conn->max_cmds = 473 iser_conn->max_cmds =
509 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 474 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
510 } else { 475 } else {
511 init_attr.cap.max_send_wr = dev_attr->max_qp_wr; 476 init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
512 iser_conn->max_cmds = 477 iser_conn->max_cmds =
513 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); 478 ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
514 iser_dbg("device %s supports max_send_wr %d\n", 479 iser_dbg("device %s supports max_send_wr %d\n",
515 device->ib_device->name, dev_attr->max_qp_wr); 480 device->ib_device->name, ib_dev->attrs.max_qp_wr);
516 } 481 }
517 } 482 }
518 483
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
724 iser_conn, err); 689 iser_conn, err);
725 690
726 /* post an indication that all flush errors were consumed */ 691 /* post an indication that all flush errors were consumed */
727 err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); 692 err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
728 if (err) { 693 if (err) {
729 iser_err("conn %p failed to post beacon", ib_conn); 694 iser_err("conn %p failed to post last wr", ib_conn);
730 return 1; 695 return 1;
731 } 696 }
732 697
733 wait_for_completion(&ib_conn->flush_comp); 698 wait_for_completion(&ib_conn->last_comp);
734 } 699 }
735 700
736 return 1; 701 return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
756 721
757 sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); 722 sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
758 sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE, 723 sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
759 device->dev_attr.max_fast_reg_page_list_len); 724 device->ib_device->attrs.max_fast_reg_page_list_len);
760 725
761 if (sg_tablesize > sup_sg_tablesize) { 726 if (sg_tablesize > sup_sg_tablesize) {
762 sg_tablesize = sup_sg_tablesize; 727 sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
799 764
800 /* connection T10-PI support */ 765 /* connection T10-PI support */
801 if (iser_pi_enable) { 766 if (iser_pi_enable) {
802 if (!(device->dev_attr.device_cap_flags & 767 if (!(device->ib_device->attrs.device_cap_flags &
803 IB_DEVICE_SIGNATURE_HANDOVER)) { 768 IB_DEVICE_SIGNATURE_HANDOVER)) {
804 iser_warn("T10-PI requested but not supported on %s, " 769 iser_warn("T10-PI requested but not supported on %s, "
805 "continue without T10-PI\n", 770 "continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
841 goto failure; 806 goto failure;
842 807
843 memset(&conn_param, 0, sizeof conn_param); 808 memset(&conn_param, 0, sizeof conn_param);
844 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; 809 conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
845 conn_param.initiator_depth = 1; 810 conn_param.initiator_depth = 1;
846 conn_param.retry_count = 7; 811 conn_param.retry_count = 7;
847 conn_param.rnr_retry_count = 6; 812 conn_param.rnr_retry_count = 6;
848 813
849 memset(&req_hdr, 0, sizeof(req_hdr)); 814 memset(&req_hdr, 0, sizeof(req_hdr));
850 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 815 req_hdr.flags = ISER_ZBVA_NOT_SUP;
851 ISER_SEND_W_INV_NOT_SUPPORTED); 816 if (!device->remote_inv_sup)
852 conn_param.private_data = (void *)&req_hdr; 817 req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
853 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 818 conn_param.private_data = (void *)&req_hdr;
819 conn_param.private_data_len = sizeof(struct iser_cm_hdr);
854 820
855 ret = rdma_connect(cma_id, &conn_param); 821 ret = rdma_connect(cma_id, &conn_param);
856 if (ret) { 822 if (ret) {
@@ -863,7 +829,8 @@ failure:
863 iser_connect_error(cma_id); 829 iser_connect_error(cma_id);
864} 830}
865 831
866static void iser_connected_handler(struct rdma_cm_id *cma_id) 832static void iser_connected_handler(struct rdma_cm_id *cma_id,
833 const void *private_data)
867{ 834{
868 struct iser_conn *iser_conn; 835 struct iser_conn *iser_conn;
869 struct ib_qp_attr attr; 836 struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
877 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 844 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
878 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); 845 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
879 846
847 if (private_data) {
848 u8 flags = *(u8 *)private_data;
849
850 iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
851 }
852
853 iser_info("conn %p: negotiated %s invalidation\n",
854 iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
855
880 iser_conn->state = ISER_CONN_UP; 856 iser_conn->state = ISER_CONN_UP;
881 complete(&iser_conn->up_completion); 857 complete(&iser_conn->up_completion);
882} 858}
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
928 iser_route_handler(cma_id); 904 iser_route_handler(cma_id);
929 break; 905 break;
930 case RDMA_CM_EVENT_ESTABLISHED: 906 case RDMA_CM_EVENT_ESTABLISHED:
931 iser_connected_handler(cma_id); 907 iser_connected_handler(cma_id, event->param.conn.private_data);
932 break; 908 break;
933 case RDMA_CM_EVENT_ADDR_ERROR: 909 case RDMA_CM_EVENT_ADDR_ERROR:
934 case RDMA_CM_EVENT_ROUTE_ERROR: 910 case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
967 943
968void iser_conn_init(struct iser_conn *iser_conn) 944void iser_conn_init(struct iser_conn *iser_conn)
969{ 945{
946 struct ib_conn *ib_conn = &iser_conn->ib_conn;
947
970 iser_conn->state = ISER_CONN_INIT; 948 iser_conn->state = ISER_CONN_INIT;
971 iser_conn->ib_conn.post_recv_buf_count = 0;
972 init_completion(&iser_conn->ib_conn.flush_comp);
973 init_completion(&iser_conn->stop_completion); 949 init_completion(&iser_conn->stop_completion);
974 init_completion(&iser_conn->ib_completion); 950 init_completion(&iser_conn->ib_completion);
975 init_completion(&iser_conn->up_completion); 951 init_completion(&iser_conn->up_completion);
976 INIT_LIST_HEAD(&iser_conn->conn_list); 952 INIT_LIST_HEAD(&iser_conn->conn_list);
977 mutex_init(&iser_conn->state_mutex); 953 mutex_init(&iser_conn->state_mutex);
954
955 ib_conn->post_recv_buf_count = 0;
956 ib_conn->reg_cqe.done = iser_reg_comp;
957 ib_conn->last_cqe.done = iser_last_comp;
958 ib_conn->last.wr_cqe = &ib_conn->last_cqe;
959 ib_conn->last.opcode = IB_WR_SEND;
960 init_completion(&ib_conn->last_comp);
978} 961}
979 962
980 /** 963 /**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn *iser_conn,
1000 983
1001 iser_conn->state = ISER_CONN_PENDING; 984 iser_conn->state = ISER_CONN_PENDING;
1002 985
1003 ib_conn->beacon.wr_id = ISER_BEACON_WRID;
1004 ib_conn->beacon.opcode = IB_WR_SEND;
1005
1006 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, 986 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
1007 (void *)iser_conn, 987 (void *)iser_conn,
1008 RDMA_PS_TCP, IB_QPT_RC); 988 RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
1045 1025
1046int iser_post_recvl(struct iser_conn *iser_conn) 1026int iser_post_recvl(struct iser_conn *iser_conn)
1047{ 1027{
1048 struct ib_recv_wr rx_wr, *rx_wr_failed;
1049 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1028 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1050 struct ib_sge sge; 1029 struct iser_login_desc *desc = &iser_conn->login_desc;
1030 struct ib_recv_wr wr, *wr_failed;
1051 int ib_ret; 1031 int ib_ret;
1052 1032
1053 sge.addr = iser_conn->login_resp_dma; 1033 desc->sge.addr = desc->rsp_dma;
1054 sge.length = ISER_RX_LOGIN_SIZE; 1034 desc->sge.length = ISER_RX_LOGIN_SIZE;
1055 sge.lkey = ib_conn->device->pd->local_dma_lkey; 1035 desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
1056 1036
1057 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 1037 desc->cqe.done = iser_login_rsp;
1058 rx_wr.sg_list = &sge; 1038 wr.wr_cqe = &desc->cqe;
1059 rx_wr.num_sge = 1; 1039 wr.sg_list = &desc->sge;
1060 rx_wr.next = NULL; 1040 wr.num_sge = 1;
1041 wr.next = NULL;
1061 1042
1062 ib_conn->post_recv_buf_count++; 1043 ib_conn->post_recv_buf_count++;
1063 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 1044 ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
1064 if (ib_ret) { 1045 if (ib_ret) {
1065 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1046 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1066 ib_conn->post_recv_buf_count--; 1047 ib_conn->post_recv_buf_count--;
1067 } 1048 }
1049
1068 return ib_ret; 1050 return ib_ret;
1069} 1051}
1070 1052
1071int iser_post_recvm(struct iser_conn *iser_conn, int count) 1053int iser_post_recvm(struct iser_conn *iser_conn, int count)
1072{ 1054{
1073 struct ib_recv_wr *rx_wr, *rx_wr_failed;
1074 int i, ib_ret;
1075 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1055 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1076 unsigned int my_rx_head = iser_conn->rx_desc_head; 1056 unsigned int my_rx_head = iser_conn->rx_desc_head;
1077 struct iser_rx_desc *rx_desc; 1057 struct iser_rx_desc *rx_desc;
1058 struct ib_recv_wr *wr, *wr_failed;
1059 int i, ib_ret;
1078 1060
1079 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 1061 for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
1080 rx_desc = &iser_conn->rx_descs[my_rx_head]; 1062 rx_desc = &iser_conn->rx_descs[my_rx_head];
1081 rx_wr->wr_id = (uintptr_t)rx_desc; 1063 rx_desc->cqe.done = iser_task_rsp;
1082 rx_wr->sg_list = &rx_desc->rx_sg; 1064 wr->wr_cqe = &rx_desc->cqe;
1083 rx_wr->num_sge = 1; 1065 wr->sg_list = &rx_desc->rx_sg;
1084 rx_wr->next = rx_wr + 1; 1066 wr->num_sge = 1;
1067 wr->next = wr + 1;
1085 my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; 1068 my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
1086 } 1069 }
1087 1070
1088 rx_wr--; 1071 wr--;
1089 rx_wr->next = NULL; /* mark end of work requests list */ 1072 wr->next = NULL; /* mark end of work requests list */
1090 1073
1091 ib_conn->post_recv_buf_count += count; 1074 ib_conn->post_recv_buf_count += count;
1092 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 1075 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
1093 if (ib_ret) { 1076 if (ib_ret) {
1094 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1077 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1095 ib_conn->post_recv_buf_count -= count; 1078 ib_conn->post_recv_buf_count -= count;
1096 } else 1079 } else
1097 iser_conn->rx_desc_head = my_rx_head; 1080 iser_conn->rx_desc_head = my_rx_head;
1081
1098 return ib_ret; 1082 return ib_ret;
1099} 1083}
1100 1084
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1115 DMA_TO_DEVICE); 1099 DMA_TO_DEVICE);
1116 1100
1117 wr->next = NULL; 1101 wr->next = NULL;
1118 wr->wr_id = (uintptr_t)tx_desc; 1102 wr->wr_cqe = &tx_desc->cqe;
1119 wr->sg_list = tx_desc->tx_sg; 1103 wr->sg_list = tx_desc->tx_sg;
1120 wr->num_sge = tx_desc->num_sge; 1104 wr->num_sge = tx_desc->num_sge;
1121 wr->opcode = IB_WR_SEND; 1105 wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1129 return ib_ret; 1113 return ib_ret;
1130} 1114}
1131 1115
1132/**
1133 * is_iser_tx_desc - Indicate if the completion wr_id
1134 * is a TX descriptor or not.
1135 * @iser_conn: iser connection
1136 * @wr_id: completion WR identifier
1137 *
1138 * Since we cannot rely on wc opcode in FLUSH errors
1139 * we must work around it by checking if the wr_id address
1140 * falls in the iser connection rx_descs buffer. If so
1141 * it is an RX descriptor, otherwize it is a TX.
1142 */
1143static inline bool
1144is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
1145{
1146 void *start = iser_conn->rx_descs;
1147 int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
1148
1149 if (wr_id >= start && wr_id < start + len)
1150 return false;
1151
1152 return true;
1153}
1154
1155/**
1156 * iser_handle_comp_error() - Handle error completion
1157 * @ib_conn: connection RDMA resources
1158 * @wc: work completion
1159 *
1160 * Notes: We may handle a FLUSH error completion and in this case
1161 * we only cleanup in case TX type was DATAOUT. For non-FLUSH
1162 * error completion we should also notify iscsi layer that
1163 * connection is failed (in case we passed bind stage).
1164 */
1165static void
1166iser_handle_comp_error(struct ib_conn *ib_conn,
1167 struct ib_wc *wc)
1168{
1169 void *wr_id = (void *)(uintptr_t)wc->wr_id;
1170 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
1171 ib_conn);
1172
1173 if (wc->status != IB_WC_WR_FLUSH_ERR)
1174 if (iser_conn->iscsi_conn)
1175 iscsi_conn_failure(iser_conn->iscsi_conn,
1176 ISCSI_ERR_CONN_FAILED);
1177
1178 if (wc->wr_id == ISER_FASTREG_LI_WRID)
1179 return;
1180
1181 if (is_iser_tx_desc(iser_conn, wr_id)) {
1182 struct iser_tx_desc *desc = wr_id;
1183
1184 if (desc->type == ISCSI_TX_DATAOUT)
1185 kmem_cache_free(ig.desc_cache, desc);
1186 } else {
1187 ib_conn->post_recv_buf_count--;
1188 }
1189}
1190
1191/**
1192 * iser_handle_wc - handle a single work completion
1193 * @wc: work completion
1194 *
1195 * Soft-IRQ context, work completion can be either
1196 * SEND or RECV, and can turn out successful or
1197 * with error (or flush error).
1198 */
1199static void iser_handle_wc(struct ib_wc *wc)
1200{
1201 struct ib_conn *ib_conn;
1202 struct iser_tx_desc *tx_desc;
1203 struct iser_rx_desc *rx_desc;
1204
1205 ib_conn = wc->qp->qp_context;
1206 if (likely(wc->status == IB_WC_SUCCESS)) {
1207 if (wc->opcode == IB_WC_RECV) {
1208 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
1209 iser_rcv_completion(rx_desc, wc->byte_len,
1210 ib_conn);
1211 } else
1212 if (wc->opcode == IB_WC_SEND) {
1213 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
1214 iser_snd_completion(tx_desc, ib_conn);
1215 } else {
1216 iser_err("Unknown wc opcode %d\n", wc->opcode);
1217 }
1218 } else {
1219 if (wc->status != IB_WC_WR_FLUSH_ERR)
1220 iser_err("%s (%d): wr id %llx vend_err %x\n",
1221 ib_wc_status_msg(wc->status), wc->status,
1222 wc->wr_id, wc->vendor_err);
1223 else
1224 iser_dbg("%s (%d): wr id %llx\n",
1225 ib_wc_status_msg(wc->status), wc->status,
1226 wc->wr_id);
1227
1228 if (wc->wr_id == ISER_BEACON_WRID)
1229 /* all flush errors were consumed */
1230 complete(&ib_conn->flush_comp);
1231 else
1232 iser_handle_comp_error(ib_conn, wc);
1233 }
1234}
1235
1236/**
1237 * iser_cq_tasklet_fn - iSER completion polling loop
1238 * @data: iSER completion context
1239 *
1240 * Soft-IRQ context, polling connection CQ until
1241 * either CQ was empty or we exausted polling budget
1242 */
1243static void iser_cq_tasklet_fn(unsigned long data)
1244{
1245 struct iser_comp *comp = (struct iser_comp *)data;
1246 struct ib_cq *cq = comp->cq;
1247 struct ib_wc *const wcs = comp->wcs;
1248 int i, n, completed = 0;
1249
1250 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
1251 for (i = 0; i < n; i++)
1252 iser_handle_wc(&wcs[i]);
1253
1254 completed += n;
1255 if (completed >= iser_cq_poll_limit)
1256 break;
1257 }
1258
1259 /*
1260 * It is assumed here that arming CQ only once its empty
1261 * would not cause interrupts to be missed.
1262 */
1263 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1264
1265 iser_dbg("got %d completions\n", completed);
1266}
1267
1268static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
1269{
1270 struct iser_comp *comp = cq_context;
1271
1272 tasklet_schedule(&comp->tasklet);
1273}
1274
1275u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 1116u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
1276 enum iser_data_dir cmd_dir, sector_t *sector) 1117 enum iser_data_dir cmd_dir, sector_t *sector)
1277{ 1118{
@@ -1319,3 +1160,21 @@ err:
1319 /* Not alot we can do here, return ambiguous guard error */ 1160 /* Not alot we can do here, return ambiguous guard error */
1320 return 0x1; 1161 return 0x1;
1321} 1162}
1163
1164void iser_err_comp(struct ib_wc *wc, const char *type)
1165{
1166 if (wc->status != IB_WC_WR_FLUSH_ERR) {
1167 struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
1168
1169 iser_err("%s failure: %s (%d) vend_err %x\n", type,
1170 ib_wc_status_msg(wc->status), wc->status,
1171 wc->vendor_err);
1172
1173 if (iser_conn->iscsi_conn)
1174 iscsi_conn_failure(iser_conn->iscsi_conn,
1175 ISCSI_ERR_CONN_FAILED);
1176 } else {
1177 iser_dbg("%s failure: %s (%d)\n", type,
1178 ib_wc_status_msg(wc->status), wc->status);
1179 }
1180}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 468c5e132563..f121e6129339 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -29,7 +29,6 @@
29#include <target/iscsi/iscsi_transport.h> 29#include <target/iscsi/iscsi_transport.h>
30#include <linux/semaphore.h> 30#include <linux/semaphore.h>
31 31
32#include "isert_proto.h"
33#include "ib_isert.h" 32#include "ib_isert.h"
34 33
35#define ISERT_MAX_CONN 8 34#define ISERT_MAX_CONN 8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
95 } 94 }
96} 95}
97 96
98static int
99isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
100{
101 int ret;
102
103 ret = ib_query_device(ib_dev, devattr);
104 if (ret) {
105 isert_err("ib_query_device() failed: %d\n", ret);
106 return ret;
107 }
108 isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
109 isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
110
111 return 0;
112}
113
114static struct isert_comp * 97static struct isert_comp *
115isert_comp_get(struct isert_conn *isert_conn) 98isert_comp_get(struct isert_conn *isert_conn)
116{ 99{
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
157 attr.recv_cq = comp->cq; 140 attr.recv_cq = comp->cq;
158 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS; 141 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
159 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 142 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
160 attr.cap.max_send_sge = device->dev_attr.max_sge; 143 attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
161 isert_conn->max_sge = min(device->dev_attr.max_sge, 144 isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
162 device->dev_attr.max_sge_rd); 145 device->ib_device->attrs.max_sge_rd);
163 attr.cap.max_recv_sge = 1; 146 attr.cap.max_recv_sge = 1;
164 attr.sq_sig_type = IB_SIGNAL_REQ_WR; 147 attr.sq_sig_type = IB_SIGNAL_REQ_WR;
165 attr.qp_type = IB_QPT_RC; 148 attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
287} 270}
288 271
289static int 272static int
290isert_alloc_comps(struct isert_device *device, 273isert_alloc_comps(struct isert_device *device)
291 struct ib_device_attr *attr)
292{ 274{
293 int i, max_cqe, ret = 0; 275 int i, max_cqe, ret = 0;
294 276
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
308 return -ENOMEM; 290 return -ENOMEM;
309 } 291 }
310 292
311 max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe); 293 max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
312 294
313 for (i = 0; i < device->comps_used; i++) { 295 for (i = 0; i < device->comps_used; i++) {
314 struct ib_cq_init_attr cq_attr = {}; 296 struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
344static int 326static int
345isert_create_device_ib_res(struct isert_device *device) 327isert_create_device_ib_res(struct isert_device *device)
346{ 328{
347 struct ib_device_attr *dev_attr; 329 struct ib_device *ib_dev = device->ib_device;
348 int ret; 330 int ret;
349 331
350 dev_attr = &device->dev_attr; 332 isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
351 ret = isert_query_device(device->ib_device, dev_attr); 333 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
352 if (ret)
353 goto out;
354 334
355 /* asign function handlers */ 335 /* asign function handlers */
356 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && 336 if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
357 dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { 337 ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
358 device->use_fastreg = 1; 338 device->use_fastreg = 1;
359 device->reg_rdma_mem = isert_reg_rdma; 339 device->reg_rdma_mem = isert_reg_rdma;
360 device->unreg_rdma_mem = isert_unreg_rdma; 340 device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
364 device->unreg_rdma_mem = isert_unmap_cmd; 344 device->unreg_rdma_mem = isert_unmap_cmd;
365 } 345 }
366 346
367 ret = isert_alloc_comps(device, dev_attr); 347 ret = isert_alloc_comps(device);
368 if (ret) 348 if (ret)
369 goto out; 349 goto out;
370 350
371 device->pd = ib_alloc_pd(device->ib_device); 351 device->pd = ib_alloc_pd(ib_dev);
372 if (IS_ERR(device->pd)) { 352 if (IS_ERR(device->pd)) {
373 ret = PTR_ERR(device->pd); 353 ret = PTR_ERR(device->pd);
374 isert_err("failed to allocate pd, device %p, ret=%d\n", 354 isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
377 } 357 }
378 358
379 /* Check signature cap */ 359 /* Check signature cap */
380 device->pi_capable = dev_attr->device_cap_flags & 360 device->pi_capable = ib_dev->attrs.device_cap_flags &
381 IB_DEVICE_SIGNATURE_HANDOVER ? true : false; 361 IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
382 362
383 return 0; 363 return 0;
@@ -676,6 +656,32 @@ out_login_buf:
676 return ret; 656 return ret;
677} 657}
678 658
659static void
660isert_set_nego_params(struct isert_conn *isert_conn,
661 struct rdma_conn_param *param)
662{
663 struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
664
665 /* Set max inflight RDMA READ requests */
666 isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
667 attr->max_qp_init_rd_atom);
668 isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
669
670 if (param->private_data) {
671 u8 flags = *(u8 *)param->private_data;
672
673 /*
674 * use remote invalidation if the both initiator
675 * and the HCA support it
676 */
677 isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
678 (attr->device_cap_flags &
679 IB_DEVICE_MEM_MGT_EXTENSIONS);
680 if (isert_conn->snd_w_inv)
681 isert_info("Using remote invalidation\n");
682 }
683}
684
679static int 685static int
680isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 686isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
681{ 687{
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
714 } 720 }
715 isert_conn->device = device; 721 isert_conn->device = device;
716 722
717 /* Set max inflight RDMA READ requests */ 723 isert_set_nego_params(isert_conn, &event->param.conn);
718 isert_conn->initiator_depth = min_t(u8,
719 event->param.conn.initiator_depth,
720 device->dev_attr.max_qp_init_rd_atom);
721 isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
722 724
723 ret = isert_conn_setup_qp(isert_conn, cma_id); 725 ret = isert_conn_setup_qp(isert_conn, cma_id);
724 if (ret) 726 if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
1050 ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr, 1052 ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
1051 ISER_HEADERS_LEN, DMA_TO_DEVICE); 1053 ISER_HEADERS_LEN, DMA_TO_DEVICE);
1052 1054
1053 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 1055 memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
1054 tx_desc->iser_header.flags = ISER_VER; 1056 tx_desc->iser_header.flags = ISCSI_CTRL;
1055 1057
1056 tx_desc->num_sge = 1; 1058 tx_desc->num_sge = 1;
1057 tx_desc->isert_cmd = isert_cmd; 1059 tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
1097 1099
1098 isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND; 1100 isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
1099 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; 1101 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
1100 send_wr->opcode = IB_WR_SEND; 1102
1103 if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
1104 send_wr->opcode = IB_WR_SEND_WITH_INV;
1105 send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
1106 } else {
1107 send_wr->opcode = IB_WR_SEND;
1108 }
1109
1101 send_wr->sg_list = &tx_desc->tx_sg[0]; 1110 send_wr->sg_list = &tx_desc->tx_sg[0];
1102 send_wr->num_sge = isert_cmd->tx_desc.num_sge; 1111 send_wr->num_sge = isert_cmd->tx_desc.num_sge;
1103 send_wr->send_flags = IB_SEND_SIGNALED; 1112 send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
1486 isert_cmd->read_va = read_va; 1495 isert_cmd->read_va = read_va;
1487 isert_cmd->write_stag = write_stag; 1496 isert_cmd->write_stag = write_stag;
1488 isert_cmd->write_va = write_va; 1497 isert_cmd->write_va = write_va;
1498 isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
1489 1499
1490 ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd, 1500 ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
1491 rx_desc, (unsigned char *)hdr); 1501 rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
1543static void 1553static void
1544isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) 1554isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
1545{ 1555{
1546 struct iser_hdr *iser_hdr = &rx_desc->iser_header; 1556 struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
1547 uint64_t read_va = 0, write_va = 0; 1557 uint64_t read_va = 0, write_va = 0;
1548 uint32_t read_stag = 0, write_stag = 0; 1558 uint32_t read_stag = 0, write_stag = 0;
1549 1559
1550 switch (iser_hdr->flags & 0xF0) { 1560 switch (iser_ctrl->flags & 0xF0) {
1551 case ISCSI_CTRL: 1561 case ISCSI_CTRL:
1552 if (iser_hdr->flags & ISER_RSV) { 1562 if (iser_ctrl->flags & ISER_RSV) {
1553 read_stag = be32_to_cpu(iser_hdr->read_stag); 1563 read_stag = be32_to_cpu(iser_ctrl->read_stag);
1554 read_va = be64_to_cpu(iser_hdr->read_va); 1564 read_va = be64_to_cpu(iser_ctrl->read_va);
1555 isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n", 1565 isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
1556 read_stag, (unsigned long long)read_va); 1566 read_stag, (unsigned long long)read_va);
1557 } 1567 }
1558 if (iser_hdr->flags & ISER_WSV) { 1568 if (iser_ctrl->flags & ISER_WSV) {
1559 write_stag = be32_to_cpu(iser_hdr->write_stag); 1569 write_stag = be32_to_cpu(iser_ctrl->write_stag);
1560 write_va = be64_to_cpu(iser_hdr->write_va); 1570 write_va = be64_to_cpu(iser_ctrl->write_va);
1561 isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n", 1571 isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
1562 write_stag, (unsigned long long)write_va); 1572 write_stag, (unsigned long long)write_va);
1563 } 1573 }
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
1568 isert_err("iSER Hello message\n"); 1578 isert_err("iSER Hello message\n");
1569 break; 1579 break;
1570 default: 1580 default:
1571 isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags); 1581 isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
1572 break; 1582 break;
1573 } 1583 }
1574 1584
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
3095 struct rdma_cm_id *cm_id = isert_conn->cm_id; 3105 struct rdma_cm_id *cm_id = isert_conn->cm_id;
3096 struct rdma_conn_param cp; 3106 struct rdma_conn_param cp;
3097 int ret; 3107 int ret;
3108 struct iser_cm_hdr rsp_hdr;
3098 3109
3099 memset(&cp, 0, sizeof(struct rdma_conn_param)); 3110 memset(&cp, 0, sizeof(struct rdma_conn_param));
3100 cp.initiator_depth = isert_conn->initiator_depth; 3111 cp.initiator_depth = isert_conn->initiator_depth;
3101 cp.retry_count = 7; 3112 cp.retry_count = 7;
3102 cp.rnr_retry_count = 7; 3113 cp.rnr_retry_count = 7;
3103 3114
3115 memset(&rsp_hdr, 0, sizeof(rsp_hdr));
3116 rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
3117 if (!isert_conn->snd_w_inv)
3118 rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
3119 cp.private_data = (void *)&rsp_hdr;
3120 cp.private_data_len = sizeof(rsp_hdr);
3121
3104 ret = rdma_accept(cm_id, &cp); 3122 ret = rdma_accept(cm_id, &cp);
3105 if (ret) { 3123 if (ret) {
3106 isert_err("rdma_accept() failed with: %d\n", ret); 3124 isert_err("rdma_accept() failed with: %d\n", ret);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3d7fbc47c343..8d50453eef66 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,8 @@
3#include <linux/in6.h> 3#include <linux/in6.h>
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <scsi/iser.h>
7
6 8
7#define DRV_NAME "isert" 9#define DRV_NAME "isert"
8#define PFX DRV_NAME ": " 10#define PFX DRV_NAME ": "
@@ -31,6 +33,38 @@
31#define isert_err(fmt, arg...) \ 33#define isert_err(fmt, arg...) \
32 pr_err(PFX "%s: " fmt, __func__ , ## arg) 34 pr_err(PFX "%s: " fmt, __func__ , ## arg)
33 35
36/* Constant PDU lengths calculations */
37#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + \
38 sizeof(struct iscsi_hdr))
39#define ISER_RECV_DATA_SEG_LEN 8192
40#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
41#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
42
43/* QP settings */
44/* Maximal bounds on received asynchronous PDUs */
45#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
46
47#define ISERT_MAX_RX_MISC_PDUS 6 /*
48 * NOOP_OUT(2), TEXT(1),
49 * SCSI_TMFUNC(2), LOGOUT(1)
50 */
51
52#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
53
54#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
55
56#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
57
58#define ISERT_INFLIGHT_DATAOUTS 8
59
60#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
61 (1 + ISERT_INFLIGHT_DATAOUTS) + \
62 ISERT_MAX_TX_MISC_PDUS + \
63 ISERT_MAX_RX_MISC_PDUS)
64
65#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
66 (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
67
34#define ISCSI_ISER_SG_TABLESIZE 256 68#define ISCSI_ISER_SG_TABLESIZE 256
35#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL 69#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
36#define ISER_BEACON_WRID 0xfffffffffffffffeULL 70#define ISER_BEACON_WRID 0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
56}; 90};
57 91
58struct iser_rx_desc { 92struct iser_rx_desc {
59 struct iser_hdr iser_header; 93 struct iser_ctrl iser_header;
60 struct iscsi_hdr iscsi_header; 94 struct iscsi_hdr iscsi_header;
61 char data[ISER_RECV_DATA_SEG_LEN]; 95 char data[ISER_RECV_DATA_SEG_LEN];
62 u64 dma_addr; 96 u64 dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
65} __packed; 99} __packed;
66 100
67struct iser_tx_desc { 101struct iser_tx_desc {
68 struct iser_hdr iser_header; 102 struct iser_ctrl iser_header;
69 struct iscsi_hdr iscsi_header; 103 struct iscsi_hdr iscsi_header;
70 enum isert_desc_type type; 104 enum isert_desc_type type;
71 u64 dma_addr; 105 u64 dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
129 uint32_t write_stag; 163 uint32_t write_stag;
130 uint64_t read_va; 164 uint64_t read_va;
131 uint64_t write_va; 165 uint64_t write_va;
166 uint32_t inv_rkey;
132 u64 pdu_buf_dma; 167 u64 pdu_buf_dma;
133 u32 pdu_buf_len; 168 u32 pdu_buf_len;
134 struct isert_conn *conn; 169 struct isert_conn *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
176 struct work_struct release_work; 211 struct work_struct release_work;
177 struct ib_recv_wr beacon; 212 struct ib_recv_wr beacon;
178 bool logout_posted; 213 bool logout_posted;
214 bool snd_w_inv;
179}; 215};
180 216
181#define ISERT_MAX_CQ 64 217#define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
207 struct isert_comp *comps; 243 struct isert_comp *comps;
208 int comps_used; 244 int comps_used;
209 struct list_head dev_node; 245 struct list_head dev_node;
210 struct ib_device_attr dev_attr;
211 int (*reg_rdma_mem)(struct iscsi_conn *conn, 246 int (*reg_rdma_mem)(struct iscsi_conn *conn,
212 struct iscsi_cmd *cmd, 247 struct iscsi_cmd *cmd,
213 struct isert_rdma_wr *wr); 248 struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644
index 4dccd313b777..000000000000
--- a/drivers/infiniband/ulp/isert/isert_proto.h
+++ /dev/null
@@ -1,47 +0,0 @@
1/* From iscsi_iser.h */
2
3struct iser_hdr {
4 u8 flags;
5 u8 rsvd[3];
6 __be32 write_stag; /* write rkey */
7 __be64 write_va;
8 __be32 read_stag; /* read rkey */
9 __be64 read_va;
10} __packed;
11
12/*Constant PDU lengths calculations */
13#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
14
15#define ISER_RECV_DATA_SEG_LEN 8192
16#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
17#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
18
19/* QP settings */
20/* Maximal bounds on received asynchronous PDUs */
21#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
22
23#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
24 * SCSI_TMFUNC(2), LOGOUT(1) */
25
26#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
27
28#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
29
30#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
31
32#define ISERT_INFLIGHT_DATAOUTS 8
33
34#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
35 (1 + ISERT_INFLIGHT_DATAOUTS) + \
36 ISERT_MAX_TX_MISC_PDUS + \
37 ISERT_MAX_RX_MISC_PDUS)
38
39#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
40 (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
41
42#define ISER_VER 0x10
43#define ISER_WSV 0x08
44#define ISER_RSV 0x04
45#define ISCSI_CTRL 0x10
46#define ISER_HELLO 0x20
47#define ISER_HELLORPLY 0x30
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 3db9a659719b..03022f6420d7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
132 132
133static void srp_add_one(struct ib_device *device); 133static void srp_add_one(struct ib_device *device);
134static void srp_remove_one(struct ib_device *device, void *client_data); 134static void srp_remove_one(struct ib_device *device, void *client_data);
135static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); 135static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
136static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); 136static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
137 const char *opname);
137static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 138static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138 139
139static struct scsi_transport_template *ib_srp_transport_template; 140static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
445 dev->max_pages_per_mr); 446 dev->max_pages_per_mr);
446} 447}
447 448
449static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
450{
451 struct srp_rdma_ch *ch = cq->cq_context;
452
453 complete(&ch->done);
454}
455
456static struct ib_cqe srp_drain_cqe = {
457 .done = srp_drain_done,
458};
459
448/** 460/**
449 * srp_destroy_qp() - destroy an RDMA queue pair 461 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel. 462 * @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
457static void srp_destroy_qp(struct srp_rdma_ch *ch) 469static void srp_destroy_qp(struct srp_rdma_ch *ch)
458{ 470{
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 471 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; 472 static struct ib_recv_wr wr = { 0 };
461 struct ib_recv_wr *bad_wr; 473 struct ib_recv_wr *bad_wr;
462 int ret; 474 int ret;
463 475
476 wr.wr_cqe = &srp_drain_cqe;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */ 477 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected); 478 WARN_ON_ONCE(ch->connected);
466 479
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
489 struct ib_fmr_pool *fmr_pool = NULL; 502 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL; 503 struct srp_fr_pool *fr_pool = NULL;
491 const int m = dev->use_fast_reg ? 3 : 1; 504 const int m = dev->use_fast_reg ? 3 : 1;
492 struct ib_cq_init_attr cq_attr = {};
493 int ret; 505 int ret;
494 506
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 507 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 if (!init_attr) 508 if (!init_attr)
497 return -ENOMEM; 509 return -ENOMEM;
498 510
499 /* + 1 for SRP_LAST_WR_ID */ 511 /* queue_size + 1 for ib_drain_qp */
500 cq_attr.cqe = target->queue_size + 1; 512 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
501 cq_attr.comp_vector = ch->comp_vector; 513 ch->comp_vector, IB_POLL_SOFTIRQ);
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
503 &cq_attr);
504 if (IS_ERR(recv_cq)) { 514 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq); 515 ret = PTR_ERR(recv_cq);
506 goto err; 516 goto err;
507 } 517 }
508 518
509 cq_attr.cqe = m * target->queue_size; 519 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
510 cq_attr.comp_vector = ch->comp_vector; 520 ch->comp_vector, IB_POLL_DIRECT);
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
512 &cq_attr);
513 if (IS_ERR(send_cq)) { 521 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq); 522 ret = PTR_ERR(send_cq);
515 goto err_recv_cq; 523 goto err_recv_cq;
516 } 524 }
517 525
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
519
520 init_attr->event_handler = srp_qp_event; 526 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size; 527 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1; 528 init_attr->cap.max_recv_wr = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
558 if (ch->qp) 564 if (ch->qp)
559 srp_destroy_qp(ch); 565 srp_destroy_qp(ch);
560 if (ch->recv_cq) 566 if (ch->recv_cq)
561 ib_destroy_cq(ch->recv_cq); 567 ib_free_cq(ch->recv_cq);
562 if (ch->send_cq) 568 if (ch->send_cq)
563 ib_destroy_cq(ch->send_cq); 569 ib_free_cq(ch->send_cq);
564 570
565 ch->qp = qp; 571 ch->qp = qp;
566 ch->recv_cq = recv_cq; 572 ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
580 return 0; 586 return 0;
581 587
582err_qp: 588err_qp:
583 ib_destroy_qp(qp); 589 srp_destroy_qp(ch);
584 590
585err_send_cq: 591err_send_cq:
586 ib_destroy_cq(send_cq); 592 ib_free_cq(send_cq);
587 593
588err_recv_cq: 594err_recv_cq:
589 ib_destroy_cq(recv_cq); 595 ib_free_cq(recv_cq);
590 596
591err: 597err:
592 kfree(init_attr); 598 kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
622 if (ch->fmr_pool) 628 if (ch->fmr_pool)
623 ib_destroy_fmr_pool(ch->fmr_pool); 629 ib_destroy_fmr_pool(ch->fmr_pool);
624 } 630 }
631
625 srp_destroy_qp(ch); 632 srp_destroy_qp(ch);
626 ib_destroy_cq(ch->send_cq); 633 ib_free_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq); 634 ib_free_cq(ch->recv_cq);
628 635
629 /* 636 /*
630 * Avoid that the SCSI error handler tries to use this channel after 637 * Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
1041 return ret <= 0 ? ret : -ENODEV; 1048 return ret <= 0 ? ret : -ENODEV;
1042} 1049}
1043 1050
1044static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey) 1051static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1052{
1053 srp_handle_qp_err(cq, wc, "INV RKEY");
1054}
1055
1056static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1057 u32 rkey)
1045{ 1058{
1046 struct ib_send_wr *bad_wr; 1059 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = { 1060 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV, 1061 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1050 .next = NULL, 1062 .next = NULL,
1051 .num_sge = 0, 1063 .num_sge = 0,
1052 .send_flags = 0, 1064 .send_flags = 0,
1053 .ex.invalidate_rkey = rkey, 1065 .ex.invalidate_rkey = rkey,
1054 }; 1066 };
1055 1067
1068 wr.wr_cqe = &req->reg_cqe;
1069 req->reg_cqe.done = srp_inv_rkey_err_done;
1056 return ib_post_send(ch->qp, &wr, &bad_wr); 1070 return ib_post_send(ch->qp, &wr, &bad_wr);
1057} 1071}
1058 1072
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
1074 struct srp_fr_desc **pfr; 1088 struct srp_fr_desc **pfr;
1075 1089
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1090 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey); 1091 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1078 if (res < 0) { 1092 if (res < 0) {
1079 shost_printk(KERN_ERR, target->scsi_host, PFX 1093 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n", 1094 "Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
1312 return 0; 1326 return 0;
1313} 1327}
1314 1328
1329static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1330{
1331 srp_handle_qp_err(cq, wc, "FAST REG");
1332}
1333
1315static int srp_map_finish_fr(struct srp_map_state *state, 1334static int srp_map_finish_fr(struct srp_map_state *state,
1335 struct srp_request *req,
1316 struct srp_rdma_ch *ch, int sg_nents) 1336 struct srp_rdma_ch *ch, int sg_nents)
1317{ 1337{
1318 struct srp_target_port *target = ch->target; 1338 struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1349 if (unlikely(n < 0)) 1369 if (unlikely(n < 0))
1350 return n; 1370 return n;
1351 1371
1372 req->reg_cqe.done = srp_reg_mr_err_done;
1373
1352 wr.wr.next = NULL; 1374 wr.wr.next = NULL;
1353 wr.wr.opcode = IB_WR_REG_MR; 1375 wr.wr.opcode = IB_WR_REG_MR;
1354 wr.wr.wr_id = FAST_REG_WR_ID_MASK; 1376 wr.wr.wr_cqe = &req->reg_cqe;
1355 wr.wr.num_sge = 0; 1377 wr.wr.num_sge = 0;
1356 wr.wr.send_flags = 0; 1378 wr.wr.send_flags = 0;
1357 wr.mr = desc->mr; 1379 wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1455 while (count) { 1477 while (count) {
1456 int i, n; 1478 int i, n;
1457 1479
1458 n = srp_map_finish_fr(state, ch, count); 1480 n = srp_map_finish_fr(state, req, ch, count);
1459 if (unlikely(n < 0)) 1481 if (unlikely(n < 0))
1460 return n; 1482 return n;
1461 1483
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1524#ifdef CONFIG_NEED_SG_DMA_LENGTH 1546#ifdef CONFIG_NEED_SG_DMA_LENGTH
1525 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1547 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1526#endif 1548#endif
1527 ret = srp_map_finish_fr(&state, ch, 1); 1549 ret = srp_map_finish_fr(&state, req, ch, 1);
1528 if (ret < 0) 1550 if (ret < 0)
1529 return ret; 1551 return ret;
1530 } else if (dev->use_fmr) { 1552 } else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1741 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1720 struct srp_iu *iu; 1742 struct srp_iu *iu;
1721 1743
1722 srp_send_completion(ch->send_cq, ch); 1744 ib_process_cq_direct(ch->send_cq, -1);
1723 1745
1724 if (list_empty(&ch->free_tx)) 1746 if (list_empty(&ch->free_tx))
1725 return NULL; 1747 return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1739 return iu; 1761 return iu;
1740} 1762}
1741 1763
1764static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1765{
1766 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1767 struct srp_rdma_ch *ch = cq->cq_context;
1768
1769 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1770 srp_handle_qp_err(cq, wc, "SEND");
1771 return;
1772 }
1773
1774 list_add(&iu->list, &ch->free_tx);
1775}
1776
1742static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 1777static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1743{ 1778{
1744 struct srp_target_port *target = ch->target; 1779 struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1749 list.length = len; 1784 list.length = len;
1750 list.lkey = target->lkey; 1785 list.lkey = target->lkey;
1751 1786
1787 iu->cqe.done = srp_send_done;
1788
1752 wr.next = NULL; 1789 wr.next = NULL;
1753 wr.wr_id = (uintptr_t) iu; 1790 wr.wr_cqe = &iu->cqe;
1754 wr.sg_list = &list; 1791 wr.sg_list = &list;
1755 wr.num_sge = 1; 1792 wr.num_sge = 1;
1756 wr.opcode = IB_WR_SEND; 1793 wr.opcode = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1769 list.length = iu->size; 1806 list.length = iu->size;
1770 list.lkey = target->lkey; 1807 list.lkey = target->lkey;
1771 1808
1809 iu->cqe.done = srp_recv_done;
1810
1772 wr.next = NULL; 1811 wr.next = NULL;
1773 wr.wr_id = (uintptr_t) iu; 1812 wr.wr_cqe = &iu->cqe;
1774 wr.sg_list = &list; 1813 wr.sg_list = &list;
1775 wr.num_sge = 1; 1814 wr.num_sge = 1;
1776 1815
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
1902 "problems processing SRP_AER_REQ\n"); 1941 "problems processing SRP_AER_REQ\n");
1903} 1942}
1904 1943
1905static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc) 1944static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1906{ 1945{
1946 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1947 struct srp_rdma_ch *ch = cq->cq_context;
1907 struct srp_target_port *target = ch->target; 1948 struct srp_target_port *target = ch->target;
1908 struct ib_device *dev = target->srp_host->srp_dev->dev; 1949 struct ib_device *dev = target->srp_host->srp_dev->dev;
1909 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1910 int res; 1950 int res;
1911 u8 opcode; 1951 u8 opcode;
1912 1952
1953 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1954 srp_handle_qp_err(cq, wc, "RECV");
1955 return;
1956 }
1957
1913 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 1958 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1914 DMA_FROM_DEVICE); 1959 DMA_FROM_DEVICE);
1915 1960
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
1972 srp_start_tl_fail_timers(target->rport); 2017 srp_start_tl_fail_timers(target->rport);
1973} 2018}
1974 2019
1975static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, 2020static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
1976 bool send_err, struct srp_rdma_ch *ch) 2021 const char *opname)
1977{ 2022{
2023 struct srp_rdma_ch *ch = cq->cq_context;
1978 struct srp_target_port *target = ch->target; 2024 struct srp_target_port *target = ch->target;
1979 2025
1980 if (wr_id == SRP_LAST_WR_ID) {
1981 complete(&ch->done);
1982 return;
1983 }
1984
1985 if (ch->connected && !target->qp_in_error) { 2026 if (ch->connected && !target->qp_in_error) {
1986 if (wr_id & LOCAL_INV_WR_ID_MASK) { 2027 shost_printk(KERN_ERR, target->scsi_host,
1987 shost_printk(KERN_ERR, target->scsi_host, PFX 2028 PFX "failed %s status %s (%d) for CQE %p\n",
1988 "LOCAL_INV failed with status %s (%d)\n", 2029 opname, ib_wc_status_msg(wc->status), wc->status,
1989 ib_wc_status_msg(wc_status), wc_status); 2030 wc->wr_cqe);
1990 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1991 shost_printk(KERN_ERR, target->scsi_host, PFX
1992 "FAST_REG_MR failed status %s (%d)\n",
1993 ib_wc_status_msg(wc_status), wc_status);
1994 } else {
1995 shost_printk(KERN_ERR, target->scsi_host,
1996 PFX "failed %s status %s (%d) for iu %p\n",
1997 send_err ? "send" : "receive",
1998 ib_wc_status_msg(wc_status), wc_status,
1999 (void *)(uintptr_t)wr_id);
2000 }
2001 queue_work(system_long_wq, &target->tl_err_work); 2031 queue_work(system_long_wq, &target->tl_err_work);
2002 } 2032 }
2003 target->qp_in_error = true; 2033 target->qp_in_error = true;
2004} 2034}
2005 2035
2006static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2007{
2008 struct srp_rdma_ch *ch = ch_ptr;
2009 struct ib_wc wc;
2010
2011 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2012 while (ib_poll_cq(cq, 1, &wc) > 0) {
2013 if (likely(wc.status == IB_WC_SUCCESS)) {
2014 srp_handle_recv(ch, &wc);
2015 } else {
2016 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2017 }
2018 }
2019}
2020
2021static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2022{
2023 struct srp_rdma_ch *ch = ch_ptr;
2024 struct ib_wc wc;
2025 struct srp_iu *iu;
2026
2027 while (ib_poll_cq(cq, 1, &wc) > 0) {
2028 if (likely(wc.status == IB_WC_SUCCESS)) {
2029 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2030 list_add(&iu->list, &ch->free_tx);
2031 } else {
2032 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2033 }
2034 }
2035}
2036
2037static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2036static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2038{ 2037{
2039 struct srp_target_port *target = host_to_target(shost); 2038 struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
3439static void srp_add_one(struct ib_device *device) 3438static void srp_add_one(struct ib_device *device)
3440{ 3439{
3441 struct srp_device *srp_dev; 3440 struct srp_device *srp_dev;
3442 struct ib_device_attr *dev_attr;
3443 struct srp_host *host; 3441 struct srp_host *host;
3444 int mr_page_shift, p; 3442 int mr_page_shift, p;
3445 u64 max_pages_per_mr; 3443 u64 max_pages_per_mr;
3446 3444
3447 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
3448 if (!dev_attr)
3449 return;
3450
3451 if (ib_query_device(device, dev_attr)) {
3452 pr_warn("Query device failed for %s\n", device->name);
3453 goto free_attr;
3454 }
3455
3456 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL); 3445 srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
3457 if (!srp_dev) 3446 if (!srp_dev)
3458 goto free_attr; 3447 return;
3459 3448
3460 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3449 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3461 device->map_phys_fmr && device->unmap_fmr); 3450 device->map_phys_fmr && device->unmap_fmr);
3462 srp_dev->has_fr = (dev_attr->device_cap_flags & 3451 srp_dev->has_fr = (device->attrs.device_cap_flags &
3463 IB_DEVICE_MEM_MGT_EXTENSIONS); 3452 IB_DEVICE_MEM_MGT_EXTENSIONS);
3464 if (!srp_dev->has_fmr && !srp_dev->has_fr) 3453 if (!srp_dev->has_fmr && !srp_dev->has_fr)
3465 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3454 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
3473 * minimum of 4096 bytes. We're unlikely to build large sglists 3462 * minimum of 4096 bytes. We're unlikely to build large sglists
3474 * out of smaller entries. 3463 * out of smaller entries.
3475 */ 3464 */
3476 mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); 3465 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
3477 srp_dev->mr_page_size = 1 << mr_page_shift; 3466 srp_dev->mr_page_size = 1 << mr_page_shift;
3478 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3467 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3479 max_pages_per_mr = dev_attr->max_mr_size; 3468 max_pages_per_mr = device->attrs.max_mr_size;
3480 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3469 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3481 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3470 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3482 max_pages_per_mr); 3471 max_pages_per_mr);
3483 if (srp_dev->use_fast_reg) { 3472 if (srp_dev->use_fast_reg) {
3484 srp_dev->max_pages_per_mr = 3473 srp_dev->max_pages_per_mr =
3485 min_t(u32, srp_dev->max_pages_per_mr, 3474 min_t(u32, srp_dev->max_pages_per_mr,
3486 dev_attr->max_fast_reg_page_list_len); 3475 device->attrs.max_fast_reg_page_list_len);
3487 } 3476 }
3488 srp_dev->mr_max_size = srp_dev->mr_page_size * 3477 srp_dev->mr_max_size = srp_dev->mr_page_size *
3489 srp_dev->max_pages_per_mr; 3478 srp_dev->max_pages_per_mr;
3490 pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 3479 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3491 device->name, mr_page_shift, dev_attr->max_mr_size, 3480 device->name, mr_page_shift, device->attrs.max_mr_size,
3492 dev_attr->max_fast_reg_page_list_len, 3481 device->attrs.max_fast_reg_page_list_len,
3493 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 3482 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3494 3483
3495 INIT_LIST_HEAD(&srp_dev->dev_list); 3484 INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
3517 } 3506 }
3518 3507
3519 ib_set_client_data(device, &srp_client, srp_dev); 3508 ib_set_client_data(device, &srp_client, srp_dev);
3520 3509 return;
3521 goto free_attr;
3522 3510
3523err_pd: 3511err_pd:
3524 ib_dealloc_pd(srp_dev->pd); 3512 ib_dealloc_pd(srp_dev->pd);
3525 3513
3526free_dev: 3514free_dev:
3527 kfree(srp_dev); 3515 kfree(srp_dev);
3528
3529free_attr:
3530 kfree(dev_attr);
3531} 3516}
3532 3517
3533static void srp_remove_one(struct ib_device *device, void *client_data) 3518static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
3587{ 3572{
3588 int ret; 3573 int ret;
3589 3574
3590 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3591
3592 if (srp_sg_tablesize) { 3575 if (srp_sg_tablesize) {
3593 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 3576 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3594 if (!cmd_sg_entries) 3577 if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index f6af531f9f32..9e05ce4a04fd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,11 +66,6 @@ enum {
66 SRP_TAG_TSK_MGMT = 1U << 31, 66 SRP_TAG_TSK_MGMT = 1U << 31,
67 67
68 SRP_MAX_PAGES_PER_MR = 512, 68 SRP_MAX_PAGES_PER_MR = 512,
69
70 LOCAL_INV_WR_ID_MASK = 1,
71 FAST_REG_WR_ID_MASK = 2,
72
73 SRP_LAST_WR_ID = 0xfffffffcU,
74}; 69};
75 70
76enum srp_target_state { 71enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
128 struct srp_direct_buf *indirect_desc; 123 struct srp_direct_buf *indirect_desc;
129 dma_addr_t indirect_dma_addr; 124 dma_addr_t indirect_dma_addr;
130 short nmdesc; 125 short nmdesc;
126 struct ib_cqe reg_cqe;
131}; 127};
132 128
133/** 129/**
@@ -231,6 +227,7 @@ struct srp_iu {
231 void *buf; 227 void *buf;
232 size_t size; 228 size_t size;
233 enum dma_data_direction direction; 229 enum dma_data_direction direction;
230 struct ib_cqe cqe;
234}; 231};
235 232
236/** 233/**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bc5470c43d26..0c37fee363b1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
93static struct ib_client srpt_client; 93static struct ib_client srpt_client;
94static void srpt_release_channel(struct srpt_rdma_ch *ch); 94static void srpt_release_channel(struct srpt_rdma_ch *ch);
95static int srpt_queue_status(struct se_cmd *cmd); 95static int srpt_queue_status(struct se_cmd *cmd);
96static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
97static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
96 98
97/** 99/**
98 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. 100 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
341 memset(iocp, 0, sizeof *iocp); 343 memset(iocp, 0, sizeof *iocp);
342 strcpy(iocp->id_string, SRPT_ID_STRING); 344 strcpy(iocp->id_string, SRPT_ID_STRING);
343 iocp->guid = cpu_to_be64(srpt_service_guid); 345 iocp->guid = cpu_to_be64(srpt_service_guid);
344 iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); 346 iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
345 iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id); 347 iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
346 iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); 348 iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
347 iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); 349 iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
348 iocp->subsys_device_id = 0x0; 350 iocp->subsys_device_id = 0x0;
349 iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS); 351 iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
350 iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS); 352 iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
453 * srpt_mad_recv_handler() - MAD reception callback function. 455 * srpt_mad_recv_handler() - MAD reception callback function.
454 */ 456 */
455static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, 457static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
458 struct ib_mad_send_buf *send_buf,
456 struct ib_mad_recv_wc *mad_wc) 459 struct ib_mad_recv_wc *mad_wc)
457{ 460{
458 struct srpt_port *sport = (struct srpt_port *)mad_agent->context; 461 struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
778 struct ib_recv_wr wr, *bad_wr; 781 struct ib_recv_wr wr, *bad_wr;
779 782
780 BUG_ON(!sdev); 783 BUG_ON(!sdev);
781 wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
782
783 list.addr = ioctx->ioctx.dma; 784 list.addr = ioctx->ioctx.dma;
784 list.length = srp_max_req_size; 785 list.length = srp_max_req_size;
785 list.lkey = sdev->pd->local_dma_lkey; 786 list.lkey = sdev->pd->local_dma_lkey;
786 787
788 ioctx->ioctx.cqe.done = srpt_recv_done;
789 wr.wr_cqe = &ioctx->ioctx.cqe;
787 wr.next = NULL; 790 wr.next = NULL;
788 wr.sg_list = &list; 791 wr.sg_list = &list;
789 wr.num_sge = 1; 792 wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
819 list.length = len; 822 list.length = len;
820 list.lkey = sdev->pd->local_dma_lkey; 823 list.lkey = sdev->pd->local_dma_lkey;
821 824
825 ioctx->ioctx.cqe.done = srpt_send_done;
822 wr.next = NULL; 826 wr.next = NULL;
823 wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); 827 wr.wr_cqe = &ioctx->ioctx.cqe;
824 wr.sg_list = &list; 828 wr.sg_list = &list;
825 wr.num_sge = 1; 829 wr.num_sge = 1;
826 wr.opcode = IB_WR_SEND; 830 wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1052 1056
1053 BUG_ON(!ch); 1057 BUG_ON(!ch);
1054 BUG_ON(!ioctx); 1058 BUG_ON(!ioctx);
1055 BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); 1059 BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
1056 1060
1057 while (ioctx->n_rdma) 1061 while (ioctx->n_rdma)
1058 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); 1062 kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
1059 1063
1060 kfree(ioctx->rdma_ius); 1064 kfree(ioctx->rdma_wrs);
1061 ioctx->rdma_ius = NULL; 1065 ioctx->rdma_wrs = NULL;
1062 1066
1063 if (ioctx->mapped_sg_count) { 1067 if (ioctx->mapped_sg_count) {
1064 sg = ioctx->sg; 1068 sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1082 struct scatterlist *sg, *sg_orig; 1086 struct scatterlist *sg, *sg_orig;
1083 int sg_cnt; 1087 int sg_cnt;
1084 enum dma_data_direction dir; 1088 enum dma_data_direction dir;
1085 struct rdma_iu *riu; 1089 struct ib_rdma_wr *riu;
1086 struct srp_direct_buf *db; 1090 struct srp_direct_buf *db;
1087 dma_addr_t dma_addr; 1091 dma_addr_t dma_addr;
1088 struct ib_sge *sge; 1092 struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1109 1113
1110 ioctx->mapped_sg_count = count; 1114 ioctx->mapped_sg_count = count;
1111 1115
1112 if (ioctx->rdma_ius && ioctx->n_rdma_ius) 1116 if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
1113 nrdma = ioctx->n_rdma_ius; 1117 nrdma = ioctx->n_rdma_wrs;
1114 else { 1118 else {
1115 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE 1119 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1116 + ioctx->n_rbuf; 1120 + ioctx->n_rbuf;
1117 1121
1118 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); 1122 ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
1119 if (!ioctx->rdma_ius) 1123 GFP_KERNEL);
1124 if (!ioctx->rdma_wrs)
1120 goto free_mem; 1125 goto free_mem;
1121 1126
1122 ioctx->n_rdma_ius = nrdma; 1127 ioctx->n_rdma_wrs = nrdma;
1123 } 1128 }
1124 1129
1125 db = ioctx->rbufs; 1130 db = ioctx->rbufs;
1126 tsize = cmd->data_length; 1131 tsize = cmd->data_length;
1127 dma_len = ib_sg_dma_len(dev, &sg[0]); 1132 dma_len = ib_sg_dma_len(dev, &sg[0]);
1128 riu = ioctx->rdma_ius; 1133 riu = ioctx->rdma_wrs;
1129 1134
1130 /* 1135 /*
1131 * For each remote desc - calculate the #ib_sge. 1136 * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1139 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1144 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1140 rsize = be32_to_cpu(db->len); 1145 rsize = be32_to_cpu(db->len);
1141 raddr = be64_to_cpu(db->va); 1146 raddr = be64_to_cpu(db->va);
1142 riu->raddr = raddr; 1147 riu->remote_addr = raddr;
1143 riu->rkey = be32_to_cpu(db->key); 1148 riu->rkey = be32_to_cpu(db->key);
1144 riu->sge_cnt = 0; 1149 riu->wr.num_sge = 0;
1145 1150
1146 /* calculate how many sge required for this remote_buf */ 1151 /* calculate how many sge required for this remote_buf */
1147 while (rsize > 0 && tsize > 0) { 1152 while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1165 rsize = 0; 1170 rsize = 0;
1166 } 1171 }
1167 1172
1168 ++riu->sge_cnt; 1173 ++riu->wr.num_sge;
1169 1174
1170 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { 1175 if (rsize > 0 &&
1176 riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
1171 ++ioctx->n_rdma; 1177 ++ioctx->n_rdma;
1172 riu->sge = 1178 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1173 kmalloc(riu->sge_cnt * sizeof *riu->sge, 1179 sizeof(*riu->wr.sg_list),
1174 GFP_KERNEL); 1180 GFP_KERNEL);
1175 if (!riu->sge) 1181 if (!riu->wr.sg_list)
1176 goto free_mem; 1182 goto free_mem;
1177 1183
1178 ++riu; 1184 ++riu;
1179 riu->sge_cnt = 0; 1185 riu->wr.num_sge = 0;
1180 riu->raddr = raddr; 1186 riu->remote_addr = raddr;
1181 riu->rkey = be32_to_cpu(db->key); 1187 riu->rkey = be32_to_cpu(db->key);
1182 } 1188 }
1183 } 1189 }
1184 1190
1185 ++ioctx->n_rdma; 1191 ++ioctx->n_rdma;
1186 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, 1192 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1187 GFP_KERNEL); 1193 sizeof(*riu->wr.sg_list),
1188 if (!riu->sge) 1194 GFP_KERNEL);
1195 if (!riu->wr.sg_list)
1189 goto free_mem; 1196 goto free_mem;
1190 } 1197 }
1191 1198
1192 db = ioctx->rbufs; 1199 db = ioctx->rbufs;
1193 tsize = cmd->data_length; 1200 tsize = cmd->data_length;
1194 riu = ioctx->rdma_ius; 1201 riu = ioctx->rdma_wrs;
1195 sg = sg_orig; 1202 sg = sg_orig;
1196 dma_len = ib_sg_dma_len(dev, &sg[0]); 1203 dma_len = ib_sg_dma_len(dev, &sg[0]);
1197 dma_addr = ib_sg_dma_address(dev, &sg[0]); 1204 dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1200 for (i = 0, j = 0; 1207 for (i = 0, j = 0;
1201 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1208 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1202 rsize = be32_to_cpu(db->len); 1209 rsize = be32_to_cpu(db->len);
1203 sge = riu->sge; 1210 sge = riu->wr.sg_list;
1204 k = 0; 1211 k = 0;
1205 1212
1206 while (rsize > 0 && tsize > 0) { 1213 while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1232 } 1239 }
1233 1240
1234 ++k; 1241 ++k;
1235 if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { 1242 if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
1236 ++riu; 1243 ++riu;
1237 sge = riu->sge; 1244 sge = riu->wr.sg_list;
1238 k = 0; 1245 k = 0;
1239 } else if (rsize > 0 && tsize > 0) 1246 } else if (rsize > 0 && tsize > 0)
1240 ++sge; 1247 ++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1277 ioctx->n_rbuf = 0; 1284 ioctx->n_rbuf = 0;
1278 ioctx->rbufs = NULL; 1285 ioctx->rbufs = NULL;
1279 ioctx->n_rdma = 0; 1286 ioctx->n_rdma = 0;
1280 ioctx->n_rdma_ius = 0; 1287 ioctx->n_rdma_wrs = 0;
1281 ioctx->rdma_ius = NULL; 1288 ioctx->rdma_wrs = NULL;
1282 ioctx->mapped_sg_count = 0; 1289 ioctx->mapped_sg_count = 0;
1283 init_completion(&ioctx->tx_done); 1290 init_completion(&ioctx->tx_done);
1284 ioctx->queue_status_only = false; 1291 ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
1380} 1387}
1381 1388
1382/** 1389/**
1383 * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
1384 */
1385static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
1386{
1387 struct srpt_send_ioctx *ioctx;
1388 enum srpt_command_state state;
1389 u32 index;
1390
1391 atomic_inc(&ch->sq_wr_avail);
1392
1393 index = idx_from_wr_id(wr_id);
1394 ioctx = ch->ioctx_ring[index];
1395 state = srpt_get_cmd_state(ioctx);
1396
1397 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1398 && state != SRPT_STATE_MGMT_RSP_SENT
1399 && state != SRPT_STATE_NEED_DATA
1400 && state != SRPT_STATE_DONE);
1401
1402 /* If SRP_RSP sending failed, undo the ch->req_lim change. */
1403 if (state == SRPT_STATE_CMD_RSP_SENT
1404 || state == SRPT_STATE_MGMT_RSP_SENT)
1405 atomic_dec(&ch->req_lim);
1406
1407 srpt_abort_cmd(ioctx);
1408}
1409
1410/**
1411 * srpt_handle_send_comp() - Process an IB send completion notification.
1412 */
1413static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1414 struct srpt_send_ioctx *ioctx)
1415{
1416 enum srpt_command_state state;
1417
1418 atomic_inc(&ch->sq_wr_avail);
1419
1420 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1421
1422 if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1423 && state != SRPT_STATE_MGMT_RSP_SENT
1424 && state != SRPT_STATE_DONE))
1425 pr_debug("state = %d\n", state);
1426
1427 if (state != SRPT_STATE_DONE) {
1428 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1429 transport_generic_free_cmd(&ioctx->cmd, 0);
1430 } else {
1431 pr_err("IB completion has been received too late for"
1432 " wr_id = %u.\n", ioctx->ioctx.index);
1433 }
1434}
1435
1436/**
1437 * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
1438 *
1439 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping 1390 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1440 * the data that has been transferred via IB RDMA had to be postponed until the 1391 * the data that has been transferred via IB RDMA had to be postponed until the
1441 * check_stop_free() callback. None of this is necessary anymore and needs to 1392 * check_stop_free() callback. None of this is necessary anymore and needs to
1442 * be cleaned up. 1393 * be cleaned up.
1443 */ 1394 */
1444static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, 1395static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1445 struct srpt_send_ioctx *ioctx,
1446 enum srpt_opcode opcode)
1447{ 1396{
1397 struct srpt_rdma_ch *ch = cq->cq_context;
1398 struct srpt_send_ioctx *ioctx =
1399 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
1400
1448 WARN_ON(ioctx->n_rdma <= 0); 1401 WARN_ON(ioctx->n_rdma <= 0);
1449 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 1402 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1450 1403
1451 if (opcode == SRPT_RDMA_READ_LAST) { 1404 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1452 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, 1405 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
1453 SRPT_STATE_DATA_IN)) 1406 ioctx, wc->status);
1454 target_execute_cmd(&ioctx->cmd); 1407 srpt_abort_cmd(ioctx);
1455 else 1408 return;
1456 pr_err("%s[%d]: wrong state = %d\n", __func__,
1457 __LINE__, srpt_get_cmd_state(ioctx));
1458 } else if (opcode == SRPT_RDMA_ABORT) {
1459 ioctx->rdma_aborted = true;
1460 } else {
1461 WARN(true, "unexpected opcode %d\n", opcode);
1462 } 1409 }
1410
1411 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1412 SRPT_STATE_DATA_IN))
1413 target_execute_cmd(&ioctx->cmd);
1414 else
1415 pr_err("%s[%d]: wrong state = %d\n", __func__,
1416 __LINE__, srpt_get_cmd_state(ioctx));
1463} 1417}
1464 1418
1465/** 1419static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1466 * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
1467 */
1468static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1469 struct srpt_send_ioctx *ioctx,
1470 enum srpt_opcode opcode)
1471{ 1420{
1472 enum srpt_command_state state; 1421 struct srpt_send_ioctx *ioctx =
1422 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
1473 1423
1474 state = srpt_get_cmd_state(ioctx); 1424 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1475 switch (opcode) { 1425 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
1476 case SRPT_RDMA_READ_LAST: 1426 ioctx, wc->status);
1477 if (ioctx->n_rdma <= 0) { 1427 srpt_abort_cmd(ioctx);
1478 pr_err("Received invalid RDMA read"
1479 " error completion with idx %d\n",
1480 ioctx->ioctx.index);
1481 break;
1482 }
1483 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1484 if (state == SRPT_STATE_NEED_DATA)
1485 srpt_abort_cmd(ioctx);
1486 else
1487 pr_err("%s[%d]: wrong state = %d\n",
1488 __func__, __LINE__, state);
1489 break;
1490 case SRPT_RDMA_WRITE_LAST:
1491 break;
1492 default:
1493 pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
1494 break;
1495 } 1428 }
1496} 1429}
1497 1430
@@ -1926,32 +1859,26 @@ out:
1926 return; 1859 return;
1927} 1860}
1928 1861
1929static void srpt_process_rcv_completion(struct ib_cq *cq, 1862static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1930 struct srpt_rdma_ch *ch,
1931 struct ib_wc *wc)
1932{ 1863{
1933 struct srpt_device *sdev = ch->sport->sdev; 1864 struct srpt_rdma_ch *ch = cq->cq_context;
1934 struct srpt_recv_ioctx *ioctx; 1865 struct srpt_recv_ioctx *ioctx =
1935 u32 index; 1866 container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
1936 1867
1937 index = idx_from_wr_id(wc->wr_id);
1938 if (wc->status == IB_WC_SUCCESS) { 1868 if (wc->status == IB_WC_SUCCESS) {
1939 int req_lim; 1869 int req_lim;
1940 1870
1941 req_lim = atomic_dec_return(&ch->req_lim); 1871 req_lim = atomic_dec_return(&ch->req_lim);
1942 if (unlikely(req_lim < 0)) 1872 if (unlikely(req_lim < 0))
1943 pr_err("req_lim = %d < 0\n", req_lim); 1873 pr_err("req_lim = %d < 0\n", req_lim);
1944 ioctx = sdev->ioctx_ring[index];
1945 srpt_handle_new_iu(ch, ioctx, NULL); 1874 srpt_handle_new_iu(ch, ioctx, NULL);
1946 } else { 1875 } else {
1947 pr_info("receiving failed for idx %u with status %d\n", 1876 pr_info("receiving failed for ioctx %p with status %d\n",
1948 index, wc->status); 1877 ioctx, wc->status);
1949 } 1878 }
1950} 1879}
1951 1880
1952/** 1881/**
1953 * srpt_process_send_completion() - Process an IB send completion.
1954 *
1955 * Note: Although this has not yet been observed during tests, at least in 1882 * Note: Although this has not yet been observed during tests, at least in
1956 * theory it is possible that the srpt_get_send_ioctx() call invoked by 1883 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1957 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta 1884 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,109 +1891,52 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
1964 * are queued on cmd_wait_list. The code below processes these delayed 1891 * are queued on cmd_wait_list. The code below processes these delayed
1965 * requests one at a time. 1892 * requests one at a time.
1966 */ 1893 */
1967static void srpt_process_send_completion(struct ib_cq *cq, 1894static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1968 struct srpt_rdma_ch *ch,
1969 struct ib_wc *wc)
1970{ 1895{
1971 struct srpt_send_ioctx *send_ioctx; 1896 struct srpt_rdma_ch *ch = cq->cq_context;
1972 uint32_t index; 1897 struct srpt_send_ioctx *ioctx =
1973 enum srpt_opcode opcode; 1898 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1899 enum srpt_command_state state;
1974 1900
1975 index = idx_from_wr_id(wc->wr_id); 1901 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1976 opcode = opcode_from_wr_id(wc->wr_id); 1902
1977 send_ioctx = ch->ioctx_ring[index]; 1903 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
1978 if (wc->status == IB_WC_SUCCESS) { 1904 state != SRPT_STATE_MGMT_RSP_SENT);
1979 if (opcode == SRPT_SEND) 1905
1980 srpt_handle_send_comp(ch, send_ioctx); 1906 atomic_inc(&ch->sq_wr_avail);
1981 else { 1907
1982 WARN_ON(opcode != SRPT_RDMA_ABORT && 1908 if (wc->status != IB_WC_SUCCESS) {
1983 wc->opcode != IB_WC_RDMA_READ); 1909 pr_info("sending response for ioctx 0x%p failed"
1984 srpt_handle_rdma_comp(ch, send_ioctx, opcode); 1910 " with status %d\n", ioctx, wc->status);
1985 } 1911
1912 atomic_dec(&ch->req_lim);
1913 srpt_abort_cmd(ioctx);
1914 goto out;
1915 }
1916
1917 if (state != SRPT_STATE_DONE) {
1918 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1919 transport_generic_free_cmd(&ioctx->cmd, 0);
1986 } else { 1920 } else {
1987 if (opcode == SRPT_SEND) { 1921 pr_err("IB completion has been received too late for"
1988 pr_info("sending response for idx %u failed" 1922 " wr_id = %u.\n", ioctx->ioctx.index);
1989 " with status %d\n", index, wc->status);
1990 srpt_handle_send_err_comp(ch, wc->wr_id);
1991 } else if (opcode != SRPT_RDMA_MID) {
1992 pr_info("RDMA t %d for idx %u failed with"
1993 " status %d\n", opcode, index, wc->status);
1994 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
1995 }
1996 } 1923 }
1997 1924
1998 while (unlikely(opcode == SRPT_SEND 1925out:
1999 && !list_empty(&ch->cmd_wait_list) 1926 while (!list_empty(&ch->cmd_wait_list) &&
2000 && srpt_get_ch_state(ch) == CH_LIVE 1927 srpt_get_ch_state(ch) == CH_LIVE &&
2001 && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { 1928 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
2002 struct srpt_recv_ioctx *recv_ioctx; 1929 struct srpt_recv_ioctx *recv_ioctx;
2003 1930
2004 recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1931 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
2005 struct srpt_recv_ioctx, 1932 struct srpt_recv_ioctx,
2006 wait_list); 1933 wait_list);
2007 list_del(&recv_ioctx->wait_list); 1934 list_del(&recv_ioctx->wait_list);
2008 srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); 1935 srpt_handle_new_iu(ch, recv_ioctx, ioctx);
2009 }
2010}
2011
2012static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
2013{
2014 struct ib_wc *const wc = ch->wc;
2015 int i, n;
2016
2017 WARN_ON(cq != ch->cq);
2018
2019 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2020 while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
2021 for (i = 0; i < n; i++) {
2022 if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
2023 srpt_process_rcv_completion(cq, ch, &wc[i]);
2024 else
2025 srpt_process_send_completion(cq, ch, &wc[i]);
2026 }
2027 } 1936 }
2028} 1937}
2029 1938
2030/** 1939/**
2031 * srpt_completion() - IB completion queue callback function.
2032 *
2033 * Notes:
2034 * - It is guaranteed that a completion handler will never be invoked
2035 * concurrently on two different CPUs for the same completion queue. See also
2036 * Documentation/infiniband/core_locking.txt and the implementation of
2037 * handle_edge_irq() in kernel/irq/chip.c.
2038 * - When threaded IRQs are enabled, completion handlers are invoked in thread
2039 * context instead of interrupt context.
2040 */
2041static void srpt_completion(struct ib_cq *cq, void *ctx)
2042{
2043 struct srpt_rdma_ch *ch = ctx;
2044
2045 wake_up_interruptible(&ch->wait_queue);
2046}
2047
2048static int srpt_compl_thread(void *arg)
2049{
2050 struct srpt_rdma_ch *ch;
2051
2052 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2053 current->flags |= PF_NOFREEZE;
2054
2055 ch = arg;
2056 BUG_ON(!ch);
2057 pr_info("Session %s: kernel thread %s (PID %d) started\n",
2058 ch->sess_name, ch->thread->comm, current->pid);
2059 while (!kthread_should_stop()) {
2060 wait_event_interruptible(ch->wait_queue,
2061 (srpt_process_completion(ch->cq, ch),
2062 kthread_should_stop()));
2063 }
2064 pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
2065 ch->sess_name, ch->thread->comm, current->pid);
2066 return 0;
2067}
2068
2069/**
2070 * srpt_create_ch_ib() - Create receive and send completion queues. 1940 * srpt_create_ch_ib() - Create receive and send completion queues.
2071 */ 1941 */
2072static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) 1942static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2075 struct srpt_port *sport = ch->sport; 1945 struct srpt_port *sport = ch->sport;
2076 struct srpt_device *sdev = sport->sdev; 1946 struct srpt_device *sdev = sport->sdev;
2077 u32 srp_sq_size = sport->port_attrib.srp_sq_size; 1947 u32 srp_sq_size = sport->port_attrib.srp_sq_size;
2078 struct ib_cq_init_attr cq_attr = {};
2079 int ret; 1948 int ret;
2080 1949
2081 WARN_ON(ch->rq_size < 1); 1950 WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2086 goto out; 1955 goto out;
2087 1956
2088retry: 1957retry:
2089 cq_attr.cqe = ch->rq_size + srp_sq_size; 1958 ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
2090 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, 1959 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
2091 &cq_attr);
2092 if (IS_ERR(ch->cq)) { 1960 if (IS_ERR(ch->cq)) {
2093 ret = PTR_ERR(ch->cq); 1961 ret = PTR_ERR(ch->cq);
2094 pr_err("failed to create CQ cqe= %d ret= %d\n", 1962 pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
2131 if (ret) 1999 if (ret)
2132 goto err_destroy_qp; 2000 goto err_destroy_qp;
2133 2001
2134 init_waitqueue_head(&ch->wait_queue);
2135
2136 pr_debug("creating thread for session %s\n", ch->sess_name);
2137
2138 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2139 if (IS_ERR(ch->thread)) {
2140 pr_err("failed to create kernel thread %ld\n",
2141 PTR_ERR(ch->thread));
2142 ch->thread = NULL;
2143 goto err_destroy_qp;
2144 }
2145
2146out: 2002out:
2147 kfree(qp_init); 2003 kfree(qp_init);
2148 return ret; 2004 return ret;
@@ -2150,17 +2006,14 @@ out:
2150err_destroy_qp: 2006err_destroy_qp:
2151 ib_destroy_qp(ch->qp); 2007 ib_destroy_qp(ch->qp);
2152err_destroy_cq: 2008err_destroy_cq:
2153 ib_destroy_cq(ch->cq); 2009 ib_free_cq(ch->cq);
2154 goto out; 2010 goto out;
2155} 2011}
2156 2012
2157static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) 2013static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2158{ 2014{
2159 if (ch->thread)
2160 kthread_stop(ch->thread);
2161
2162 ib_destroy_qp(ch->qp); 2015 ib_destroy_qp(ch->qp);
2163 ib_destroy_cq(ch->cq); 2016 ib_free_cq(ch->cq);
2164} 2017}
2165 2018
2166/** 2019/**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2808static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, 2661static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2809 struct srpt_send_ioctx *ioctx) 2662 struct srpt_send_ioctx *ioctx)
2810{ 2663{
2811 struct ib_rdma_wr wr;
2812 struct ib_send_wr *bad_wr; 2664 struct ib_send_wr *bad_wr;
2813 struct rdma_iu *riu; 2665 int sq_wr_avail, ret, i;
2814 int i;
2815 int ret;
2816 int sq_wr_avail;
2817 enum dma_data_direction dir; 2666 enum dma_data_direction dir;
2818 const int n_rdma = ioctx->n_rdma; 2667 const int n_rdma = ioctx->n_rdma;
2819 2668
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2829 } 2678 }
2830 } 2679 }
2831 2680
2832 ioctx->rdma_aborted = false; 2681 for (i = 0; i < n_rdma; i++) {
2833 ret = 0; 2682 struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
2834 riu = ioctx->rdma_ius;
2835 memset(&wr, 0, sizeof wr);
2836
2837 for (i = 0; i < n_rdma; ++i, ++riu) {
2838 if (dir == DMA_FROM_DEVICE) {
2839 wr.wr.opcode = IB_WR_RDMA_WRITE;
2840 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2841 SRPT_RDMA_WRITE_LAST :
2842 SRPT_RDMA_MID,
2843 ioctx->ioctx.index);
2844 } else {
2845 wr.wr.opcode = IB_WR_RDMA_READ;
2846 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2847 SRPT_RDMA_READ_LAST :
2848 SRPT_RDMA_MID,
2849 ioctx->ioctx.index);
2850 }
2851 wr.wr.next = NULL;
2852 wr.remote_addr = riu->raddr;
2853 wr.rkey = riu->rkey;
2854 wr.wr.num_sge = riu->sge_cnt;
2855 wr.wr.sg_list = riu->sge;
2856 2683
2857 /* only get completion event for the last rdma write */ 2684 wr->opcode = (dir == DMA_FROM_DEVICE) ?
2858 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) 2685 IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2859 wr.wr.send_flags = IB_SEND_SIGNALED;
2860 2686
2861 ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); 2687 if (i == n_rdma - 1) {
2862 if (ret) 2688 /* only get completion event for the last rdma read */
2863 break; 2689 if (dir == DMA_TO_DEVICE) {
2690 wr->send_flags = IB_SEND_SIGNALED;
2691 ioctx->rdma_cqe.done = srpt_rdma_read_done;
2692 } else {
2693 ioctx->rdma_cqe.done = srpt_rdma_write_done;
2694 }
2695 wr->wr_cqe = &ioctx->rdma_cqe;
2696 wr->next = NULL;
2697 } else {
2698 wr->wr_cqe = NULL;
2699 wr->next = &ioctx->rdma_wrs[i + 1].wr;
2700 }
2864 } 2701 }
2865 2702
2703 ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
2866 if (ret) 2704 if (ret)
2867 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", 2705 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2868 __func__, __LINE__, ret, i, n_rdma); 2706 __func__, __LINE__, ret, i, n_rdma);
2869 if (ret && i > 0) {
2870 wr.wr.num_sge = 0;
2871 wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2872 wr.wr.send_flags = IB_SEND_SIGNALED;
2873 while (ch->state == CH_LIVE &&
2874 ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
2875 pr_info("Trying to abort failed RDMA transfer [%d]\n",
2876 ioctx->ioctx.index);
2877 msleep(1000);
2878 }
2879 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2880 pr_info("Waiting until RDMA abort finished [%d]\n",
2881 ioctx->ioctx.index);
2882 msleep(1000);
2883 }
2884 }
2885out: 2707out:
2886 if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) 2708 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2887 atomic_add(n_rdma, &ch->sq_wr_avail); 2709 atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
3190 init_waitqueue_head(&sdev->ch_releaseQ); 3012 init_waitqueue_head(&sdev->ch_releaseQ);
3191 spin_lock_init(&sdev->spinlock); 3013 spin_lock_init(&sdev->spinlock);
3192 3014
3193 if (ib_query_device(device, &sdev->dev_attr))
3194 goto free_dev;
3195
3196 sdev->pd = ib_alloc_pd(device); 3015 sdev->pd = ib_alloc_pd(device);
3197 if (IS_ERR(sdev->pd)) 3016 if (IS_ERR(sdev->pd))
3198 goto free_dev; 3017 goto free_dev;
3199 3018
3200 sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr); 3019 sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
3201 3020
3202 srq_attr.event_handler = srpt_srq_event; 3021 srq_attr.event_handler = srpt_srq_event;
3203 srq_attr.srq_context = (void *)sdev; 3022 srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
3211 goto err_pd; 3030 goto err_pd;
3212 3031
3213 pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", 3032 pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
3214 __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr, 3033 __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
3215 device->name); 3034 device->name);
3216 3035
3217 if (!srpt_service_guid) 3036 if (!srpt_service_guid)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 5366e0a9fd6d..09037f2b0b51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -128,36 +128,6 @@ enum {
128 DEFAULT_MAX_RDMA_SIZE = 65536, 128 DEFAULT_MAX_RDMA_SIZE = 65536,
129}; 129};
130 130
131enum srpt_opcode {
132 SRPT_RECV,
133 SRPT_SEND,
134 SRPT_RDMA_MID,
135 SRPT_RDMA_ABORT,
136 SRPT_RDMA_READ_LAST,
137 SRPT_RDMA_WRITE_LAST,
138};
139
140static inline u64 encode_wr_id(u8 opcode, u32 idx)
141{
142 return ((u64)opcode << 32) | idx;
143}
144static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
145{
146 return wr_id >> 32;
147}
148static inline u32 idx_from_wr_id(u64 wr_id)
149{
150 return (u32)wr_id;
151}
152
153struct rdma_iu {
154 u64 raddr;
155 u32 rkey;
156 struct ib_sge *sge;
157 u32 sge_cnt;
158 int mem_id;
159};
160
161/** 131/**
162 * enum srpt_command_state - SCSI command state managed by SRPT. 132 * enum srpt_command_state - SCSI command state managed by SRPT.
163 * @SRPT_STATE_NEW: New command arrived and is being processed. 133 * @SRPT_STATE_NEW: New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
189 * @index: Index of the I/O context in its ioctx_ring array. 159 * @index: Index of the I/O context in its ioctx_ring array.
190 */ 160 */
191struct srpt_ioctx { 161struct srpt_ioctx {
162 struct ib_cqe cqe;
192 void *buf; 163 void *buf;
193 dma_addr_t dma; 164 dma_addr_t dma;
194 uint32_t index; 165 uint32_t index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
215 * @sg: Pointer to sg-list associated with this I/O context. 186 * @sg: Pointer to sg-list associated with this I/O context.
216 * @sg_cnt: SG-list size. 187 * @sg_cnt: SG-list size.
217 * @mapped_sg_count: ib_dma_map_sg() return value. 188 * @mapped_sg_count: ib_dma_map_sg() return value.
218 * @n_rdma_ius: Number of elements in the rdma_ius array. 189 * @n_rdma_wrs: Number of elements in the rdma_wrs array.
219 * @rdma_ius: Array with information about the RDMA mapping. 190 * @rdma_wrs: Array with information about the RDMA mapping.
220 * @tag: Tag of the received SRP information unit. 191 * @tag: Tag of the received SRP information unit.
221 * @spinlock: Protects 'state'. 192 * @spinlock: Protects 'state'.
222 * @state: I/O context state. 193 * @state: I/O context state.
223 * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
224 * the already initiated transfers have finished.
225 * @cmd: Target core command data structure. 194 * @cmd: Target core command data structure.
226 * @sense_data: SCSI sense data. 195 * @sense_data: SCSI sense data.
227 */ 196 */
228struct srpt_send_ioctx { 197struct srpt_send_ioctx {
229 struct srpt_ioctx ioctx; 198 struct srpt_ioctx ioctx;
230 struct srpt_rdma_ch *ch; 199 struct srpt_rdma_ch *ch;
231 struct rdma_iu *rdma_ius; 200 struct ib_rdma_wr *rdma_wrs;
201 struct ib_cqe rdma_cqe;
232 struct srp_direct_buf *rbufs; 202 struct srp_direct_buf *rbufs;
233 struct srp_direct_buf single_rbuf; 203 struct srp_direct_buf single_rbuf;
234 struct scatterlist *sg; 204 struct scatterlist *sg;
235 struct list_head free_list; 205 struct list_head free_list;
236 spinlock_t spinlock; 206 spinlock_t spinlock;
237 enum srpt_command_state state; 207 enum srpt_command_state state;
238 bool rdma_aborted;
239 struct se_cmd cmd; 208 struct se_cmd cmd;
240 struct completion tx_done; 209 struct completion tx_done;
241 int sg_cnt; 210 int sg_cnt;
242 int mapped_sg_count; 211 int mapped_sg_count;
243 u16 n_rdma_ius; 212 u16 n_rdma_wrs;
244 u8 n_rdma; 213 u8 n_rdma;
245 u8 n_rbuf; 214 u8 n_rbuf;
246 bool queue_status_only; 215 bool queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
267 236
268/** 237/**
269 * struct srpt_rdma_ch - RDMA channel. 238 * struct srpt_rdma_ch - RDMA channel.
270 * @wait_queue: Allows the kernel thread to wait for more work.
271 * @thread: Kernel thread that processes the IB queues associated with
272 * the channel.
273 * @cm_id: IB CM ID associated with the channel. 239 * @cm_id: IB CM ID associated with the channel.
274 * @qp: IB queue pair used for communicating over this channel. 240 * @qp: IB queue pair used for communicating over this channel.
275 * @cq: IB completion queue for this channel. 241 * @cq: IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
288 * @free_list: Head of list with free send I/O contexts. 254 * @free_list: Head of list with free send I/O contexts.
289 * @state: channel state. See also enum rdma_ch_state. 255 * @state: channel state. See also enum rdma_ch_state.
290 * @ioctx_ring: Send ring. 256 * @ioctx_ring: Send ring.
291 * @wc: IB work completion array for srpt_process_completion().
292 * @list: Node for insertion in the srpt_device.rch_list list. 257 * @list: Node for insertion in the srpt_device.rch_list list.
293 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This 258 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
294 * list contains struct srpt_ioctx elements and is protected 259 * list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
299 * @release_done: Enables waiting for srpt_release_channel() completion. 264 * @release_done: Enables waiting for srpt_release_channel() completion.
300 */ 265 */
301struct srpt_rdma_ch { 266struct srpt_rdma_ch {
302 wait_queue_head_t wait_queue;
303 struct task_struct *thread;
304 struct ib_cm_id *cm_id; 267 struct ib_cm_id *cm_id;
305 struct ib_qp *qp; 268 struct ib_qp *qp;
306 struct ib_cq *cq; 269 struct ib_cq *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
317 struct list_head free_list; 280 struct list_head free_list;
318 enum rdma_ch_state state; 281 enum rdma_ch_state state;
319 struct srpt_send_ioctx **ioctx_ring; 282 struct srpt_send_ioctx **ioctx_ring;
320 struct ib_wc wc[16];
321 struct list_head list; 283 struct list_head list;
322 struct list_head cmd_wait_list; 284 struct list_head cmd_wait_list;
323 struct se_session *sess; 285 struct se_session *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
377 * @mr: L_Key (local key) with write access to all local memory. 339 * @mr: L_Key (local key) with write access to all local memory.
378 * @srq: Per-HCA SRQ (shared receive queue). 340 * @srq: Per-HCA SRQ (shared receive queue).
379 * @cm_id: Connection identifier. 341 * @cm_id: Connection identifier.
380 * @dev_attr: Attributes of the InfiniBand device as obtained during the
381 * ib_client.add() callback.
382 * @srq_size: SRQ size. 342 * @srq_size: SRQ size.
383 * @ioctx_ring: Per-HCA SRQ. 343 * @ioctx_ring: Per-HCA SRQ.
384 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. 344 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
393 struct ib_pd *pd; 353 struct ib_pd *pd;
394 struct ib_srq *srq; 354 struct ib_srq *srq;
395 struct ib_cm_id *cm_id; 355 struct ib_cm_id *cm_id;
396 struct ib_device_attr dev_attr;
397 int srq_size; 356 int srq_size;
398 struct srpt_recv_ioctx **ioctx_ring; 357 struct srpt_recv_ioctx **ioctx_ring;
399 struct list_head rch_list; 358 struct list_head rch_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 2c2baab9d880..d66c690a8597 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
157 [29] = "802.1ad offload support", 157 [29] = "802.1ad offload support",
158 [31] = "Modifying loopback source checks using UPDATE_QP support", 158 [31] = "Modifying loopback source checks using UPDATE_QP support",
159 [32] = "Loopback source checks support", 159 [32] = "Loopback source checks support",
160 [33] = "RoCEv2 support"
160 }; 161 };
161 int i; 162 int i;
162 163
@@ -626,6 +627,8 @@ out:
626 return err; 627 return err;
627} 628}
628 629
630static void disable_unsupported_roce_caps(void *buf);
631
629int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 632int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
630{ 633{
631 struct mlx4_cmd_mailbox *mailbox; 634 struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
738 if (err) 741 if (err)
739 goto out; 742 goto out;
740 743
744 if (mlx4_is_mfunc(dev))
745 disable_unsupported_roce_caps(outbox);
741 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); 746 MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
742 dev_cap->reserved_qps = 1 << (field & 0xf); 747 dev_cap->reserved_qps = 1 << (field & 0xf);
743 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); 748 MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
905 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; 910 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
906 MLX4_GET(dev_cap->bmme_flags, outbox, 911 MLX4_GET(dev_cap->bmme_flags, outbox,
907 QUERY_DEV_CAP_BMME_FLAGS_OFFSET); 912 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
913 if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
914 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
908 if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) 915 if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
909 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; 916 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
910 MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); 917 MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
1161 if (err) 1168 if (err)
1162 return err; 1169 return err;
1163 1170
1171 disable_unsupported_roce_caps(outbox->buf);
1164 /* add port mng change event capability and disable mw type 1 1172 /* add port mng change event capability and disable mw type 1
1165 * unconditionally to slaves 1173 * unconditionally to slaves
1166 */ 1174 */
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
1258 return 0; 1266 return 0;
1259} 1267}
1260 1268
1269static void disable_unsupported_roce_caps(void *buf)
1270{
1271 u32 flags;
1272
1273 MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
1274 flags &= ~(1UL << 31);
1275 MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
1276 MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
1277 flags &= ~(1UL << 24);
1278 MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
1279 MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
1280 flags &= ~(MLX4_FLAG_ROCE_V1_V2);
1281 MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
1282}
1283
1261int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, 1284int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
1262 struct mlx4_vhcr *vhcr, 1285 struct mlx4_vhcr *vhcr,
1263 struct mlx4_cmd_mailbox *inbox, 1286 struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
2239 __be32 rsvd1[3]; 2262 __be32 rsvd1[3];
2240 __be16 vxlan_udp_dport; 2263 __be16 vxlan_udp_dport;
2241 __be16 rsvd2; 2264 __be16 rsvd2;
2242 __be32 rsvd3; 2265 __be16 roce_v2_entropy;
2266 __be16 roce_v2_udp_dport;
2243 __be32 roce_flags; 2267 __be32 roce_flags;
2244 __be32 rsvd4[25]; 2268 __be32 rsvd4[25];
2245 __be16 rsvd5; 2269 __be16 rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
2248}; 2272};
2249 2273
2250#define MLX4_VXLAN_UDP_DPORT (1 << 0) 2274#define MLX4_VXLAN_UDP_DPORT (1 << 0)
2275#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
2251#define MLX4_DISABLE_RX_PORT BIT(18) 2276#define MLX4_DISABLE_RX_PORT BIT(18)
2252 2277
2253static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) 2278static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
2365 return mlx4_CONFIG_DEV_set(dev, &config_dev); 2390 return mlx4_CONFIG_DEV_set(dev, &config_dev);
2366} 2391}
2367 2392
2393int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
2394{
2395 struct mlx4_config_dev config_dev;
2396
2397 memset(&config_dev, 0, sizeof(config_dev));
2398 config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
2399 config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
2400
2401 return mlx4_CONFIG_DEV_set(dev, &config_dev);
2402}
2403EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
2404
2368int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) 2405int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
2369{ 2406{
2370 struct mlx4_cmd_mailbox *mailbox; 2407 struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2404c22ad2b2..7baef52db6b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
780 u16 reserved1; 780 u16 reserved1;
781 u8 v_ignore_fcs; 781 u8 v_ignore_fcs;
782 u8 flags; 782 u8 flags;
783 u8 ignore_fcs; 783 union {
784 u8 ignore_fcs;
785 u8 roce_mode;
786 };
784 u8 reserved2; 787 u8 reserved2;
785 __be16 mtu; 788 __be16 mtu;
786 u8 pptx; 789 u8 pptx;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index f2550425c251..787b7bb54d52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
1520 return err; 1520 return err;
1521} 1521}
1522 1522
1523#define SET_PORT_ROCE_2_FLAGS 0x10
1524#define MLX4_SET_PORT_ROCE_V1_V2 0x2
1523int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, 1525int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
1524 u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx) 1526 u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
1525{ 1527{
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
1539 context->pprx = (pprx * (!pfcrx)) << 7; 1541 context->pprx = (pprx * (!pfcrx)) << 7;
1540 context->pfcrx = pfcrx; 1542 context->pfcrx = pfcrx;
1541 1543
1544 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
1545 context->flags |= SET_PORT_ROCE_2_FLAGS;
1546 context->roce_mode |=
1547 MLX4_SET_PORT_ROCE_V1_V2 << 4;
1548 }
1542 in_mod = MLX4_SET_PORT_GENERAL << 8 | port; 1549 in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
1543 err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, 1550 err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
1544 MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 1551 MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 168823dde79f..d1cd9c32a9ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
167 context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; 167 context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
168 } 168 }
169 169
170 if ((cur_state == MLX4_QP_STATE_RTR) &&
171 (new_state == MLX4_QP_STATE_RTS) &&
172 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
173 context->roce_entropy =
174 cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
175
170 *(__be32 *) mailbox->buf = cpu_to_be32(optpar); 176 *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
171 memcpy(mailbox->buf + 8, context, sizeof *context); 177 memcpy(mailbox->buf + 8, context, sizeof *context);
172 178
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
921 return 0; 927 return 0;
922} 928}
923EXPORT_SYMBOL_GPL(mlx4_qp_to_ready); 929EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
930
931u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
932{
933 struct mlx4_qp_context context;
934 struct mlx4_qp qp;
935 int err;
936
937 qp.qpn = qpn;
938 err = mlx4_qp_query(dev, &qp, &context);
939 if (!err) {
940 u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
941 u16 folded_dst = folded_qp(dest_qpn);
942 u16 folded_src = folded_qp(qpn);
943
944 return (dest_qpn != qpn) ?
945 ((folded_dst ^ folded_src) | 0xC000) :
946 folded_src | 0xC000;
947 }
948 return 0xdead;
949}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ea49a893323..aac071a7e830 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -39,8 +39,8 @@
39#include <linux/mlx5/qp.h> 39#include <linux/mlx5/qp.h>
40#include <linux/mlx5/cq.h> 40#include <linux/mlx5/cq.h>
41#include <linux/mlx5/vport.h> 41#include <linux/mlx5/vport.h>
42#include <linux/mlx5/transobj.h>
42#include "wq.h" 43#include "wq.h"
43#include "transobj.h"
44#include "mlx5_core.h" 44#include "mlx5_core.h"
45 45
46#define MLX5E_MAX_NUM_TC 8 46#define MLX5E_MAX_NUM_TC 8
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c56d91a2812b..6a3e430f1062 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2241,7 +2241,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
2241 goto err_unmap_free_uar; 2241 goto err_unmap_free_uar;
2242 } 2242 }
2243 2243
2244 err = mlx5_alloc_transport_domain(mdev, &priv->tdn); 2244 err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
2245 if (err) { 2245 if (err) {
2246 mlx5_core_err(mdev, "alloc td failed, %d\n", err); 2246 mlx5_core_err(mdev, "alloc td failed, %d\n", err);
2247 goto err_dealloc_pd; 2247 goto err_dealloc_pd;
@@ -2324,7 +2324,7 @@ err_destroy_mkey:
2324 mlx5_core_destroy_mkey(mdev, &priv->mr); 2324 mlx5_core_destroy_mkey(mdev, &priv->mr);
2325 2325
2326err_dealloc_transport_domain: 2326err_dealloc_transport_domain:
2327 mlx5_dealloc_transport_domain(mdev, priv->tdn); 2327 mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
2328 2328
2329err_dealloc_pd: 2329err_dealloc_pd:
2330 mlx5_core_dealloc_pd(mdev, priv->pdn); 2330 mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2356,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
2356 mlx5e_close_drop_rq(priv); 2356 mlx5e_close_drop_rq(priv);
2357 mlx5e_destroy_tises(priv); 2357 mlx5e_destroy_tises(priv);
2358 mlx5_core_destroy_mkey(priv->mdev, &priv->mr); 2358 mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
2359 mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); 2359 mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
2360 mlx5_core_dealloc_pd(priv->mdev, priv->pdn); 2360 mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
2361 mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); 2361 mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
2362 free_netdev(netdev); 2362 free_netdev(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 23c244a7e5d7..647a3ca2c2a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
230 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 230 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
231 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 231 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
232 rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; 232 rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
233 rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
233 mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", 234 mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
234 eqe_type_str(eqe->type), eqe->type, rsn); 235 eqe_type_str(eqe->type), eqe->type, rsn);
235 mlx5_rsc_event(dev, rsn, eqe->type); 236 mlx5_rsc_event(dev, rsn, eqe->type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b37749a3730e..1545a944c309 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -78,6 +78,11 @@ struct mlx5_device_context {
78 void *context; 78 void *context;
79}; 79};
80 80
81enum {
82 MLX5_ATOMIC_REQ_MODE_BE = 0x0,
83 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
84};
85
81static struct mlx5_profile profile[] = { 86static struct mlx5_profile profile[] = {
82 [0] = { 87 [0] = {
83 .mask = 0, 88 .mask = 0,
@@ -387,7 +392,7 @@ query_ex:
387 return err; 392 return err;
388} 393}
389 394
390static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz) 395static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
391{ 396{
392 u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)]; 397 u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
393 int err; 398 int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
395 memset(out, 0, sizeof(out)); 400 memset(out, 0, sizeof(out));
396 401
397 MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); 402 MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
403 MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
398 err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); 404 err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
399 if (err) 405 if (err)
400 return err; 406 return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
404 return err; 410 return err;
405} 411}
406 412
413static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
414{
415 void *set_ctx;
416 void *set_hca_cap;
417 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
418 int req_endianness;
419 int err;
420
421 if (MLX5_CAP_GEN(dev, atomic)) {
422 err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
423 HCA_CAP_OPMOD_GET_CUR);
424 if (err)
425 return err;
426 } else {
427 return 0;
428 }
429
430 req_endianness =
431 MLX5_CAP_ATOMIC(dev,
432 supported_atomic_req_8B_endianess_mode_1);
433
434 if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
435 return 0;
436
437 set_ctx = kzalloc(set_sz, GFP_KERNEL);
438 if (!set_ctx)
439 return -ENOMEM;
440
441 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
442
443 /* Set requestor to host endianness */
444 MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
445 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
446
447 err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
448
449 kfree(set_ctx);
450 return err;
451}
452
407static int handle_hca_cap(struct mlx5_core_dev *dev) 453static int handle_hca_cap(struct mlx5_core_dev *dev)
408{ 454{
409 void *set_ctx = NULL; 455 void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
445 491
446 MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); 492 MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
447 493
448 err = set_caps(dev, set_ctx, set_sz); 494 err = set_caps(dev, set_ctx, set_sz,
495 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
449 496
450query_ex: 497query_ex:
451 kfree(set_ctx); 498 kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
667 return err; 714 return err;
668} 715}
669 716
670#ifdef CONFIG_MLX5_CORE_EN
671static int mlx5_core_set_issi(struct mlx5_core_dev *dev) 717static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
672{ 718{
673 u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; 719 u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
720 766
721 return -ENOTSUPP; 767 return -ENOTSUPP;
722} 768}
723#endif
724 769
725static int map_bf_area(struct mlx5_core_dev *dev) 770static int map_bf_area(struct mlx5_core_dev *dev)
726{ 771{
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
966 goto err_pagealloc_cleanup; 1011 goto err_pagealloc_cleanup;
967 } 1012 }
968 1013
969#ifdef CONFIG_MLX5_CORE_EN
970 err = mlx5_core_set_issi(dev); 1014 err = mlx5_core_set_issi(dev);
971 if (err) { 1015 if (err) {
972 dev_err(&pdev->dev, "failed to set issi\n"); 1016 dev_err(&pdev->dev, "failed to set issi\n");
973 goto err_disable_hca; 1017 goto err_disable_hca;
974 } 1018 }
975#endif
976 1019
977 err = mlx5_satisfy_startup_pages(dev, 1); 1020 err = mlx5_satisfy_startup_pages(dev, 1);
978 if (err) { 1021 if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
992 goto reclaim_boot_pages; 1035 goto reclaim_boot_pages;
993 } 1036 }
994 1037
1038 err = handle_hca_cap_atomic(dev);
1039 if (err) {
1040 dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
1041 goto reclaim_boot_pages;
1042 }
1043
995 err = mlx5_satisfy_startup_pages(dev, 0); 1044 err = mlx5_satisfy_startup_pages(dev, 0);
996 if (err) { 1045 if (err) {
997 dev_err(&pdev->dev, "failed to allocate init pages\n"); 1046 dev_err(&pdev->dev, "failed to allocate init pages\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 30e2ba3f5f16..def289375ecb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -36,6 +36,7 @@
36#include <linux/mlx5/cmd.h> 36#include <linux/mlx5/cmd.h>
37#include <linux/mlx5/qp.h> 37#include <linux/mlx5/qp.h>
38#include <linux/mlx5/driver.h> 38#include <linux/mlx5/driver.h>
39#include <linux/mlx5/transobj.h>
39 40
40#include "mlx5_core.h" 41#include "mlx5_core.h"
41 42
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
67 complete(&common->free); 68 complete(&common->free);
68} 69}
69 70
71static u64 qp_allowed_event_types(void)
72{
73 u64 mask;
74
75 mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
76 BIT(MLX5_EVENT_TYPE_COMM_EST) |
77 BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
78 BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
79 BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
80 BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
81 BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
82 BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
83
84 return mask;
85}
86
87static u64 rq_allowed_event_types(void)
88{
89 u64 mask;
90
91 mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
92 BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
93
94 return mask;
95}
96
97static u64 sq_allowed_event_types(void)
98{
99 return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
100}
101
102static bool is_event_type_allowed(int rsc_type, int event_type)
103{
104 switch (rsc_type) {
105 case MLX5_EVENT_QUEUE_TYPE_QP:
106 return BIT(event_type) & qp_allowed_event_types();
107 case MLX5_EVENT_QUEUE_TYPE_RQ:
108 return BIT(event_type) & rq_allowed_event_types();
109 case MLX5_EVENT_QUEUE_TYPE_SQ:
110 return BIT(event_type) & sq_allowed_event_types();
111 default:
112 WARN(1, "Event arrived for unknown resource type");
113 return false;
114 }
115}
116
70void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) 117void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
71{ 118{
72 struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); 119 struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
75 if (!common) 122 if (!common)
76 return; 123 return;
77 124
125 if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
126 mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
127 event_type, rsn);
128 return;
129 }
130
78 switch (common->res) { 131 switch (common->res) {
79 case MLX5_RES_QP: 132 case MLX5_RES_QP:
133 case MLX5_RES_RQ:
134 case MLX5_RES_SQ:
80 qp = (struct mlx5_core_qp *)common; 135 qp = (struct mlx5_core_qp *)common;
81 qp->event(qp, event_type); 136 qp->event(qp, event_type);
82 break; 137 break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
177} 232}
178#endif 233#endif
179 234
235static int create_qprqsq_common(struct mlx5_core_dev *dev,
236 struct mlx5_core_qp *qp,
237 int rsc_type)
238{
239 struct mlx5_qp_table *table = &dev->priv.qp_table;
240 int err;
241
242 qp->common.res = rsc_type;
243 spin_lock_irq(&table->lock);
244 err = radix_tree_insert(&table->tree,
245 qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
246 qp);
247 spin_unlock_irq(&table->lock);
248 if (err)
249 return err;
250
251 atomic_set(&qp->common.refcount, 1);
252 init_completion(&qp->common.free);
253 qp->pid = current->pid;
254
255 return 0;
256}
257
258static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
259 struct mlx5_core_qp *qp)
260{
261 struct mlx5_qp_table *table = &dev->priv.qp_table;
262 unsigned long flags;
263
264 spin_lock_irqsave(&table->lock, flags);
265 radix_tree_delete(&table->tree,
266 qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
267 spin_unlock_irqrestore(&table->lock, flags);
268 mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
269 wait_for_completion(&qp->common.free);
270}
271
180int mlx5_core_create_qp(struct mlx5_core_dev *dev, 272int mlx5_core_create_qp(struct mlx5_core_dev *dev,
181 struct mlx5_core_qp *qp, 273 struct mlx5_core_qp *qp,
182 struct mlx5_create_qp_mbox_in *in, 274 struct mlx5_create_qp_mbox_in *in,
183 int inlen) 275 int inlen)
184{ 276{
185 struct mlx5_qp_table *table = &dev->priv.qp_table;
186 struct mlx5_create_qp_mbox_out out; 277 struct mlx5_create_qp_mbox_out out;
187 struct mlx5_destroy_qp_mbox_in din; 278 struct mlx5_destroy_qp_mbox_in din;
188 struct mlx5_destroy_qp_mbox_out dout; 279 struct mlx5_destroy_qp_mbox_out dout;
189 int err; 280 int err;
190 void *qpc;
191 281
192 memset(&out, 0, sizeof(out)); 282 memset(&out, 0, sizeof(out));
193 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); 283 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
194 284
195 if (dev->issi) {
196 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
197 /* 0xffffff means we ask to work with cqe version 0 */
198 MLX5_SET(qpc, qpc, user_index, 0xffffff);
199 }
200
201 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 285 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
202 if (err) { 286 if (err) {
203 mlx5_core_warn(dev, "ret %d\n", err); 287 mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
213 qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; 297 qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
214 mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); 298 mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
215 299
216 qp->common.res = MLX5_RES_QP; 300 err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
217 spin_lock_irq(&table->lock); 301 if (err)
218 err = radix_tree_insert(&table->tree, qp->qpn, qp);
219 spin_unlock_irq(&table->lock);
220 if (err) {
221 mlx5_core_warn(dev, "err %d\n", err);
222 goto err_cmd; 302 goto err_cmd;
223 }
224 303
225 err = mlx5_debug_qp_add(dev, qp); 304 err = mlx5_debug_qp_add(dev, qp);
226 if (err) 305 if (err)
227 mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n", 306 mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
228 qp->qpn); 307 qp->qpn);
229 308
230 qp->pid = current->pid;
231 atomic_set(&qp->common.refcount, 1);
232 atomic_inc(&dev->num_qps); 309 atomic_inc(&dev->num_qps);
233 init_completion(&qp->common.free);
234 310
235 return 0; 311 return 0;
236 312
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
250{ 326{
251 struct mlx5_destroy_qp_mbox_in in; 327 struct mlx5_destroy_qp_mbox_in in;
252 struct mlx5_destroy_qp_mbox_out out; 328 struct mlx5_destroy_qp_mbox_out out;
253 struct mlx5_qp_table *table = &dev->priv.qp_table;
254 unsigned long flags;
255 int err; 329 int err;
256 330
257 mlx5_debug_qp_remove(dev, qp); 331 mlx5_debug_qp_remove(dev, qp);
258 332
259 spin_lock_irqsave(&table->lock, flags); 333 destroy_qprqsq_common(dev, qp);
260 radix_tree_delete(&table->tree, qp->qpn);
261 spin_unlock_irqrestore(&table->lock, flags);
262
263 mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
264 wait_for_completion(&qp->common.free);
265 334
266 memset(&in, 0, sizeof(in)); 335 memset(&in, 0, sizeof(in));
267 memset(&out, 0, sizeof(out)); 336 memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
279} 348}
280EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); 349EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
281 350
282int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, 351int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
283 enum mlx5_qp_state new_state,
284 struct mlx5_modify_qp_mbox_in *in, int sqd_event, 352 struct mlx5_modify_qp_mbox_in *in, int sqd_event,
285 struct mlx5_core_qp *qp) 353 struct mlx5_core_qp *qp)
286{ 354{
287 static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
288 [MLX5_QP_STATE_RST] = {
289 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
290 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
291 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
292 },
293 [MLX5_QP_STATE_INIT] = {
294 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
295 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
296 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
297 [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
298 },
299 [MLX5_QP_STATE_RTR] = {
300 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
301 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
302 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
303 },
304 [MLX5_QP_STATE_RTS] = {
305 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
306 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
307 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
308 },
309 [MLX5_QP_STATE_SQD] = {
310 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
311 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
312 },
313 [MLX5_QP_STATE_SQER] = {
314 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
315 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
316 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
317 },
318 [MLX5_QP_STATE_ERR] = {
319 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
320 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
321 }
322 };
323
324 struct mlx5_modify_qp_mbox_out out; 355 struct mlx5_modify_qp_mbox_out out;
325 int err = 0; 356 int err = 0;
326 u16 op;
327
328 if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
329 !optab[cur_state][new_state])
330 return -EINVAL;
331 357
332 memset(&out, 0, sizeof(out)); 358 memset(&out, 0, sizeof(out));
333 op = optab[cur_state][new_state]; 359 in->hdr.opcode = cpu_to_be16(operation);
334 in->hdr.opcode = cpu_to_be16(op);
335 in->qpn = cpu_to_be32(qp->qpn); 360 in->qpn = cpu_to_be32(qp->qpn);
336 err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); 361 err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
337 if (err) 362 if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
449} 474}
450EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); 475EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
451#endif 476#endif
477
478int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
479 struct mlx5_core_qp *rq)
480{
481 int err;
482 u32 rqn;
483
484 err = mlx5_core_create_rq(dev, in, inlen, &rqn);
485 if (err)
486 return err;
487
488 rq->qpn = rqn;
489 err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
490 if (err)
491 goto err_destroy_rq;
492
493 return 0;
494
495err_destroy_rq:
496 mlx5_core_destroy_rq(dev, rq->qpn);
497
498 return err;
499}
500EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
501
502void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
503 struct mlx5_core_qp *rq)
504{
505 destroy_qprqsq_common(dev, rq);
506 mlx5_core_destroy_rq(dev, rq->qpn);
507}
508EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
509
510int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
511 struct mlx5_core_qp *sq)
512{
513 int err;
514 u32 sqn;
515
516 err = mlx5_core_create_sq(dev, in, inlen, &sqn);
517 if (err)
518 return err;
519
520 sq->qpn = sqn;
521 err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
522 if (err)
523 goto err_destroy_sq;
524
525 return 0;
526
527err_destroy_sq:
528 mlx5_core_destroy_sq(dev, sq->qpn);
529
530 return err;
531}
532EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
533
534void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
535 struct mlx5_core_qp *sq)
536{
537 destroy_qprqsq_common(dev, sq);
538 mlx5_core_destroy_sq(dev, sq->qpn);
539}
540EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index ffada801976b..04bc522605a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -37,7 +37,7 @@
37#include <linux/mlx5/srq.h> 37#include <linux/mlx5/srq.h>
38#include <rdma/ib_verbs.h> 38#include <rdma/ib_verbs.h>
39#include "mlx5_core.h" 39#include "mlx5_core.h"
40#include "transobj.h" 40#include <linux/mlx5/transobj.h>
41 41
42void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) 42void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
43{ 43{
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
241 241
242 memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); 242 memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
243 memcpy(pas, in->pas, pas_size); 243 memcpy(pas, in->pas, pas_size);
244 /* 0xffffff means we ask to work with cqe version 0 */
245 MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff);
246 MLX5_SET(create_xrc_srq_in, create_in, opcode, 244 MLX5_SET(create_xrc_srq_in, create_in, opcode,
247 MLX5_CMD_OP_CREATE_XRC_SRQ); 245 MLX5_CMD_OP_CREATE_XRC_SRQ);
248 246
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index d7068f54e800..03a5093ffeb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -32,9 +32,9 @@
32 32
33#include <linux/mlx5/driver.h> 33#include <linux/mlx5/driver.h>
34#include "mlx5_core.h" 34#include "mlx5_core.h"
35#include "transobj.h" 35#include <linux/mlx5/transobj.h>
36 36
37int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) 37int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
38{ 38{
39 u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]; 39 u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
40 u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)]; 40 u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
53 53
54 return err; 54 return err;
55} 55}
56EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
56 57
57void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) 58void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
58{ 59{
59 u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]; 60 u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
60 u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)]; 61 u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
68 69
69 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 70 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
70} 71}
72EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
71 73
72int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) 74int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
73{ 75{
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
94 memset(out, 0, sizeof(out)); 96 memset(out, 0, sizeof(out));
95 return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); 97 return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
96} 98}
99EXPORT_SYMBOL(mlx5_core_modify_rq);
97 100
98void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) 101void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
99{ 102{
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
108 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 111 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
109} 112}
110 113
114int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
115{
116 u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
117 int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
118
119 MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
120 MLX5_SET(query_rq_in, in, rqn, rqn);
121
122 return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
123}
124EXPORT_SYMBOL(mlx5_core_query_rq);
125
111int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) 126int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
112{ 127{
113 u32 out[MLX5_ST_SZ_DW(create_sq_out)]; 128 u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
133 memset(out, 0, sizeof(out)); 148 memset(out, 0, sizeof(out));
134 return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); 149 return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
135} 150}
151EXPORT_SYMBOL(mlx5_core_modify_sq);
136 152
137void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) 153void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
138{ 154{
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
147 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 163 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
148} 164}
149 165
166int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
167{
168 u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
169 int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
170
171 MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
172 MLX5_SET(query_sq_in, in, sqn, sqn);
173
174 return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
175}
176EXPORT_SYMBOL(mlx5_core_query_sq);
177
150int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, 178int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
151 u32 *tirn) 179 u32 *tirn)
152{ 180{
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
162 190
163 return err; 191 return err;
164} 192}
193EXPORT_SYMBOL(mlx5_core_create_tir);
165 194
166int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, 195int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
167 int inlen) 196 int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
187 216
188 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 217 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
189} 218}
219EXPORT_SYMBOL(mlx5_core_destroy_tir);
190 220
191int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, 221int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
192 u32 *tisn) 222 u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
203 233
204 return err; 234 return err;
205} 235}
236EXPORT_SYMBOL(mlx5_core_create_tis);
237
238int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
239 int inlen)
240{
241 u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
242
243 MLX5_SET(modify_tis_in, in, tisn, tisn);
244 MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
245
246 return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
247}
248EXPORT_SYMBOL(mlx5_core_modify_tis);
206 249
207void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) 250void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
208{ 251{
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
216 259
217 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 260 mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
218} 261}
262EXPORT_SYMBOL(mlx5_core_destroy_tis);
219 263
220int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, 264int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
221 u32 *rmpn) 265 u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 076197efea9b..c7398b95aecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
76 76
77 return MLX5_GET(query_vport_state_out, out, admin_state); 77 return MLX5_GET(query_vport_state_out, out, admin_state);
78} 78}
79EXPORT_SYMBOL(mlx5_query_vport_admin_state); 79EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
80 80
81int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, 81int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
82 u16 vport, u8 state) 82 u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
104 104
105 return err; 105 return err;
106} 106}
107EXPORT_SYMBOL(mlx5_modify_vport_admin_state); 107EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
108 108
109static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, 109static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
110 u32 *out, int outlen) 110 u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
151 nic_vport_context.permanent_address); 151 nic_vport_context.permanent_address);
152 152
153 err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); 153 err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
154 if (err) 154 if (!err)
155 goto out; 155 ether_addr_copy(addr, &out_addr[2]);
156
157 ether_addr_copy(addr, &out_addr[2]);
158 156
159out:
160 kvfree(out); 157 kvfree(out);
161 return err; 158 return err;
162} 159}
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
197 194
198 return err; 195 return err;
199} 196}
200EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); 197EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
201 198
202int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, 199int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
203 u32 vport, 200 u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
430} 427}
431EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); 428EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
432 429
430int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
431 u64 *system_image_guid)
432{
433 u32 *out;
434 int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
435
436 out = mlx5_vzalloc(outlen);
437 if (!out)
438 return -ENOMEM;
439
440 mlx5_query_nic_vport_context(mdev, 0, out, outlen);
441
442 *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
443 nic_vport_context.system_image_guid);
444
445 kfree(out);
446
447 return 0;
448}
449EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
450
451int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
452{
453 u32 *out;
454 int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
455
456 out = mlx5_vzalloc(outlen);
457 if (!out)
458 return -ENOMEM;
459
460 mlx5_query_nic_vport_context(mdev, 0, out, outlen);
461
462 *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
463 nic_vport_context.node_guid);
464
465 kfree(out);
466
467 return 0;
468}
469EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
470
471int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
472 u16 *qkey_viol_cntr)
473{
474 u32 *out;
475 int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
476
477 out = mlx5_vzalloc(outlen);
478 if (!out)
479 return -ENOMEM;
480
481 mlx5_query_nic_vport_context(mdev, 0, out, outlen);
482
483 *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
484 nic_vport_context.qkey_violation_counter);
485
486 kfree(out);
487
488 return 0;
489}
490EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
491
433int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, 492int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
434 u8 port_num, u16 vf_num, u16 gid_index, 493 u8 port_num, u16 vf_num, u16 gid_index,
435 union ib_gid *gid) 494 union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
750 return err; 809 return err;
751} 810}
752EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); 811EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
812
813enum mlx5_vport_roce_state {
814 MLX5_VPORT_ROCE_DISABLED = 0,
815 MLX5_VPORT_ROCE_ENABLED = 1,
816};
817
818static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
819 enum mlx5_vport_roce_state state)
820{
821 void *in;
822 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
823 int err;
824
825 in = mlx5_vzalloc(inlen);
826 if (!in) {
827 mlx5_core_warn(mdev, "failed to allocate inbox\n");
828 return -ENOMEM;
829 }
830
831 MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
832 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
833 state);
834
835 err = mlx5_modify_nic_vport_context(mdev, in, inlen);
836
837 kvfree(in);
838
839 return err;
840}
841
842int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
843{
844 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
845}
846EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
847
848int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
849{
850 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
851}
852EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 23d862dfdde3..e2f31c93717d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1106,6 +1106,7 @@ config SCSI_IPR
1106 tristate "IBM Power Linux RAID adapter support" 1106 tristate "IBM Power Linux RAID adapter support"
1107 depends on PCI && SCSI && ATA 1107 depends on PCI && SCSI && ATA
1108 select FW_LOADER 1108 select FW_LOADER
1109 select IRQ_POLL
1109 ---help--- 1110 ---help---
1110 This driver supports the IBM Power Linux family RAID adapters. 1111 This driver supports the IBM Power Linux family RAID adapters.
1111 This includes IBM pSeries 5712, 5703, 5709, and 570A, as well 1112 This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig
index 4e7cad272469..bad5f32e1f67 100644
--- a/drivers/scsi/be2iscsi/Kconfig
+++ b/drivers/scsi/be2iscsi/Kconfig
@@ -3,6 +3,7 @@ config BE2ISCSI
3 depends on PCI && SCSI && NET 3 depends on PCI && SCSI && NET
4 select SCSI_ISCSI_ATTRS 4 select SCSI_ISCSI_ATTRS
5 select ISCSI_BOOT_SYSFS 5 select ISCSI_BOOT_SYSFS
6 select IRQ_POLL
6 7
7 help 8 help
8 This driver implements the iSCSI functionality for Emulex 9 This driver implements the iSCSI functionality for Emulex
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 77f992e74726..a41c6432f444 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -20,7 +20,7 @@
20 20
21#include <linux/pci.h> 21#include <linux/pci.h>
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/blk-iopoll.h> 23#include <linux/irq_poll.h>
24#define FW_VER_LEN 32 24#define FW_VER_LEN 32
25#define MCC_Q_LEN 128 25#define MCC_Q_LEN 128
26#define MCC_CQ_LEN 256 26#define MCC_CQ_LEN 256
@@ -101,7 +101,7 @@ struct be_eq_obj {
101 struct beiscsi_hba *phba; 101 struct beiscsi_hba *phba;
102 struct be_queue_info *cq; 102 struct be_queue_info *cq;
103 struct work_struct work_cqs; /* Work Item */ 103 struct work_struct work_cqs; /* Work Item */
104 struct blk_iopoll iopoll; 104 struct irq_poll iopoll;
105}; 105};
106 106
107struct be_mcc_obj { 107struct be_mcc_obj {
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index b7087ba69d8d..022e87b62e40 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
1292 1292
1293 for (i = 0; i < phba->num_cpus; i++) { 1293 for (i = 0; i < phba->num_cpus; i++) {
1294 pbe_eq = &phwi_context->be_eq[i]; 1294 pbe_eq = &phwi_context->be_eq[i];
1295 blk_iopoll_disable(&pbe_eq->iopoll); 1295 irq_poll_disable(&pbe_eq->iopoll);
1296 beiscsi_process_cq(pbe_eq); 1296 beiscsi_process_cq(pbe_eq);
1297 blk_iopoll_enable(&pbe_eq->iopoll); 1297 irq_poll_enable(&pbe_eq->iopoll);
1298 } 1298 }
1299} 1299}
1300 1300
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index fe0c5143f8e6..cb9072a841be 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
910 num_eq_processed = 0; 910 num_eq_processed = 0;
911 while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] 911 while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
912 & EQE_VALID_MASK) { 912 & EQE_VALID_MASK) {
913 if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) 913 irq_poll_sched(&pbe_eq->iopoll);
914 blk_iopoll_sched(&pbe_eq->iopoll);
915 914
916 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); 915 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
917 queue_tail_inc(eq); 916 queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
972 spin_unlock_irqrestore(&phba->isr_lock, flags); 971 spin_unlock_irqrestore(&phba->isr_lock, flags);
973 num_mcceq_processed++; 972 num_mcceq_processed++;
974 } else { 973 } else {
975 if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) 974 irq_poll_sched(&pbe_eq->iopoll);
976 blk_iopoll_sched(&pbe_eq->iopoll);
977 num_ioeq_processed++; 975 num_ioeq_processed++;
978 } 976 }
979 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); 977 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
2295 hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1); 2293 hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
2296} 2294}
2297 2295
2298static int be_iopoll(struct blk_iopoll *iop, int budget) 2296static int be_iopoll(struct irq_poll *iop, int budget)
2299{ 2297{
2300 unsigned int ret; 2298 unsigned int ret;
2301 struct beiscsi_hba *phba; 2299 struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
2306 pbe_eq->cq_count += ret; 2304 pbe_eq->cq_count += ret;
2307 if (ret < budget) { 2305 if (ret < budget) {
2308 phba = pbe_eq->phba; 2306 phba = pbe_eq->phba;
2309 blk_iopoll_complete(iop); 2307 irq_poll_complete(iop);
2310 beiscsi_log(phba, KERN_INFO, 2308 beiscsi_log(phba, KERN_INFO,
2311 BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO, 2309 BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
2312 "BM_%d : rearm pbe_eq->q.id =%d\n", 2310 "BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
5293 5291
5294 for (i = 0; i < phba->num_cpus; i++) { 5292 for (i = 0; i < phba->num_cpus; i++) {
5295 pbe_eq = &phwi_context->be_eq[i]; 5293 pbe_eq = &phwi_context->be_eq[i];
5296 blk_iopoll_disable(&pbe_eq->iopoll); 5294 irq_poll_disable(&pbe_eq->iopoll);
5297 } 5295 }
5298 5296
5299 if (unload_state == BEISCSI_CLEAN_UNLOAD) { 5297 if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
5579 5577
5580 for (i = 0; i < phba->num_cpus; i++) { 5578 for (i = 0; i < phba->num_cpus; i++) {
5581 pbe_eq = &phwi_context->be_eq[i]; 5579 pbe_eq = &phwi_context->be_eq[i];
5582 blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, 5580 irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
5583 be_iopoll); 5581 be_iopoll);
5584 blk_iopoll_enable(&pbe_eq->iopoll);
5585 } 5582 }
5586 5583
5587 i = (phba->msix_enabled) ? i : 0; 5584 i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
5752 5749
5753 for (i = 0; i < phba->num_cpus; i++) { 5750 for (i = 0; i < phba->num_cpus; i++) {
5754 pbe_eq = &phwi_context->be_eq[i]; 5751 pbe_eq = &phwi_context->be_eq[i];
5755 blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, 5752 irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
5756 be_iopoll); 5753 be_iopoll);
5757 blk_iopoll_enable(&pbe_eq->iopoll);
5758 } 5754 }
5759 5755
5760 i = (phba->msix_enabled) ? i : 0; 5756 i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
5795 destroy_workqueue(phba->wq); 5791 destroy_workqueue(phba->wq);
5796 for (i = 0; i < phba->num_cpus; i++) { 5792 for (i = 0; i < phba->num_cpus; i++) {
5797 pbe_eq = &phwi_context->be_eq[i]; 5793 pbe_eq = &phwi_context->be_eq[i];
5798 blk_iopoll_disable(&pbe_eq->iopoll); 5794 irq_poll_disable(&pbe_eq->iopoll);
5799 } 5795 }
5800free_twq: 5796free_twq:
5801 beiscsi_clean_port(phba); 5797 beiscsi_clean_port(phba);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 1c3759bab80b..3b3e0998fa6e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
3638 .store = ipr_store_reset_adapter 3638 .store = ipr_store_reset_adapter
3639}; 3639};
3640 3640
3641static int ipr_iopoll(struct blk_iopoll *iop, int budget); 3641static int ipr_iopoll(struct irq_poll *iop, int budget);
3642 /** 3642 /**
3643 * ipr_show_iopoll_weight - Show ipr polling mode 3643 * ipr_show_iopoll_weight - Show ipr polling mode
3644 * @dev: class device struct 3644 * @dev: class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
3681 int i; 3681 int i;
3682 3682
3683 if (!ioa_cfg->sis64) { 3683 if (!ioa_cfg->sis64) {
3684 dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); 3684 dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
3685 return -EINVAL; 3685 return -EINVAL;
3686 } 3686 }
3687 if (kstrtoul(buf, 10, &user_iopoll_weight)) 3687 if (kstrtoul(buf, 10, &user_iopoll_weight))
3688 return -EINVAL; 3688 return -EINVAL;
3689 3689
3690 if (user_iopoll_weight > 256) { 3690 if (user_iopoll_weight > 256) {
3691 dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); 3691 dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
3692 return -EINVAL; 3692 return -EINVAL;
3693 } 3693 }
3694 3694
3695 if (user_iopoll_weight == ioa_cfg->iopoll_weight) { 3695 if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
3696 dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); 3696 dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
3697 return strlen(buf); 3697 return strlen(buf);
3698 } 3698 }
3699 3699
3700 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 3700 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
3701 for (i = 1; i < ioa_cfg->hrrq_num; i++) 3701 for (i = 1; i < ioa_cfg->hrrq_num; i++)
3702 blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); 3702 irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
3703 } 3703 }
3704 3704
3705 spin_lock_irqsave(shost->host_lock, lock_flags); 3705 spin_lock_irqsave(shost->host_lock, lock_flags);
3706 ioa_cfg->iopoll_weight = user_iopoll_weight; 3706 ioa_cfg->iopoll_weight = user_iopoll_weight;
3707 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 3707 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
3708 for (i = 1; i < ioa_cfg->hrrq_num; i++) { 3708 for (i = 1; i < ioa_cfg->hrrq_num; i++) {
3709 blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, 3709 irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
3710 ioa_cfg->iopoll_weight, ipr_iopoll); 3710 ioa_cfg->iopoll_weight, ipr_iopoll);
3711 blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
3712 } 3711 }
3713 } 3712 }
3714 spin_unlock_irqrestore(shost->host_lock, lock_flags); 3713 spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -5568,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
5568 return num_hrrq; 5567 return num_hrrq;
5569} 5568}
5570 5569
5571static int ipr_iopoll(struct blk_iopoll *iop, int budget) 5570static int ipr_iopoll(struct irq_poll *iop, int budget)
5572{ 5571{
5573 struct ipr_ioa_cfg *ioa_cfg; 5572 struct ipr_ioa_cfg *ioa_cfg;
5574 struct ipr_hrr_queue *hrrq; 5573 struct ipr_hrr_queue *hrrq;
@@ -5584,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
5584 completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); 5583 completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
5585 5584
5586 if (completed_ops < budget) 5585 if (completed_ops < budget)
5587 blk_iopoll_complete(iop); 5586 irq_poll_complete(iop);
5588 spin_unlock_irqrestore(hrrq->lock, hrrq_flags); 5587 spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
5589 5588
5590 list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { 5589 list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5692,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
5692 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 5691 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
5693 if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == 5692 if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
5694 hrrq->toggle_bit) { 5693 hrrq->toggle_bit) {
5695 if (!blk_iopoll_sched_prep(&hrrq->iopoll)) 5694 irq_poll_sched(&hrrq->iopoll);
5696 blk_iopoll_sched(&hrrq->iopoll);
5697 spin_unlock_irqrestore(hrrq->lock, hrrq_flags); 5695 spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
5698 return IRQ_HANDLED; 5696 return IRQ_HANDLED;
5699 } 5697 }
@@ -10404,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
10404 10402
10405 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 10403 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
10406 for (i = 1; i < ioa_cfg->hrrq_num; i++) { 10404 for (i = 1; i < ioa_cfg->hrrq_num; i++) {
10407 blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, 10405 irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
10408 ioa_cfg->iopoll_weight, ipr_iopoll); 10406 ioa_cfg->iopoll_weight, ipr_iopoll);
10409 blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
10410 } 10407 }
10411 } 10408 }
10412 10409
@@ -10435,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
10435 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 10432 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
10436 ioa_cfg->iopoll_weight = 0; 10433 ioa_cfg->iopoll_weight = 0;
10437 for (i = 1; i < ioa_cfg->hrrq_num; i++) 10434 for (i = 1; i < ioa_cfg->hrrq_num; i++)
10438 blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); 10435 irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
10439 } 10436 }
10440 10437
10441 while (ioa_cfg->in_reset_reload) { 10438 while (ioa_cfg->in_reset_reload) {
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a34c7a5a995e..56c57068300a 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -32,7 +32,7 @@
32#include <linux/libata.h> 32#include <linux/libata.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/kref.h> 34#include <linux/kref.h>
35#include <linux/blk-iopoll.h> 35#include <linux/irq_poll.h>
36#include <scsi/scsi.h> 36#include <scsi/scsi.h>
37#include <scsi/scsi_cmnd.h> 37#include <scsi/scsi_cmnd.h>
38 38
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
517 u8 allow_cmds:1; 517 u8 allow_cmds:1;
518 u8 removing_ioa:1; 518 u8 removing_ioa:1;
519 519
520 struct blk_iopoll iopoll; 520 struct irq_poll iopoll;
521}; 521};
522 522
523/* Command packet structure */ 523/* Command packet structure */
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 72af486b65df..cb74ae731b95 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
2070 2070
2071static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) 2071static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
2072{ 2072{
2073 struct ib_device_attr *attr;
2074 int rc;
2075
2076 /* It's safe to assume a HCA can handle a page size 2073 /* It's safe to assume a HCA can handle a page size
2077 * matching that of the native system */ 2074 * matching that of the native system */
2078 hdev->ibh_page_shift = PAGE_SHIFT; 2075 hdev->ibh_page_shift = PAGE_SHIFT;
2079 hdev->ibh_page_size = 1 << PAGE_SHIFT; 2076 hdev->ibh_page_size = 1 << PAGE_SHIFT;
2080 hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); 2077 hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
2081 2078
2082 LIBCFS_ALLOC(attr, sizeof(*attr)); 2079 hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
2083 if (attr == NULL) {
2084 CERROR("Out of memory\n");
2085 return -ENOMEM;
2086 }
2087
2088 rc = ib_query_device(hdev->ibh_ibdev, attr);
2089 if (rc == 0)
2090 hdev->ibh_mr_size = attr->max_mr_size;
2091
2092 LIBCFS_FREE(attr, sizeof(*attr));
2093
2094 if (rc != 0) {
2095 CERROR("Failed to query IB device: %d\n", rc);
2096 return rc;
2097 }
2098
2099 if (hdev->ibh_mr_size == ~0ULL) { 2080 if (hdev->ibh_mr_size == ~0ULL) {
2100 hdev->ibh_mr_shift = 64; 2081 hdev->ibh_mr_shift = 64;
2101 return 0; 2082 return 0;
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
index 3ef881f2da0f..7ad0c082485a 100644
--- a/drivers/staging/rdma/amso1100/c2_cq.c
+++ b/drivers/staging/rdma/amso1100/c2_cq.c
@@ -173,9 +173,6 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
173 case C2_WR_TYPE_RDMA_READ: 173 case C2_WR_TYPE_RDMA_READ:
174 entry->opcode = IB_WC_RDMA_READ; 174 entry->opcode = IB_WC_RDMA_READ;
175 break; 175 break;
176 case C2_WR_TYPE_BIND_MW:
177 entry->opcode = IB_WC_BIND_MW;
178 break;
179 case C2_WR_TYPE_RECV: 176 case C2_WR_TYPE_RECV:
180 entry->byte_len = be32_to_cpu(ce->bytes_rcvd); 177 entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
181 entry->opcode = IB_WC_RECV; 178 entry->opcode = IB_WC_RECV;
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
index a092ac743c72..de8d10e1bde3 100644
--- a/drivers/staging/rdma/amso1100/c2_provider.c
+++ b/drivers/staging/rdma/amso1100/c2_provider.c
@@ -337,43 +337,21 @@ static inline u32 c2_convert_access(int acc)
337 C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND; 337 C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
338} 338}
339 339
340static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, 340static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
341 struct ib_phys_buf *buffer_list,
342 int num_phys_buf, int acc, u64 * iova_start)
343{ 341{
344 struct c2_mr *mr; 342 struct c2_mr *mr;
345 u64 *page_list; 343 u64 *page_list;
346 u32 total_len; 344 const u32 total_len = 0xffffffff; /* AMSO1100 limit */
347 int err, i, j, k, page_shift, pbl_depth; 345 int err, page_shift, pbl_depth, i;
346 u64 kva = 0;
348 347
349 pbl_depth = 0; 348 pr_debug("%s:%u\n", __func__, __LINE__);
350 total_len = 0;
351 349
352 page_shift = PAGE_SHIFT;
353 /* 350 /*
354 * If there is only 1 buffer we assume this could 351 * This is a map of all phy mem...use a 32k page_shift.
355 * be a map of all phy mem...use a 32k page_shift.
356 */ 352 */
357 if (num_phys_buf == 1) 353 page_shift = PAGE_SHIFT + 3;
358 page_shift += 3; 354 pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
359
360 for (i = 0; i < num_phys_buf; i++) {
361
362 if (offset_in_page(buffer_list[i].addr)) {
363 pr_debug("Unaligned Memory Buffer: 0x%x\n",
364 (unsigned int) buffer_list[i].addr);
365 return ERR_PTR(-EINVAL);
366 }
367
368 if (!buffer_list[i].size) {
369 pr_debug("Invalid Buffer Size\n");
370 return ERR_PTR(-EINVAL);
371 }
372
373 total_len += buffer_list[i].size;
374 pbl_depth += ALIGN(buffer_list[i].size,
375 BIT(page_shift)) >> page_shift;
376 }
377 355
378 page_list = vmalloc(sizeof(u64) * pbl_depth); 356 page_list = vmalloc(sizeof(u64) * pbl_depth);
379 if (!page_list) { 357 if (!page_list) {
@@ -382,16 +360,8 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
382 return ERR_PTR(-ENOMEM); 360 return ERR_PTR(-ENOMEM);
383 } 361 }
384 362
385 for (i = 0, j = 0; i < num_phys_buf; i++) { 363 for (i = 0; i < pbl_depth; i++)
386 364 page_list[i] = (i << page_shift);
387 int naddrs;
388
389 naddrs = ALIGN(buffer_list[i].size,
390 BIT(page_shift)) >> page_shift;
391 for (k = 0; k < naddrs; k++)
392 page_list[j++] = (buffer_list[i].addr +
393 (k << page_shift));
394 }
395 365
396 mr = kmalloc(sizeof(*mr), GFP_KERNEL); 366 mr = kmalloc(sizeof(*mr), GFP_KERNEL);
397 if (!mr) { 367 if (!mr) {
@@ -399,17 +369,17 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
399 return ERR_PTR(-ENOMEM); 369 return ERR_PTR(-ENOMEM);
400 } 370 }
401 371
402 mr->pd = to_c2pd(ib_pd); 372 mr->pd = to_c2pd(pd);
403 mr->umem = NULL; 373 mr->umem = NULL;
404 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " 374 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
405 "*iova_start %llx, first pa %llx, last pa %llx\n", 375 "*iova_start %llx, first pa %llx, last pa %llx\n",
406 __func__, page_shift, pbl_depth, total_len, 376 __func__, page_shift, pbl_depth, total_len,
407 (unsigned long long) *iova_start, 377 (unsigned long long) kva,
408 (unsigned long long) page_list[0], 378 (unsigned long long) page_list[0],
409 (unsigned long long) page_list[pbl_depth-1]); 379 (unsigned long long) page_list[pbl_depth-1]);
410 err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, 380 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
411 BIT(page_shift), pbl_depth, 381 BIT(page_shift), pbl_depth,
412 total_len, 0, iova_start, 382 total_len, 0, &kva,
413 c2_convert_access(acc), mr); 383 c2_convert_access(acc), mr);
414 vfree(page_list); 384 vfree(page_list);
415 if (err) { 385 if (err) {
@@ -420,19 +390,6 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
420 return &mr->ibmr; 390 return &mr->ibmr;
421} 391}
422 392
423static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
424{
425 struct ib_phys_buf bl;
426 u64 kva = 0;
427
428 pr_debug("%s:%u\n", __func__, __LINE__);
429
430 /* AMSO1100 limit */
431 bl.size = 0xffffffff;
432 bl.addr = 0;
433 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
434}
435
436static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 393static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
437 u64 virt, int acc, struct ib_udata *udata) 394 u64 virt, int acc, struct ib_udata *udata)
438{ 395{
@@ -840,7 +797,6 @@ int c2_register_device(struct c2_dev *dev)
840 dev->ibdev.destroy_cq = c2_destroy_cq; 797 dev->ibdev.destroy_cq = c2_destroy_cq;
841 dev->ibdev.poll_cq = c2_poll_cq; 798 dev->ibdev.poll_cq = c2_poll_cq;
842 dev->ibdev.get_dma_mr = c2_get_dma_mr; 799 dev->ibdev.get_dma_mr = c2_get_dma_mr;
843 dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
844 dev->ibdev.reg_user_mr = c2_reg_user_mr; 800 dev->ibdev.reg_user_mr = c2_reg_user_mr;
845 dev->ibdev.dereg_mr = c2_dereg_mr; 801 dev->ibdev.dereg_mr = c2_dereg_mr;
846 dev->ibdev.get_port_immutable = c2_port_immutable; 802 dev->ibdev.get_port_immutable = c2_port_immutable;
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
index bd45e0f3923f..e8c3387d7aaa 100644
--- a/drivers/staging/rdma/ehca/ehca_classes.h
+++ b/drivers/staging/rdma/ehca/ehca_classes.h
@@ -316,9 +316,8 @@ struct ehca_mr_pginfo {
316 316
317 union { 317 union {
318 struct { /* type EHCA_MR_PGI_PHYS section */ 318 struct { /* type EHCA_MR_PGI_PHYS section */
319 int num_phys_buf; 319 u64 addr;
320 struct ib_phys_buf *phys_buf_array; 320 u16 size;
321 u64 next_buf;
322 } phy; 321 } phy;
323 struct { /* type EHCA_MR_PGI_USER section */ 322 struct { /* type EHCA_MR_PGI_USER section */
324 struct ib_umem *region; 323 struct ib_umem *region;
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
index 80e6a3d5df3e..cca5933fcda6 100644
--- a/drivers/staging/rdma/ehca/ehca_iverbs.h
+++ b/drivers/staging/rdma/ehca/ehca_iverbs.h
@@ -80,30 +80,14 @@ int ehca_destroy_ah(struct ib_ah *ah);
80 80
81struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); 81struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
82 82
83struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
84 struct ib_phys_buf *phys_buf_array,
85 int num_phys_buf,
86 int mr_access_flags, u64 *iova_start);
87
88struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 83struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
89 u64 virt, int mr_access_flags, 84 u64 virt, int mr_access_flags,
90 struct ib_udata *udata); 85 struct ib_udata *udata);
91 86
92int ehca_rereg_phys_mr(struct ib_mr *mr,
93 int mr_rereg_mask,
94 struct ib_pd *pd,
95 struct ib_phys_buf *phys_buf_array,
96 int num_phys_buf, int mr_access_flags, u64 *iova_start);
97
98int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
99
100int ehca_dereg_mr(struct ib_mr *mr); 87int ehca_dereg_mr(struct ib_mr *mr);
101 88
102struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 89struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
103 90
104int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
105 struct ib_mw_bind *mw_bind);
106
107int ehca_dealloc_mw(struct ib_mw *mw); 91int ehca_dealloc_mw(struct ib_mw *mw);
108 92
109struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, 93struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
index 860b974e9faa..832f22f40862 100644
--- a/drivers/staging/rdma/ehca/ehca_main.c
+++ b/drivers/staging/rdma/ehca/ehca_main.c
@@ -511,13 +511,9 @@ static int ehca_init_device(struct ehca_shca *shca)
511 shca->ib_device.req_notify_cq = ehca_req_notify_cq; 511 shca->ib_device.req_notify_cq = ehca_req_notify_cq;
512 /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ 512 /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
513 shca->ib_device.get_dma_mr = ehca_get_dma_mr; 513 shca->ib_device.get_dma_mr = ehca_get_dma_mr;
514 shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
515 shca->ib_device.reg_user_mr = ehca_reg_user_mr; 514 shca->ib_device.reg_user_mr = ehca_reg_user_mr;
516 shca->ib_device.query_mr = ehca_query_mr;
517 shca->ib_device.dereg_mr = ehca_dereg_mr; 515 shca->ib_device.dereg_mr = ehca_dereg_mr;
518 shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
519 shca->ib_device.alloc_mw = ehca_alloc_mw; 516 shca->ib_device.alloc_mw = ehca_alloc_mw;
520 shca->ib_device.bind_mw = ehca_bind_mw;
521 shca->ib_device.dealloc_mw = ehca_dealloc_mw; 517 shca->ib_device.dealloc_mw = ehca_dealloc_mw;
522 shca->ib_device.alloc_fmr = ehca_alloc_fmr; 518 shca->ib_device.alloc_fmr = ehca_alloc_fmr;
523 shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; 519 shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
index 553e883a5718..3367205e3160 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.c
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.c
@@ -196,120 +196,6 @@ get_dma_mr_exit0:
196 196
197/*----------------------------------------------------------------------*/ 197/*----------------------------------------------------------------------*/
198 198
199struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
200 struct ib_phys_buf *phys_buf_array,
201 int num_phys_buf,
202 int mr_access_flags,
203 u64 *iova_start)
204{
205 struct ib_mr *ib_mr;
206 int ret;
207 struct ehca_mr *e_mr;
208 struct ehca_shca *shca =
209 container_of(pd->device, struct ehca_shca, ib_device);
210 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
211
212 u64 size;
213
214 if ((num_phys_buf <= 0) || !phys_buf_array) {
215 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
216 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
217 ib_mr = ERR_PTR(-EINVAL);
218 goto reg_phys_mr_exit0;
219 }
220 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
221 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
222 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
223 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
224 /*
225 * Remote Write Access requires Local Write Access
226 * Remote Atomic Access requires Local Write Access
227 */
228 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
229 mr_access_flags);
230 ib_mr = ERR_PTR(-EINVAL);
231 goto reg_phys_mr_exit0;
232 }
233
234 /* check physical buffer list and calculate size */
235 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
236 iova_start, &size);
237 if (ret) {
238 ib_mr = ERR_PTR(ret);
239 goto reg_phys_mr_exit0;
240 }
241 if ((size == 0) ||
242 (((u64)iova_start + size) < (u64)iova_start)) {
243 ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
244 size, iova_start);
245 ib_mr = ERR_PTR(-EINVAL);
246 goto reg_phys_mr_exit0;
247 }
248
249 e_mr = ehca_mr_new();
250 if (!e_mr) {
251 ehca_err(pd->device, "out of memory");
252 ib_mr = ERR_PTR(-ENOMEM);
253 goto reg_phys_mr_exit0;
254 }
255
256 /* register MR on HCA */
257 if (ehca_mr_is_maxmr(size, iova_start)) {
258 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
259 ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
260 e_pd, &e_mr->ib.ib_mr.lkey,
261 &e_mr->ib.ib_mr.rkey);
262 if (ret) {
263 ib_mr = ERR_PTR(ret);
264 goto reg_phys_mr_exit1;
265 }
266 } else {
267 struct ehca_mr_pginfo pginfo;
268 u32 num_kpages;
269 u32 num_hwpages;
270 u64 hw_pgsize;
271
272 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
273 PAGE_SIZE);
274 /* for kernel space we try most possible pgsize */
275 hw_pgsize = ehca_get_max_hwpage_size(shca);
276 num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
277 hw_pgsize);
278 memset(&pginfo, 0, sizeof(pginfo));
279 pginfo.type = EHCA_MR_PGI_PHYS;
280 pginfo.num_kpages = num_kpages;
281 pginfo.hwpage_size = hw_pgsize;
282 pginfo.num_hwpages = num_hwpages;
283 pginfo.u.phy.num_phys_buf = num_phys_buf;
284 pginfo.u.phy.phys_buf_array = phys_buf_array;
285 pginfo.next_hwpage =
286 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
287
288 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
289 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
290 &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
291 if (ret) {
292 ib_mr = ERR_PTR(ret);
293 goto reg_phys_mr_exit1;
294 }
295 }
296
297 /* successful registration of all pages */
298 return &e_mr->ib.ib_mr;
299
300reg_phys_mr_exit1:
301 ehca_mr_delete(e_mr);
302reg_phys_mr_exit0:
303 if (IS_ERR(ib_mr))
304 ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
305 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
306 PTR_ERR(ib_mr), pd, phys_buf_array,
307 num_phys_buf, mr_access_flags, iova_start);
308 return ib_mr;
309} /* end ehca_reg_phys_mr() */
310
311/*----------------------------------------------------------------------*/
312
313struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 199struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
314 u64 virt, int mr_access_flags, 200 u64 virt, int mr_access_flags,
315 struct ib_udata *udata) 201 struct ib_udata *udata)
@@ -437,207 +323,6 @@ reg_user_mr_exit0:
437 323
438/*----------------------------------------------------------------------*/ 324/*----------------------------------------------------------------------*/
439 325
440int ehca_rereg_phys_mr(struct ib_mr *mr,
441 int mr_rereg_mask,
442 struct ib_pd *pd,
443 struct ib_phys_buf *phys_buf_array,
444 int num_phys_buf,
445 int mr_access_flags,
446 u64 *iova_start)
447{
448 int ret;
449
450 struct ehca_shca *shca =
451 container_of(mr->device, struct ehca_shca, ib_device);
452 struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
453 u64 new_size;
454 u64 *new_start;
455 u32 new_acl;
456 struct ehca_pd *new_pd;
457 u32 tmp_lkey, tmp_rkey;
458 unsigned long sl_flags;
459 u32 num_kpages = 0;
460 u32 num_hwpages = 0;
461 struct ehca_mr_pginfo pginfo;
462
463 if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
464 /* TODO not supported, because PHYP rereg hCall needs pages */
465 ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
466 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
467 ret = -EINVAL;
468 goto rereg_phys_mr_exit0;
469 }
470
471 if (mr_rereg_mask & IB_MR_REREG_PD) {
472 if (!pd) {
473 ehca_err(mr->device, "rereg with bad pd, pd=%p "
474 "mr_rereg_mask=%x", pd, mr_rereg_mask);
475 ret = -EINVAL;
476 goto rereg_phys_mr_exit0;
477 }
478 }
479
480 if ((mr_rereg_mask &
481 ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
482 (mr_rereg_mask == 0)) {
483 ret = -EINVAL;
484 goto rereg_phys_mr_exit0;
485 }
486
487 /* check other parameters */
488 if (e_mr == shca->maxmr) {
489 /* should be impossible, however reject to be sure */
490 ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
491 "shca->maxmr=%p mr->lkey=%x",
492 mr, shca->maxmr, mr->lkey);
493 ret = -EINVAL;
494 goto rereg_phys_mr_exit0;
495 }
496 if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
497 if (e_mr->flags & EHCA_MR_FLAG_FMR) {
498 ehca_err(mr->device, "not supported for FMR, mr=%p "
499 "flags=%x", mr, e_mr->flags);
500 ret = -EINVAL;
501 goto rereg_phys_mr_exit0;
502 }
503 if (!phys_buf_array || num_phys_buf <= 0) {
504 ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
505 " phys_buf_array=%p num_phys_buf=%x",
506 mr_rereg_mask, phys_buf_array, num_phys_buf);
507 ret = -EINVAL;
508 goto rereg_phys_mr_exit0;
509 }
510 }
511 if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
512 (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
513 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
514 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
515 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
516 /*
517 * Remote Write Access requires Local Write Access
518 * Remote Atomic Access requires Local Write Access
519 */
520 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
521 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
522 ret = -EINVAL;
523 goto rereg_phys_mr_exit0;
524 }
525
526 /* set requested values dependent on rereg request */
527 spin_lock_irqsave(&e_mr->mrlock, sl_flags);
528 new_start = e_mr->start;
529 new_size = e_mr->size;
530 new_acl = e_mr->acl;
531 new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
532
533 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
534 u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
535
536 new_start = iova_start; /* change address */
537 /* check physical buffer list and calculate size */
538 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
539 num_phys_buf, iova_start,
540 &new_size);
541 if (ret)
542 goto rereg_phys_mr_exit1;
543 if ((new_size == 0) ||
544 (((u64)iova_start + new_size) < (u64)iova_start)) {
545 ehca_err(mr->device, "bad input values: new_size=%llx "
546 "iova_start=%p", new_size, iova_start);
547 ret = -EINVAL;
548 goto rereg_phys_mr_exit1;
549 }
550 num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
551 new_size, PAGE_SIZE);
552 num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
553 new_size, hw_pgsize);
554 memset(&pginfo, 0, sizeof(pginfo));
555 pginfo.type = EHCA_MR_PGI_PHYS;
556 pginfo.num_kpages = num_kpages;
557 pginfo.hwpage_size = hw_pgsize;
558 pginfo.num_hwpages = num_hwpages;
559 pginfo.u.phy.num_phys_buf = num_phys_buf;
560 pginfo.u.phy.phys_buf_array = phys_buf_array;
561 pginfo.next_hwpage =
562 ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
563 }
564 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
565 new_acl = mr_access_flags;
566 if (mr_rereg_mask & IB_MR_REREG_PD)
567 new_pd = container_of(pd, struct ehca_pd, ib_pd);
568
569 ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
570 new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
571 if (ret)
572 goto rereg_phys_mr_exit1;
573
574 /* successful reregistration */
575 if (mr_rereg_mask & IB_MR_REREG_PD)
576 mr->pd = pd;
577 mr->lkey = tmp_lkey;
578 mr->rkey = tmp_rkey;
579
580rereg_phys_mr_exit1:
581 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
582rereg_phys_mr_exit0:
583 if (ret)
584 ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
585 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
586 "iova_start=%p",
587 ret, mr, mr_rereg_mask, pd, phys_buf_array,
588 num_phys_buf, mr_access_flags, iova_start);
589 return ret;
590} /* end ehca_rereg_phys_mr() */
591
592/*----------------------------------------------------------------------*/
593
594int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
595{
596 int ret = 0;
597 u64 h_ret;
598 struct ehca_shca *shca =
599 container_of(mr->device, struct ehca_shca, ib_device);
600 struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
601 unsigned long sl_flags;
602 struct ehca_mr_hipzout_parms hipzout;
603
604 if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
605 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
606 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
607 ret = -EINVAL;
608 goto query_mr_exit0;
609 }
610
611 memset(mr_attr, 0, sizeof(struct ib_mr_attr));
612 spin_lock_irqsave(&e_mr->mrlock, sl_flags);
613
614 h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
615 if (h_ret != H_SUCCESS) {
616 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
617 "hca_hndl=%llx mr_hndl=%llx lkey=%x",
618 h_ret, mr, shca->ipz_hca_handle.handle,
619 e_mr->ipz_mr_handle.handle, mr->lkey);
620 ret = ehca2ib_return_code(h_ret);
621 goto query_mr_exit1;
622 }
623 mr_attr->pd = mr->pd;
624 mr_attr->device_virt_addr = hipzout.vaddr;
625 mr_attr->size = hipzout.len;
626 mr_attr->lkey = hipzout.lkey;
627 mr_attr->rkey = hipzout.rkey;
628 ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
629
630query_mr_exit1:
631 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
632query_mr_exit0:
633 if (ret)
634 ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
635 ret, mr, mr_attr);
636 return ret;
637} /* end ehca_query_mr() */
638
639/*----------------------------------------------------------------------*/
640
641int ehca_dereg_mr(struct ib_mr *mr) 326int ehca_dereg_mr(struct ib_mr *mr)
642{ 327{
643 int ret = 0; 328 int ret = 0;
@@ -728,18 +413,6 @@ alloc_mw_exit0:
728 413
729/*----------------------------------------------------------------------*/ 414/*----------------------------------------------------------------------*/
730 415
731int ehca_bind_mw(struct ib_qp *qp,
732 struct ib_mw *mw,
733 struct ib_mw_bind *mw_bind)
734{
735 /* TODO: not supported up to now */
736 ehca_gen_err("bind MW currently not supported by HCAD");
737
738 return -EPERM;
739} /* end ehca_bind_mw() */
740
741/*----------------------------------------------------------------------*/
742
743int ehca_dealloc_mw(struct ib_mw *mw) 416int ehca_dealloc_mw(struct ib_mw *mw)
744{ 417{
745 u64 h_ret; 418 u64 h_ret;
@@ -1616,7 +1289,6 @@ int ehca_reg_internal_maxmr(
1616 u64 *iova_start; 1289 u64 *iova_start;
1617 u64 size_maxmr; 1290 u64 size_maxmr;
1618 struct ehca_mr_pginfo pginfo; 1291 struct ehca_mr_pginfo pginfo;
1619 struct ib_phys_buf ib_pbuf;
1620 u32 num_kpages; 1292 u32 num_kpages;
1621 u32 num_hwpages; 1293 u32 num_hwpages;
1622 u64 hw_pgsize; 1294 u64 hw_pgsize;
@@ -1637,8 +1309,6 @@ int ehca_reg_internal_maxmr(
1637 /* register internal max-MR on HCA */ 1309 /* register internal max-MR on HCA */
1638 size_maxmr = ehca_mr_len; 1310 size_maxmr = ehca_mr_len;
1639 iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); 1311 iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1640 ib_pbuf.addr = 0;
1641 ib_pbuf.size = size_maxmr;
1642 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, 1312 num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1643 PAGE_SIZE); 1313 PAGE_SIZE);
1644 hw_pgsize = ehca_get_max_hwpage_size(shca); 1314 hw_pgsize = ehca_get_max_hwpage_size(shca);
@@ -1650,8 +1320,8 @@ int ehca_reg_internal_maxmr(
1650 pginfo.num_kpages = num_kpages; 1320 pginfo.num_kpages = num_kpages;
1651 pginfo.num_hwpages = num_hwpages; 1321 pginfo.num_hwpages = num_hwpages;
1652 pginfo.hwpage_size = hw_pgsize; 1322 pginfo.hwpage_size = hw_pgsize;
1653 pginfo.u.phy.num_phys_buf = 1; 1323 pginfo.u.phy.addr = 0;
1654 pginfo.u.phy.phys_buf_array = &ib_pbuf; 1324 pginfo.u.phy.size = size_maxmr;
1655 1325
1656 ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, 1326 ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1657 &pginfo, &e_mr->ib.ib_mr.lkey, 1327 &pginfo, &e_mr->ib.ib_mr.lkey,
@@ -1669,7 +1339,6 @@ int ehca_reg_internal_maxmr(
1669 e_mr->ib.ib_mr.pd = &e_pd->ib_pd; 1339 e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1670 e_mr->ib.ib_mr.uobject = NULL; 1340 e_mr->ib.ib_mr.uobject = NULL;
1671 atomic_inc(&(e_pd->ib_pd.usecnt)); 1341 atomic_inc(&(e_pd->ib_pd.usecnt));
1672 atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1673 *e_maxmr = e_mr; 1342 *e_maxmr = e_mr;
1674 return 0; 1343 return 0;
1675 1344
@@ -1762,61 +1431,6 @@ ehca_dereg_internal_maxmr_exit0:
1762 1431
1763/*----------------------------------------------------------------------*/ 1432/*----------------------------------------------------------------------*/
1764 1433
1765/*
1766 * check physical buffer array of MR verbs for validness and
1767 * calculates MR size
1768 */
1769int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1770 int num_phys_buf,
1771 u64 *iova_start,
1772 u64 *size)
1773{
1774 struct ib_phys_buf *pbuf = phys_buf_array;
1775 u64 size_count = 0;
1776 u32 i;
1777
1778 if (num_phys_buf == 0) {
1779 ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1780 return -EINVAL;
1781 }
1782 /* check first buffer */
1783 if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1784 ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1785 "pbuf->addr=%llx pbuf->size=%llx",
1786 iova_start, pbuf->addr, pbuf->size);
1787 return -EINVAL;
1788 }
1789 if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1790 (num_phys_buf > 1)) {
1791 ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1792 "pbuf->size=%llx", pbuf->addr, pbuf->size);
1793 return -EINVAL;
1794 }
1795
1796 for (i = 0; i < num_phys_buf; i++) {
1797 if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1798 ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1799 "pbuf->size=%llx",
1800 i, pbuf->addr, pbuf->size);
1801 return -EINVAL;
1802 }
1803 if (((i > 0) && /* not 1st */
1804 (i < (num_phys_buf - 1)) && /* not last */
1805 (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1806 ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1807 i, pbuf->size);
1808 return -EINVAL;
1809 }
1810 size_count += pbuf->size;
1811 pbuf++;
1812 }
1813
1814 *size = size_count;
1815 return 0;
1816} /* end ehca_mr_chk_buf_and_calc_size() */
1817
1818/*----------------------------------------------------------------------*/
1819
1820/* check page list of map FMR verb for validness */ 1434/* check page list of map FMR verb for validness */
1821int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, 1435int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1822 u64 *page_list, 1436 u64 *page_list,
@@ -2002,57 +1616,54 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2002 u32 number, u64 *kpage) 1616 u32 number, u64 *kpage)
2003{ 1617{
2004 int ret = 0; 1618 int ret = 0;
2005 struct ib_phys_buf *pbuf; 1619 u64 addr = pginfo->u.phy.addr;
1620 u64 size = pginfo->u.phy.size;
2006 u64 num_hw, offs_hw; 1621 u64 num_hw, offs_hw;
2007 u32 i = 0; 1622 u32 i = 0;
2008 1623
2009 /* loop over desired phys_buf_array entries */ 1624 num_hw = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
2010 while (i < number) { 1625 pginfo->hwpage_size);
2011 pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; 1626 offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
2012 num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + 1627
2013 pbuf->size, pginfo->hwpage_size); 1628 while (pginfo->next_hwpage < offs_hw + num_hw) {
2014 offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / 1629 /* sanity check */
2015 pginfo->hwpage_size; 1630 if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2016 while (pginfo->next_hwpage < offs_hw + num_hw) { 1631 (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2017 /* sanity check */ 1632 ehca_gen_err("kpage_cnt >= num_kpages, "
2018 if ((pginfo->kpage_cnt >= pginfo->num_kpages) || 1633 "kpage_cnt=%llx num_kpages=%llx "
2019 (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { 1634 "hwpage_cnt=%llx "
2020 ehca_gen_err("kpage_cnt >= num_kpages, " 1635 "num_hwpages=%llx i=%x",
2021 "kpage_cnt=%llx num_kpages=%llx " 1636 pginfo->kpage_cnt,
2022 "hwpage_cnt=%llx " 1637 pginfo->num_kpages,
2023 "num_hwpages=%llx i=%x", 1638 pginfo->hwpage_cnt,
2024 pginfo->kpage_cnt, 1639 pginfo->num_hwpages, i);
2025 pginfo->num_kpages, 1640 return -EFAULT;
2026 pginfo->hwpage_cnt,
2027 pginfo->num_hwpages, i);
2028 return -EFAULT;
2029 }
2030 *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2031 (pginfo->next_hwpage * pginfo->hwpage_size);
2032 if ( !(*kpage) && pbuf->addr ) {
2033 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2034 "next_hwpage=%llx", pbuf->addr,
2035 pbuf->size, pginfo->next_hwpage);
2036 return -EFAULT;
2037 }
2038 (pginfo->hwpage_cnt)++;
2039 (pginfo->next_hwpage)++;
2040 if (PAGE_SIZE >= pginfo->hwpage_size) {
2041 if (pginfo->next_hwpage %
2042 (PAGE_SIZE / pginfo->hwpage_size) == 0)
2043 (pginfo->kpage_cnt)++;
2044 } else
2045 pginfo->kpage_cnt += pginfo->hwpage_size /
2046 PAGE_SIZE;
2047 kpage++;
2048 i++;
2049 if (i >= number) break;
2050 } 1641 }
2051 if (pginfo->next_hwpage >= offs_hw + num_hw) { 1642 *kpage = (addr & ~(pginfo->hwpage_size - 1)) +
2052 (pginfo->u.phy.next_buf)++; 1643 (pginfo->next_hwpage * pginfo->hwpage_size);
2053 pginfo->next_hwpage = 0; 1644 if ( !(*kpage) && addr ) {
1645 ehca_gen_err("addr=%llx size=%llx "
1646 "next_hwpage=%llx", addr,
1647 size, pginfo->next_hwpage);
1648 return -EFAULT;
2054 } 1649 }
1650 (pginfo->hwpage_cnt)++;
1651 (pginfo->next_hwpage)++;
1652 if (PAGE_SIZE >= pginfo->hwpage_size) {
1653 if (pginfo->next_hwpage %
1654 (PAGE_SIZE / pginfo->hwpage_size) == 0)
1655 (pginfo->kpage_cnt)++;
1656 } else
1657 pginfo->kpage_cnt += pginfo->hwpage_size /
1658 PAGE_SIZE;
1659 kpage++;
1660 i++;
1661 if (i >= number) break;
2055 } 1662 }
1663 if (pginfo->next_hwpage >= offs_hw + num_hw) {
1664 pginfo->next_hwpage = 0;
1665 }
1666
2056 return ret; 1667 return ret;
2057} 1668}
2058 1669
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
index 50d8b51306dd..52bfa95697f7 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.h
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.h
@@ -98,11 +98,6 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
98 98
99int ehca_dereg_internal_maxmr(struct ehca_shca *shca); 99int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
100 100
101int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
102 int num_phys_buf,
103 u64 *iova_start,
104 u64 *size);
105
106int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, 101int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
107 u64 *page_list, 102 u64 *page_list,
108 int list_len); 103 int list_len);
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
index 10e2074384f5..11813b880e16 100644
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ b/drivers/staging/rdma/ehca/ehca_reqs.c
@@ -614,7 +614,6 @@ int ehca_post_srq_recv(struct ib_srq *srq,
614static const u8 ib_wc_opcode[255] = { 614static const u8 ib_wc_opcode[255] = {
615 [0x01] = IB_WC_RECV+1, 615 [0x01] = IB_WC_RECV+1,
616 [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, 616 [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
617 [0x04] = IB_WC_BIND_MW+1,
618 [0x08] = IB_WC_FETCH_ADD+1, 617 [0x08] = IB_WC_FETCH_ADD+1,
619 [0x10] = IB_WC_COMP_SWAP+1, 618 [0x10] = IB_WC_COMP_SWAP+1,
620 [0x20] = IB_WC_RDMA_WRITE+1, 619 [0x20] = IB_WC_RDMA_WRITE+1,
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
index 568f185a022d..a3f8b884fdd6 100644
--- a/drivers/staging/rdma/hfi1/mr.c
+++ b/drivers/staging/rdma/hfi1/mr.c
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
167 rval = init_mregion(&mr->mr, pd, count); 167 rval = init_mregion(&mr->mr, pd, count);
168 if (rval) 168 if (rval)
169 goto bail; 169 goto bail;
170 /* 170
171 * ib_reg_phys_mr() will initialize mr->ibmr except for
172 * lkey and rkey.
173 */
174 rval = hfi1_alloc_lkey(&mr->mr, 0); 171 rval = hfi1_alloc_lkey(&mr->mr, 0);
175 if (rval) 172 if (rval)
176 goto bail_mregion; 173 goto bail_mregion;
@@ -188,52 +185,6 @@ bail:
188} 185}
189 186
190/** 187/**
191 * hfi1_reg_phys_mr - register a physical memory region
192 * @pd: protection domain for this memory region
193 * @buffer_list: pointer to the list of physical buffers to register
194 * @num_phys_buf: the number of physical buffers to register
195 * @iova_start: the starting address passed over IB which maps to this MR
196 *
197 * Returns the memory region on success, otherwise returns an errno.
198 */
199struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
200 struct ib_phys_buf *buffer_list,
201 int num_phys_buf, int acc, u64 *iova_start)
202{
203 struct hfi1_mr *mr;
204 int n, m, i;
205 struct ib_mr *ret;
206
207 mr = alloc_mr(num_phys_buf, pd);
208 if (IS_ERR(mr)) {
209 ret = (struct ib_mr *)mr;
210 goto bail;
211 }
212
213 mr->mr.user_base = *iova_start;
214 mr->mr.iova = *iova_start;
215 mr->mr.access_flags = acc;
216
217 m = 0;
218 n = 0;
219 for (i = 0; i < num_phys_buf; i++) {
220 mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
221 mr->mr.map[m]->segs[n].length = buffer_list[i].size;
222 mr->mr.length += buffer_list[i].size;
223 n++;
224 if (n == HFI1_SEGSZ) {
225 m++;
226 n = 0;
227 }
228 }
229
230 ret = &mr->ibmr;
231
232bail:
233 return ret;
234}
235
236/**
237 * hfi1_reg_user_mr - register a userspace memory region 188 * hfi1_reg_user_mr - register a userspace memory region
238 * @pd: protection domain for this memory region 189 * @pd: protection domain for this memory region
239 * @start: starting userspace address 190 * @start: starting userspace address
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index ef0feaa684a4..09b8d412ee90 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
2052 ibdev->poll_cq = hfi1_poll_cq; 2052 ibdev->poll_cq = hfi1_poll_cq;
2053 ibdev->req_notify_cq = hfi1_req_notify_cq; 2053 ibdev->req_notify_cq = hfi1_req_notify_cq;
2054 ibdev->get_dma_mr = hfi1_get_dma_mr; 2054 ibdev->get_dma_mr = hfi1_get_dma_mr;
2055 ibdev->reg_phys_mr = hfi1_reg_phys_mr;
2056 ibdev->reg_user_mr = hfi1_reg_user_mr; 2055 ibdev->reg_user_mr = hfi1_reg_user_mr;
2057 ibdev->dereg_mr = hfi1_dereg_mr; 2056 ibdev->dereg_mr = hfi1_dereg_mr;
2058 ibdev->alloc_mr = hfi1_alloc_mr; 2057 ibdev->alloc_mr = hfi1_alloc_mr;
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 72106e5362b9..286e468b0479 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
1024 1024
1025struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc); 1025struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
1026 1026
1027struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
1028 struct ib_phys_buf *buffer_list,
1029 int num_phys_buf, int acc, u64 *iova_start);
1030
1031struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1027struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1032 u64 virt_addr, int mr_access_flags, 1028 u64 virt_addr, int mr_access_flags,
1033 struct ib_udata *udata); 1029 struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
index c7278f6a8217..b76b0ce66709 100644
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ b/drivers/staging/rdma/ipath/ipath_mr.c
@@ -98,10 +98,6 @@ static struct ipath_mr *alloc_mr(int count,
98 } 98 }
99 mr->mr.mapsz = m; 99 mr->mr.mapsz = m;
100 100
101 /*
102 * ib_reg_phys_mr() will initialize mr->ibmr except for
103 * lkey and rkey.
104 */
105 if (!ipath_alloc_lkey(lk_table, &mr->mr)) 101 if (!ipath_alloc_lkey(lk_table, &mr->mr))
106 goto bail; 102 goto bail;
107 mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; 103 mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
@@ -121,57 +117,6 @@ done:
121} 117}
122 118
123/** 119/**
124 * ipath_reg_phys_mr - register a physical memory region
125 * @pd: protection domain for this memory region
126 * @buffer_list: pointer to the list of physical buffers to register
127 * @num_phys_buf: the number of physical buffers to register
128 * @iova_start: the starting address passed over IB which maps to this MR
129 *
130 * Returns the memory region on success, otherwise returns an errno.
131 */
132struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
133 struct ib_phys_buf *buffer_list,
134 int num_phys_buf, int acc, u64 *iova_start)
135{
136 struct ipath_mr *mr;
137 int n, m, i;
138 struct ib_mr *ret;
139
140 mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
141 if (mr == NULL) {
142 ret = ERR_PTR(-ENOMEM);
143 goto bail;
144 }
145
146 mr->mr.pd = pd;
147 mr->mr.user_base = *iova_start;
148 mr->mr.iova = *iova_start;
149 mr->mr.length = 0;
150 mr->mr.offset = 0;
151 mr->mr.access_flags = acc;
152 mr->mr.max_segs = num_phys_buf;
153 mr->umem = NULL;
154
155 m = 0;
156 n = 0;
157 for (i = 0; i < num_phys_buf; i++) {
158 mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
159 mr->mr.map[m]->segs[n].length = buffer_list[i].size;
160 mr->mr.length += buffer_list[i].size;
161 n++;
162 if (n == IPATH_SEGSZ) {
163 m++;
164 n = 0;
165 }
166 }
167
168 ret = &mr->ibmr;
169
170bail:
171 return ret;
172}
173
174/**
175 * ipath_reg_user_mr - register a userspace memory region 120 * ipath_reg_user_mr - register a userspace memory region
176 * @pd: protection domain for this memory region 121 * @pd: protection domain for this memory region
177 * @start: starting userspace address 122 * @start: starting userspace address
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index 1778dee13f99..53f9dcab180d 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -2201,7 +2201,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
2201 dev->poll_cq = ipath_poll_cq; 2201 dev->poll_cq = ipath_poll_cq;
2202 dev->req_notify_cq = ipath_req_notify_cq; 2202 dev->req_notify_cq = ipath_req_notify_cq;
2203 dev->get_dma_mr = ipath_get_dma_mr; 2203 dev->get_dma_mr = ipath_get_dma_mr;
2204 dev->reg_phys_mr = ipath_reg_phys_mr;
2205 dev->reg_user_mr = ipath_reg_user_mr; 2204 dev->reg_user_mr = ipath_reg_user_mr;
2206 dev->dereg_mr = ipath_dereg_mr; 2205 dev->dereg_mr = ipath_dereg_mr;
2207 dev->alloc_fmr = ipath_alloc_fmr; 2206 dev->alloc_fmr = ipath_alloc_fmr;
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index 0a90a56870ab..6c70a89667a9 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -828,10 +828,6 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
828 828
829struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc); 829struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
830 830
831struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
832 struct ib_phys_buf *buffer_list,
833 int num_phys_buf, int acc, u64 *iova_start);
834
835struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 831struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
836 u64 virt_addr, int mr_access_flags, 832 u64 virt_addr, int mr_access_flags,
837 struct ib_udata *udata); 833 struct ib_udata *udata);
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644
index 77ae77c0b704..000000000000
--- a/include/linux/blk-iopoll.h
+++ /dev/null
@@ -1,46 +0,0 @@
1#ifndef BLK_IOPOLL_H
2#define BLK_IOPOLL_H
3
4struct blk_iopoll;
5typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
6
7struct blk_iopoll {
8 struct list_head list;
9 unsigned long state;
10 unsigned long data;
11 int weight;
12 int max;
13 blk_iopoll_fn *poll;
14};
15
16enum {
17 IOPOLL_F_SCHED = 0,
18 IOPOLL_F_DISABLE = 1,
19};
20
21/*
22 * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
23 * that we were the first to acquire this iop for scheduling. If this iop
24 * is currently disabled, return "failure".
25 */
26static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
27{
28 if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
29 return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
30
31 return 1;
32}
33
34static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
35{
36 return test_bit(IOPOLL_F_DISABLE, &iop->state);
37}
38
39extern void blk_iopoll_sched(struct blk_iopoll *);
40extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
41extern void blk_iopoll_complete(struct blk_iopoll *);
42extern void __blk_iopoll_complete(struct blk_iopoll *);
43extern void blk_iopoll_enable(struct blk_iopoll *);
44extern void blk_iopoll_disable(struct blk_iopoll *);
45
46#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index cb30edbfe9fc..0e95fcc75b2a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -413,7 +413,7 @@ enum
413 NET_TX_SOFTIRQ, 413 NET_TX_SOFTIRQ,
414 NET_RX_SOFTIRQ, 414 NET_RX_SOFTIRQ,
415 BLOCK_SOFTIRQ, 415 BLOCK_SOFTIRQ,
416 BLOCK_IOPOLL_SOFTIRQ, 416 IRQ_POLL_SOFTIRQ,
417 TASKLET_SOFTIRQ, 417 TASKLET_SOFTIRQ,
418 SCHED_SOFTIRQ, 418 SCHED_SOFTIRQ,
419 HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the 419 HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644
index 000000000000..3e8c1b8fb9be
--- /dev/null
+++ b/include/linux/irq_poll.h
@@ -0,0 +1,25 @@
1#ifndef IRQ_POLL_H
2#define IRQ_POLL_H
3
4struct irq_poll;
5typedef int (irq_poll_fn)(struct irq_poll *, int);
6
7struct irq_poll {
8 struct list_head list;
9 unsigned long state;
10 int weight;
11 irq_poll_fn *poll;
12};
13
14enum {
15 IRQ_POLL_F_SCHED = 0,
16 IRQ_POLL_F_DISABLE = 1,
17};
18
19extern void irq_poll_sched(struct irq_poll *);
20extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
21extern void irq_poll_complete(struct irq_poll *);
22extern void irq_poll_enable(struct irq_poll *);
23extern void irq_poll_disable(struct irq_poll *);
24
25#endif
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 58391f2e0414..116b284bc4ce 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -206,7 +206,8 @@ enum {
206 MLX4_SET_PORT_GID_TABLE = 0x5, 206 MLX4_SET_PORT_GID_TABLE = 0x5,
207 MLX4_SET_PORT_PRIO2TC = 0x8, 207 MLX4_SET_PORT_PRIO2TC = 0x8,
208 MLX4_SET_PORT_SCHEDULER = 0x9, 208 MLX4_SET_PORT_SCHEDULER = 0x9,
209 MLX4_SET_PORT_VXLAN = 0xB 209 MLX4_SET_PORT_VXLAN = 0xB,
210 MLX4_SET_PORT_ROCE_ADDR = 0xD
210}; 211};
211 212
212enum { 213enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d3133be12d92..430a929f048b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -216,6 +216,7 @@ enum {
216 MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, 216 MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
217 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, 217 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
218 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, 218 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
219 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
219}; 220};
220 221
221enum { 222enum {
@@ -267,12 +268,14 @@ enum {
267 MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, 268 MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
268 MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, 269 MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
269 MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, 270 MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
271 MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19,
270 MLX4_BMME_FLAG_PORT_REMAP = 1 << 24, 272 MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
271 MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28, 273 MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
272}; 274};
273 275
274enum { 276enum {
275 MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP 277 MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP,
278 MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2
276}; 279};
277 280
278enum mlx4_event { 281enum mlx4_event {
@@ -979,14 +982,11 @@ struct mlx4_mad_ifc {
979 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ 982 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
980 if ((type) == (dev)->caps.port_mask[(port)]) 983 if ((type) == (dev)->caps.port_mask[(port)])
981 984
982#define mlx4_foreach_non_ib_transport_port(port, dev) \
983 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
984 if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
985
986#define mlx4_foreach_ib_transport_port(port, dev) \ 985#define mlx4_foreach_ib_transport_port(port, dev) \
987 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ 986 for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
988 if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ 987 if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
989 ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) 988 ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
989 ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
990 990
991#define MLX4_INVALID_SLAVE_ID 0xFF 991#define MLX4_INVALID_SLAVE_ID 0xFF
992#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) 992#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1)
@@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
1457 1457
1458int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port); 1458int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
1459int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis); 1459int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
1460int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
1460int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2); 1461int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
1461int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); 1462int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
1462int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); 1463int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index fe052e234906..587cdf943b52 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -194,7 +194,7 @@ struct mlx4_qp_context {
194 u8 mtu_msgmax; 194 u8 mtu_msgmax;
195 u8 rq_size_stride; 195 u8 rq_size_stride;
196 u8 sq_size_stride; 196 u8 sq_size_stride;
197 u8 rlkey; 197 u8 rlkey_roce_mode;
198 __be32 usr_page; 198 __be32 usr_page;
199 __be32 local_qpn; 199 __be32 local_qpn;
200 __be32 remote_qpn; 200 __be32 remote_qpn;
@@ -204,7 +204,8 @@ struct mlx4_qp_context {
204 u32 reserved1; 204 u32 reserved1;
205 __be32 next_send_psn; 205 __be32 next_send_psn;
206 __be32 cqn_send; 206 __be32 cqn_send;
207 u32 reserved2[2]; 207 __be16 roce_entropy;
208 __be16 reserved2[3];
208 __be32 last_acked_psn; 209 __be32 last_acked_psn;
209 __be32 ssn; 210 __be32 ssn;
210 __be32 params2; 211 __be32 params2;
@@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
487 488
488void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); 489void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
489 490
491static inline u16 folded_qp(u32 q)
492{
493 u16 res;
494
495 res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
496 return res;
497}
498
499u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
500
490#endif /* MLX4_QP_H */ 501#endif /* MLX4_QP_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7be845e30689..987764afa65c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -223,6 +223,14 @@ enum {
223#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) 223#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
224#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT 224#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
225 225
226#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
227
228enum {
229 MLX5_EVENT_QUEUE_TYPE_QP = 0,
230 MLX5_EVENT_QUEUE_TYPE_RQ = 1,
231 MLX5_EVENT_QUEUE_TYPE_SQ = 2,
232};
233
226enum mlx5_event { 234enum mlx5_event {
227 MLX5_EVENT_TYPE_COMP = 0x0, 235 MLX5_EVENT_TYPE_COMP = 0x0,
228 236
@@ -280,6 +288,26 @@ enum {
280}; 288};
281 289
282enum { 290enum {
291 MLX5_ROCE_VERSION_1 = 0,
292 MLX5_ROCE_VERSION_2 = 2,
293};
294
295enum {
296 MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1,
297 MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2,
298};
299
300enum {
301 MLX5_ROCE_L3_TYPE_IPV4 = 0,
302 MLX5_ROCE_L3_TYPE_IPV6 = 1,
303};
304
305enum {
306 MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1,
307 MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2,
308};
309
310enum {
283 MLX5_OPCODE_NOP = 0x00, 311 MLX5_OPCODE_NOP = 0x00,
284 MLX5_OPCODE_SEND_INVAL = 0x01, 312 MLX5_OPCODE_SEND_INVAL = 0x01,
285 MLX5_OPCODE_RDMA_WRITE = 0x08, 313 MLX5_OPCODE_RDMA_WRITE = 0x08,
@@ -446,7 +474,7 @@ struct mlx5_init_seg {
446 __be32 rsvd2[880]; 474 __be32 rsvd2[880];
447 __be32 internal_timer_h; 475 __be32 internal_timer_h;
448 __be32 internal_timer_l; 476 __be32 internal_timer_l;
449 __be32 rsrv3[2]; 477 __be32 rsvd3[2];
450 __be32 health_counter; 478 __be32 health_counter;
451 __be32 rsvd4[1019]; 479 __be32 rsvd4[1019];
452 __be64 ieee1588_clk; 480 __be64 ieee1588_clk;
@@ -460,7 +488,9 @@ struct mlx5_eqe_comp {
460}; 488};
461 489
462struct mlx5_eqe_qp_srq { 490struct mlx5_eqe_qp_srq {
463 __be32 reserved[6]; 491 __be32 reserved1[5];
492 u8 type;
493 u8 reserved2[3];
464 __be32 qp_srq_n; 494 __be32 qp_srq_n;
465}; 495};
466 496
@@ -651,6 +681,12 @@ enum {
651}; 681};
652 682
653enum { 683enum {
684 MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0,
685 MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1,
686 MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2,
687};
688
689enum {
654 CQE_L2_OK = 1 << 0, 690 CQE_L2_OK = 1 << 0,
655 CQE_L3_OK = 1 << 1, 691 CQE_L3_OK = 1 << 1,
656 CQE_L4_OK = 1 << 2, 692 CQE_L4_OK = 1 << 2,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5162f3533042..1e3006dcf35d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -115,6 +115,11 @@ enum {
115 MLX5_REG_HOST_ENDIANNESS = 0x7004, 115 MLX5_REG_HOST_ENDIANNESS = 0x7004,
116}; 116};
117 117
118enum {
119 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
120 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
121};
122
118enum mlx5_page_fault_resume_flags { 123enum mlx5_page_fault_resume_flags {
119 MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0, 124 MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
120 MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1, 125 MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1,
@@ -341,9 +346,11 @@ struct mlx5_core_mr {
341}; 346};
342 347
343enum mlx5_res_type { 348enum mlx5_res_type {
344 MLX5_RES_QP, 349 MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
345 MLX5_RES_SRQ, 350 MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
346 MLX5_RES_XSRQ, 351 MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ,
352 MLX5_RES_SRQ = 3,
353 MLX5_RES_XSRQ = 4,
347}; 354};
348 355
349struct mlx5_core_rsc_common { 356struct mlx5_core_rsc_common {
@@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq;
651 .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ 658 .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
652 .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field 659 .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field
653 660
654struct ib_field {
655 size_t struct_offset_bytes;
656 size_t struct_size_bytes;
657 int offset_bits;
658 int size_bits;
659};
660
661static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) 661static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
662{ 662{
663 return pci_get_drvdata(pdev); 663 return pci_get_drvdata(pdev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 68d73f82e009..231ab6bcea76 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -67,6 +67,11 @@ enum {
67}; 67};
68 68
69enum { 69enum {
70 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
71 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
72};
73
74enum {
70 MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, 75 MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
71 MLX5_CMD_OP_QUERY_ADAPTER = 0x101, 76 MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
72 MLX5_CMD_OP_INIT_HCA = 0x102, 77 MLX5_CMD_OP_INIT_HCA = 0x102,
@@ -573,21 +578,24 @@ enum {
573struct mlx5_ifc_atomic_caps_bits { 578struct mlx5_ifc_atomic_caps_bits {
574 u8 reserved_0[0x40]; 579 u8 reserved_0[0x40];
575 580
576 u8 atomic_req_endianness[0x1]; 581 u8 atomic_req_8B_endianess_mode[0x2];
577 u8 reserved_1[0x1f]; 582 u8 reserved_1[0x4];
583 u8 supported_atomic_req_8B_endianess_mode_1[0x1];
578 584
579 u8 reserved_2[0x20]; 585 u8 reserved_2[0x19];
580 586
581 u8 reserved_3[0x10]; 587 u8 reserved_3[0x20];
582 u8 atomic_operations[0x10];
583 588
584 u8 reserved_4[0x10]; 589 u8 reserved_4[0x10];
585 u8 atomic_size_qp[0x10]; 590 u8 atomic_operations[0x10];
586 591
587 u8 reserved_5[0x10]; 592 u8 reserved_5[0x10];
593 u8 atomic_size_qp[0x10];
594
595 u8 reserved_6[0x10];
588 u8 atomic_size_dc[0x10]; 596 u8 atomic_size_dc[0x10];
589 597
590 u8 reserved_6[0x720]; 598 u8 reserved_7[0x720];
591}; 599};
592 600
593struct mlx5_ifc_odp_cap_bits { 601struct mlx5_ifc_odp_cap_bits {
@@ -850,7 +858,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
850 u8 reserved_66[0x8]; 858 u8 reserved_66[0x8];
851 u8 log_uar_page_sz[0x10]; 859 u8 log_uar_page_sz[0x10];
852 860
853 u8 reserved_67[0x40]; 861 u8 reserved_67[0x20];
862 u8 device_frequency_mhz[0x20];
854 u8 device_frequency_khz[0x20]; 863 u8 device_frequency_khz[0x20];
855 u8 reserved_68[0x5f]; 864 u8 reserved_68[0x5f];
856 u8 cqe_zip[0x1]; 865 u8 cqe_zip[0x1];
@@ -2215,19 +2224,25 @@ struct mlx5_ifc_nic_vport_context_bits {
2215 2224
2216 u8 mtu[0x10]; 2225 u8 mtu[0x10];
2217 2226
2218 u8 reserved_3[0x640]; 2227 u8 system_image_guid[0x40];
2228 u8 port_guid[0x40];
2229 u8 node_guid[0x40];
2230
2231 u8 reserved_3[0x140];
2232 u8 qkey_violation_counter[0x10];
2233 u8 reserved_4[0x430];
2219 2234
2220 u8 promisc_uc[0x1]; 2235 u8 promisc_uc[0x1];
2221 u8 promisc_mc[0x1]; 2236 u8 promisc_mc[0x1];
2222 u8 promisc_all[0x1]; 2237 u8 promisc_all[0x1];
2223 u8 reserved_4[0x2]; 2238 u8 reserved_5[0x2];
2224 u8 allowed_list_type[0x3]; 2239 u8 allowed_list_type[0x3];
2225 u8 reserved_5[0xc]; 2240 u8 reserved_6[0xc];
2226 u8 allowed_list_size[0xc]; 2241 u8 allowed_list_size[0xc];
2227 2242
2228 struct mlx5_ifc_mac_address_layout_bits permanent_address; 2243 struct mlx5_ifc_mac_address_layout_bits permanent_address;
2229 2244
2230 u8 reserved_6[0x20]; 2245 u8 reserved_7[0x20];
2231 2246
2232 u8 current_uc_mac_address[0][0x40]; 2247 u8 current_uc_mac_address[0][0x40];
2233}; 2248};
@@ -4199,6 +4214,13 @@ struct mlx5_ifc_modify_tis_out_bits {
4199 u8 reserved_1[0x40]; 4214 u8 reserved_1[0x40];
4200}; 4215};
4201 4216
4217struct mlx5_ifc_modify_tis_bitmask_bits {
4218 u8 reserved_0[0x20];
4219
4220 u8 reserved_1[0x1f];
4221 u8 prio[0x1];
4222};
4223
4202struct mlx5_ifc_modify_tis_in_bits { 4224struct mlx5_ifc_modify_tis_in_bits {
4203 u8 opcode[0x10]; 4225 u8 opcode[0x10];
4204 u8 reserved_0[0x10]; 4226 u8 reserved_0[0x10];
@@ -4211,7 +4233,7 @@ struct mlx5_ifc_modify_tis_in_bits {
4211 4233
4212 u8 reserved_3[0x20]; 4234 u8 reserved_3[0x20];
4213 4235
4214 u8 modify_bitmask[0x40]; 4236 struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
4215 4237
4216 u8 reserved_4[0x40]; 4238 u8 reserved_4[0x40];
4217 4239
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index f079fb1a31f7..5b8c89ffaa58 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -85,7 +85,16 @@ enum mlx5_qp_state {
85 MLX5_QP_STATE_ERR = 6, 85 MLX5_QP_STATE_ERR = 6,
86 MLX5_QP_STATE_SQ_DRAINING = 7, 86 MLX5_QP_STATE_SQ_DRAINING = 7,
87 MLX5_QP_STATE_SUSPENDED = 9, 87 MLX5_QP_STATE_SUSPENDED = 9,
88 MLX5_QP_NUM_STATE 88 MLX5_QP_NUM_STATE,
89 MLX5_QP_STATE,
90 MLX5_QP_STATE_BAD,
91};
92
93enum {
94 MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1,
95 MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1,
96 MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1,
97 MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1,
89}; 98};
90 99
91enum { 100enum {
@@ -130,6 +139,9 @@ enum {
130 MLX5_QP_BIT_RWE = 1 << 14, 139 MLX5_QP_BIT_RWE = 1 << 14,
131 MLX5_QP_BIT_RAE = 1 << 13, 140 MLX5_QP_BIT_RAE = 1 << 13,
132 MLX5_QP_BIT_RIC = 1 << 4, 141 MLX5_QP_BIT_RIC = 1 << 4,
142 MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2,
143 MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1,
144 MLX5_QP_BIT_CC_MASTER = 1 << 0
133}; 145};
134 146
135enum { 147enum {
@@ -248,8 +260,12 @@ struct mlx5_av {
248 __be32 dqp_dct; 260 __be32 dqp_dct;
249 u8 stat_rate_sl; 261 u8 stat_rate_sl;
250 u8 fl_mlid; 262 u8 fl_mlid;
251 __be16 rlid; 263 union {
252 u8 reserved0[10]; 264 __be16 rlid;
265 __be16 udp_sport;
266 };
267 u8 reserved0[4];
268 u8 rmac[6];
253 u8 tclass; 269 u8 tclass;
254 u8 hop_limit; 270 u8 hop_limit;
255 __be32 grh_gid_fl; 271 __be32 grh_gid_fl;
@@ -456,11 +472,16 @@ struct mlx5_qp_path {
456 u8 static_rate; 472 u8 static_rate;
457 u8 hop_limit; 473 u8 hop_limit;
458 __be32 tclass_flowlabel; 474 __be32 tclass_flowlabel;
459 u8 rgid[16]; 475 union {
460 u8 rsvd1[4]; 476 u8 rgid[16];
461 u8 sl; 477 u8 rip[16];
478 };
479 u8 f_dscp_ecn_prio;
480 u8 ecn_dscp;
481 __be16 udp_sport;
482 u8 dci_cfi_prio_sl;
462 u8 port; 483 u8 port;
463 u8 rsvd2[6]; 484 u8 rmac[6];
464}; 485};
465 486
466struct mlx5_qp_context { 487struct mlx5_qp_context {
@@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
620 struct mlx5_core_qp *qp, 641 struct mlx5_core_qp *qp,
621 struct mlx5_create_qp_mbox_in *in, 642 struct mlx5_create_qp_mbox_in *in,
622 int inlen); 643 int inlen);
623int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, 644int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
624 enum mlx5_qp_state new_state,
625 struct mlx5_modify_qp_mbox_in *in, int sqd_event, 645 struct mlx5_modify_qp_mbox_in *in, int sqd_event,
626 struct mlx5_core_qp *qp); 646 struct mlx5_core_qp *qp);
627int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, 647int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
@@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
639int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, 659int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
640 u8 context, int error); 660 u8 context, int error);
641#endif 661#endif
662int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
663 struct mlx5_core_qp *rq);
664void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
665 struct mlx5_core_qp *rq);
666int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
667 struct mlx5_core_qp *sq);
668void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
669 struct mlx5_core_qp *sq);
642 670
643static inline const char *mlx5_qp_type_str(int type) 671static inline const char *mlx5_qp_type_str(int type)
644{ 672{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/include/linux/mlx5/transobj.h
index 74cae51436e4..88441f5ece25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -33,16 +33,20 @@
33#ifndef __TRANSOBJ_H__ 33#ifndef __TRANSOBJ_H__
34#define __TRANSOBJ_H__ 34#define __TRANSOBJ_H__
35 35
36int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); 36#include <linux/mlx5/driver.h>
37void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); 37
38int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
39void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
38int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, 40int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
39 u32 *rqn); 41 u32 *rqn);
40int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); 42int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
41void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); 43void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
44int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
42int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, 45int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
43 u32 *sqn); 46 u32 *sqn);
44int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); 47int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
45void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); 48void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
49int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
46int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, 50int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
47 u32 *tirn); 51 u32 *tirn);
48int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, 52int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
@@ -50,6 +54,8 @@ int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
50void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); 54void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
51int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, 55int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
52 u32 *tisn); 56 u32 *tisn);
57int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
58 int inlen);
53void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); 59void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
54int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, 60int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
55 u32 *rmpn); 61 u32 *rmpn);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 638f2ca7a527..123771003e68 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
45 u16 vport, u8 *addr); 45 u16 vport, u8 *addr);
46int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, 46int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
47 u16 vport, u8 *addr); 47 u16 vport, u8 *addr);
48int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
49 u64 *system_image_guid);
50int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
51int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
52 u16 *qkey_viol_cntr);
48int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, 53int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
49 u8 port_num, u16 vf_num, u16 gid_index, 54 u8 port_num, u16 vf_num, u16 gid_index,
50 union ib_gid *gid); 55 union ib_gid *gid);
@@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
85 u16 vlans[], 90 u16 vlans[],
86 int list_size); 91 int list_size);
87 92
93int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
94int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
95
88#endif /* __MLX5_VPORT_H__ */ 96#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f869807a0d0e..5322fea6fe4c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -51,6 +51,7 @@
51/* RPC/RDMA parameters and stats */ 51/* RPC/RDMA parameters and stats */
52extern unsigned int svcrdma_ord; 52extern unsigned int svcrdma_ord;
53extern unsigned int svcrdma_max_requests; 53extern unsigned int svcrdma_max_requests;
54extern unsigned int svcrdma_max_bc_requests;
54extern unsigned int svcrdma_max_req_size; 55extern unsigned int svcrdma_max_req_size;
55 56
56extern atomic_t rdma_stat_recv; 57extern atomic_t rdma_stat_recv;
@@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
69 * completes. 70 * completes.
70 */ 71 */
71struct svc_rdma_op_ctxt { 72struct svc_rdma_op_ctxt {
73 struct list_head free;
72 struct svc_rdma_op_ctxt *read_hdr; 74 struct svc_rdma_op_ctxt *read_hdr;
73 struct svc_rdma_fastreg_mr *frmr; 75 struct svc_rdma_fastreg_mr *frmr;
74 int hdr_count; 76 int hdr_count;
@@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr {
112 struct list_head frmr_list; 114 struct list_head frmr_list;
113}; 115};
114struct svc_rdma_req_map { 116struct svc_rdma_req_map {
117 struct list_head free;
115 unsigned long count; 118 unsigned long count;
116 union { 119 union {
117 struct kvec sge[RPCSVC_MAXPAGES]; 120 struct kvec sge[RPCSVC_MAXPAGES];
@@ -132,28 +135,32 @@ struct svcxprt_rdma {
132 int sc_max_sge; 135 int sc_max_sge;
133 int sc_max_sge_rd; /* max sge for read target */ 136 int sc_max_sge_rd; /* max sge for read target */
134 137
135 int sc_sq_depth; /* Depth of SQ */
136 atomic_t sc_sq_count; /* Number of SQ WR on queue */ 138 atomic_t sc_sq_count; /* Number of SQ WR on queue */
137 139 unsigned int sc_sq_depth; /* Depth of SQ */
138 int sc_max_requests; /* Depth of RQ */ 140 unsigned int sc_rq_depth; /* Depth of RQ */
141 u32 sc_max_requests; /* Forward credits */
142 u32 sc_max_bc_requests;/* Backward credits */
139 int sc_max_req_size; /* Size of each RQ WR buf */ 143 int sc_max_req_size; /* Size of each RQ WR buf */
140 144
141 struct ib_pd *sc_pd; 145 struct ib_pd *sc_pd;
142 146
143 atomic_t sc_dma_used; 147 atomic_t sc_dma_used;
144 atomic_t sc_ctxt_used; 148 spinlock_t sc_ctxt_lock;
149 struct list_head sc_ctxts;
150 int sc_ctxt_used;
151 spinlock_t sc_map_lock;
152 struct list_head sc_maps;
153
145 struct list_head sc_rq_dto_q; 154 struct list_head sc_rq_dto_q;
146 spinlock_t sc_rq_dto_lock; 155 spinlock_t sc_rq_dto_lock;
147 struct ib_qp *sc_qp; 156 struct ib_qp *sc_qp;
148 struct ib_cq *sc_rq_cq; 157 struct ib_cq *sc_rq_cq;
149 struct ib_cq *sc_sq_cq; 158 struct ib_cq *sc_sq_cq;
150 struct ib_mr *sc_phys_mr; /* MR for server memory */
151 int (*sc_reader)(struct svcxprt_rdma *, 159 int (*sc_reader)(struct svcxprt_rdma *,
152 struct svc_rqst *, 160 struct svc_rqst *,
153 struct svc_rdma_op_ctxt *, 161 struct svc_rdma_op_ctxt *,
154 int *, u32 *, u32, u32, u64, bool); 162 int *, u32 *, u32, u32, u64, bool);
155 u32 sc_dev_caps; /* distilled device caps */ 163 u32 sc_dev_caps; /* distilled device caps */
156 u32 sc_dma_lkey; /* local dma key */
157 unsigned int sc_frmr_pg_list_len; 164 unsigned int sc_frmr_pg_list_len;
158 struct list_head sc_frmr_q; 165 struct list_head sc_frmr_q;
159 spinlock_t sc_frmr_q_lock; 166 spinlock_t sc_frmr_q_lock;
@@ -179,8 +186,18 @@ struct svcxprt_rdma {
179#define RPCRDMA_MAX_REQUESTS 32 186#define RPCRDMA_MAX_REQUESTS 32
180#define RPCRDMA_MAX_REQ_SIZE 4096 187#define RPCRDMA_MAX_REQ_SIZE 4096
181 188
189/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
190 * current NFSv4.1 implementation supports one backchannel slot.
191 */
192#define RPCRDMA_MAX_BC_REQUESTS 2
193
182#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 194#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
183 195
196/* svc_rdma_backchannel.c */
197extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
198 struct rpcrdma_msg *rmsgp,
199 struct xdr_buf *rcvbuf);
200
184/* svc_rdma_marshal.c */ 201/* svc_rdma_marshal.c */
185extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); 202extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
186extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, 203extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
206 u32, u32, u64, bool); 223 u32, u32, u64, bool);
207 224
208/* svc_rdma_sendto.c */ 225/* svc_rdma_sendto.c */
226extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
227 struct svc_rdma_req_map *);
209extern int svc_rdma_sendto(struct svc_rqst *); 228extern int svc_rdma_sendto(struct svc_rqst *);
210extern struct rpcrdma_read_chunk * 229extern struct rpcrdma_read_chunk *
211 svc_rdma_get_read_chunk(struct rpcrdma_msg *); 230 svc_rdma_get_read_chunk(struct rpcrdma_msg *);
@@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk *
214extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); 233extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
215extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, 234extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
216 enum rpcrdma_errcode); 235 enum rpcrdma_errcode);
217extern int svc_rdma_post_recv(struct svcxprt_rdma *); 236extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
218extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 237extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
219extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 238extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
220extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); 239extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
221extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); 240extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
222extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); 241extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
223extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); 242extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
243 struct svc_rdma_req_map *);
224extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); 244extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
225extern void svc_rdma_put_frmr(struct svcxprt_rdma *, 245extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
226 struct svc_rdma_fastreg_mr *); 246 struct svc_rdma_fastreg_mr *);
@@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class;
234#endif 254#endif
235 255
236/* svc_rdma.c */ 256/* svc_rdma.c */
257extern struct workqueue_struct *svc_rdma_wq;
237extern int svc_rdma_init(void); 258extern int svc_rdma_init(void);
238extern void svc_rdma_cleanup(void); 259extern void svc_rdma_cleanup(void);
239 260
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 11528591d0d7..c34c9002460c 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -83,6 +83,8 @@ struct rdma_dev_addr {
83 int bound_dev_if; 83 int bound_dev_if;
84 enum rdma_transport_type transport; 84 enum rdma_transport_type transport;
85 struct net *net; 85 struct net *net;
86 enum rdma_network_type network;
87 int hoplimit;
86}; 88};
87 89
88/** 90/**
@@ -91,8 +93,8 @@ struct rdma_dev_addr {
91 * 93 *
92 * The dev_addr->net field must be initialized. 94 * The dev_addr->net field must be initialized.
93 */ 95 */
94int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 96int rdma_translate_ip(const struct sockaddr *addr,
95 u16 *vlan_id); 97 struct rdma_dev_addr *dev_addr, u16 *vlan_id);
96 98
97/** 99/**
98 * rdma_resolve_ip - Resolve source and destination IP addresses to 100 * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
117 struct rdma_dev_addr *addr, void *context), 119 struct rdma_dev_addr *addr, void *context),
118 void *context); 120 void *context);
119 121
122int rdma_resolve_ip_route(struct sockaddr *src_addr,
123 const struct sockaddr *dst_addr,
124 struct rdma_dev_addr *addr);
125
120void rdma_addr_cancel(struct rdma_dev_addr *addr); 126void rdma_addr_cancel(struct rdma_dev_addr *addr);
121 127
122int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 128int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
@@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
125int rdma_addr_size(struct sockaddr *addr); 131int rdma_addr_size(struct sockaddr *addr);
126 132
127int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); 133int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
128int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, 134int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
129 u8 *smac, u16 *vlan_id, int if_index); 135 const union ib_gid *dgid,
136 u8 *smac, u16 *vlan_id, int *if_index,
137 int *hoplimit);
130 138
131static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) 139static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
132{ 140{
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 269a27cf0a46..e30f19bd4a41 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device *device,
60 * a specified GID value occurs. 60 * a specified GID value occurs.
61 * @device: The device to query. 61 * @device: The device to query.
62 * @gid: The GID value to search for. 62 * @gid: The GID value to search for.
63 * @gid_type: The GID type to search for.
63 * @ndev: In RoCE, the net device of the device. NULL means ignore. 64 * @ndev: In RoCE, the net device of the device. NULL means ignore.
64 * @port_num: The port number of the device where the GID value was found. 65 * @port_num: The port number of the device where the GID value was found.
65 * @index: The index into the cached GID table where the GID was found. This 66 * @index: The index into the cached GID table where the GID was found. This
@@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device *device,
70 */ 71 */
71int ib_find_cached_gid(struct ib_device *device, 72int ib_find_cached_gid(struct ib_device *device,
72 const union ib_gid *gid, 73 const union ib_gid *gid,
74 enum ib_gid_type gid_type,
73 struct net_device *ndev, 75 struct net_device *ndev,
74 u8 *port_num, 76 u8 *port_num,
75 u16 *index); 77 u16 *index);
@@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device,
79 * GID value occurs 81 * GID value occurs
80 * @device: The device to query. 82 * @device: The device to query.
81 * @gid: The GID value to search for. 83 * @gid: The GID value to search for.
84 * @gid_type: The GID type to search for.
82 * @port_num: The port number of the device where the GID value sould be 85 * @port_num: The port number of the device where the GID value sould be
83 * searched. 86 * searched.
84 * @ndev: In RoCE, the net device of the device. Null means ignore. 87 * @ndev: In RoCE, the net device of the device. Null means ignore.
@@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device,
90 */ 93 */
91int ib_find_cached_gid_by_port(struct ib_device *device, 94int ib_find_cached_gid_by_port(struct ib_device *device,
92 const union ib_gid *gid, 95 const union ib_gid *gid,
96 enum ib_gid_type gid_type,
93 u8 port_num, 97 u8 port_num,
94 struct net_device *ndev, 98 struct net_device *ndev,
95 u16 *index); 99 u16 *index);
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index ec9b44dd3d80..0ff049bd9ad4 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
438/** 438/**
439 * ib_mad_recv_handler - callback handler for a received MAD. 439 * ib_mad_recv_handler - callback handler for a received MAD.
440 * @mad_agent: MAD agent requesting the received MAD. 440 * @mad_agent: MAD agent requesting the received MAD.
441 * @send_buf: Send buffer if found, else NULL
441 * @mad_recv_wc: Received work completion information on the received MAD. 442 * @mad_recv_wc: Received work completion information on the received MAD.
442 * 443 *
443 * MADs received in response to a send request operation will be handed to 444 * MADs received in response to a send request operation will be handed to
@@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
447 * modify the data referenced by @mad_recv_wc. 448 * modify the data referenced by @mad_recv_wc.
448 */ 449 */
449typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, 450typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
451 struct ib_mad_send_buf *send_buf,
450 struct ib_mad_recv_wc *mad_recv_wc); 452 struct ib_mad_recv_wc *mad_recv_wc);
451 453
452/** 454/**
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index e99d8f9a4551..0f3daae44bf9 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -41,6 +41,8 @@ enum {
41 IB_ETH_BYTES = 14, 41 IB_ETH_BYTES = 14,
42 IB_VLAN_BYTES = 4, 42 IB_VLAN_BYTES = 4,
43 IB_GRH_BYTES = 40, 43 IB_GRH_BYTES = 40,
44 IB_IP4_BYTES = 20,
45 IB_UDP_BYTES = 8,
44 IB_BTH_BYTES = 12, 46 IB_BTH_BYTES = 12,
45 IB_DETH_BYTES = 8 47 IB_DETH_BYTES = 8
46}; 48};
@@ -223,6 +225,27 @@ struct ib_unpacked_eth {
223 __be16 type; 225 __be16 type;
224}; 226};
225 227
228struct ib_unpacked_ip4 {
229 u8 ver;
230 u8 hdr_len;
231 u8 tos;
232 __be16 tot_len;
233 __be16 id;
234 __be16 frag_off;
235 u8 ttl;
236 u8 protocol;
237 __sum16 check;
238 __be32 saddr;
239 __be32 daddr;
240};
241
242struct ib_unpacked_udp {
243 __be16 sport;
244 __be16 dport;
245 __be16 length;
246 __be16 csum;
247};
248
226struct ib_unpacked_vlan { 249struct ib_unpacked_vlan {
227 __be16 tag; 250 __be16 tag;
228 __be16 type; 251 __be16 type;
@@ -237,6 +260,10 @@ struct ib_ud_header {
237 struct ib_unpacked_vlan vlan; 260 struct ib_unpacked_vlan vlan;
238 int grh_present; 261 int grh_present;
239 struct ib_unpacked_grh grh; 262 struct ib_unpacked_grh grh;
263 int ipv4_present;
264 struct ib_unpacked_ip4 ip4;
265 int udp_present;
266 struct ib_unpacked_udp udp;
240 struct ib_unpacked_bth bth; 267 struct ib_unpacked_bth bth;
241 struct ib_unpacked_deth deth; 268 struct ib_unpacked_deth deth;
242 int immediate_present; 269 int immediate_present;
@@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field *desc,
253 void *buf, 280 void *buf,
254 void *structure); 281 void *structure);
255 282
256void ib_ud_header_init(int payload_bytes, 283__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
257 int lrh_present, 284
258 int eth_present, 285int ib_ud_header_init(int payload_bytes,
259 int vlan_present, 286 int lrh_present,
260 int grh_present, 287 int eth_present,
261 int immediate_present, 288 int vlan_present,
262 struct ib_ud_header *header); 289 int grh_present,
290 int ip_version,
291 int udp_present,
292 int immediate_present,
293 struct ib_ud_header *header);
263 294
264int ib_ud_header_pack(struct ib_ud_header *header, 295int ib_ud_header_pack(struct ib_ud_header *header,
265 void *buf); 296 void *buf);
diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h
index a5889f18807b..2f8a65c1fca7 100644
--- a/include/rdma/ib_pma.h
+++ b/include/rdma/ib_pma.h
@@ -42,6 +42,7 @@
42 */ 42 */
43#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) 43#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
44#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) 44#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
45#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
45#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) 46#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
46 47
47#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) 48#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 301969552d0a..cdc1c81aa275 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -160,6 +160,7 @@ struct ib_sa_path_rec {
160 int ifindex; 160 int ifindex;
161 /* ignored in IB */ 161 /* ignored in IB */
162 struct net *net; 162 struct net *net;
163 enum ib_gid_type gid_type;
163}; 164};
164 165
165static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) 166static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
@@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
402 */ 403 */
403int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 404int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
404 struct ib_sa_mcmember_rec *rec, 405 struct ib_sa_mcmember_rec *rec,
406 struct net_device *ndev,
407 enum ib_gid_type gid_type,
405 struct ib_ah_attr *ah_attr); 408 struct ib_ah_attr *ah_attr);
406 409
407/** 410/**
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 120da1d7f57e..284b00c8fea4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -49,13 +49,19 @@
49#include <linux/scatterlist.h> 49#include <linux/scatterlist.h>
50#include <linux/workqueue.h> 50#include <linux/workqueue.h>
51#include <linux/socket.h> 51#include <linux/socket.h>
52#include <linux/irq_poll.h>
52#include <uapi/linux/if_ether.h> 53#include <uapi/linux/if_ether.h>
54#include <net/ipv6.h>
55#include <net/ip.h>
56#include <linux/string.h>
57#include <linux/slab.h>
53 58
54#include <linux/atomic.h> 59#include <linux/atomic.h>
55#include <linux/mmu_notifier.h> 60#include <linux/mmu_notifier.h>
56#include <asm/uaccess.h> 61#include <asm/uaccess.h>
57 62
58extern struct workqueue_struct *ib_wq; 63extern struct workqueue_struct *ib_wq;
64extern struct workqueue_struct *ib_comp_wq;
59 65
60union ib_gid { 66union ib_gid {
61 u8 raw[16]; 67 u8 raw[16];
@@ -67,7 +73,17 @@ union ib_gid {
67 73
68extern union ib_gid zgid; 74extern union ib_gid zgid;
69 75
76enum ib_gid_type {
77 /* If link layer is Ethernet, this is RoCE V1 */
78 IB_GID_TYPE_IB = 0,
79 IB_GID_TYPE_ROCE = 0,
80 IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
81 IB_GID_TYPE_SIZE
82};
83
84#define ROCE_V2_UDP_DPORT 4791
70struct ib_gid_attr { 85struct ib_gid_attr {
86 enum ib_gid_type gid_type;
71 struct net_device *ndev; 87 struct net_device *ndev;
72}; 88};
73 89
@@ -98,6 +114,35 @@ enum rdma_protocol_type {
98__attribute_const__ enum rdma_transport_type 114__attribute_const__ enum rdma_transport_type
99rdma_node_get_transport(enum rdma_node_type node_type); 115rdma_node_get_transport(enum rdma_node_type node_type);
100 116
117enum rdma_network_type {
118 RDMA_NETWORK_IB,
119 RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
120 RDMA_NETWORK_IPV4,
121 RDMA_NETWORK_IPV6
122};
123
124static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
125{
126 if (network_type == RDMA_NETWORK_IPV4 ||
127 network_type == RDMA_NETWORK_IPV6)
128 return IB_GID_TYPE_ROCE_UDP_ENCAP;
129
130 /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
131 return IB_GID_TYPE_IB;
132}
133
134static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
135 union ib_gid *gid)
136{
137 if (gid_type == IB_GID_TYPE_IB)
138 return RDMA_NETWORK_IB;
139
140 if (ipv6_addr_v4mapped((struct in6_addr *)gid))
141 return RDMA_NETWORK_IPV4;
142 else
143 return RDMA_NETWORK_IPV6;
144}
145
101enum rdma_link_layer { 146enum rdma_link_layer {
102 IB_LINK_LAYER_UNSPECIFIED, 147 IB_LINK_LAYER_UNSPECIFIED,
103 IB_LINK_LAYER_INFINIBAND, 148 IB_LINK_LAYER_INFINIBAND,
@@ -105,24 +150,32 @@ enum rdma_link_layer {
105}; 150};
106 151
107enum ib_device_cap_flags { 152enum ib_device_cap_flags {
108 IB_DEVICE_RESIZE_MAX_WR = 1, 153 IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
109 IB_DEVICE_BAD_PKEY_CNTR = (1<<1), 154 IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
110 IB_DEVICE_BAD_QKEY_CNTR = (1<<2), 155 IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
111 IB_DEVICE_RAW_MULTI = (1<<3), 156 IB_DEVICE_RAW_MULTI = (1 << 3),
112 IB_DEVICE_AUTO_PATH_MIG = (1<<4), 157 IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
113 IB_DEVICE_CHANGE_PHY_PORT = (1<<5), 158 IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
114 IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), 159 IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
115 IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), 160 IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
116 IB_DEVICE_SHUTDOWN_PORT = (1<<8), 161 IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
117 IB_DEVICE_INIT_TYPE = (1<<9), 162 IB_DEVICE_INIT_TYPE = (1 << 9),
118 IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), 163 IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
119 IB_DEVICE_SYS_IMAGE_GUID = (1<<11), 164 IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
120 IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), 165 IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
121 IB_DEVICE_SRQ_RESIZE = (1<<13), 166 IB_DEVICE_SRQ_RESIZE = (1 << 13),
122 IB_DEVICE_N_NOTIFY_CQ = (1<<14), 167 IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
123 IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), 168
124 IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ 169 /*
125 IB_DEVICE_MEM_WINDOW = (1<<17), 170 * This device supports a per-device lkey or stag that can be
171 * used without performing a memory registration for the local
172 * memory. Note that ULPs should never check this flag, but
173 * instead of use the local_dma_lkey flag in the ib_pd structure,
174 * which will always contain a usable lkey.
175 */
176 IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
177 IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
178 IB_DEVICE_MEM_WINDOW = (1 << 17),
126 /* 179 /*
127 * Devices should set IB_DEVICE_UD_IP_SUM if they support 180 * Devices should set IB_DEVICE_UD_IP_SUM if they support
128 * insertion of UDP and TCP checksum on outgoing UD IPoIB 181 * insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -130,18 +183,35 @@ enum ib_device_cap_flags {
130 * incoming messages. Setting this flag implies that the 183 * incoming messages. Setting this flag implies that the
131 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. 184 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
132 */ 185 */
133 IB_DEVICE_UD_IP_CSUM = (1<<18), 186 IB_DEVICE_UD_IP_CSUM = (1 << 18),
134 IB_DEVICE_UD_TSO = (1<<19), 187 IB_DEVICE_UD_TSO = (1 << 19),
135 IB_DEVICE_XRC = (1<<20), 188 IB_DEVICE_XRC = (1 << 20),
136 IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), 189
137 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), 190 /*
138 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), 191 * This device supports the IB "base memory management extension",
139 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), 192 * which includes support for fast registrations (IB_WR_REG_MR,
140 IB_DEVICE_RC_IP_CSUM = (1<<25), 193 * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should
141 IB_DEVICE_RAW_IP_CSUM = (1<<26), 194 * also be set by any iWarp device which must support FRs to comply
142 IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), 195 * to the iWarp verbs spec. iWarp devices also support the
143 IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), 196 * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
144 IB_DEVICE_ON_DEMAND_PAGING = (1<<31), 197 * stag.
198 */
199 IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
200 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
201 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
202 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
203 IB_DEVICE_RC_IP_CSUM = (1 << 25),
204 IB_DEVICE_RAW_IP_CSUM = (1 << 26),
205 /*
206 * Devices should set IB_DEVICE_CROSS_CHANNEL if they
207 * support execution of WQEs that involve synchronization
208 * of I/O operations with single completion queue managed
209 * by hardware.
210 */
211 IB_DEVICE_CROSS_CHANNEL = (1 << 27),
212 IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
213 IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
214 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
145}; 215};
146 216
147enum ib_signature_prot_cap { 217enum ib_signature_prot_cap {
@@ -184,6 +254,7 @@ struct ib_odp_caps {
184 254
185enum ib_cq_creation_flags { 255enum ib_cq_creation_flags {
186 IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, 256 IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
257 IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
187}; 258};
188 259
189struct ib_cq_init_attr { 260struct ib_cq_init_attr {
@@ -393,6 +464,7 @@ union rdma_protocol_stats {
393#define RDMA_CORE_CAP_PROT_IB 0x00100000 464#define RDMA_CORE_CAP_PROT_IB 0x00100000
394#define RDMA_CORE_CAP_PROT_ROCE 0x00200000 465#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
395#define RDMA_CORE_CAP_PROT_IWARP 0x00400000 466#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
467#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
396 468
397#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ 469#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
398 | RDMA_CORE_CAP_IB_MAD \ 470 | RDMA_CORE_CAP_IB_MAD \
@@ -405,6 +477,12 @@ union rdma_protocol_stats {
405 | RDMA_CORE_CAP_IB_CM \ 477 | RDMA_CORE_CAP_IB_CM \
406 | RDMA_CORE_CAP_AF_IB \ 478 | RDMA_CORE_CAP_AF_IB \
407 | RDMA_CORE_CAP_ETH_AH) 479 | RDMA_CORE_CAP_ETH_AH)
480#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \
481 (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
482 | RDMA_CORE_CAP_IB_MAD \
483 | RDMA_CORE_CAP_IB_CM \
484 | RDMA_CORE_CAP_AF_IB \
485 | RDMA_CORE_CAP_ETH_AH)
408#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ 486#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
409 | RDMA_CORE_CAP_IW_CM) 487 | RDMA_CORE_CAP_IW_CM)
410#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ 488#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
@@ -519,6 +597,17 @@ struct ib_grh {
519 union ib_gid dgid; 597 union ib_gid dgid;
520}; 598};
521 599
600union rdma_network_hdr {
601 struct ib_grh ibgrh;
602 struct {
603 /* The IB spec states that if it's IPv4, the header
604 * is located in the last 20 bytes of the header.
605 */
606 u8 reserved[20];
607 struct iphdr roce4grh;
608 };
609};
610
522enum { 611enum {
523 IB_MULTICAST_QPN = 0xffffff 612 IB_MULTICAST_QPN = 0xffffff
524}; 613};
@@ -734,7 +823,6 @@ enum ib_wc_opcode {
734 IB_WC_RDMA_READ, 823 IB_WC_RDMA_READ,
735 IB_WC_COMP_SWAP, 824 IB_WC_COMP_SWAP,
736 IB_WC_FETCH_ADD, 825 IB_WC_FETCH_ADD,
737 IB_WC_BIND_MW,
738 IB_WC_LSO, 826 IB_WC_LSO,
739 IB_WC_LOCAL_INV, 827 IB_WC_LOCAL_INV,
740 IB_WC_REG_MR, 828 IB_WC_REG_MR,
@@ -755,10 +843,14 @@ enum ib_wc_flags {
755 IB_WC_IP_CSUM_OK = (1<<3), 843 IB_WC_IP_CSUM_OK = (1<<3),
756 IB_WC_WITH_SMAC = (1<<4), 844 IB_WC_WITH_SMAC = (1<<4),
757 IB_WC_WITH_VLAN = (1<<5), 845 IB_WC_WITH_VLAN = (1<<5),
846 IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6),
758}; 847};
759 848
760struct ib_wc { 849struct ib_wc {
761 u64 wr_id; 850 union {
851 u64 wr_id;
852 struct ib_cqe *wr_cqe;
853 };
762 enum ib_wc_status status; 854 enum ib_wc_status status;
763 enum ib_wc_opcode opcode; 855 enum ib_wc_opcode opcode;
764 u32 vendor_err; 856 u32 vendor_err;
@@ -777,6 +869,7 @@ struct ib_wc {
777 u8 port_num; /* valid only for DR SMPs on switches */ 869 u8 port_num; /* valid only for DR SMPs on switches */
778 u8 smac[ETH_ALEN]; 870 u8 smac[ETH_ALEN];
779 u16 vlan_id; 871 u16 vlan_id;
872 u8 network_hdr_type;
780}; 873};
781 874
782enum ib_cq_notify_flags { 875enum ib_cq_notify_flags {
@@ -866,6 +959,9 @@ enum ib_qp_type {
866enum ib_qp_create_flags { 959enum ib_qp_create_flags {
867 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, 960 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
868 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 961 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
962 IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
963 IB_QP_CREATE_MANAGED_SEND = 1 << 3,
964 IB_QP_CREATE_MANAGED_RECV = 1 << 4,
869 IB_QP_CREATE_NETIF_QP = 1 << 5, 965 IB_QP_CREATE_NETIF_QP = 1 << 5,
870 IB_QP_CREATE_SIGNATURE_EN = 1 << 6, 966 IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
871 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, 967 IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
@@ -1027,7 +1123,6 @@ enum ib_wr_opcode {
1027 IB_WR_REG_MR, 1123 IB_WR_REG_MR,
1028 IB_WR_MASKED_ATOMIC_CMP_AND_SWP, 1124 IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
1029 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, 1125 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
1030 IB_WR_BIND_MW,
1031 IB_WR_REG_SIG_MR, 1126 IB_WR_REG_SIG_MR,
1032 /* reserve values for low level drivers' internal use. 1127 /* reserve values for low level drivers' internal use.
1033 * These values will not be used at all in the ib core layer. 1128 * These values will not be used at all in the ib core layer.
@@ -1062,26 +1157,16 @@ struct ib_sge {
1062 u32 lkey; 1157 u32 lkey;
1063}; 1158};
1064 1159
1065/** 1160struct ib_cqe {
1066 * struct ib_mw_bind_info - Parameters for a memory window bind operation. 1161 void (*done)(struct ib_cq *cq, struct ib_wc *wc);
1067 * @mr: A memory region to bind the memory window to.
1068 * @addr: The address where the memory window should begin.
1069 * @length: The length of the memory window, in bytes.
1070 * @mw_access_flags: Access flags from enum ib_access_flags for the window.
1071 *
1072 * This struct contains the shared parameters for type 1 and type 2
1073 * memory window bind operations.
1074 */
1075struct ib_mw_bind_info {
1076 struct ib_mr *mr;
1077 u64 addr;
1078 u64 length;
1079 int mw_access_flags;
1080}; 1162};
1081 1163
1082struct ib_send_wr { 1164struct ib_send_wr {
1083 struct ib_send_wr *next; 1165 struct ib_send_wr *next;
1084 u64 wr_id; 1166 union {
1167 u64 wr_id;
1168 struct ib_cqe *wr_cqe;
1169 };
1085 struct ib_sge *sg_list; 1170 struct ib_sge *sg_list;
1086 int num_sge; 1171 int num_sge;
1087 enum ib_wr_opcode opcode; 1172 enum ib_wr_opcode opcode;
@@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
1147 return container_of(wr, struct ib_reg_wr, wr); 1232 return container_of(wr, struct ib_reg_wr, wr);
1148} 1233}
1149 1234
1150struct ib_bind_mw_wr {
1151 struct ib_send_wr wr;
1152 struct ib_mw *mw;
1153 /* The new rkey for the memory window. */
1154 u32 rkey;
1155 struct ib_mw_bind_info bind_info;
1156};
1157
1158static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
1159{
1160 return container_of(wr, struct ib_bind_mw_wr, wr);
1161}
1162
1163struct ib_sig_handover_wr { 1235struct ib_sig_handover_wr {
1164 struct ib_send_wr wr; 1236 struct ib_send_wr wr;
1165 struct ib_sig_attrs *sig_attrs; 1237 struct ib_sig_attrs *sig_attrs;
@@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
1175 1247
1176struct ib_recv_wr { 1248struct ib_recv_wr {
1177 struct ib_recv_wr *next; 1249 struct ib_recv_wr *next;
1178 u64 wr_id; 1250 union {
1251 u64 wr_id;
1252 struct ib_cqe *wr_cqe;
1253 };
1179 struct ib_sge *sg_list; 1254 struct ib_sge *sg_list;
1180 int num_sge; 1255 int num_sge;
1181}; 1256};
@@ -1190,20 +1265,10 @@ enum ib_access_flags {
1190 IB_ACCESS_ON_DEMAND = (1<<6), 1265 IB_ACCESS_ON_DEMAND = (1<<6),
1191}; 1266};
1192 1267
1193struct ib_phys_buf { 1268/*
1194 u64 addr; 1269 * XXX: these are apparently used for ->rereg_user_mr, no idea why they
1195 u64 size; 1270 * are hidden here instead of a uapi header!
1196}; 1271 */
1197
1198struct ib_mr_attr {
1199 struct ib_pd *pd;
1200 u64 device_virt_addr;
1201 u64 size;
1202 int mr_access_flags;
1203 u32 lkey;
1204 u32 rkey;
1205};
1206
1207enum ib_mr_rereg_flags { 1272enum ib_mr_rereg_flags {
1208 IB_MR_REREG_TRANS = 1, 1273 IB_MR_REREG_TRANS = 1,
1209 IB_MR_REREG_PD = (1<<1), 1274 IB_MR_REREG_PD = (1<<1),
@@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags {
1211 IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) 1276 IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
1212}; 1277};
1213 1278
1214/**
1215 * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
1216 * @wr_id: Work request id.
1217 * @send_flags: Flags from ib_send_flags enum.
1218 * @bind_info: More parameters of the bind operation.
1219 */
1220struct ib_mw_bind {
1221 u64 wr_id;
1222 int send_flags;
1223 struct ib_mw_bind_info bind_info;
1224};
1225
1226struct ib_fmr_attr { 1279struct ib_fmr_attr {
1227 int max_pages; 1280 int max_pages;
1228 int max_maps; 1281 int max_maps;
@@ -1307,6 +1360,12 @@ struct ib_ah {
1307 1360
1308typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 1361typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
1309 1362
1363enum ib_poll_context {
1364 IB_POLL_DIRECT, /* caller context, no hw completions */
1365 IB_POLL_SOFTIRQ, /* poll from softirq context */
1366 IB_POLL_WORKQUEUE, /* poll from workqueue */
1367};
1368
1310struct ib_cq { 1369struct ib_cq {
1311 struct ib_device *device; 1370 struct ib_device *device;
1312 struct ib_uobject *uobject; 1371 struct ib_uobject *uobject;
@@ -1315,6 +1374,12 @@ struct ib_cq {
1315 void *cq_context; 1374 void *cq_context;
1316 int cqe; 1375 int cqe;
1317 atomic_t usecnt; /* count number of work queues */ 1376 atomic_t usecnt; /* count number of work queues */
1377 enum ib_poll_context poll_ctx;
1378 struct ib_wc *wc;
1379 union {
1380 struct irq_poll iop;
1381 struct work_struct work;
1382 };
1318}; 1383};
1319 1384
1320struct ib_srq { 1385struct ib_srq {
@@ -1363,7 +1428,6 @@ struct ib_mr {
1363 u64 iova; 1428 u64 iova;
1364 u32 length; 1429 u32 length;
1365 unsigned int page_size; 1430 unsigned int page_size;
1366 atomic_t usecnt; /* count number of MWs */
1367}; 1431};
1368 1432
1369struct ib_mw { 1433struct ib_mw {
@@ -1724,11 +1788,6 @@ struct ib_device {
1724 int wc_cnt); 1788 int wc_cnt);
1725 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, 1789 struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
1726 int mr_access_flags); 1790 int mr_access_flags);
1727 struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
1728 struct ib_phys_buf *phys_buf_array,
1729 int num_phys_buf,
1730 int mr_access_flags,
1731 u64 *iova_start);
1732 struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, 1791 struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
1733 u64 start, u64 length, 1792 u64 start, u64 length,
1734 u64 virt_addr, 1793 u64 virt_addr,
@@ -1741,8 +1800,6 @@ struct ib_device {
1741 int mr_access_flags, 1800 int mr_access_flags,
1742 struct ib_pd *pd, 1801 struct ib_pd *pd,
1743 struct ib_udata *udata); 1802 struct ib_udata *udata);
1744 int (*query_mr)(struct ib_mr *mr,
1745 struct ib_mr_attr *mr_attr);
1746 int (*dereg_mr)(struct ib_mr *mr); 1803 int (*dereg_mr)(struct ib_mr *mr);
1747 struct ib_mr * (*alloc_mr)(struct ib_pd *pd, 1804 struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
1748 enum ib_mr_type mr_type, 1805 enum ib_mr_type mr_type,
@@ -1750,18 +1807,8 @@ struct ib_device {
1750 int (*map_mr_sg)(struct ib_mr *mr, 1807 int (*map_mr_sg)(struct ib_mr *mr,
1751 struct scatterlist *sg, 1808 struct scatterlist *sg,
1752 int sg_nents); 1809 int sg_nents);
1753 int (*rereg_phys_mr)(struct ib_mr *mr,
1754 int mr_rereg_mask,
1755 struct ib_pd *pd,
1756 struct ib_phys_buf *phys_buf_array,
1757 int num_phys_buf,
1758 int mr_access_flags,
1759 u64 *iova_start);
1760 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1810 struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
1761 enum ib_mw_type type); 1811 enum ib_mw_type type);
1762 int (*bind_mw)(struct ib_qp *qp,
1763 struct ib_mw *mw,
1764 struct ib_mw_bind *mw_bind);
1765 int (*dealloc_mw)(struct ib_mw *mw); 1812 int (*dealloc_mw)(struct ib_mw *mw);
1766 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, 1813 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
1767 int mr_access_flags, 1814 int mr_access_flags,
@@ -1823,6 +1870,7 @@ struct ib_device {
1823 u16 is_switch:1; 1870 u16 is_switch:1;
1824 u8 node_type; 1871 u8 node_type;
1825 u8 phys_port_cnt; 1872 u8 phys_port_cnt;
1873 struct ib_device_attr attrs;
1826 1874
1827 /** 1875 /**
1828 * The following mandatory functions are used only at device 1876 * The following mandatory functions are used only at device
@@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
1888 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; 1936 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
1889} 1937}
1890 1938
1939static inline bool ib_is_udata_cleared(struct ib_udata *udata,
1940 size_t offset,
1941 size_t len)
1942{
1943 const void __user *p = udata->inbuf + offset;
1944 bool ret = false;
1945 u8 *buf;
1946
1947 if (len > USHRT_MAX)
1948 return false;
1949
1950 buf = kmalloc(len, GFP_KERNEL);
1951 if (!buf)
1952 return false;
1953
1954 if (copy_from_user(buf, p, len))
1955 goto free;
1956
1957 ret = !memchr_inv(buf, 0, len);
1958
1959free:
1960 kfree(buf);
1961 return ret;
1962}
1963
1891/** 1964/**
1892 * ib_modify_qp_is_ok - Check that the supplied attribute mask 1965 * ib_modify_qp_is_ok - Check that the supplied attribute mask
1893 * contains all required attributes and no attributes not allowed for 1966 * contains all required attributes and no attributes not allowed for
@@ -1912,9 +1985,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler);
1912int ib_unregister_event_handler(struct ib_event_handler *event_handler); 1985int ib_unregister_event_handler(struct ib_event_handler *event_handler);
1913void ib_dispatch_event(struct ib_event *event); 1986void ib_dispatch_event(struct ib_event *event);
1914 1987
1915int ib_query_device(struct ib_device *device,
1916 struct ib_device_attr *device_attr);
1917
1918int ib_query_port(struct ib_device *device, 1988int ib_query_port(struct ib_device *device,
1919 u8 port_num, struct ib_port_attr *port_attr); 1989 u8 port_num, struct ib_port_attr *port_attr);
1920 1990
@@ -1968,6 +2038,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
1968 2038
1969static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) 2039static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
1970{ 2040{
2041 return device->port_immutable[port_num].core_cap_flags &
2042 (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
2043}
2044
2045static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
2046{
2047 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
2048}
2049
2050static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
2051{
1971 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; 2052 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
1972} 2053}
1973 2054
@@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n
1978 2059
1979static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) 2060static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
1980{ 2061{
1981 return device->port_immutable[port_num].core_cap_flags & 2062 return rdma_protocol_ib(device, port_num) ||
1982 (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE); 2063 rdma_protocol_roce(device, port_num);
1983} 2064}
1984 2065
1985/** 2066/**
@@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device,
2220 struct ib_port_modify *port_modify); 2301 struct ib_port_modify *port_modify);
2221 2302
2222int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2303int ib_find_gid(struct ib_device *device, union ib_gid *gid,
2223 struct net_device *ndev, u8 *port_num, u16 *index); 2304 enum ib_gid_type gid_type, struct net_device *ndev,
2305 u8 *port_num, u16 *index);
2224 2306
2225int ib_find_pkey(struct ib_device *device, 2307int ib_find_pkey(struct ib_device *device,
2226 u8 port_num, u16 pkey, u16 *index); 2308 u8 port_num, u16 pkey, u16 *index);
@@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
2454 return qp->device->post_recv(qp, recv_wr, bad_recv_wr); 2536 return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
2455} 2537}
2456 2538
2539struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
2540 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
2541void ib_free_cq(struct ib_cq *cq);
2542int ib_process_cq_direct(struct ib_cq *cq, int budget);
2543
2457/** 2544/**
2458 * ib_create_cq - Creates a CQ on the specified device. 2545 * ib_create_cq - Creates a CQ on the specified device.
2459 * @device: The device on which to create the CQ. 2546 * @device: The device on which to create the CQ.
@@ -2839,13 +2926,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
2839} 2926}
2840 2927
2841/** 2928/**
2842 * ib_query_mr - Retrieves information about a specific memory region.
2843 * @mr: The memory region to retrieve information about.
2844 * @mr_attr: The attributes of the specified memory region.
2845 */
2846int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
2847
2848/**
2849 * ib_dereg_mr - Deregisters a memory region and removes it from the 2929 * ib_dereg_mr - Deregisters a memory region and removes it from the
2850 * HCA translation table. 2930 * HCA translation table.
2851 * @mr: The memory region to deregister. 2931 * @mr: The memory region to deregister.
@@ -2882,42 +2962,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
2882} 2962}
2883 2963
2884/** 2964/**
2885 * ib_alloc_mw - Allocates a memory window.
2886 * @pd: The protection domain associated with the memory window.
2887 * @type: The type of the memory window (1 or 2).
2888 */
2889struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
2890
2891/**
2892 * ib_bind_mw - Posts a work request to the send queue of the specified
2893 * QP, which binds the memory window to the given address range and
2894 * remote access attributes.
2895 * @qp: QP to post the bind work request on.
2896 * @mw: The memory window to bind.
2897 * @mw_bind: Specifies information about the memory window, including
2898 * its address range, remote access rights, and associated memory region.
2899 *
2900 * If there is no immediate error, the function will update the rkey member
2901 * of the mw parameter to its new value. The bind operation can still fail
2902 * asynchronously.
2903 */
2904static inline int ib_bind_mw(struct ib_qp *qp,
2905 struct ib_mw *mw,
2906 struct ib_mw_bind *mw_bind)
2907{
2908 /* XXX reference counting in corresponding MR? */
2909 return mw->device->bind_mw ?
2910 mw->device->bind_mw(qp, mw, mw_bind) :
2911 -ENOSYS;
2912}
2913
2914/**
2915 * ib_dealloc_mw - Deallocates a memory window.
2916 * @mw: The memory window to deallocate.
2917 */
2918int ib_dealloc_mw(struct ib_mw *mw);
2919
2920/**
2921 * ib_alloc_fmr - Allocates a unmapped fast memory region. 2965 * ib_alloc_fmr - Allocates a unmapped fast memory region.
2922 * @pd: The protection domain associated with the unmapped region. 2966 * @pd: The protection domain associated with the unmapped region.
2923 * @mr_access_flags: Specifies the memory access rights. 2967 * @mr_access_flags: Specifies the memory access rights.
diff --git a/include/scsi/iser.h b/include/scsi/iser.h
new file mode 100644
index 000000000000..2e678fa74eca
--- /dev/null
+++ b/include/scsi/iser.h
@@ -0,0 +1,78 @@
1/*
2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#ifndef ISCSI_ISER_H
33#define ISCSI_ISER_H
34
35#define ISER_ZBVA_NOT_SUP 0x80
36#define ISER_SEND_W_INV_NOT_SUP 0x40
37#define ISERT_ZBVA_NOT_USED 0x80
38#define ISERT_SEND_W_INV_NOT_USED 0x40
39
40#define ISCSI_CTRL 0x10
41#define ISER_HELLO 0x20
42#define ISER_HELLORPLY 0x30
43
44#define ISER_VER 0x10
45#define ISER_WSV 0x08
46#define ISER_RSV 0x04
47
48/**
49 * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
50 *
51 * @flags: flags support (zbva, send_w_inv)
52 * @rsvd: reserved
53 */
54struct iser_cm_hdr {
55 u8 flags;
56 u8 rsvd[3];
57} __packed;
58
59/**
60 * struct iser_ctrl - iSER header of iSCSI control PDU
61 *
62 * @flags: opcode and read/write valid bits
63 * @rsvd: reserved
64 * @write_stag: write rkey
65 * @write_va: write virtual address
66 * @reaf_stag: read rkey
67 * @read_va: read virtual address
68 */
69struct iser_ctrl {
70 u8 flags;
71 u8 rsvd[3];
72 __be32 write_stag;
73 __be64 write_va;
74 __be32 read_stag;
75 __be64 read_va;
76} __packed;
77
78#endif /* ISCSI_ISER_H */
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index ff8f6c091a15..f95f25e786ef 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -15,7 +15,7 @@ struct softirq_action;
15 softirq_name(NET_TX) \ 15 softirq_name(NET_TX) \
16 softirq_name(NET_RX) \ 16 softirq_name(NET_RX) \
17 softirq_name(BLOCK) \ 17 softirq_name(BLOCK) \
18 softirq_name(BLOCK_IOPOLL) \ 18 softirq_name(IRQ_POLL) \
19 softirq_name(TASKLET) \ 19 softirq_name(TASKLET) \
20 softirq_name(SCHED) \ 20 softirq_name(SCHED) \
21 softirq_name(HRTIMER) \ 21 softirq_name(HRTIMER) \
diff --git a/lib/Kconfig b/lib/Kconfig
index 435f7315bc89..133ebc0c1773 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -477,6 +477,11 @@ config DDR
477 information. This data is useful for drivers handling 477 information. This data is useful for drivers handling
478 DDR SDRAM controllers. 478 DDR SDRAM controllers.
479 479
480config IRQ_POLL
481 bool "IRQ polling library"
482 help
483 Helper library to poll interrupt mitigation using polling.
484
480config MPILIB 485config MPILIB
481 tristate 486 tristate
482 select CLZ_TAB 487 select CLZ_TAB
diff --git a/lib/Makefile b/lib/Makefile
index 2d4bc33d09b4..a7c26a41a738 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -165,6 +165,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
165 165
166obj-$(CONFIG_SG_SPLIT) += sg_split.o 166obj-$(CONFIG_SG_SPLIT) += sg_split.o
167obj-$(CONFIG_STMP_DEVICE) += stmp_device.o 167obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
168obj-$(CONFIG_IRQ_POLL) += irq_poll.o
168 169
169libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ 170libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
170 fdt_empty_tree.o 171 fdt_empty_tree.o
diff --git a/block/blk-iopoll.c b/lib/irq_poll.c
index 0736729d6494..836f7db4e548 100644
--- a/block/blk-iopoll.c
+++ b/lib/irq_poll.c
@@ -6,84 +6,84 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/bio.h> 8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/interrupt.h> 9#include <linux/interrupt.h>
11#include <linux/cpu.h> 10#include <linux/cpu.h>
12#include <linux/blk-iopoll.h> 11#include <linux/irq_poll.h>
13#include <linux/delay.h> 12#include <linux/delay.h>
14 13
15#include "blk.h" 14static unsigned int irq_poll_budget __read_mostly = 256;
16
17static unsigned int blk_iopoll_budget __read_mostly = 256;
18 15
19static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); 16static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
20 17
21/** 18/**
22 * blk_iopoll_sched - Schedule a run of the iopoll handler 19 * irq_poll_sched - Schedule a run of the iopoll handler
23 * @iop: The parent iopoll structure 20 * @iop: The parent iopoll structure
24 * 21 *
25 * Description: 22 * Description:
26 * Add this blk_iopoll structure to the pending poll list and trigger the 23 * Add this irq_poll structure to the pending poll list and trigger the
27 * raise of the blk iopoll softirq. The driver must already have gotten a 24 * raise of the blk iopoll softirq.
28 * successful return from blk_iopoll_sched_prep() before calling this.
29 **/ 25 **/
30void blk_iopoll_sched(struct blk_iopoll *iop) 26void irq_poll_sched(struct irq_poll *iop)
31{ 27{
32 unsigned long flags; 28 unsigned long flags;
33 29
30 if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
31 return;
32 if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
33 return;
34
34 local_irq_save(flags); 35 local_irq_save(flags);
35 list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); 36 list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
36 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 37 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
37 local_irq_restore(flags); 38 local_irq_restore(flags);
38} 39}
39EXPORT_SYMBOL(blk_iopoll_sched); 40EXPORT_SYMBOL(irq_poll_sched);
40 41
41/** 42/**
42 * __blk_iopoll_complete - Mark this @iop as un-polled again 43 * __irq_poll_complete - Mark this @iop as un-polled again
43 * @iop: The parent iopoll structure 44 * @iop: The parent iopoll structure
44 * 45 *
45 * Description: 46 * Description:
46 * See blk_iopoll_complete(). This function must be called with interrupts 47 * See irq_poll_complete(). This function must be called with interrupts
47 * disabled. 48 * disabled.
48 **/ 49 **/
49void __blk_iopoll_complete(struct blk_iopoll *iop) 50static void __irq_poll_complete(struct irq_poll *iop)
50{ 51{
51 list_del(&iop->list); 52 list_del(&iop->list);
52 smp_mb__before_atomic(); 53 smp_mb__before_atomic();
53 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); 54 clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
54} 55}
55EXPORT_SYMBOL(__blk_iopoll_complete);
56 56
57/** 57/**
58 * blk_iopoll_complete - Mark this @iop as un-polled again 58 * irq_poll_complete - Mark this @iop as un-polled again
59 * @iop: The parent iopoll structure 59 * @iop: The parent iopoll structure
60 * 60 *
61 * Description: 61 * Description:
62 * If a driver consumes less than the assigned budget in its run of the 62 * If a driver consumes less than the assigned budget in its run of the
63 * iopoll handler, it'll end the polled mode by calling this function. The 63 * iopoll handler, it'll end the polled mode by calling this function. The
64 * iopoll handler will not be invoked again before blk_iopoll_sched_prep() 64 * iopoll handler will not be invoked again before irq_poll_sched()
65 * is called. 65 * is called.
66 **/ 66 **/
67void blk_iopoll_complete(struct blk_iopoll *iop) 67void irq_poll_complete(struct irq_poll *iop)
68{ 68{
69 unsigned long flags; 69 unsigned long flags;
70 70
71 local_irq_save(flags); 71 local_irq_save(flags);
72 __blk_iopoll_complete(iop); 72 __irq_poll_complete(iop);
73 local_irq_restore(flags); 73 local_irq_restore(flags);
74} 74}
75EXPORT_SYMBOL(blk_iopoll_complete); 75EXPORT_SYMBOL(irq_poll_complete);
76 76
77static void blk_iopoll_softirq(struct softirq_action *h) 77static void irq_poll_softirq(struct softirq_action *h)
78{ 78{
79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); 79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
80 int rearm = 0, budget = blk_iopoll_budget; 80 int rearm = 0, budget = irq_poll_budget;
81 unsigned long start_time = jiffies; 81 unsigned long start_time = jiffies;
82 82
83 local_irq_disable(); 83 local_irq_disable();
84 84
85 while (!list_empty(list)) { 85 while (!list_empty(list)) {
86 struct blk_iopoll *iop; 86 struct irq_poll *iop;
87 int work, weight; 87 int work, weight;
88 88
89 /* 89 /*
@@ -101,11 +101,11 @@ static void blk_iopoll_softirq(struct softirq_action *h)
101 * entries to the tail of this list, and only ->poll() 101 * entries to the tail of this list, and only ->poll()
102 * calls can remove this head entry from the list. 102 * calls can remove this head entry from the list.
103 */ 103 */
104 iop = list_entry(list->next, struct blk_iopoll, list); 104 iop = list_entry(list->next, struct irq_poll, list);
105 105
106 weight = iop->weight; 106 weight = iop->weight;
107 work = 0; 107 work = 0;
108 if (test_bit(IOPOLL_F_SCHED, &iop->state)) 108 if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
109 work = iop->poll(iop, weight); 109 work = iop->poll(iop, weight);
110 110
111 budget -= work; 111 budget -= work;
@@ -121,72 +121,70 @@ static void blk_iopoll_softirq(struct softirq_action *h)
121 * move the instance around on the list at-will. 121 * move the instance around on the list at-will.
122 */ 122 */
123 if (work >= weight) { 123 if (work >= weight) {
124 if (blk_iopoll_disable_pending(iop)) 124 if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
125 __blk_iopoll_complete(iop); 125 __irq_poll_complete(iop);
126 else 126 else
127 list_move_tail(&iop->list, list); 127 list_move_tail(&iop->list, list);
128 } 128 }
129 } 129 }
130 130
131 if (rearm) 131 if (rearm)
132 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 132 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
133 133
134 local_irq_enable(); 134 local_irq_enable();
135} 135}
136 136
137/** 137/**
138 * blk_iopoll_disable - Disable iopoll on this @iop 138 * irq_poll_disable - Disable iopoll on this @iop
139 * @iop: The parent iopoll structure 139 * @iop: The parent iopoll structure
140 * 140 *
141 * Description: 141 * Description:
142 * Disable io polling and wait for any pending callbacks to have completed. 142 * Disable io polling and wait for any pending callbacks to have completed.
143 **/ 143 **/
144void blk_iopoll_disable(struct blk_iopoll *iop) 144void irq_poll_disable(struct irq_poll *iop)
145{ 145{
146 set_bit(IOPOLL_F_DISABLE, &iop->state); 146 set_bit(IRQ_POLL_F_DISABLE, &iop->state);
147 while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) 147 while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
148 msleep(1); 148 msleep(1);
149 clear_bit(IOPOLL_F_DISABLE, &iop->state); 149 clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
150} 150}
151EXPORT_SYMBOL(blk_iopoll_disable); 151EXPORT_SYMBOL(irq_poll_disable);
152 152
153/** 153/**
154 * blk_iopoll_enable - Enable iopoll on this @iop 154 * irq_poll_enable - Enable iopoll on this @iop
155 * @iop: The parent iopoll structure 155 * @iop: The parent iopoll structure
156 * 156 *
157 * Description: 157 * Description:
158 * Enable iopoll on this @iop. Note that the handler run will not be 158 * Enable iopoll on this @iop. Note that the handler run will not be
159 * scheduled, it will only mark it as active. 159 * scheduled, it will only mark it as active.
160 **/ 160 **/
161void blk_iopoll_enable(struct blk_iopoll *iop) 161void irq_poll_enable(struct irq_poll *iop)
162{ 162{
163 BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); 163 BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
164 smp_mb__before_atomic(); 164 smp_mb__before_atomic();
165 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); 165 clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
166} 166}
167EXPORT_SYMBOL(blk_iopoll_enable); 167EXPORT_SYMBOL(irq_poll_enable);
168 168
169/** 169/**
170 * blk_iopoll_init - Initialize this @iop 170 * irq_poll_init - Initialize this @iop
171 * @iop: The parent iopoll structure 171 * @iop: The parent iopoll structure
172 * @weight: The default weight (or command completion budget) 172 * @weight: The default weight (or command completion budget)
173 * @poll_fn: The handler to invoke 173 * @poll_fn: The handler to invoke
174 * 174 *
175 * Description: 175 * Description:
176 * Initialize this blk_iopoll structure. Before being actively used, the 176 * Initialize and enable this irq_poll structure.
177 * driver must call blk_iopoll_enable().
178 **/ 177 **/
179void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) 178void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
180{ 179{
181 memset(iop, 0, sizeof(*iop)); 180 memset(iop, 0, sizeof(*iop));
182 INIT_LIST_HEAD(&iop->list); 181 INIT_LIST_HEAD(&iop->list);
183 iop->weight = weight; 182 iop->weight = weight;
184 iop->poll = poll_fn; 183 iop->poll = poll_fn;
185 set_bit(IOPOLL_F_SCHED, &iop->state);
186} 184}
187EXPORT_SYMBOL(blk_iopoll_init); 185EXPORT_SYMBOL(irq_poll_init);
188 186
189static int blk_iopoll_cpu_notify(struct notifier_block *self, 187static int irq_poll_cpu_notify(struct notifier_block *self,
190 unsigned long action, void *hcpu) 188 unsigned long action, void *hcpu)
191{ 189{
192 /* 190 /*
@@ -199,26 +197,26 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self,
199 local_irq_disable(); 197 local_irq_disable();
200 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), 198 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
201 this_cpu_ptr(&blk_cpu_iopoll)); 199 this_cpu_ptr(&blk_cpu_iopoll));
202 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 200 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
203 local_irq_enable(); 201 local_irq_enable();
204 } 202 }
205 203
206 return NOTIFY_OK; 204 return NOTIFY_OK;
207} 205}
208 206
209static struct notifier_block blk_iopoll_cpu_notifier = { 207static struct notifier_block irq_poll_cpu_notifier = {
210 .notifier_call = blk_iopoll_cpu_notify, 208 .notifier_call = irq_poll_cpu_notify,
211}; 209};
212 210
213static __init int blk_iopoll_setup(void) 211static __init int irq_poll_setup(void)
214{ 212{
215 int i; 213 int i;
216 214
217 for_each_possible_cpu(i) 215 for_each_possible_cpu(i)
218 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); 216 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
219 217
220 open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); 218 open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
221 register_hotcpu_notifier(&blk_iopoll_cpu_notifier); 219 register_hotcpu_notifier(&irq_poll_cpu_notifier);
222 return 0; 220 return 0;
223} 221}
224subsys_initcall(blk_iopoll_setup); 222subsys_initcall(irq_poll_setup);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index f222885ac0c7..9481d55ff6cb 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
122static void rds_ib_add_one(struct ib_device *device) 122static void rds_ib_add_one(struct ib_device *device)
123{ 123{
124 struct rds_ib_device *rds_ibdev; 124 struct rds_ib_device *rds_ibdev;
125 struct ib_device_attr *dev_attr;
126 125
127 /* Only handle IB (no iWARP) devices */ 126 /* Only handle IB (no iWARP) devices */
128 if (device->node_type != RDMA_NODE_IB_CA) 127 if (device->node_type != RDMA_NODE_IB_CA)
129 return; 128 return;
130 129
131 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
132 if (!dev_attr)
133 return;
134
135 if (ib_query_device(device, dev_attr)) {
136 rdsdebug("Query device failed for %s\n", device->name);
137 goto free_attr;
138 }
139
140 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, 130 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
141 ibdev_to_node(device)); 131 ibdev_to_node(device));
142 if (!rds_ibdev) 132 if (!rds_ibdev)
143 goto free_attr; 133 return;
144 134
145 spin_lock_init(&rds_ibdev->spinlock); 135 spin_lock_init(&rds_ibdev->spinlock);
146 atomic_set(&rds_ibdev->refcount, 1); 136 atomic_set(&rds_ibdev->refcount, 1);
147 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); 137 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
148 138
149 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 139 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
150 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 140 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
151 141
152 rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; 142 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
153 rds_ibdev->max_1m_fmrs = dev_attr->max_mr ? 143 rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
154 min_t(unsigned int, (dev_attr->max_mr / 2), 144 min_t(unsigned int, (device->attrs.max_mr / 2),
155 rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size; 145 rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
156 146
157 rds_ibdev->max_8k_fmrs = dev_attr->max_mr ? 147 rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
158 min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE), 148 min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
159 rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size; 149 rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
160 150
161 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom; 151 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
162 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom; 152 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
163 153
164 rds_ibdev->dev = device; 154 rds_ibdev->dev = device;
165 rds_ibdev->pd = ib_alloc_pd(device); 155 rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
183 } 173 }
184 174
185 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n", 175 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
186 dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 176 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
187 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs, 177 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
188 rds_ibdev->max_8k_fmrs); 178 rds_ibdev->max_8k_fmrs);
189 179
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
202 192
203put_dev: 193put_dev:
204 rds_ib_dev_put(rds_ibdev); 194 rds_ib_dev_put(rds_ibdev);
205free_attr:
206 kfree(dev_attr);
207} 195}
208 196
209/* 197/*
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 576f1825fc55..f4a9fff829e0 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
60static void rds_iw_add_one(struct ib_device *device) 60static void rds_iw_add_one(struct ib_device *device)
61{ 61{
62 struct rds_iw_device *rds_iwdev; 62 struct rds_iw_device *rds_iwdev;
63 struct ib_device_attr *dev_attr;
64 63
65 /* Only handle iwarp devices */ 64 /* Only handle iwarp devices */
66 if (device->node_type != RDMA_NODE_RNIC) 65 if (device->node_type != RDMA_NODE_RNIC)
67 return; 66 return;
68 67
69 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
70 if (!dev_attr)
71 return;
72
73 if (ib_query_device(device, dev_attr)) {
74 rdsdebug("Query device failed for %s\n", device->name);
75 goto free_attr;
76 }
77
78 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL); 68 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
79 if (!rds_iwdev) 69 if (!rds_iwdev)
80 goto free_attr; 70 return;
81 71
82 spin_lock_init(&rds_iwdev->spinlock); 72 spin_lock_init(&rds_iwdev->spinlock);
83 73
84 rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY); 74 rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
85 rds_iwdev->max_wrs = dev_attr->max_qp_wr; 75 rds_iwdev->max_wrs = device->attrs.max_qp_wr;
86 rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE); 76 rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
87 77
88 rds_iwdev->dev = device; 78 rds_iwdev->dev = device;
89 rds_iwdev->pd = ib_alloc_pd(device); 79 rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
111 list_add_tail(&rds_iwdev->list, &rds_iw_devices); 101 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
112 102
113 ib_set_client_data(device, &rds_iw_client, rds_iwdev); 103 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
114 104 return;
115 goto free_attr;
116 105
117err_mr: 106err_mr:
118 if (rds_iwdev->mr) 107 if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
121 ib_dealloc_pd(rds_iwdev->pd); 110 ib_dealloc_pd(rds_iwdev->pd);
122free_dev: 111free_dev:
123 kfree(rds_iwdev); 112 kfree(rds_iwdev);
124free_attr:
125 kfree(dev_attr);
126} 113}
127 114
128static void rds_iw_remove_one(struct ib_device *device, void *client_data) 115static void rds_iw_remove_one(struct ib_device *device, void *client_data)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2e98f4a243e5..37edea6fa92d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
1425 if (atomic_dec_and_test(&xprt->count)) 1425 if (atomic_dec_and_test(&xprt->count))
1426 xprt_destroy(xprt); 1426 xprt_destroy(xprt);
1427} 1427}
1428EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 33f99d3004f2..dc9f3b513a05 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
2 2
3rpcrdma-y := transport.o rpc_rdma.o verbs.o \ 3rpcrdma-y := transport.o rpc_rdma.o verbs.o \
4 fmr_ops.o frwr_ops.o physical_ops.o \ 4 fmr_ops.o frwr_ops.o physical_ops.o \
5 svc_rdma.o svc_rdma_transport.o \ 5 svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ 6 svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
7 module.o 7 module.o
8rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o 8rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c6836844bd0e..e16567389e28 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -190,12 +190,11 @@ static int
190frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, 190frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
191 struct rpcrdma_create_data_internal *cdata) 191 struct rpcrdma_create_data_internal *cdata)
192{ 192{
193 struct ib_device_attr *devattr = &ia->ri_devattr;
194 int depth, delta; 193 int depth, delta;
195 194
196 ia->ri_max_frmr_depth = 195 ia->ri_max_frmr_depth =
197 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, 196 min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
198 devattr->max_fast_reg_page_list_len); 197 ia->ri_device->attrs.max_fast_reg_page_list_len);
199 dprintk("RPC: %s: device's max FR page list len = %u\n", 198 dprintk("RPC: %s: device's max FR page list len = %u\n",
200 __func__, ia->ri_max_frmr_depth); 199 __func__, ia->ri_max_frmr_depth);
201 200
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
222 } 221 }
223 222
224 ep->rep_attr.cap.max_send_wr *= depth; 223 ep->rep_attr.cap.max_send_wr *= depth;
225 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { 224 if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
226 cdata->max_requests = devattr->max_qp_wr / depth; 225 cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
227 if (!cdata->max_requests) 226 if (!cdata->max_requests)
228 return -EINVAL; 227 return -EINVAL;
229 ep->rep_attr.cap.max_send_wr = cdata->max_requests * 228 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 1b7051bdbdc8..c846ca9f1eba 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
55static unsigned int min_ord = 1; 55static unsigned int min_ord = 1;
56static unsigned int max_ord = 4096; 56static unsigned int max_ord = 4096;
57unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; 57unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
58unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
58static unsigned int min_max_requests = 4; 59static unsigned int min_max_requests = 4;
59static unsigned int max_max_requests = 16384; 60static unsigned int max_max_requests = 16384;
60unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; 61unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
71atomic_t rdma_stat_sq_poll; 72atomic_t rdma_stat_sq_poll;
72atomic_t rdma_stat_sq_prod; 73atomic_t rdma_stat_sq_prod;
73 74
74/* Temporary NFS request map and context caches */
75struct kmem_cache *svc_rdma_map_cachep;
76struct kmem_cache *svc_rdma_ctxt_cachep;
77
78struct workqueue_struct *svc_rdma_wq; 75struct workqueue_struct *svc_rdma_wq;
79 76
80/* 77/*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
243 svc_unreg_xprt_class(&svc_rdma_bc_class); 240 svc_unreg_xprt_class(&svc_rdma_bc_class);
244#endif 241#endif
245 svc_unreg_xprt_class(&svc_rdma_class); 242 svc_unreg_xprt_class(&svc_rdma_class);
246 kmem_cache_destroy(svc_rdma_map_cachep);
247 kmem_cache_destroy(svc_rdma_ctxt_cachep);
248} 243}
249 244
250int svc_rdma_init(void) 245int svc_rdma_init(void)
251{ 246{
252 dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); 247 dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
253 dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); 248 dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
254 dprintk("\tmax_requests : %d\n", svcrdma_max_requests); 249 dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
255 dprintk("\tsq_depth : %d\n", 250 dprintk("\tsq_depth : %u\n",
256 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 251 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
252 dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
257 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 253 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
258 254
259 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); 255 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
264 svcrdma_table_header = 260 svcrdma_table_header =
265 register_sysctl_table(svcrdma_root_table); 261 register_sysctl_table(svcrdma_root_table);
266 262
267 /* Create the temporary map cache */
268 svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
269 sizeof(struct svc_rdma_req_map),
270 0,
271 SLAB_HWCACHE_ALIGN,
272 NULL);
273 if (!svc_rdma_map_cachep) {
274 printk(KERN_INFO "Could not allocate map cache.\n");
275 goto err0;
276 }
277
278 /* Create the temporary context cache */
279 svc_rdma_ctxt_cachep =
280 kmem_cache_create("svc_rdma_ctxt_cache",
281 sizeof(struct svc_rdma_op_ctxt),
282 0,
283 SLAB_HWCACHE_ALIGN,
284 NULL);
285 if (!svc_rdma_ctxt_cachep) {
286 printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
287 goto err1;
288 }
289
290 /* Register RDMA with the SVC transport switch */ 263 /* Register RDMA with the SVC transport switch */
291 svc_reg_xprt_class(&svc_rdma_class); 264 svc_reg_xprt_class(&svc_rdma_class);
292#if defined(CONFIG_SUNRPC_BACKCHANNEL) 265#if defined(CONFIG_SUNRPC_BACKCHANNEL)
293 svc_reg_xprt_class(&svc_rdma_bc_class); 266 svc_reg_xprt_class(&svc_rdma_bc_class);
294#endif 267#endif
295 return 0; 268 return 0;
296 err1:
297 kmem_cache_destroy(svc_rdma_map_cachep);
298 err0:
299 unregister_sysctl_table(svcrdma_table_header);
300 destroy_workqueue(svc_rdma_wq);
301 return -ENOMEM;
302} 269}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644
index 000000000000..65a7c232a345
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -0,0 +1,371 @@
1/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 *
4 * Support for backward direction RPCs on RPC/RDMA (server-side).
5 */
6
7#include <linux/sunrpc/svc_rdma.h>
8#include "xprt_rdma.h"
9
10#define RPCDBG_FACILITY RPCDBG_SVCXPRT
11
12#undef SVCRDMA_BACKCHANNEL_DEBUG
13
14int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
15 struct xdr_buf *rcvbuf)
16{
17 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
18 struct kvec *dst, *src = &rcvbuf->head[0];
19 struct rpc_rqst *req;
20 unsigned long cwnd;
21 u32 credits;
22 size_t len;
23 __be32 xid;
24 __be32 *p;
25 int ret;
26
27 p = (__be32 *)src->iov_base;
28 len = src->iov_len;
29 xid = rmsgp->rm_xid;
30
31#ifdef SVCRDMA_BACKCHANNEL_DEBUG
32 pr_info("%s: xid=%08x, length=%zu\n",
33 __func__, be32_to_cpu(xid), len);
34 pr_info("%s: RPC/RDMA: %*ph\n",
35 __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
36 pr_info("%s: RPC: %*ph\n",
37 __func__, (int)len, p);
38#endif
39
40 ret = -EAGAIN;
41 if (src->iov_len < 24)
42 goto out_shortreply;
43
44 spin_lock_bh(&xprt->transport_lock);
45 req = xprt_lookup_rqst(xprt, xid);
46 if (!req)
47 goto out_notfound;
48
49 dst = &req->rq_private_buf.head[0];
50 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
51 if (dst->iov_len < len)
52 goto out_unlock;
53 memcpy(dst->iov_base, p, len);
54
55 credits = be32_to_cpu(rmsgp->rm_credit);
56 if (credits == 0)
57 credits = 1; /* don't deadlock */
58 else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
59 credits = r_xprt->rx_buf.rb_bc_max_requests;
60
61 cwnd = xprt->cwnd;
62 xprt->cwnd = credits << RPC_CWNDSHIFT;
63 if (xprt->cwnd > cwnd)
64 xprt_release_rqst_cong(req->rq_task);
65
66 ret = 0;
67 xprt_complete_rqst(req->rq_task, rcvbuf->len);
68 rcvbuf->len = 0;
69
70out_unlock:
71 spin_unlock_bh(&xprt->transport_lock);
72out:
73 return ret;
74
75out_shortreply:
76 dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
77 xprt, src->iov_len);
78 goto out;
79
80out_notfound:
81 dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
82 xprt, be32_to_cpu(xid));
83
84 goto out_unlock;
85}
86
87/* Send a backwards direction RPC call.
88 *
89 * Caller holds the connection's mutex and has already marshaled
90 * the RPC/RDMA request.
91 *
92 * This is similar to svc_rdma_reply, but takes an rpc_rqst
93 * instead, does not support chunks, and avoids blocking memory
94 * allocation.
95 *
96 * XXX: There is still an opportunity to block in svc_rdma_send()
97 * if there are no SQ entries to post the Send. This may occur if
98 * the adapter has a small maximum SQ depth.
99 */
100static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
101 struct rpc_rqst *rqst)
102{
103 struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
104 struct svc_rdma_op_ctxt *ctxt;
105 struct svc_rdma_req_map *vec;
106 struct ib_send_wr send_wr;
107 int ret;
108
109 vec = svc_rdma_get_req_map(rdma);
110 ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
111 if (ret)
112 goto out_err;
113
114 /* Post a recv buffer to handle the reply for this request. */
115 ret = svc_rdma_post_recv(rdma, GFP_NOIO);
116 if (ret) {
117 pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
118 ret);
119 pr_err("svcrdma: closing transport %p.\n", rdma);
120 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
121 ret = -ENOTCONN;
122 goto out_err;
123 }
124
125 ctxt = svc_rdma_get_context(rdma);
126 ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
127 ctxt->count = 1;
128
129 ctxt->wr_op = IB_WR_SEND;
130 ctxt->direction = DMA_TO_DEVICE;
131 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
132 ctxt->sge[0].length = sndbuf->len;
133 ctxt->sge[0].addr =
134 ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
135 sndbuf->len, DMA_TO_DEVICE);
136 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
137 ret = -EIO;
138 goto out_unmap;
139 }
140 atomic_inc(&rdma->sc_dma_used);
141
142 memset(&send_wr, 0, sizeof(send_wr));
143 send_wr.wr_id = (unsigned long)ctxt;
144 send_wr.sg_list = ctxt->sge;
145 send_wr.num_sge = 1;
146 send_wr.opcode = IB_WR_SEND;
147 send_wr.send_flags = IB_SEND_SIGNALED;
148
149 ret = svc_rdma_send(rdma, &send_wr);
150 if (ret) {
151 ret = -EIO;
152 goto out_unmap;
153 }
154
155out_err:
156 svc_rdma_put_req_map(rdma, vec);
157 dprintk("svcrdma: %s returns %d\n", __func__, ret);
158 return ret;
159
160out_unmap:
161 svc_rdma_unmap_dma(ctxt);
162 svc_rdma_put_context(ctxt, 1);
163 goto out_err;
164}
165
166/* Server-side transport endpoint wants a whole page for its send
167 * buffer. The client RPC code constructs the RPC header in this
168 * buffer before it invokes ->send_request.
169 *
170 * Returns NULL if there was a temporary allocation failure.
171 */
172static void *
173xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
174{
175 struct rpc_rqst *rqst = task->tk_rqstp;
176 struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
177 struct svcxprt_rdma *rdma;
178 struct page *page;
179
180 rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
181
182 /* Prevent an infinite loop: try to make this case work */
183 if (size > PAGE_SIZE)
184 WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
185 size);
186
187 page = alloc_page(RPCRDMA_DEF_GFP);
188 if (!page)
189 return NULL;
190
191 return page_address(page);
192}
193
194static void
195xprt_rdma_bc_free(void *buffer)
196{
197 /* No-op: ctxt and page have already been freed. */
198}
199
200static int
201rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
202{
203 struct rpc_xprt *xprt = rqst->rq_xprt;
204 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
205 struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
206 int rc;
207
208 /* Space in the send buffer for an RPC/RDMA header is reserved
209 * via xprt->tsh_size.
210 */
211 headerp->rm_xid = rqst->rq_xid;
212 headerp->rm_vers = rpcrdma_version;
213 headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
214 headerp->rm_type = rdma_msg;
215 headerp->rm_body.rm_chunks[0] = xdr_zero;
216 headerp->rm_body.rm_chunks[1] = xdr_zero;
217 headerp->rm_body.rm_chunks[2] = xdr_zero;
218
219#ifdef SVCRDMA_BACKCHANNEL_DEBUG
220 pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
221#endif
222
223 rc = svc_rdma_bc_sendto(rdma, rqst);
224 if (rc)
225 goto drop_connection;
226 return rc;
227
228drop_connection:
229 dprintk("svcrdma: failed to send bc call\n");
230 xprt_disconnect_done(xprt);
231 return -ENOTCONN;
232}
233
234/* Send an RPC call on the passive end of a transport
235 * connection.
236 */
237static int
238xprt_rdma_bc_send_request(struct rpc_task *task)
239{
240 struct rpc_rqst *rqst = task->tk_rqstp;
241 struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
242 struct svcxprt_rdma *rdma;
243 int ret;
244
245 dprintk("svcrdma: sending bc call with xid: %08x\n",
246 be32_to_cpu(rqst->rq_xid));
247
248 if (!mutex_trylock(&sxprt->xpt_mutex)) {
249 rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
250 if (!mutex_trylock(&sxprt->xpt_mutex))
251 return -EAGAIN;
252 rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
253 }
254
255 ret = -ENOTCONN;
256 rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
257 if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
258 ret = rpcrdma_bc_send_request(rdma, rqst);
259
260 mutex_unlock(&sxprt->xpt_mutex);
261
262 if (ret < 0)
263 return ret;
264 return 0;
265}
266
267static void
268xprt_rdma_bc_close(struct rpc_xprt *xprt)
269{
270 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
271}
272
273static void
274xprt_rdma_bc_put(struct rpc_xprt *xprt)
275{
276 dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
277
278 xprt_free(xprt);
279 module_put(THIS_MODULE);
280}
281
282static struct rpc_xprt_ops xprt_rdma_bc_procs = {
283 .reserve_xprt = xprt_reserve_xprt_cong,
284 .release_xprt = xprt_release_xprt_cong,
285 .alloc_slot = xprt_alloc_slot,
286 .release_request = xprt_release_rqst_cong,
287 .buf_alloc = xprt_rdma_bc_allocate,
288 .buf_free = xprt_rdma_bc_free,
289 .send_request = xprt_rdma_bc_send_request,
290 .set_retrans_timeout = xprt_set_retrans_timeout_def,
291 .close = xprt_rdma_bc_close,
292 .destroy = xprt_rdma_bc_put,
293 .print_stats = xprt_rdma_print_stats
294};
295
296static const struct rpc_timeout xprt_rdma_bc_timeout = {
297 .to_initval = 60 * HZ,
298 .to_maxval = 60 * HZ,
299};
300
301/* It shouldn't matter if the number of backchannel session slots
302 * doesn't match the number of RPC/RDMA credits. That just means
303 * one or the other will have extra slots that aren't used.
304 */
305static struct rpc_xprt *
306xprt_setup_rdma_bc(struct xprt_create *args)
307{
308 struct rpc_xprt *xprt;
309 struct rpcrdma_xprt *new_xprt;
310
311 if (args->addrlen > sizeof(xprt->addr)) {
312 dprintk("RPC: %s: address too large\n", __func__);
313 return ERR_PTR(-EBADF);
314 }
315
316 xprt = xprt_alloc(args->net, sizeof(*new_xprt),
317 RPCRDMA_MAX_BC_REQUESTS,
318 RPCRDMA_MAX_BC_REQUESTS);
319 if (!xprt) {
320 dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
321 __func__);
322 return ERR_PTR(-ENOMEM);
323 }
324
325 xprt->timeout = &xprt_rdma_bc_timeout;
326 xprt_set_bound(xprt);
327 xprt_set_connected(xprt);
328 xprt->bind_timeout = RPCRDMA_BIND_TO;
329 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
330 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
331
332 xprt->prot = XPRT_TRANSPORT_BC_RDMA;
333 xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
334 xprt->ops = &xprt_rdma_bc_procs;
335
336 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
337 xprt->addrlen = args->addrlen;
338 xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
339 xprt->resvport = 0;
340
341 xprt->max_payload = xprt_rdma_max_inline_read;
342
343 new_xprt = rpcx_to_rdmax(xprt);
344 new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
345
346 xprt_get(xprt);
347 args->bc_xprt->xpt_bc_xprt = xprt;
348 xprt->bc_xprt = args->bc_xprt;
349
350 if (!try_module_get(THIS_MODULE))
351 goto out_fail;
352
353 /* Final put for backchannel xprt is in __svc_rdma_free */
354 xprt_get(xprt);
355 return xprt;
356
357out_fail:
358 xprt_rdma_free_addresses(xprt);
359 args->bc_xprt->xpt_bc_xprt = NULL;
360 xprt_put(xprt);
361 xprt_free(xprt);
362 return ERR_PTR(-EINVAL);
363}
364
365struct xprt_class xprt_rdma_bc = {
366 .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
367 .name = "rdma backchannel",
368 .owner = THIS_MODULE,
369 .ident = XPRT_TRANSPORT_BC_RDMA,
370 .setup = xprt_setup_rdma_bc,
371};
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ff4f01e527ec..c8b8a8b4181e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
144 144
145 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; 145 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
146 head->arg.page_len += len; 146 head->arg.page_len += len;
147
147 head->arg.len += len; 148 head->arg.len += len;
148 if (!pg_off) 149 if (!pg_off)
149 head->count++; 150 head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
160 goto err; 161 goto err;
161 atomic_inc(&xprt->sc_dma_used); 162 atomic_inc(&xprt->sc_dma_used);
162 163
163 /* The lkey here is either a local dma lkey or a dma_mr lkey */ 164 ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
164 ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
165 ctxt->sge[pno].length = len; 165 ctxt->sge[pno].length = len;
166 ctxt->count++; 166 ctxt->count++;
167 167
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
567 return ret; 567 return ret;
568} 568}
569 569
570/* By convention, backchannel calls arrive via rdma_msg type
571 * messages, and never populate the chunk lists. This makes
572 * the RPC/RDMA header small and fixed in size, so it is
573 * straightforward to check the RPC header's direction field.
574 */
575static bool
576svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
577{
578 __be32 *p = (__be32 *)rmsgp;
579
580 if (!xprt->xpt_bc_xprt)
581 return false;
582
583 if (rmsgp->rm_type != rdma_msg)
584 return false;
585 if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
586 return false;
587 if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
588 return false;
589 if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
590 return false;
591
592 /* sanity */
593 if (p[7] != rmsgp->rm_xid)
594 return false;
595 /* call direction */
596 if (p[8] == cpu_to_be32(RPC_CALL))
597 return false;
598
599 return true;
600}
601
570/* 602/*
571 * Set up the rqstp thread context to point to the RQ buffer. If 603 * Set up the rqstp thread context to point to the RQ buffer. If
572 * necessary, pull additional data from the client with an RDMA_READ 604 * necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
632 goto close_out; 664 goto close_out;
633 } 665 }
634 666
667 if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
668 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
669 &rqstp->rq_arg);
670 svc_rdma_put_context(ctxt, 0);
671 if (ret)
672 goto repost;
673 return ret;
674 }
675
635 /* Read read-list data. */ 676 /* Read read-list data. */
636 ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); 677 ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
637 if (ret > 0) { 678 if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
668 set_bit(XPT_CLOSE, &xprt->xpt_flags); 709 set_bit(XPT_CLOSE, &xprt->xpt_flags);
669defer: 710defer:
670 return 0; 711 return 0;
712
713repost:
714 ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
715 if (ret) {
716 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
717 ret);
718 pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
719 set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
720 ret = -ENOTCONN;
721 }
722 return ret;
671} 723}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 969a1ab75fc3..df57f3ce6cd2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,9 @@
50 50
51#define RPCDBG_FACILITY RPCDBG_SVCXPRT 51#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 52
53static int map_xdr(struct svcxprt_rdma *xprt, 53int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
54 struct xdr_buf *xdr, 54 struct xdr_buf *xdr,
55 struct svc_rdma_req_map *vec) 55 struct svc_rdma_req_map *vec)
56{ 56{
57 int sge_no; 57 int sge_no;
58 u32 sge_bytes; 58 u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
62 62
63 if (xdr->len != 63 if (xdr->len !=
64 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { 64 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
65 pr_err("svcrdma: map_xdr: XDR buffer length error\n"); 65 pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
66 return -EIO; 66 return -EIO;
67 } 67 }
68 68
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
97 sge_no++; 97 sge_no++;
98 } 98 }
99 99
100 dprintk("svcrdma: map_xdr: sge_no %d page_no %d " 100 dprintk("svcrdma: %s: sge_no %d page_no %d "
101 "page_base %u page_len %u head_len %zu tail_len %zu\n", 101 "page_base %u page_len %u head_len %zu tail_len %zu\n",
102 sge_no, page_no, xdr->page_base, xdr->page_len, 102 __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
103 xdr->head[0].iov_len, xdr->tail[0].iov_len); 103 xdr->head[0].iov_len, xdr->tail[0].iov_len);
104 104
105 vec->count = sge_no; 105 vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
265 sge[sge_no].addr)) 265 sge[sge_no].addr))
266 goto err; 266 goto err;
267 atomic_inc(&xprt->sc_dma_used); 267 atomic_inc(&xprt->sc_dma_used);
268 sge[sge_no].lkey = xprt->sc_dma_lkey; 268 sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
269 ctxt->count++; 269 ctxt->count++;
270 sge_off = 0; 270 sge_off = 0;
271 sge_no++; 271 sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
465 int ret; 465 int ret;
466 466
467 /* Post a recv buffer to handle another request. */ 467 /* Post a recv buffer to handle another request. */
468 ret = svc_rdma_post_recv(rdma); 468 ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
469 if (ret) { 469 if (ret) {
470 printk(KERN_INFO 470 printk(KERN_INFO
471 "svcrdma: could not post a receive buffer, err=%d." 471 "svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
480 ctxt->count = 1; 480 ctxt->count = 1;
481 481
482 /* Prepare the SGE for the RPCRDMA Header */ 482 /* Prepare the SGE for the RPCRDMA Header */
483 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 483 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
484 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 484 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
485 ctxt->sge[0].addr = 485 ctxt->sge[0].addr =
486 ib_dma_map_page(rdma->sc_cm_id->device, page, 0, 486 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
504 ctxt->sge[sge_no].addr)) 504 ctxt->sge[sge_no].addr))
505 goto err; 505 goto err;
506 atomic_inc(&rdma->sc_dma_used); 506 atomic_inc(&rdma->sc_dma_used);
507 ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; 507 ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
508 ctxt->sge[sge_no].length = sge_bytes; 508 ctxt->sge[sge_no].length = sge_bytes;
509 } 509 }
510 if (byte_count != 0) { 510 if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
591 /* Build an req vec for the XDR */ 591 /* Build an req vec for the XDR */
592 ctxt = svc_rdma_get_context(rdma); 592 ctxt = svc_rdma_get_context(rdma);
593 ctxt->direction = DMA_TO_DEVICE; 593 ctxt->direction = DMA_TO_DEVICE;
594 vec = svc_rdma_get_req_map(); 594 vec = svc_rdma_get_req_map(rdma);
595 ret = map_xdr(rdma, &rqstp->rq_res, vec); 595 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
596 if (ret) 596 if (ret)
597 goto err0; 597 goto err0;
598 inline_bytes = rqstp->rq_res.len; 598 inline_bytes = rqstp->rq_res.len;
599 599
600 /* Create the RDMA response header */ 600 /* Create the RDMA response header */
601 res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); 601 ret = -ENOMEM;
602 res_page = alloc_page(GFP_KERNEL);
603 if (!res_page)
604 goto err0;
602 rdma_resp = page_address(res_page); 605 rdma_resp = page_address(res_page);
603 reply_ary = svc_rdma_get_reply_array(rdma_argp); 606 reply_ary = svc_rdma_get_reply_array(rdma_argp);
604 if (reply_ary) 607 if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
630 633
631 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, 634 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
632 inline_bytes); 635 inline_bytes);
633 svc_rdma_put_req_map(vec); 636 svc_rdma_put_req_map(rdma, vec);
634 dprintk("svcrdma: send_reply returns %d\n", ret); 637 dprintk("svcrdma: send_reply returns %d\n", ret);
635 return ret; 638 return ret;
636 639
637 err1: 640 err1:
638 put_page(res_page); 641 put_page(res_page);
639 err0: 642 err0:
640 svc_rdma_put_req_map(vec); 643 svc_rdma_put_req_map(rdma, vec);
641 svc_rdma_put_context(ctxt, 0); 644 svc_rdma_put_context(ctxt, 0);
642 return ret; 645 return ret;
643} 646}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index b348b4adef29..5763825d09bf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
153} 153}
154#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 154#endif /* CONFIG_SUNRPC_BACKCHANNEL */
155 155
156struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 156static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
157 gfp_t flags)
157{ 158{
158 struct svc_rdma_op_ctxt *ctxt; 159 struct svc_rdma_op_ctxt *ctxt;
159 160
160 ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, 161 ctxt = kmalloc(sizeof(*ctxt), flags);
161 GFP_KERNEL | __GFP_NOFAIL); 162 if (ctxt) {
162 ctxt->xprt = xprt; 163 ctxt->xprt = xprt;
163 INIT_LIST_HEAD(&ctxt->dto_q); 164 INIT_LIST_HEAD(&ctxt->free);
165 INIT_LIST_HEAD(&ctxt->dto_q);
166 }
167 return ctxt;
168}
169
170static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
171{
172 unsigned int i;
173
174 /* Each RPC/RDMA credit can consume a number of send
175 * and receive WQEs. One ctxt is allocated for each.
176 */
177 i = xprt->sc_sq_depth + xprt->sc_rq_depth;
178
179 while (i--) {
180 struct svc_rdma_op_ctxt *ctxt;
181
182 ctxt = alloc_ctxt(xprt, GFP_KERNEL);
183 if (!ctxt) {
184 dprintk("svcrdma: No memory for RDMA ctxt\n");
185 return false;
186 }
187 list_add(&ctxt->free, &xprt->sc_ctxts);
188 }
189 return true;
190}
191
192struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
193{
194 struct svc_rdma_op_ctxt *ctxt = NULL;
195
196 spin_lock_bh(&xprt->sc_ctxt_lock);
197 xprt->sc_ctxt_used++;
198 if (list_empty(&xprt->sc_ctxts))
199 goto out_empty;
200
201 ctxt = list_first_entry(&xprt->sc_ctxts,
202 struct svc_rdma_op_ctxt, free);
203 list_del_init(&ctxt->free);
204 spin_unlock_bh(&xprt->sc_ctxt_lock);
205
206out:
164 ctxt->count = 0; 207 ctxt->count = 0;
165 ctxt->frmr = NULL; 208 ctxt->frmr = NULL;
166 atomic_inc(&xprt->sc_ctxt_used);
167 return ctxt; 209 return ctxt;
210
211out_empty:
212 /* Either pre-allocation missed the mark, or send
213 * queue accounting is broken.
214 */
215 spin_unlock_bh(&xprt->sc_ctxt_lock);
216
217 ctxt = alloc_ctxt(xprt, GFP_NOIO);
218 if (ctxt)
219 goto out;
220
221 spin_lock_bh(&xprt->sc_ctxt_lock);
222 xprt->sc_ctxt_used--;
223 spin_unlock_bh(&xprt->sc_ctxt_lock);
224 WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
225 return NULL;
168} 226}
169 227
170void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) 228void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
174 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { 232 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
175 /* 233 /*
176 * Unmap the DMA addr in the SGE if the lkey matches 234 * Unmap the DMA addr in the SGE if the lkey matches
177 * the sc_dma_lkey, otherwise, ignore it since it is 235 * the local_dma_lkey, otherwise, ignore it since it is
178 * an FRMR lkey and will be unmapped later when the 236 * an FRMR lkey and will be unmapped later when the
179 * last WR that uses it completes. 237 * last WR that uses it completes.
180 */ 238 */
181 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 239 if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
182 atomic_dec(&xprt->sc_dma_used); 240 atomic_dec(&xprt->sc_dma_used);
183 ib_dma_unmap_page(xprt->sc_cm_id->device, 241 ib_dma_unmap_page(xprt->sc_cm_id->device,
184 ctxt->sge[i].addr, 242 ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
190 248
191void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) 249void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
192{ 250{
193 struct svcxprt_rdma *xprt; 251 struct svcxprt_rdma *xprt = ctxt->xprt;
194 int i; 252 int i;
195 253
196 xprt = ctxt->xprt;
197 if (free_pages) 254 if (free_pages)
198 for (i = 0; i < ctxt->count; i++) 255 for (i = 0; i < ctxt->count; i++)
199 put_page(ctxt->pages[i]); 256 put_page(ctxt->pages[i]);
200 257
201 kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); 258 spin_lock_bh(&xprt->sc_ctxt_lock);
202 atomic_dec(&xprt->sc_ctxt_used); 259 xprt->sc_ctxt_used--;
260 list_add(&ctxt->free, &xprt->sc_ctxts);
261 spin_unlock_bh(&xprt->sc_ctxt_lock);
203} 262}
204 263
205/* 264static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
206 * Temporary NFS req mappings are shared across all transport 265{
207 * instances. These are short lived and should be bounded by the number 266 while (!list_empty(&xprt->sc_ctxts)) {
208 * of concurrent server threads * depth of the SQ. 267 struct svc_rdma_op_ctxt *ctxt;
209 */ 268
210struct svc_rdma_req_map *svc_rdma_get_req_map(void) 269 ctxt = list_first_entry(&xprt->sc_ctxts,
270 struct svc_rdma_op_ctxt, free);
271 list_del(&ctxt->free);
272 kfree(ctxt);
273 }
274}
275
276static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
211{ 277{
212 struct svc_rdma_req_map *map; 278 struct svc_rdma_req_map *map;
213 map = kmem_cache_alloc(svc_rdma_map_cachep, 279
214 GFP_KERNEL | __GFP_NOFAIL); 280 map = kmalloc(sizeof(*map), flags);
281 if (map)
282 INIT_LIST_HEAD(&map->free);
283 return map;
284}
285
286static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
287{
288 unsigned int i;
289
290 /* One for each receive buffer on this connection. */
291 i = xprt->sc_max_requests;
292
293 while (i--) {
294 struct svc_rdma_req_map *map;
295
296 map = alloc_req_map(GFP_KERNEL);
297 if (!map) {
298 dprintk("svcrdma: No memory for request map\n");
299 return false;
300 }
301 list_add(&map->free, &xprt->sc_maps);
302 }
303 return true;
304}
305
306struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
307{
308 struct svc_rdma_req_map *map = NULL;
309
310 spin_lock(&xprt->sc_map_lock);
311 if (list_empty(&xprt->sc_maps))
312 goto out_empty;
313
314 map = list_first_entry(&xprt->sc_maps,
315 struct svc_rdma_req_map, free);
316 list_del_init(&map->free);
317 spin_unlock(&xprt->sc_map_lock);
318
319out:
215 map->count = 0; 320 map->count = 0;
216 return map; 321 return map;
322
323out_empty:
324 spin_unlock(&xprt->sc_map_lock);
325
326 /* Pre-allocation amount was incorrect */
327 map = alloc_req_map(GFP_NOIO);
328 if (map)
329 goto out;
330
331 WARN_ONCE(1, "svcrdma: empty request map list?\n");
332 return NULL;
333}
334
335void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
336 struct svc_rdma_req_map *map)
337{
338 spin_lock(&xprt->sc_map_lock);
339 list_add(&map->free, &xprt->sc_maps);
340 spin_unlock(&xprt->sc_map_lock);
217} 341}
218 342
219void svc_rdma_put_req_map(struct svc_rdma_req_map *map) 343static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
220{ 344{
221 kmem_cache_free(svc_rdma_map_cachep, map); 345 while (!list_empty(&xprt->sc_maps)) {
346 struct svc_rdma_req_map *map;
347
348 map = list_first_entry(&xprt->sc_maps,
349 struct svc_rdma_req_map, free);
350 list_del(&map->free);
351 kfree(map);
352 }
222} 353}
223 354
224/* ib_cq event handler */ 355/* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
386static void process_context(struct svcxprt_rdma *xprt, 517static void process_context(struct svcxprt_rdma *xprt,
387 struct svc_rdma_op_ctxt *ctxt) 518 struct svc_rdma_op_ctxt *ctxt)
388{ 519{
520 struct svc_rdma_op_ctxt *read_hdr;
521 int free_pages = 0;
522
389 svc_rdma_unmap_dma(ctxt); 523 svc_rdma_unmap_dma(ctxt);
390 524
391 switch (ctxt->wr_op) { 525 switch (ctxt->wr_op) {
392 case IB_WR_SEND: 526 case IB_WR_SEND:
393 if (ctxt->frmr) 527 free_pages = 1;
394 pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
395 svc_rdma_put_context(ctxt, 1);
396 break; 528 break;
397 529
398 case IB_WR_RDMA_WRITE: 530 case IB_WR_RDMA_WRITE:
399 if (ctxt->frmr)
400 pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
401 svc_rdma_put_context(ctxt, 0);
402 break; 531 break;
403 532
404 case IB_WR_RDMA_READ: 533 case IB_WR_RDMA_READ:
405 case IB_WR_RDMA_READ_WITH_INV: 534 case IB_WR_RDMA_READ_WITH_INV:
406 svc_rdma_put_frmr(xprt, ctxt->frmr); 535 svc_rdma_put_frmr(xprt, ctxt->frmr);
407 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 536
408 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; 537 if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
409 if (read_hdr) { 538 break;
410 spin_lock_bh(&xprt->sc_rq_dto_lock); 539
411 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 540 read_hdr = ctxt->read_hdr;
412 list_add_tail(&read_hdr->dto_q,
413 &xprt->sc_read_complete_q);
414 spin_unlock_bh(&xprt->sc_rq_dto_lock);
415 } else {
416 pr_err("svcrdma: ctxt->read_hdr == NULL\n");
417 }
418 svc_xprt_enqueue(&xprt->sc_xprt);
419 }
420 svc_rdma_put_context(ctxt, 0); 541 svc_rdma_put_context(ctxt, 0);
421 break; 542
543 spin_lock_bh(&xprt->sc_rq_dto_lock);
544 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
545 list_add_tail(&read_hdr->dto_q,
546 &xprt->sc_read_complete_q);
547 spin_unlock_bh(&xprt->sc_rq_dto_lock);
548 svc_xprt_enqueue(&xprt->sc_xprt);
549 return;
422 550
423 default: 551 default:
424 printk(KERN_ERR "svcrdma: unexpected completion type, " 552 dprintk("svcrdma: unexpected completion opcode=%d\n",
425 "opcode=%d\n", 553 ctxt->wr_op);
426 ctxt->wr_op);
427 break; 554 break;
428 } 555 }
556
557 svc_rdma_put_context(ctxt, free_pages);
429} 558}
430 559
431/* 560/*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
523 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 652 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
524 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 653 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
525 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); 654 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
655 INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
656 INIT_LIST_HEAD(&cma_xprt->sc_maps);
526 init_waitqueue_head(&cma_xprt->sc_send_wait); 657 init_waitqueue_head(&cma_xprt->sc_send_wait);
527 658
528 spin_lock_init(&cma_xprt->sc_lock); 659 spin_lock_init(&cma_xprt->sc_lock);
529 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 660 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
530 spin_lock_init(&cma_xprt->sc_frmr_q_lock); 661 spin_lock_init(&cma_xprt->sc_frmr_q_lock);
531 662 spin_lock_init(&cma_xprt->sc_ctxt_lock);
532 cma_xprt->sc_ord = svcrdma_ord; 663 spin_lock_init(&cma_xprt->sc_map_lock);
533
534 cma_xprt->sc_max_req_size = svcrdma_max_req_size;
535 cma_xprt->sc_max_requests = svcrdma_max_requests;
536 cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
537 atomic_set(&cma_xprt->sc_sq_count, 0);
538 atomic_set(&cma_xprt->sc_ctxt_used, 0);
539 664
540 if (listener) 665 if (listener)
541 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 666 set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
543 return cma_xprt; 668 return cma_xprt;
544} 669}
545 670
546int svc_rdma_post_recv(struct svcxprt_rdma *xprt) 671int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
547{ 672{
548 struct ib_recv_wr recv_wr, *bad_recv_wr; 673 struct ib_recv_wr recv_wr, *bad_recv_wr;
549 struct svc_rdma_op_ctxt *ctxt; 674 struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
561 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 686 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
562 goto err_put_ctxt; 687 goto err_put_ctxt;
563 } 688 }
564 page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); 689 page = alloc_page(flags);
690 if (!page)
691 goto err_put_ctxt;
565 ctxt->pages[sge_no] = page; 692 ctxt->pages[sge_no] = page;
566 pa = ib_dma_map_page(xprt->sc_cm_id->device, 693 pa = ib_dma_map_page(xprt->sc_cm_id->device,
567 page, 0, PAGE_SIZE, 694 page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
571 atomic_inc(&xprt->sc_dma_used); 698 atomic_inc(&xprt->sc_dma_used);
572 ctxt->sge[sge_no].addr = pa; 699 ctxt->sge[sge_no].addr = pa;
573 ctxt->sge[sge_no].length = PAGE_SIZE; 700 ctxt->sge[sge_no].length = PAGE_SIZE;
574 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 701 ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
575 ctxt->count = sge_no + 1; 702 ctxt->count = sge_no + 1;
576 buflen += PAGE_SIZE; 703 buflen += PAGE_SIZE;
577 } 704 }
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
886 struct rdma_conn_param conn_param; 1013 struct rdma_conn_param conn_param;
887 struct ib_cq_init_attr cq_attr = {}; 1014 struct ib_cq_init_attr cq_attr = {};
888 struct ib_qp_init_attr qp_attr; 1015 struct ib_qp_init_attr qp_attr;
889 struct ib_device_attr devattr; 1016 struct ib_device *dev;
890 int uninitialized_var(dma_mr_acc); 1017 unsigned int i;
891 int need_dma_mr = 0; 1018 int ret = 0;
892 int ret;
893 int i;
894 1019
895 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); 1020 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
896 clear_bit(XPT_CONN, &xprt->xpt_flags); 1021 clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
910 dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", 1035 dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
911 newxprt, newxprt->sc_cm_id); 1036 newxprt, newxprt->sc_cm_id);
912 1037
913 ret = ib_query_device(newxprt->sc_cm_id->device, &devattr); 1038 dev = newxprt->sc_cm_id->device;
914 if (ret) {
915 dprintk("svcrdma: could not query device attributes on "
916 "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
917 goto errout;
918 }
919 1039
920 /* Qualify the transport resource defaults with the 1040 /* Qualify the transport resource defaults with the
921 * capabilities of this particular device */ 1041 * capabilities of this particular device */
922 newxprt->sc_max_sge = min((size_t)devattr.max_sge, 1042 newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
923 (size_t)RPCSVC_MAXPAGES); 1043 (size_t)RPCSVC_MAXPAGES);
924 newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd, 1044 newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
925 RPCSVC_MAXPAGES); 1045 RPCSVC_MAXPAGES);
926 newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, 1046 newxprt->sc_max_req_size = svcrdma_max_req_size;
927 (size_t)svcrdma_max_requests); 1047 newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
928 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; 1048 svcrdma_max_requests);
1049 newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
1050 svcrdma_max_bc_requests);
1051 newxprt->sc_rq_depth = newxprt->sc_max_requests +
1052 newxprt->sc_max_bc_requests;
1053 newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
1054
1055 if (!svc_rdma_prealloc_ctxts(newxprt))
1056 goto errout;
1057 if (!svc_rdma_prealloc_maps(newxprt))
1058 goto errout;
929 1059
930 /* 1060 /*
931 * Limit ORD based on client limit, local device limit, and 1061 * Limit ORD based on client limit, local device limit, and
932 * configured svcrdma limit. 1062 * configured svcrdma limit.
933 */ 1063 */
934 newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord); 1064 newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
935 newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); 1065 newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
936 1066
937 newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); 1067 newxprt->sc_pd = ib_alloc_pd(dev);
938 if (IS_ERR(newxprt->sc_pd)) { 1068 if (IS_ERR(newxprt->sc_pd)) {
939 dprintk("svcrdma: error creating PD for connect request\n"); 1069 dprintk("svcrdma: error creating PD for connect request\n");
940 goto errout; 1070 goto errout;
941 } 1071 }
942 cq_attr.cqe = newxprt->sc_sq_depth; 1072 cq_attr.cqe = newxprt->sc_sq_depth;
943 newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, 1073 newxprt->sc_sq_cq = ib_create_cq(dev,
944 sq_comp_handler, 1074 sq_comp_handler,
945 cq_event_handler, 1075 cq_event_handler,
946 newxprt, 1076 newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
949 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 1079 dprintk("svcrdma: error creating SQ CQ for connect request\n");
950 goto errout; 1080 goto errout;
951 } 1081 }
952 cq_attr.cqe = newxprt->sc_max_requests; 1082 cq_attr.cqe = newxprt->sc_rq_depth;
953 newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, 1083 newxprt->sc_rq_cq = ib_create_cq(dev,
954 rq_comp_handler, 1084 rq_comp_handler,
955 cq_event_handler, 1085 cq_event_handler,
956 newxprt, 1086 newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
964 qp_attr.event_handler = qp_event_handler; 1094 qp_attr.event_handler = qp_event_handler;
965 qp_attr.qp_context = &newxprt->sc_xprt; 1095 qp_attr.qp_context = &newxprt->sc_xprt;
966 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; 1096 qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
967 qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; 1097 qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
968 qp_attr.cap.max_send_sge = newxprt->sc_max_sge; 1098 qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
969 qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; 1099 qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
970 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 1100 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
978 " cap.max_send_sge = %d\n" 1108 " cap.max_send_sge = %d\n"
979 " cap.max_recv_sge = %d\n", 1109 " cap.max_recv_sge = %d\n",
980 newxprt->sc_cm_id, newxprt->sc_pd, 1110 newxprt->sc_cm_id, newxprt->sc_pd,
981 newxprt->sc_cm_id->device, newxprt->sc_pd->device, 1111 dev, newxprt->sc_pd->device,
982 qp_attr.cap.max_send_wr, 1112 qp_attr.cap.max_send_wr,
983 qp_attr.cap.max_recv_wr, 1113 qp_attr.cap.max_recv_wr,
984 qp_attr.cap.max_send_sge, 1114 qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1014 * of an RDMA_READ. IB does not. 1144 * of an RDMA_READ. IB does not.
1015 */ 1145 */
1016 newxprt->sc_reader = rdma_read_chunk_lcl; 1146 newxprt->sc_reader = rdma_read_chunk_lcl;
1017 if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 1147 if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
1018 newxprt->sc_frmr_pg_list_len = 1148 newxprt->sc_frmr_pg_list_len =
1019 devattr.max_fast_reg_page_list_len; 1149 dev->attrs.max_fast_reg_page_list_len;
1020 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; 1150 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
1021 newxprt->sc_reader = rdma_read_chunk_frmr; 1151 newxprt->sc_reader = rdma_read_chunk_frmr;
1022 } 1152 }
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1024 /* 1154 /*
1025 * Determine if a DMA MR is required and if so, what privs are required 1155 * Determine if a DMA MR is required and if so, what privs are required
1026 */ 1156 */
1027 if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device, 1157 if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
1028 newxprt->sc_cm_id->port_num) && 1158 !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
1029 !rdma_ib_or_roce(newxprt->sc_cm_id->device,
1030 newxprt->sc_cm_id->port_num))
1031 goto errout; 1159 goto errout;
1032 1160
1033 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) || 1161 if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
1034 !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
1035 need_dma_mr = 1;
1036 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
1037 if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
1038 newxprt->sc_cm_id->port_num) &&
1039 !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
1040 dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
1041 }
1042
1043 if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
1044 newxprt->sc_cm_id->port_num))
1045 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; 1162 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
1046 1163
1047 /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
1048 if (need_dma_mr) {
1049 /* Register all of physical memory */
1050 newxprt->sc_phys_mr =
1051 ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
1052 if (IS_ERR(newxprt->sc_phys_mr)) {
1053 dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
1054 ret);
1055 goto errout;
1056 }
1057 newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
1058 } else
1059 newxprt->sc_dma_lkey =
1060 newxprt->sc_cm_id->device->local_dma_lkey;
1061
1062 /* Post receive buffers */ 1164 /* Post receive buffers */
1063 for (i = 0; i < newxprt->sc_max_requests; i++) { 1165 for (i = 0; i < newxprt->sc_rq_depth; i++) {
1064 ret = svc_rdma_post_recv(newxprt); 1166 ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
1065 if (ret) { 1167 if (ret) {
1066 dprintk("svcrdma: failure posting receive buffers\n"); 1168 dprintk("svcrdma: failure posting receive buffers\n");
1067 goto errout; 1169 goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
1160{ 1262{
1161 struct svcxprt_rdma *rdma = 1263 struct svcxprt_rdma *rdma =
1162 container_of(work, struct svcxprt_rdma, sc_work); 1264 container_of(work, struct svcxprt_rdma, sc_work);
1163 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); 1265 struct svc_xprt *xprt = &rdma->sc_xprt;
1266
1267 dprintk("svcrdma: %s(%p)\n", __func__, rdma);
1164 1268
1165 /* We should only be called from kref_put */ 1269 /* We should only be called from kref_put */
1166 if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) 1270 if (atomic_read(&xprt->xpt_ref.refcount) != 0)
1167 pr_err("svcrdma: sc_xprt still in use? (%d)\n", 1271 pr_err("svcrdma: sc_xprt still in use? (%d)\n",
1168 atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); 1272 atomic_read(&xprt->xpt_ref.refcount));
1169 1273
1170 /* 1274 /*
1171 * Destroy queued, but not processed read completions. Note 1275 * Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
1193 } 1297 }
1194 1298
1195 /* Warn if we leaked a resource or under-referenced */ 1299 /* Warn if we leaked a resource or under-referenced */
1196 if (atomic_read(&rdma->sc_ctxt_used) != 0) 1300 if (rdma->sc_ctxt_used != 0)
1197 pr_err("svcrdma: ctxt still in use? (%d)\n", 1301 pr_err("svcrdma: ctxt still in use? (%d)\n",
1198 atomic_read(&rdma->sc_ctxt_used)); 1302 rdma->sc_ctxt_used);
1199 if (atomic_read(&rdma->sc_dma_used) != 0) 1303 if (atomic_read(&rdma->sc_dma_used) != 0)
1200 pr_err("svcrdma: dma still in use? (%d)\n", 1304 pr_err("svcrdma: dma still in use? (%d)\n",
1201 atomic_read(&rdma->sc_dma_used)); 1305 atomic_read(&rdma->sc_dma_used));
1202 1306
1203 /* De-allocate fastreg mr */ 1307 /* Final put of backchannel client transport */
1308 if (xprt->xpt_bc_xprt) {
1309 xprt_put(xprt->xpt_bc_xprt);
1310 xprt->xpt_bc_xprt = NULL;
1311 }
1312
1204 rdma_dealloc_frmr_q(rdma); 1313 rdma_dealloc_frmr_q(rdma);
1314 svc_rdma_destroy_ctxts(rdma);
1315 svc_rdma_destroy_maps(rdma);
1205 1316
1206 /* Destroy the QP if present (not a listener) */ 1317 /* Destroy the QP if present (not a listener) */
1207 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 1318 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
1213 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) 1324 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
1214 ib_destroy_cq(rdma->sc_rq_cq); 1325 ib_destroy_cq(rdma->sc_rq_cq);
1215 1326
1216 if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
1217 ib_dereg_mr(rdma->sc_phys_mr);
1218
1219 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 1327 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
1220 ib_dealloc_pd(rdma->sc_pd); 1328 ib_dealloc_pd(rdma->sc_pd);
1221 1329
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1321 int length; 1429 int length;
1322 int ret; 1430 int ret;
1323 1431
1324 p = alloc_page(GFP_KERNEL | __GFP_NOFAIL); 1432 p = alloc_page(GFP_KERNEL);
1433 if (!p)
1434 return;
1325 va = page_address(p); 1435 va = page_address(p);
1326 1436
1327 /* XDR encode error */ 1437 /* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1341 return; 1451 return;
1342 } 1452 }
1343 atomic_inc(&xprt->sc_dma_used); 1453 atomic_inc(&xprt->sc_dma_used);
1344 ctxt->sge[0].lkey = xprt->sc_dma_lkey; 1454 ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
1345 ctxt->sge[0].length = length; 1455 ctxt->sge[0].length = length;
1346 1456
1347 /* Prepare SEND WR */ 1457 /* Prepare SEND WR */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 740bddcf3488..b1b009f10ea3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -63,7 +63,7 @@
63 */ 63 */
64 64
65static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; 65static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
66static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 66unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding; 68static unsigned int xprt_rdma_inline_write_padding;
69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; 69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
143 143
144#endif 144#endif
145 145
146#define RPCRDMA_BIND_TO (60U * HZ) 146static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
147#define RPCRDMA_INIT_REEST_TO (5U * HZ)
148#define RPCRDMA_MAX_REEST_TO (30U * HZ)
149#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
150
151static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
152 147
153static void 148static void
154xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) 149xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
174 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; 169 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
175} 170}
176 171
177static void 172void
178xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) 173xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
179{ 174{
180 char buf[128]; 175 char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
203 xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; 198 xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
204} 199}
205 200
206static void 201void
207xprt_rdma_free_addresses(struct rpc_xprt *xprt) 202xprt_rdma_free_addresses(struct rpc_xprt *xprt)
208{ 203{
209 unsigned int i; 204 unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
499 if (req == NULL) 494 if (req == NULL)
500 return NULL; 495 return NULL;
501 496
502 flags = GFP_NOIO | __GFP_NOWARN; 497 flags = RPCRDMA_DEF_GFP;
503 if (RPC_IS_SWAPPER(task)) 498 if (RPC_IS_SWAPPER(task))
504 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; 499 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
505 500
@@ -642,7 +637,7 @@ drop_connection:
642 return -ENOTCONN; /* implies disconnect */ 637 return -ENOTCONN; /* implies disconnect */
643} 638}
644 639
645static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 640void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
646{ 641{
647 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 642 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
648 long idle_time = 0; 643 long idle_time = 0;
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void)
743 738
744 rpcrdma_destroy_wq(); 739 rpcrdma_destroy_wq();
745 frwr_destroy_recovery_wq(); 740 frwr_destroy_recovery_wq();
741
742 rc = xprt_unregister_transport(&xprt_rdma_bc);
743 if (rc)
744 dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
745 __func__, rc);
746} 746}
747 747
748int xprt_rdma_init(void) 748int xprt_rdma_init(void)
@@ -766,6 +766,14 @@ int xprt_rdma_init(void)
766 return rc; 766 return rc;
767 } 767 }
768 768
769 rc = xprt_register_transport(&xprt_rdma_bc);
770 if (rc) {
771 xprt_unregister_transport(&xprt_rdma);
772 rpcrdma_destroy_wq();
773 frwr_destroy_recovery_wq();
774 return rc;
775 }
776
769 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); 777 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
770 778
771 dprintk("Defaults:\n"); 779 dprintk("Defaults:\n");
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 732c71ce5dca..878f1bfb1db9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -462,7 +462,6 @@ int
462rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) 462rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
463{ 463{
464 struct rpcrdma_ia *ia = &xprt->rx_ia; 464 struct rpcrdma_ia *ia = &xprt->rx_ia;
465 struct ib_device_attr *devattr = &ia->ri_devattr;
466 int rc; 465 int rc;
467 466
468 ia->ri_dma_mr = NULL; 467 ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
482 goto out2; 481 goto out2;
483 } 482 }
484 483
485 rc = ib_query_device(ia->ri_device, devattr);
486 if (rc) {
487 dprintk("RPC: %s: ib_query_device failed %d\n",
488 __func__, rc);
489 goto out3;
490 }
491
492 if (memreg == RPCRDMA_FRMR) { 484 if (memreg == RPCRDMA_FRMR) {
493 if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) || 485 if (!(ia->ri_device->attrs.device_cap_flags &
494 (devattr->max_fast_reg_page_list_len == 0)) { 486 IB_DEVICE_MEM_MGT_EXTENSIONS) ||
487 (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
495 dprintk("RPC: %s: FRMR registration " 488 dprintk("RPC: %s: FRMR registration "
496 "not supported by HCA\n", __func__); 489 "not supported by HCA\n", __func__);
497 memreg = RPCRDMA_MTHCAFMR; 490 memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
566rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 559rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
567 struct rpcrdma_create_data_internal *cdata) 560 struct rpcrdma_create_data_internal *cdata)
568{ 561{
569 struct ib_device_attr *devattr = &ia->ri_devattr;
570 struct ib_cq *sendcq, *recvcq; 562 struct ib_cq *sendcq, *recvcq;
571 struct ib_cq_init_attr cq_attr = {}; 563 struct ib_cq_init_attr cq_attr = {};
572 unsigned int max_qp_wr; 564 unsigned int max_qp_wr;
573 int rc, err; 565 int rc, err;
574 566
575 if (devattr->max_sge < RPCRDMA_MAX_IOVS) { 567 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
576 dprintk("RPC: %s: insufficient sge's available\n", 568 dprintk("RPC: %s: insufficient sge's available\n",
577 __func__); 569 __func__);
578 return -ENOMEM; 570 return -ENOMEM;
579 } 571 }
580 572
581 if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) { 573 if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
582 dprintk("RPC: %s: insufficient wqe's available\n", 574 dprintk("RPC: %s: insufficient wqe's available\n",
583 __func__); 575 __func__);
584 return -ENOMEM; 576 return -ENOMEM;
585 } 577 }
586 max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS; 578 max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
587 579
588 /* check provider's send/recv wr limits */ 580 /* check provider's send/recv wr limits */
589 if (cdata->max_requests > max_qp_wr) 581 if (cdata->max_requests > max_qp_wr)
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
668 660
669 /* Client offers RDMA Read but does not initiate */ 661 /* Client offers RDMA Read but does not initiate */
670 ep->rep_remote_cma.initiator_depth = 0; 662 ep->rep_remote_cma.initiator_depth = 0;
671 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ 663 if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
672 ep->rep_remote_cma.responder_resources = 32; 664 ep->rep_remote_cma.responder_resources = 32;
673 else 665 else
674 ep->rep_remote_cma.responder_resources = 666 ep->rep_remote_cma.responder_resources =
675 devattr->max_qp_rd_atom; 667 ia->ri_device->attrs.max_qp_rd_atom;
676 668
677 ep->rep_remote_cma.retry_count = 7; 669 ep->rep_remote_cma.retry_count = 7;
678 ep->rep_remote_cma.flow_control = 0; 670 ep->rep_remote_cma.flow_control = 0;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 728101ddc44b..38fe11b09875 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -55,6 +55,11 @@
55#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 55#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
56#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ 56#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
57 57
58#define RPCRDMA_BIND_TO (60U * HZ)
59#define RPCRDMA_INIT_REEST_TO (5U * HZ)
60#define RPCRDMA_MAX_REEST_TO (30U * HZ)
61#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
62
58/* 63/*
59 * Interface Adapter -- one per transport instance 64 * Interface Adapter -- one per transport instance
60 */ 65 */
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
68 struct completion ri_done; 73 struct completion ri_done;
69 int ri_async_rc; 74 int ri_async_rc;
70 unsigned int ri_max_frmr_depth; 75 unsigned int ri_max_frmr_depth;
71 struct ib_device_attr ri_devattr;
72 struct ib_qp_attr ri_qp_attr; 76 struct ib_qp_attr ri_qp_attr;
73 struct ib_qp_init_attr ri_qp_init_attr; 77 struct ib_qp_init_attr ri_qp_init_attr;
74}; 78};
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
142 return (struct rpcrdma_msg *)rb->rg_base; 146 return (struct rpcrdma_msg *)rb->rg_base;
143} 147}
144 148
149#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
150
145/* 151/*
146 * struct rpcrdma_rep -- this structure encapsulates state required to recv 152 * struct rpcrdma_rep -- this structure encapsulates state required to recv
147 * and complete a reply, asychronously. It needs several pieces of 153 * and complete a reply, asychronously. It needs several pieces of
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
309 u32 rb_bc_srv_max_requests; 315 u32 rb_bc_srv_max_requests;
310 spinlock_t rb_reqslock; /* protect rb_allreqs */ 316 spinlock_t rb_reqslock; /* protect rb_allreqs */
311 struct list_head rb_allreqs; 317 struct list_head rb_allreqs;
318
319 u32 rb_bc_max_requests;
312}; 320};
313#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) 321#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
314 322
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
516 524
517/* RPC/RDMA module init - xprtrdma/transport.c 525/* RPC/RDMA module init - xprtrdma/transport.c
518 */ 526 */
527extern unsigned int xprt_rdma_max_inline_read;
528void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
529void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
530void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
519int xprt_rdma_init(void); 531int xprt_rdma_init(void);
520void xprt_rdma_cleanup(void); 532void xprt_rdma_cleanup(void);
521 533
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
531void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); 543void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
532#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 544#endif /* CONFIG_SUNRPC_BACKCHANNEL */
533 545
534/* Temporary NFS request map cache. Created in svc_rdma.c */ 546extern struct xprt_class xprt_rdma_bc;
535extern struct kmem_cache *svc_rdma_map_cachep;
536/* WR context cache. Created in svc_rdma.c */
537extern struct kmem_cache *svc_rdma_ctxt_cachep;
538/* Workqueue created in svc_rdma.c */
539extern struct workqueue_struct *svc_rdma_wq;
540 547
541#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 548#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ea69ce35e902..c3bd294a63d1 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
3746 { "NET_TX_SOFTIRQ", 2 }, 3746 { "NET_TX_SOFTIRQ", 2 },
3747 { "NET_RX_SOFTIRQ", 3 }, 3747 { "NET_RX_SOFTIRQ", 3 },
3748 { "BLOCK_SOFTIRQ", 4 }, 3748 { "BLOCK_SOFTIRQ", 4 },
3749 { "BLOCK_IOPOLL_SOFTIRQ", 5 }, 3749 { "IRQ_POLL_SOFTIRQ", 5 },
3750 { "TASKLET_SOFTIRQ", 6 }, 3750 { "TASKLET_SOFTIRQ", 6 },
3751 { "SCHED_SOFTIRQ", 7 }, 3751 { "SCHED_SOFTIRQ", 7 },
3752 { "HRTIMER_SOFTIRQ", 8 }, 3752 { "HRTIMER_SOFTIRQ", 8 },
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 8ff7d620d942..33b52eaa39db 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
209 { "NET_TX_SOFTIRQ", 2 }, 209 { "NET_TX_SOFTIRQ", 2 },
210 { "NET_RX_SOFTIRQ", 3 }, 210 { "NET_RX_SOFTIRQ", 3 },
211 { "BLOCK_SOFTIRQ", 4 }, 211 { "BLOCK_SOFTIRQ", 4 },
212 { "BLOCK_IOPOLL_SOFTIRQ", 5 }, 212 { "IRQ_POLL_SOFTIRQ", 5 },
213 { "TASKLET_SOFTIRQ", 6 }, 213 { "TASKLET_SOFTIRQ", 6 },
214 { "SCHED_SOFTIRQ", 7 }, 214 { "SCHED_SOFTIRQ", 7 },
215 { "HRTIMER_SOFTIRQ", 8 }, 215 { "HRTIMER_SOFTIRQ", 8 },