aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdit Ranadive <aditr@vmware.com>2016-10-02 22:10:22 -0400
committerDoug Ledford <dledford@redhat.com>2016-12-14 14:55:10 -0500
commit29c8d9eba550c6d73d17cc1618a9f5f2a7345aa1 (patch)
treee8cef3b3035f1f3c4ad12f0a25ec57b6c789bcab
parentb1226c7db1d997fa6955cd3b54ba333bd0d8a29c (diff)
IB: Add vmw_pvrdma driver
This patch series adds a driver for a paravirtual RDMA device. The device is developed for VMware's Virtual Machines and allows existing RDMA applications to continue to use existing Verbs API when deployed in VMs on ESXi. We recently did a presentation in the OFA Workshop [1] regarding this device. Description and RDMA Support ============================ The virtual device is exposed as a dual function PCIe device. One part is a virtual network device (VMXNet3) which provides networking properties like MAC, IP addresses to the RDMA part of the device. The networking properties are used to register GIDs required by RDMA applications to communicate. These patches add support and the all required infrastructure for letting applications use such a device. We support the mandatory Verbs API as well as the base memory management extensions (Local Inv, Send with Inv and Fast Register Work Requests). We currently support both Reliable Connected and Unreliable Datagram QPs but do not support Shared Receive Queues (SRQs). Also, we support the following types of Work Requests: o Send/Receive (with or without Immediate Data) o RDMA Write (with or without Immediate Data) o RDMA Read o Local Invalidate o Send with Invalidate o Fast Register Work Requests This version only adds support for version 1 of RoCE. We will add RoCEv2 support in a future patch. We do support registration of both MAC-based and IP-based GIDs. I have also created a git tree for our user-level driver [2]. Testing ======= We have tested this internally for various types of Guest OS - Red Hat, Centos, Ubuntu 12.04/14.04/16.04, Oracle Enterprise Linux, SLES 12 using backported versions of this driver. The tests included several runs of the performance tests (included with OFED), Intel MPI PingPong benchmark on OpenMPI, krping for FRWRs. Mellanox has been kind enough to test the backported version of the driver internally on their hardware using a VMware provided ESX build. I have also applied and tested this with Doug's k.o/for-4.9 branch (commit 5603910b). Note, that this patch series should be applied all together. I split out the commits so that it may be easier to review. PVRDMA Resources ================ [1] OFA Workshop Presentation - https://openfabrics.org/images/eventpresos/2016presentations/102parardma.pdf [2] Libpvrdma User-level library - http://git.openfabrics.org/?p=~aditr/libpvrdma.git;a=summary Reviewed-by: Jorgen Hansen <jhansen@vmware.com> Reviewed-by: George Zhang <georgezhang@vmware.com> Reviewed-by: Aditya Sarwade <asarwade@vmware.com> Reviewed-by: Bryan Tan <bryantan@vmware.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Adit Ranadive <aditr@vmware.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Kconfig7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Makefile3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h474
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c119
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c425
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h586
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c127
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c1211
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c304
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c334
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c972
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h131
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c579
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h436
-rw-r--r--include/uapi/rdma/Kbuild1
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h289
19 files changed, 6007 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 3d838cf49f81..75a68f079188 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12928,6 +12928,13 @@ S: Maintained
12928F: drivers/scsi/vmw_pvscsi.c 12928F: drivers/scsi/vmw_pvscsi.c
12929F: drivers/scsi/vmw_pvscsi.h 12929F: drivers/scsi/vmw_pvscsi.h
12930 12930
12931VMWARE PVRDMA DRIVER
12932M: Adit Ranadive <aditr@vmware.com>
12933M: VMware PV-Drivers <pv-drivers@vmware.com>
12934L: linux-rdma@vger.kernel.org
12935S: Maintained
12936F: drivers/infiniband/hw/vmw_pvrdma/
12937
12931VOLTAGE AND CURRENT REGULATOR FRAMEWORK 12938VOLTAGE AND CURRENT REGULATOR FRAMEWORK
12932M: Liam Girdwood <lgirdwood@gmail.com> 12939M: Liam Girdwood <lgirdwood@gmail.com>
12933M: Mark Brown <broonie@kernel.org> 12940M: Mark Brown <broonie@kernel.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89640e5..670917387eda 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -73,6 +73,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig"
73source "drivers/infiniband/hw/mlx5/Kconfig" 73source "drivers/infiniband/hw/mlx5/Kconfig"
74source "drivers/infiniband/hw/nes/Kconfig" 74source "drivers/infiniband/hw/nes/Kconfig"
75source "drivers/infiniband/hw/ocrdma/Kconfig" 75source "drivers/infiniband/hw/ocrdma/Kconfig"
76source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
76source "drivers/infiniband/hw/usnic/Kconfig" 77source "drivers/infiniband/hw/usnic/Kconfig"
77source "drivers/infiniband/hw/hns/Kconfig" 78source "drivers/infiniband/hw/hns/Kconfig"
78 79
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index e7a5ed9f6f3f..ed553de2ca12 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
7obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ 7obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
8obj-$(CONFIG_INFINIBAND_NES) += nes/ 8obj-$(CONFIG_INFINIBAND_NES) += nes/
9obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ 9obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
10obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
10obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ 11obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
11obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ 12obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
12obj-$(CONFIG_INFINIBAND_HNS) += hns/ 13obj-$(CONFIG_INFINIBAND_HNS) += hns/
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Kconfig b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
new file mode 100644
index 000000000000..5a9790ac0ede
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
@@ -0,0 +1,7 @@
1config INFINIBAND_VMWARE_PVRDMA
2 tristate "VMware Paravirtualized RDMA Driver"
3 depends on NETDEVICES && ETHERNET && PCI && INET && VMXNET3
4 ---help---
5 This driver provides low-level support for VMware Paravirtual
6 RDMA adapter. It interacts with the VMXNet3 driver to provide
7 Ethernet capabilities.
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile
new file mode 100644
index 000000000000..0194ed19f542
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o
2
3vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
new file mode 100644
index 000000000000..71e1d55d69d6
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -0,0 +1,474 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __PVRDMA_H__
47#define __PVRDMA_H__
48
49#include <linux/compiler.h>
50#include <linux/interrupt.h>
51#include <linux/list.h>
52#include <linux/mutex.h>
53#include <linux/pci.h>
54#include <linux/semaphore.h>
55#include <linux/workqueue.h>
56#include <rdma/ib_umem.h>
57#include <rdma/ib_verbs.h>
58#include <rdma/vmw_pvrdma-abi.h>
59
60#include "pvrdma_ring.h"
61#include "pvrdma_dev_api.h"
62#include "pvrdma_verbs.h"
63
64/* NOT the same as BIT_MASK(). */
65#define PVRDMA_MASK(n) ((n << 1) - 1)
66
67/*
68 * VMware PVRDMA PCI device id.
69 */
70#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
71
72struct pvrdma_dev;
73
74struct pvrdma_page_dir {
75 dma_addr_t dir_dma;
76 u64 *dir;
77 int ntables;
78 u64 **tables;
79 u64 npages;
80 void **pages;
81};
82
83struct pvrdma_cq {
84 struct ib_cq ibcq;
85 int offset;
86 spinlock_t cq_lock; /* Poll lock. */
87 struct pvrdma_uar_map *uar;
88 struct ib_umem *umem;
89 struct pvrdma_ring_state *ring_state;
90 struct pvrdma_page_dir pdir;
91 u32 cq_handle;
92 bool is_kernel;
93 atomic_t refcnt;
94 wait_queue_head_t wait;
95};
96
97struct pvrdma_id_table {
98 u32 last;
99 u32 top;
100 u32 max;
101 u32 mask;
102 spinlock_t lock; /* Table lock. */
103 unsigned long *table;
104};
105
106struct pvrdma_uar_map {
107 unsigned long pfn;
108 void __iomem *map;
109 int index;
110};
111
112struct pvrdma_uar_table {
113 struct pvrdma_id_table tbl;
114 int size;
115};
116
117struct pvrdma_ucontext {
118 struct ib_ucontext ibucontext;
119 struct pvrdma_dev *dev;
120 struct pvrdma_uar_map uar;
121 u64 ctx_handle;
122};
123
124struct pvrdma_pd {
125 struct ib_pd ibpd;
126 u32 pdn;
127 u32 pd_handle;
128 int privileged;
129};
130
131struct pvrdma_mr {
132 u32 mr_handle;
133 u64 iova;
134 u64 size;
135};
136
137struct pvrdma_user_mr {
138 struct ib_mr ibmr;
139 struct ib_umem *umem;
140 struct pvrdma_mr mmr;
141 struct pvrdma_page_dir pdir;
142 u64 *pages;
143 u32 npages;
144 u32 max_pages;
145 u32 page_shift;
146};
147
148struct pvrdma_wq {
149 struct pvrdma_ring *ring;
150 spinlock_t lock; /* Work queue lock. */
151 int wqe_cnt;
152 int wqe_size;
153 int max_sg;
154 int offset;
155};
156
157struct pvrdma_ah {
158 struct ib_ah ibah;
159 struct pvrdma_av av;
160};
161
162struct pvrdma_qp {
163 struct ib_qp ibqp;
164 u32 qp_handle;
165 u32 qkey;
166 struct pvrdma_wq sq;
167 struct pvrdma_wq rq;
168 struct ib_umem *rumem;
169 struct ib_umem *sumem;
170 struct pvrdma_page_dir pdir;
171 int npages;
172 int npages_send;
173 int npages_recv;
174 u32 flags;
175 u8 port;
176 u8 state;
177 bool is_kernel;
178 struct mutex mutex; /* QP state mutex. */
179 atomic_t refcnt;
180 wait_queue_head_t wait;
181};
182
183struct pvrdma_dev {
184 /* PCI device-related information. */
185 struct ib_device ib_dev;
186 struct pci_dev *pdev;
187 void __iomem *regs;
188 struct pvrdma_device_shared_region *dsr; /* Shared region pointer */
189 dma_addr_t dsrbase; /* Shared region base address */
190 void *cmd_slot;
191 void *resp_slot;
192 unsigned long flags;
193 struct list_head device_link;
194
195 /* Locking and interrupt information. */
196 spinlock_t cmd_lock; /* Command lock. */
197 struct semaphore cmd_sema;
198 struct completion cmd_done;
199 struct {
200 enum pvrdma_intr_type type; /* Intr type */
201 struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
202 irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
203 u8 enabled[PVRDMA_MAX_INTERRUPTS];
204 u8 size;
205 } intr;
206
207 /* RDMA-related device information. */
208 union ib_gid *sgid_tbl;
209 struct pvrdma_ring_state *async_ring_state;
210 struct pvrdma_page_dir async_pdir;
211 struct pvrdma_ring_state *cq_ring_state;
212 struct pvrdma_page_dir cq_pdir;
213 struct pvrdma_cq **cq_tbl;
214 spinlock_t cq_tbl_lock;
215 struct pvrdma_qp **qp_tbl;
216 spinlock_t qp_tbl_lock;
217 struct pvrdma_uar_table uar_table;
218 struct pvrdma_uar_map driver_uar;
219 __be64 sys_image_guid;
220 spinlock_t desc_lock; /* Device modification lock. */
221 u32 port_cap_mask;
222 struct mutex port_mutex; /* Port modification mutex. */
223 bool ib_active;
224 atomic_t num_qps;
225 atomic_t num_cqs;
226 atomic_t num_pds;
227 atomic_t num_ahs;
228
229 /* Network device information. */
230 struct net_device *netdev;
231 struct notifier_block nb_netdev;
232};
233
234struct pvrdma_netdevice_work {
235 struct work_struct work;
236 struct net_device *event_netdev;
237 unsigned long event;
238};
239
240static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev)
241{
242 return container_of(ibdev, struct pvrdma_dev, ib_dev);
243}
244
245static inline struct
246pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext)
247{
248 return container_of(ibucontext, struct pvrdma_ucontext, ibucontext);
249}
250
251static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd)
252{
253 return container_of(ibpd, struct pvrdma_pd, ibpd);
254}
255
256static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq)
257{
258 return container_of(ibcq, struct pvrdma_cq, ibcq);
259}
260
261static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr)
262{
263 return container_of(ibmr, struct pvrdma_user_mr, ibmr);
264}
265
266static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp)
267{
268 return container_of(ibqp, struct pvrdma_qp, ibqp);
269}
270
271static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah)
272{
273 return container_of(ibah, struct pvrdma_ah, ibah);
274}
275
276static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val)
277{
278 writel(cpu_to_le32(val), dev->regs + reg);
279}
280
281static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg)
282{
283 return le32_to_cpu(readl(dev->regs + reg));
284}
285
286static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val)
287{
288 writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET);
289}
290
291static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val)
292{
293 writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET);
294}
295
296static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir,
297 u64 offset)
298{
299 return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE);
300}
301
302static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu)
303{
304 return (enum pvrdma_mtu)mtu;
305}
306
307static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu)
308{
309 return (enum ib_mtu)mtu;
310}
311
312static inline enum pvrdma_port_state ib_port_state_to_pvrdma(
313 enum ib_port_state state)
314{
315 return (enum pvrdma_port_state)state;
316}
317
318static inline enum ib_port_state pvrdma_port_state_to_ib(
319 enum pvrdma_port_state state)
320{
321 return (enum ib_port_state)state;
322}
323
324static inline int ib_port_cap_flags_to_pvrdma(int flags)
325{
326 return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX);
327}
328
329static inline int pvrdma_port_cap_flags_to_ib(int flags)
330{
331 return flags;
332}
333
334static inline enum pvrdma_port_width ib_port_width_to_pvrdma(
335 enum ib_port_width width)
336{
337 return (enum pvrdma_port_width)width;
338}
339
340static inline enum ib_port_width pvrdma_port_width_to_ib(
341 enum pvrdma_port_width width)
342{
343 return (enum ib_port_width)width;
344}
345
346static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma(
347 enum ib_port_speed speed)
348{
349 return (enum pvrdma_port_speed)speed;
350}
351
352static inline enum ib_port_speed pvrdma_port_speed_to_ib(
353 enum pvrdma_port_speed speed)
354{
355 return (enum ib_port_speed)speed;
356}
357
358static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
359{
360 return attr_mask;
361}
362
363static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
364{
365 return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
366}
367
368static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma(
369 enum ib_mig_state state)
370{
371 return (enum pvrdma_mig_state)state;
372}
373
374static inline enum ib_mig_state pvrdma_mig_state_to_ib(
375 enum pvrdma_mig_state state)
376{
377 return (enum ib_mig_state)state;
378}
379
380static inline int ib_access_flags_to_pvrdma(int flags)
381{
382 return flags;
383}
384
385static inline int pvrdma_access_flags_to_ib(int flags)
386{
387 return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX);
388}
389
390static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type)
391{
392 return (enum pvrdma_qp_type)type;
393}
394
395static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type)
396{
397 return (enum ib_qp_type)type;
398}
399
400static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state)
401{
402 return (enum pvrdma_qp_state)state;
403}
404
405static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state)
406{
407 return (enum ib_qp_state)state;
408}
409
410static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op)
411{
412 return (enum pvrdma_wr_opcode)op;
413}
414
415static inline enum ib_wc_status pvrdma_wc_status_to_ib(
416 enum pvrdma_wc_status status)
417{
418 return (enum ib_wc_status)status;
419}
420
421static inline int pvrdma_wc_opcode_to_ib(int opcode)
422{
423 return opcode;
424}
425
426static inline int pvrdma_wc_flags_to_ib(int flags)
427{
428 return flags;
429}
430
431static inline int ib_send_flags_to_pvrdma(int flags)
432{
433 return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX);
434}
435
436void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst,
437 const struct pvrdma_qp_cap *src);
438void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst,
439 const struct ib_qp_cap *src);
440void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src);
441void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src);
442void pvrdma_global_route_to_ib(struct ib_global_route *dst,
443 const struct pvrdma_global_route *src);
444void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
445 const struct ib_global_route *src);
446void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
447 const struct pvrdma_ah_attr *src);
448void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
449 const struct ib_ah_attr *src);
450
451int pvrdma_uar_table_init(struct pvrdma_dev *dev);
452void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
453
454int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
455void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
456
457void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq);
458
459int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
460 u64 npages, bool alloc_pages);
461void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
462 struct pvrdma_page_dir *pdir);
463int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
464 dma_addr_t daddr);
465int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
466 struct ib_umem *umem, u64 offset);
467dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx);
468int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
469 u64 *page_list, int num_pages);
470
471int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
472 union pvrdma_cmd_resp *rsp, unsigned resp_code);
473
474#endif /* __PVRDMA_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
new file mode 100644
index 000000000000..4a78c537d8a1
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
@@ -0,0 +1,119 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <linux/list.h>
47
48#include "pvrdma.h"
49
50#define PVRDMA_CMD_TIMEOUT 10000 /* ms */
51
52static inline int pvrdma_cmd_recv(struct pvrdma_dev *dev,
53 union pvrdma_cmd_resp *resp,
54 unsigned resp_code)
55{
56 int err;
57
58 dev_dbg(&dev->pdev->dev, "receive response from device\n");
59
60 err = wait_for_completion_interruptible_timeout(&dev->cmd_done,
61 msecs_to_jiffies(PVRDMA_CMD_TIMEOUT));
62 if (err == 0 || err == -ERESTARTSYS) {
63 dev_warn(&dev->pdev->dev,
64 "completion timeout or interrupted\n");
65 return -ETIMEDOUT;
66 }
67
68 spin_lock(&dev->cmd_lock);
69 memcpy(resp, dev->resp_slot, sizeof(*resp));
70 spin_unlock(&dev->cmd_lock);
71
72 if (resp->hdr.ack != resp_code) {
73 dev_warn(&dev->pdev->dev,
74 "unknown response %#x expected %#x\n",
75 resp->hdr.ack, resp_code);
76 return -EFAULT;
77 }
78
79 return 0;
80}
81
82int
83pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
84 union pvrdma_cmd_resp *resp, unsigned resp_code)
85{
86 int err;
87
88 dev_dbg(&dev->pdev->dev, "post request to device\n");
89
90 /* Serializiation */
91 down(&dev->cmd_sema);
92
93 BUILD_BUG_ON(sizeof(union pvrdma_cmd_req) !=
94 sizeof(struct pvrdma_cmd_modify_qp));
95
96 spin_lock(&dev->cmd_lock);
97 memcpy(dev->cmd_slot, req, sizeof(*req));
98 spin_unlock(&dev->cmd_lock);
99
100 init_completion(&dev->cmd_done);
101 pvrdma_write_reg(dev, PVRDMA_REG_REQUEST, 0);
102
103 /* Make sure the request is written before reading status. */
104 mb();
105
106 err = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
107 if (err == 0) {
108 if (resp != NULL)
109 err = pvrdma_cmd_recv(dev, resp, resp_code);
110 } else {
111 dev_warn(&dev->pdev->dev,
112 "failed to write request error reg: %d\n", err);
113 err = -EFAULT;
114 }
115
116 up(&dev->cmd_sema);
117
118 return err;
119}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
new file mode 100644
index 000000000000..e429ca5b16aa
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -0,0 +1,425 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <asm/page.h>
47#include <linux/io.h>
48#include <linux/wait.h>
49#include <rdma/ib_addr.h>
50#include <rdma/ib_smi.h>
51#include <rdma/ib_user_verbs.h>
52
53#include "pvrdma.h"
54
55/**
56 * pvrdma_req_notify_cq - request notification for a completion queue
57 * @ibcq: the completion queue
58 * @notify_flags: notification flags
59 *
60 * @return: 0 for success.
61 */
62int pvrdma_req_notify_cq(struct ib_cq *ibcq,
63 enum ib_cq_notify_flags notify_flags)
64{
65 struct pvrdma_dev *dev = to_vdev(ibcq->device);
66 struct pvrdma_cq *cq = to_vcq(ibcq);
67 u32 val = cq->cq_handle;
68
69 val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
70 PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
71
72 pvrdma_write_uar_cq(dev, val);
73
74 return 0;
75}
76
77/**
78 * pvrdma_create_cq - create completion queue
79 * @ibdev: the device
80 * @attr: completion queue attributes
81 * @context: user context
82 * @udata: user data
83 *
84 * @return: ib_cq completion queue pointer on success,
85 * otherwise returns negative errno.
86 */
87struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
88 const struct ib_cq_init_attr *attr,
89 struct ib_ucontext *context,
90 struct ib_udata *udata)
91{
92 int entries = attr->cqe;
93 struct pvrdma_dev *dev = to_vdev(ibdev);
94 struct pvrdma_cq *cq;
95 int ret;
96 int npages;
97 unsigned long flags;
98 union pvrdma_cmd_req req;
99 union pvrdma_cmd_resp rsp;
100 struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
101 struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
102 struct pvrdma_create_cq ucmd;
103
104 BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
105
106 entries = roundup_pow_of_two(entries);
107 if (entries < 1 || entries > dev->dsr->caps.max_cqe)
108 return ERR_PTR(-EINVAL);
109
110 if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
111 return ERR_PTR(-ENOMEM);
112
113 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
114 if (!cq) {
115 atomic_dec(&dev->num_cqs);
116 return ERR_PTR(-ENOMEM);
117 }
118
119 cq->ibcq.cqe = entries;
120
121 if (context) {
122 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
123 ret = -EFAULT;
124 goto err_cq;
125 }
126
127 cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size,
128 IB_ACCESS_LOCAL_WRITE, 1);
129 if (IS_ERR(cq->umem)) {
130 ret = PTR_ERR(cq->umem);
131 goto err_cq;
132 }
133
134 npages = ib_umem_page_count(cq->umem);
135 } else {
136 cq->is_kernel = true;
137
138 /* One extra page for shared ring state */
139 npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
140 PAGE_SIZE - 1) / PAGE_SIZE;
141
142 /* Skip header page. */
143 cq->offset = PAGE_SIZE;
144 }
145
146 if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
147 dev_warn(&dev->pdev->dev,
148 "overflow pages in completion queue\n");
149 ret = -EINVAL;
150 goto err_umem;
151 }
152
153 ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
154 if (ret) {
155 dev_warn(&dev->pdev->dev,
156 "could not allocate page directory\n");
157 goto err_umem;
158 }
159
160 /* Ring state is always the first page. Set in library for user cq. */
161 if (cq->is_kernel)
162 cq->ring_state = cq->pdir.pages[0];
163 else
164 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
165
166 atomic_set(&cq->refcnt, 1);
167 init_waitqueue_head(&cq->wait);
168 spin_lock_init(&cq->cq_lock);
169
170 memset(cmd, 0, sizeof(*cmd));
171 cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
172 cmd->nchunks = npages;
173 cmd->ctx_handle = (context) ?
174 (u64)to_vucontext(context)->ctx_handle : 0;
175 cmd->cqe = entries;
176 cmd->pdir_dma = cq->pdir.dir_dma;
177 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
178 if (ret < 0) {
179 dev_warn(&dev->pdev->dev,
180 "could not create completion queue, error: %d\n", ret);
181 goto err_page_dir;
182 }
183
184 cq->ibcq.cqe = resp->cqe;
185 cq->cq_handle = resp->cq_handle;
186 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
187 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
188 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
189
190 if (context) {
191 cq->uar = &(to_vucontext(context)->uar);
192
193 /* Copy udata back. */
194 if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
195 dev_warn(&dev->pdev->dev,
196 "failed to copy back udata\n");
197 pvrdma_destroy_cq(&cq->ibcq);
198 return ERR_PTR(-EINVAL);
199 }
200 }
201
202 return &cq->ibcq;
203
204err_page_dir:
205 pvrdma_page_dir_cleanup(dev, &cq->pdir);
206err_umem:
207 if (context)
208 ib_umem_release(cq->umem);
209err_cq:
210 atomic_dec(&dev->num_cqs);
211 kfree(cq);
212
213 return ERR_PTR(ret);
214}
215
216static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
217{
218 atomic_dec(&cq->refcnt);
219 wait_event(cq->wait, !atomic_read(&cq->refcnt));
220
221 if (!cq->is_kernel)
222 ib_umem_release(cq->umem);
223
224 pvrdma_page_dir_cleanup(dev, &cq->pdir);
225 kfree(cq);
226}
227
228/**
229 * pvrdma_destroy_cq - destroy completion queue
230 * @cq: the completion queue to destroy.
231 *
232 * @return: 0 for success.
233 */
234int pvrdma_destroy_cq(struct ib_cq *cq)
235{
236 struct pvrdma_cq *vcq = to_vcq(cq);
237 union pvrdma_cmd_req req;
238 struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
239 struct pvrdma_dev *dev = to_vdev(cq->device);
240 unsigned long flags;
241 int ret;
242
243 memset(cmd, 0, sizeof(*cmd));
244 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
245 cmd->cq_handle = vcq->cq_handle;
246
247 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
248 if (ret < 0)
249 dev_warn(&dev->pdev->dev,
250 "could not destroy completion queue, error: %d\n",
251 ret);
252
253 /* free cq's resources */
254 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
255 dev->cq_tbl[vcq->cq_handle] = NULL;
256 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
257
258 pvrdma_free_cq(dev, vcq);
259 atomic_dec(&dev->num_cqs);
260
261 return ret;
262}
263
264/**
265 * pvrdma_modify_cq - modify the CQ moderation parameters
266 * @ibcq: the CQ to modify
267 * @cq_count: number of CQEs that will trigger an event
268 * @cq_period: max period of time in usec before triggering an event
269 *
270 * @return: -EOPNOTSUPP as CQ resize is not supported.
271 */
272int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
273{
274 return -EOPNOTSUPP;
275}
276
277static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
278{
279 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
280 &cq->pdir,
281 cq->offset +
282 sizeof(struct pvrdma_cqe) * i);
283}
284
285void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
286{
287 int head;
288 int has_data;
289
290 if (!cq->is_kernel)
291 return;
292
293 /* Lock held */
294 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
295 cq->ibcq.cqe, &head);
296 if (unlikely(has_data > 0)) {
297 int items;
298 int curr;
299 int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
300 cq->ibcq.cqe);
301 struct pvrdma_cqe *cqe;
302 struct pvrdma_cqe *curr_cqe;
303
304 items = (tail > head) ? (tail - head) :
305 (cq->ibcq.cqe - head + tail);
306 curr = --tail;
307 while (items-- > 0) {
308 if (curr < 0)
309 curr = cq->ibcq.cqe - 1;
310 if (tail < 0)
311 tail = cq->ibcq.cqe - 1;
312 curr_cqe = get_cqe(cq, curr);
313 if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
314 if (curr != tail) {
315 cqe = get_cqe(cq, tail);
316 *cqe = *curr_cqe;
317 }
318 tail--;
319 } else {
320 pvrdma_idx_ring_inc(
321 &cq->ring_state->rx.cons_head,
322 cq->ibcq.cqe);
323 }
324 curr--;
325 }
326 }
327}
328
329static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
330 struct ib_wc *wc)
331{
332 struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
333 int has_data;
334 unsigned int head;
335 bool tried = false;
336 struct pvrdma_cqe *cqe;
337
338retry:
339 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
340 cq->ibcq.cqe, &head);
341 if (has_data == 0) {
342 if (tried)
343 return -EAGAIN;
344
345 pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
346
347 tried = true;
348 goto retry;
349 } else if (has_data == PVRDMA_INVALID_IDX) {
350 dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
351 return -EAGAIN;
352 }
353
354 cqe = get_cqe(cq, head);
355
356 /* Ensure cqe is valid. */
357 rmb();
358 if (dev->qp_tbl[cqe->qp & 0xffff])
359 *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
360 else
361 return -EAGAIN;
362
363 wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
364 wc->status = pvrdma_wc_status_to_ib(cqe->status);
365 wc->wr_id = cqe->wr_id;
366 wc->qp = &(*cur_qp)->ibqp;
367 wc->byte_len = cqe->byte_len;
368 wc->ex.imm_data = cqe->imm_data;
369 wc->src_qp = cqe->src_qp;
370 wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
371 wc->pkey_index = cqe->pkey_index;
372 wc->slid = cqe->slid;
373 wc->sl = cqe->sl;
374 wc->dlid_path_bits = cqe->dlid_path_bits;
375 wc->port_num = cqe->port_num;
376 wc->vendor_err = 0;
377
378 /* Update shared ring state */
379 pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
380
381 return 0;
382}
383
384/**
385 * pvrdma_poll_cq - poll for work completion queue entries
386 * @ibcq: completion queue
387 * @num_entries: the maximum number of entries
388 * @entry: pointer to work completion array
389 *
390 * @return: number of polled completion entries
391 */
392int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
393{
394 struct pvrdma_cq *cq = to_vcq(ibcq);
395 struct pvrdma_qp *cur_qp = NULL;
396 unsigned long flags;
397 int npolled;
398
399 if (num_entries < 1 || wc == NULL)
400 return 0;
401
402 spin_lock_irqsave(&cq->cq_lock, flags);
403 for (npolled = 0; npolled < num_entries; ++npolled) {
404 if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
405 break;
406 }
407
408 spin_unlock_irqrestore(&cq->cq_lock, flags);
409
410 /* Ensure we do not return errors from poll_cq */
411 return npolled;
412}
413
414/**
415 * pvrdma_resize_cq - resize CQ
416 * @ibcq: the completion queue
417 * @entries: CQ entries
418 * @udata: user data
419 *
420 * @return: -EOPNOTSUPP as CQ resize is not supported.
421 */
422int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
423{
424 return -EOPNOTSUPP;
425}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
new file mode 100644
index 000000000000..c06768635d65
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __PVRDMA_DEV_API_H__
47#define __PVRDMA_DEV_API_H__
48
49#include <linux/types.h>
50
51#include "pvrdma_verbs.h"
52
53#define PVRDMA_VERSION 17
54#define PVRDMA_BOARD_ID 1
55#define PVRDMA_REV_ID 1
56
57/*
58 * Masks and accessors for page directory, which is a two-level lookup:
59 * page directory -> page table -> page. Only one directory for now, but we
60 * could expand that easily. 9 bits for tables, 9 bits for pages, gives one
61 * gigabyte for memory regions and so forth.
62 */
63
64#define PVRDMA_PDIR_SHIFT 18
65#define PVRDMA_PTABLE_SHIFT 9
66#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1)
67#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff)
68#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff)
69#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512)
70#define PVRDMA_MAX_FAST_REG_PAGES 128
71
72/*
73 * Max MSI-X vectors.
74 */
75
76#define PVRDMA_MAX_INTERRUPTS 3
77
78/* Register offsets within PCI resource on BAR1. */
79#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */
80#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */
81#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */
82#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */
83#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */
84#define PVRDMA_REG_ERR 0x14 /* R: Device error. */
85#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */
86#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */
87#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */
88#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */
89
90/* Object flags. */
91#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */
92#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */
93#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */
94#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */
95
96/*
97 * Atomic operation capability (masked versions are extended atomic
98 * operations.
99 */
100
101#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */
102#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */
103#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */
104#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */
105
106/*
107 * Base Memory Management Extension flags to support Fast Reg Memory Regions
108 * and Fast Reg Work Requests. Each flag represents a verb operation and we
109 * must support all of them to qualify for the BMME device cap.
110 */
111
112#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */
113#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */
114#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */
115
116/*
117 * GID types. The interpretation of the gid_types bit field in the device
118 * capabilities will depend on the device mode. For now, the device only
119 * supports RoCE as mode, so only the different GID types for RoCE are
120 * defined.
121 */
122
123#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0)
124#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1)
125
126enum pvrdma_pci_resource {
127 PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */
128 PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */
129 PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */
130 PVRDMA_PCI_RESOURCE_LAST, /* Last. */
131};
132
133enum pvrdma_device_ctl {
134 PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */
135 PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */
136 PVRDMA_DEVICE_CTL_RESET, /* Reset device. */
137};
138
139enum pvrdma_intr_vector {
140 PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */
141 PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */
142 PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */
143 /* Additional CQ notification vectors. */
144};
145
146enum pvrdma_intr_cause {
147 PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE),
148 PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC),
149 PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ),
150};
151
152enum pvrdma_intr_type {
153 PVRDMA_INTR_TYPE_INTX, /* Legacy. */
154 PVRDMA_INTR_TYPE_MSI, /* MSI. */
155 PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */
156};
157
158enum pvrdma_gos_bits {
159 PVRDMA_GOS_BITS_UNK, /* Unknown. */
160 PVRDMA_GOS_BITS_32, /* 32-bit. */
161 PVRDMA_GOS_BITS_64, /* 64-bit. */
162};
163
164enum pvrdma_gos_type {
165 PVRDMA_GOS_TYPE_UNK, /* Unknown. */
166 PVRDMA_GOS_TYPE_LINUX, /* Linux. */
167};
168
169enum pvrdma_device_mode {
170 PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */
171 PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */
172 PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */
173};
174
175struct pvrdma_gos_info {
176 u32 gos_bits:2; /* W: PVRDMA_GOS_BITS_ */
177 u32 gos_type:4; /* W: PVRDMA_GOS_TYPE_ */
178 u32 gos_ver:16; /* W: Guest OS version. */
179 u32 gos_misc:10; /* W: Other. */
180 u32 pad; /* Pad to 8-byte alignment. */
181};
182
183struct pvrdma_device_caps {
184 u64 fw_ver; /* R: Query device. */
185 __be64 node_guid;
186 __be64 sys_image_guid;
187 u64 max_mr_size;
188 u64 page_size_cap;
189 u64 atomic_arg_sizes; /* EX verbs. */
190 u32 ex_comp_mask; /* EX verbs. */
191 u32 device_cap_flags2; /* EX verbs. */
192 u32 max_fa_bit_boundary; /* EX verbs. */
193 u32 log_max_atomic_inline_arg; /* EX verbs. */
194 u32 vendor_id;
195 u32 vendor_part_id;
196 u32 hw_ver;
197 u32 max_qp;
198 u32 max_qp_wr;
199 u32 device_cap_flags;
200 u32 max_sge;
201 u32 max_sge_rd;
202 u32 max_cq;
203 u32 max_cqe;
204 u32 max_mr;
205 u32 max_pd;
206 u32 max_qp_rd_atom;
207 u32 max_ee_rd_atom;
208 u32 max_res_rd_atom;
209 u32 max_qp_init_rd_atom;
210 u32 max_ee_init_rd_atom;
211 u32 max_ee;
212 u32 max_rdd;
213 u32 max_mw;
214 u32 max_raw_ipv6_qp;
215 u32 max_raw_ethy_qp;
216 u32 max_mcast_grp;
217 u32 max_mcast_qp_attach;
218 u32 max_total_mcast_qp_attach;
219 u32 max_ah;
220 u32 max_fmr;
221 u32 max_map_per_fmr;
222 u32 max_srq;
223 u32 max_srq_wr;
224 u32 max_srq_sge;
225 u32 max_uar;
226 u32 gid_tbl_len;
227 u16 max_pkeys;
228 u8 local_ca_ack_delay;
229 u8 phys_port_cnt;
230 u8 mode; /* PVRDMA_DEVICE_MODE_ */
231 u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */
232 u8 bmme_flags; /* FRWR Mem Mgmt Extensions */
233 u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */
234 u8 reserved[4];
235};
236
237struct pvrdma_ring_page_info {
238 u32 num_pages; /* Num pages incl. header. */
239 u32 reserved; /* Reserved. */
240 u64 pdir_dma; /* Page directory PA. */
241};
242
243#pragma pack(push, 1)
244
245struct pvrdma_device_shared_region {
246 u32 driver_version; /* W: Driver version. */
247 u32 pad; /* Pad to 8-byte align. */
248 struct pvrdma_gos_info gos_info; /* W: Guest OS information. */
249 u64 cmd_slot_dma; /* W: Command slot address. */
250 u64 resp_slot_dma; /* W: Response slot address. */
251 struct pvrdma_ring_page_info async_ring_pages;
252 /* W: Async ring page info. */
253 struct pvrdma_ring_page_info cq_ring_pages;
254 /* W: CQ ring page info. */
255 u32 uar_pfn; /* W: UAR pageframe. */
256 u32 pad2; /* Pad to 8-byte align. */
257 struct pvrdma_device_caps caps; /* R: Device capabilities. */
258};
259
260#pragma pack(pop)
261
262/* Event types. Currently a 1:1 mapping with enum ib_event. */
263enum pvrdma_eqe_type {
264 PVRDMA_EVENT_CQ_ERR,
265 PVRDMA_EVENT_QP_FATAL,
266 PVRDMA_EVENT_QP_REQ_ERR,
267 PVRDMA_EVENT_QP_ACCESS_ERR,
268 PVRDMA_EVENT_COMM_EST,
269 PVRDMA_EVENT_SQ_DRAINED,
270 PVRDMA_EVENT_PATH_MIG,
271 PVRDMA_EVENT_PATH_MIG_ERR,
272 PVRDMA_EVENT_DEVICE_FATAL,
273 PVRDMA_EVENT_PORT_ACTIVE,
274 PVRDMA_EVENT_PORT_ERR,
275 PVRDMA_EVENT_LID_CHANGE,
276 PVRDMA_EVENT_PKEY_CHANGE,
277 PVRDMA_EVENT_SM_CHANGE,
278 PVRDMA_EVENT_SRQ_ERR,
279 PVRDMA_EVENT_SRQ_LIMIT_REACHED,
280 PVRDMA_EVENT_QP_LAST_WQE_REACHED,
281 PVRDMA_EVENT_CLIENT_REREGISTER,
282 PVRDMA_EVENT_GID_CHANGE,
283};
284
285/* Event queue element. */
286struct pvrdma_eqe {
287 u32 type; /* Event type. */
288 u32 info; /* Handle, other. */
289};
290
291/* CQ notification queue element. */
292struct pvrdma_cqne {
293 u32 info; /* Handle */
294};
295
296enum {
297 PVRDMA_CMD_FIRST,
298 PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST,
299 PVRDMA_CMD_QUERY_PKEY,
300 PVRDMA_CMD_CREATE_PD,
301 PVRDMA_CMD_DESTROY_PD,
302 PVRDMA_CMD_CREATE_MR,
303 PVRDMA_CMD_DESTROY_MR,
304 PVRDMA_CMD_CREATE_CQ,
305 PVRDMA_CMD_RESIZE_CQ,
306 PVRDMA_CMD_DESTROY_CQ,
307 PVRDMA_CMD_CREATE_QP,
308 PVRDMA_CMD_MODIFY_QP,
309 PVRDMA_CMD_QUERY_QP,
310 PVRDMA_CMD_DESTROY_QP,
311 PVRDMA_CMD_CREATE_UC,
312 PVRDMA_CMD_DESTROY_UC,
313 PVRDMA_CMD_CREATE_BIND,
314 PVRDMA_CMD_DESTROY_BIND,
315 PVRDMA_CMD_MAX,
316};
317
318enum {
319 PVRDMA_CMD_FIRST_RESP = (1 << 31),
320 PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP,
321 PVRDMA_CMD_QUERY_PKEY_RESP,
322 PVRDMA_CMD_CREATE_PD_RESP,
323 PVRDMA_CMD_DESTROY_PD_RESP_NOOP,
324 PVRDMA_CMD_CREATE_MR_RESP,
325 PVRDMA_CMD_DESTROY_MR_RESP_NOOP,
326 PVRDMA_CMD_CREATE_CQ_RESP,
327 PVRDMA_CMD_RESIZE_CQ_RESP,
328 PVRDMA_CMD_DESTROY_CQ_RESP_NOOP,
329 PVRDMA_CMD_CREATE_QP_RESP,
330 PVRDMA_CMD_MODIFY_QP_RESP,
331 PVRDMA_CMD_QUERY_QP_RESP,
332 PVRDMA_CMD_DESTROY_QP_RESP,
333 PVRDMA_CMD_CREATE_UC_RESP,
334 PVRDMA_CMD_DESTROY_UC_RESP_NOOP,
335 PVRDMA_CMD_CREATE_BIND_RESP_NOOP,
336 PVRDMA_CMD_DESTROY_BIND_RESP_NOOP,
337 PVRDMA_CMD_MAX_RESP,
338};
339
340struct pvrdma_cmd_hdr {
341 u64 response; /* Key for response lookup. */
342 u32 cmd; /* PVRDMA_CMD_ */
343 u32 reserved; /* Reserved. */
344};
345
346struct pvrdma_cmd_resp_hdr {
347 u64 response; /* From cmd hdr. */
348 u32 ack; /* PVRDMA_CMD_XXX_RESP */
349 u8 err; /* Error. */
350 u8 reserved[3]; /* Reserved. */
351};
352
353struct pvrdma_cmd_query_port {
354 struct pvrdma_cmd_hdr hdr;
355 u8 port_num;
356 u8 reserved[7];
357};
358
359struct pvrdma_cmd_query_port_resp {
360 struct pvrdma_cmd_resp_hdr hdr;
361 struct pvrdma_port_attr attrs;
362};
363
364struct pvrdma_cmd_query_pkey {
365 struct pvrdma_cmd_hdr hdr;
366 u8 port_num;
367 u8 index;
368 u8 reserved[6];
369};
370
371struct pvrdma_cmd_query_pkey_resp {
372 struct pvrdma_cmd_resp_hdr hdr;
373 u16 pkey;
374 u8 reserved[6];
375};
376
377struct pvrdma_cmd_create_uc {
378 struct pvrdma_cmd_hdr hdr;
379 u32 pfn; /* UAR page frame number */
380 u8 reserved[4];
381};
382
383struct pvrdma_cmd_create_uc_resp {
384 struct pvrdma_cmd_resp_hdr hdr;
385 u32 ctx_handle;
386 u8 reserved[4];
387};
388
389struct pvrdma_cmd_destroy_uc {
390 struct pvrdma_cmd_hdr hdr;
391 u32 ctx_handle;
392 u8 reserved[4];
393};
394
395struct pvrdma_cmd_create_pd {
396 struct pvrdma_cmd_hdr hdr;
397 u32 ctx_handle;
398 u8 reserved[4];
399};
400
401struct pvrdma_cmd_create_pd_resp {
402 struct pvrdma_cmd_resp_hdr hdr;
403 u32 pd_handle;
404 u8 reserved[4];
405};
406
407struct pvrdma_cmd_destroy_pd {
408 struct pvrdma_cmd_hdr hdr;
409 u32 pd_handle;
410 u8 reserved[4];
411};
412
413struct pvrdma_cmd_create_mr {
414 struct pvrdma_cmd_hdr hdr;
415 u64 start;
416 u64 length;
417 u64 pdir_dma;
418 u32 pd_handle;
419 u32 access_flags;
420 u32 flags;
421 u32 nchunks;
422};
423
424struct pvrdma_cmd_create_mr_resp {
425 struct pvrdma_cmd_resp_hdr hdr;
426 u32 mr_handle;
427 u32 lkey;
428 u32 rkey;
429 u8 reserved[4];
430};
431
432struct pvrdma_cmd_destroy_mr {
433 struct pvrdma_cmd_hdr hdr;
434 u32 mr_handle;
435 u8 reserved[4];
436};
437
438struct pvrdma_cmd_create_cq {
439 struct pvrdma_cmd_hdr hdr;
440 u64 pdir_dma;
441 u32 ctx_handle;
442 u32 cqe;
443 u32 nchunks;
444 u8 reserved[4];
445};
446
447struct pvrdma_cmd_create_cq_resp {
448 struct pvrdma_cmd_resp_hdr hdr;
449 u32 cq_handle;
450 u32 cqe;
451};
452
453struct pvrdma_cmd_resize_cq {
454 struct pvrdma_cmd_hdr hdr;
455 u32 cq_handle;
456 u32 cqe;
457};
458
459struct pvrdma_cmd_resize_cq_resp {
460 struct pvrdma_cmd_resp_hdr hdr;
461 u32 cqe;
462 u8 reserved[4];
463};
464
465struct pvrdma_cmd_destroy_cq {
466 struct pvrdma_cmd_hdr hdr;
467 u32 cq_handle;
468 u8 reserved[4];
469};
470
471struct pvrdma_cmd_create_qp {
472 struct pvrdma_cmd_hdr hdr;
473 u64 pdir_dma;
474 u32 pd_handle;
475 u32 send_cq_handle;
476 u32 recv_cq_handle;
477 u32 srq_handle;
478 u32 max_send_wr;
479 u32 max_recv_wr;
480 u32 max_send_sge;
481 u32 max_recv_sge;
482 u32 max_inline_data;
483 u32 lkey;
484 u32 access_flags;
485 u16 total_chunks;
486 u16 send_chunks;
487 u16 max_atomic_arg;
488 u8 sq_sig_all;
489 u8 qp_type;
490 u8 is_srq;
491 u8 reserved[3];
492};
493
494struct pvrdma_cmd_create_qp_resp {
495 struct pvrdma_cmd_resp_hdr hdr;
496 u32 qpn;
497 u32 max_send_wr;
498 u32 max_recv_wr;
499 u32 max_send_sge;
500 u32 max_recv_sge;
501 u32 max_inline_data;
502};
503
504struct pvrdma_cmd_modify_qp {
505 struct pvrdma_cmd_hdr hdr;
506 u32 qp_handle;
507 u32 attr_mask;
508 struct pvrdma_qp_attr attrs;
509};
510
511struct pvrdma_cmd_query_qp {
512 struct pvrdma_cmd_hdr hdr;
513 u32 qp_handle;
514 u32 attr_mask;
515};
516
517struct pvrdma_cmd_query_qp_resp {
518 struct pvrdma_cmd_resp_hdr hdr;
519 struct pvrdma_qp_attr attrs;
520};
521
522struct pvrdma_cmd_destroy_qp {
523 struct pvrdma_cmd_hdr hdr;
524 u32 qp_handle;
525 u8 reserved[4];
526};
527
528struct pvrdma_cmd_destroy_qp_resp {
529 struct pvrdma_cmd_resp_hdr hdr;
530 u32 events_reported;
531 u8 reserved[4];
532};
533
534struct pvrdma_cmd_create_bind {
535 struct pvrdma_cmd_hdr hdr;
536 u32 mtu;
537 u32 vlan;
538 u32 index;
539 u8 new_gid[16];
540 u8 gid_type;
541 u8 reserved[3];
542};
543
544struct pvrdma_cmd_destroy_bind {
545 struct pvrdma_cmd_hdr hdr;
546 u32 index;
547 u8 dest_gid[16];
548 u8 reserved[4];
549};
550
551union pvrdma_cmd_req {
552 struct pvrdma_cmd_hdr hdr;
553 struct pvrdma_cmd_query_port query_port;
554 struct pvrdma_cmd_query_pkey query_pkey;
555 struct pvrdma_cmd_create_uc create_uc;
556 struct pvrdma_cmd_destroy_uc destroy_uc;
557 struct pvrdma_cmd_create_pd create_pd;
558 struct pvrdma_cmd_destroy_pd destroy_pd;
559 struct pvrdma_cmd_create_mr create_mr;
560 struct pvrdma_cmd_destroy_mr destroy_mr;
561 struct pvrdma_cmd_create_cq create_cq;
562 struct pvrdma_cmd_resize_cq resize_cq;
563 struct pvrdma_cmd_destroy_cq destroy_cq;
564 struct pvrdma_cmd_create_qp create_qp;
565 struct pvrdma_cmd_modify_qp modify_qp;
566 struct pvrdma_cmd_query_qp query_qp;
567 struct pvrdma_cmd_destroy_qp destroy_qp;
568 struct pvrdma_cmd_create_bind create_bind;
569 struct pvrdma_cmd_destroy_bind destroy_bind;
570};
571
572union pvrdma_cmd_resp {
573 struct pvrdma_cmd_resp_hdr hdr;
574 struct pvrdma_cmd_query_port_resp query_port_resp;
575 struct pvrdma_cmd_query_pkey_resp query_pkey_resp;
576 struct pvrdma_cmd_create_uc_resp create_uc_resp;
577 struct pvrdma_cmd_create_pd_resp create_pd_resp;
578 struct pvrdma_cmd_create_mr_resp create_mr_resp;
579 struct pvrdma_cmd_create_cq_resp create_cq_resp;
580 struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
581 struct pvrdma_cmd_create_qp_resp create_qp_resp;
582 struct pvrdma_cmd_query_qp_resp query_qp_resp;
583 struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
584};
585
586#endif /* __PVRDMA_DEV_API_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
new file mode 100644
index 000000000000..bf51357ea3aa
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
@@ -0,0 +1,127 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <linux/bitmap.h>
47#include <linux/errno.h>
48#include <linux/slab.h>
49
50#include "pvrdma.h"
51
52int pvrdma_uar_table_init(struct pvrdma_dev *dev)
53{
54 u32 num = dev->dsr->caps.max_uar;
55 u32 mask = num - 1;
56 struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
57
58 if (!is_power_of_2(num))
59 return -EINVAL;
60
61 tbl->last = 0;
62 tbl->top = 0;
63 tbl->max = num;
64 tbl->mask = mask;
65 spin_lock_init(&tbl->lock);
66 tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL);
67 if (!tbl->table)
68 return -ENOMEM;
69
70 /* 0th UAR is taken by the device. */
71 set_bit(0, tbl->table);
72
73 return 0;
74}
75
76void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev)
77{
78 struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
79
80 kfree(tbl->table);
81}
82
83int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
84{
85 struct pvrdma_id_table *tbl;
86 unsigned long flags;
87 u32 obj;
88
89 tbl = &dev->uar_table.tbl;
90
91 spin_lock_irqsave(&tbl->lock, flags);
92 obj = find_next_zero_bit(tbl->table, tbl->max, tbl->last);
93 if (obj >= tbl->max) {
94 tbl->top = (tbl->top + tbl->max) & tbl->mask;
95 obj = find_first_zero_bit(tbl->table, tbl->max);
96 }
97
98 if (obj >= tbl->max) {
99 spin_unlock_irqrestore(&tbl->lock, flags);
100 return -ENOMEM;
101 }
102
103 set_bit(obj, tbl->table);
104 obj |= tbl->top;
105
106 spin_unlock_irqrestore(&tbl->lock, flags);
107
108 uar->index = obj;
109 uar->pfn = (pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
110 PAGE_SHIFT) + uar->index;
111
112 return 0;
113}
114
115void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
116{
117 struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
118 unsigned long flags;
119 u32 obj;
120
121 obj = uar->index & (tbl->max - 1);
122 spin_lock_irqsave(&tbl->lock, flags);
123 clear_bit(obj, tbl->table);
124 tbl->last = min(tbl->last, obj);
125 tbl->top = (tbl->top + tbl->max) & tbl->mask;
126 spin_unlock_irqrestore(&tbl->lock, flags);
127}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
new file mode 100644
index 000000000000..231a1ce1f4be
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -0,0 +1,1211 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <linux/errno.h>
47#include <linux/inetdevice.h>
48#include <linux/init.h>
49#include <linux/module.h>
50#include <linux/slab.h>
51#include <rdma/ib_addr.h>
52#include <rdma/ib_smi.h>
53#include <rdma/ib_user_verbs.h>
54#include <net/addrconf.h>
55
56#include "pvrdma.h"
57
58#define DRV_NAME "vmw_pvrdma"
59#define DRV_VERSION "1.0.0.0-k"
60
61static DEFINE_MUTEX(pvrdma_device_list_lock);
62static LIST_HEAD(pvrdma_device_list);
63static struct workqueue_struct *event_wq;
64
65static int pvrdma_add_gid(struct ib_device *ibdev,
66 u8 port_num,
67 unsigned int index,
68 const union ib_gid *gid,
69 const struct ib_gid_attr *attr,
70 void **context);
71static int pvrdma_del_gid(struct ib_device *ibdev,
72 u8 port_num,
73 unsigned int index,
74 void **context);
75
76
77static ssize_t show_hca(struct device *device, struct device_attribute *attr,
78 char *buf)
79{
80 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
81}
82
83static ssize_t show_rev(struct device *device, struct device_attribute *attr,
84 char *buf)
85{
86 return sprintf(buf, "%d\n", PVRDMA_REV_ID);
87}
88
89static ssize_t show_board(struct device *device, struct device_attribute *attr,
90 char *buf)
91{
92 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
93}
94
95static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
96static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
97static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
98
99static struct device_attribute *pvrdma_class_attributes[] = {
100 &dev_attr_hw_rev,
101 &dev_attr_hca_type,
102 &dev_attr_board_id
103};
104
105static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str,
106 size_t str_len)
107{
108 struct pvrdma_dev *dev =
109 container_of(device, struct pvrdma_dev, ib_dev);
110 snprintf(str, str_len, "%d.%d.%d\n",
111 (int) (dev->dsr->caps.fw_ver >> 32),
112 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
113 (int) dev->dsr->caps.fw_ver & 0xffff);
114}
115
116static int pvrdma_init_device(struct pvrdma_dev *dev)
117{
118 /* Initialize some device related stuff */
119 spin_lock_init(&dev->cmd_lock);
120 sema_init(&dev->cmd_sema, 1);
121 atomic_set(&dev->num_qps, 0);
122 atomic_set(&dev->num_cqs, 0);
123 atomic_set(&dev->num_pds, 0);
124 atomic_set(&dev->num_ahs, 0);
125
126 return 0;
127}
128
129static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
130 struct ib_port_immutable *immutable)
131{
132 struct ib_port_attr attr;
133 int err;
134
135 err = pvrdma_query_port(ibdev, port_num, &attr);
136 if (err)
137 return err;
138
139 immutable->pkey_tbl_len = attr.pkey_tbl_len;
140 immutable->gid_tbl_len = attr.gid_tbl_len;
141 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
142 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
143 return 0;
144}
145
146static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
147 u8 port_num)
148{
149 struct net_device *netdev;
150 struct pvrdma_dev *dev = to_vdev(ibdev);
151
152 if (port_num != 1)
153 return NULL;
154
155 rcu_read_lock();
156 netdev = dev->netdev;
157 if (netdev)
158 dev_hold(netdev);
159 rcu_read_unlock();
160
161 return netdev;
162}
163
164static int pvrdma_register_device(struct pvrdma_dev *dev)
165{
166 int ret = -1;
167 int i = 0;
168
169 strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
170 dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
171 dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
172 dev->flags = 0;
173 dev->ib_dev.owner = THIS_MODULE;
174 dev->ib_dev.num_comp_vectors = 1;
175 dev->ib_dev.dma_device = &dev->pdev->dev;
176 dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
177 dev->ib_dev.uverbs_cmd_mask =
178 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
179 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
180 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
181 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
182 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
183 (1ull << IB_USER_VERBS_CMD_REG_MR) |
184 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
185 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
186 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
187 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
188 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
189 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
190 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
191 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
192 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
193 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
194 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
195 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
196 (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
197 (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
198
199 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
200 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
201
202 dev->ib_dev.query_device = pvrdma_query_device;
203 dev->ib_dev.query_port = pvrdma_query_port;
204 dev->ib_dev.query_gid = pvrdma_query_gid;
205 dev->ib_dev.query_pkey = pvrdma_query_pkey;
206 dev->ib_dev.modify_port = pvrdma_modify_port;
207 dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
208 dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
209 dev->ib_dev.mmap = pvrdma_mmap;
210 dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
211 dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
212 dev->ib_dev.create_ah = pvrdma_create_ah;
213 dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
214 dev->ib_dev.create_qp = pvrdma_create_qp;
215 dev->ib_dev.modify_qp = pvrdma_modify_qp;
216 dev->ib_dev.query_qp = pvrdma_query_qp;
217 dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
218 dev->ib_dev.post_send = pvrdma_post_send;
219 dev->ib_dev.post_recv = pvrdma_post_recv;
220 dev->ib_dev.create_cq = pvrdma_create_cq;
221 dev->ib_dev.modify_cq = pvrdma_modify_cq;
222 dev->ib_dev.resize_cq = pvrdma_resize_cq;
223 dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
224 dev->ib_dev.poll_cq = pvrdma_poll_cq;
225 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
226 dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
227 dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr;
228 dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
229 dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
230 dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
231 dev->ib_dev.add_gid = pvrdma_add_gid;
232 dev->ib_dev.del_gid = pvrdma_del_gid;
233 dev->ib_dev.get_netdev = pvrdma_get_netdev;
234 dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
235 dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
236 dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
237
238 mutex_init(&dev->port_mutex);
239 spin_lock_init(&dev->desc_lock);
240
241 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *),
242 GFP_KERNEL);
243 if (!dev->cq_tbl)
244 return ret;
245 spin_lock_init(&dev->cq_tbl_lock);
246
247 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *),
248 GFP_KERNEL);
249 if (!dev->qp_tbl)
250 goto err_cq_free;
251 spin_lock_init(&dev->qp_tbl_lock);
252
253 ret = ib_register_device(&dev->ib_dev, NULL);
254 if (ret)
255 goto err_qp_free;
256
257 for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
258 ret = device_create_file(&dev->ib_dev.dev,
259 pvrdma_class_attributes[i]);
260 if (ret)
261 goto err_class;
262 }
263
264 dev->ib_active = true;
265
266 return 0;
267
268err_class:
269 ib_unregister_device(&dev->ib_dev);
270err_qp_free:
271 kfree(dev->qp_tbl);
272err_cq_free:
273 kfree(dev->cq_tbl);
274
275 return ret;
276}
277
278static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
279{
280 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
281 struct pvrdma_dev *dev = dev_id;
282
283 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
284
285 if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
286 /* Legacy intr */
287 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
288 if (icr == 0)
289 return IRQ_NONE;
290 }
291
292 if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
293 complete(&dev->cmd_done);
294
295 return IRQ_HANDLED;
296}
297
298static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
299{
300 struct pvrdma_qp *qp;
301 unsigned long flags;
302
303 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
304 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
305 if (qp)
306 atomic_inc(&qp->refcnt);
307 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
308
309 if (qp && qp->ibqp.event_handler) {
310 struct ib_qp *ibqp = &qp->ibqp;
311 struct ib_event e;
312
313 e.device = ibqp->device;
314 e.element.qp = ibqp;
315 e.event = type; /* 1:1 mapping for now. */
316 ibqp->event_handler(&e, ibqp->qp_context);
317 }
318 if (qp) {
319 atomic_dec(&qp->refcnt);
320 if (atomic_read(&qp->refcnt) == 0)
321 wake_up(&qp->wait);
322 }
323}
324
325static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
326{
327 struct pvrdma_cq *cq;
328 unsigned long flags;
329
330 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
331 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
332 if (cq)
333 atomic_inc(&cq->refcnt);
334 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
335
336 if (cq && cq->ibcq.event_handler) {
337 struct ib_cq *ibcq = &cq->ibcq;
338 struct ib_event e;
339
340 e.device = ibcq->device;
341 e.element.cq = ibcq;
342 e.event = type; /* 1:1 mapping for now. */
343 ibcq->event_handler(&e, ibcq->cq_context);
344 }
345 if (cq) {
346 atomic_dec(&cq->refcnt);
347 if (atomic_read(&cq->refcnt) == 0)
348 wake_up(&cq->wait);
349 }
350}
351
352static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
353 enum ib_event_type event)
354{
355 struct ib_event ib_event;
356
357 memset(&ib_event, 0, sizeof(ib_event));
358 ib_event.device = &dev->ib_dev;
359 ib_event.element.port_num = port;
360 ib_event.event = event;
361 ib_dispatch_event(&ib_event);
362}
363
364static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
365{
366 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
367 dev_warn(&dev->pdev->dev, "event on port %d\n", port);
368 return;
369 }
370
371 pvrdma_dispatch_event(dev, port, type);
372}
373
374static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
375{
376 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
377 &dev->async_pdir,
378 PAGE_SIZE +
379 sizeof(struct pvrdma_eqe) * i);
380}
381
382static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
383{
384 struct pvrdma_dev *dev = dev_id;
385 struct pvrdma_ring *ring = &dev->async_ring_state->rx;
386 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
387 PAGE_SIZE / sizeof(struct pvrdma_eqe);
388 unsigned int head;
389
390 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
391
392 /*
393 * Don't process events until the IB device is registered. Otherwise
394 * we'll try to ib_dispatch_event() on an invalid device.
395 */
396 if (!dev->ib_active)
397 return IRQ_HANDLED;
398
399 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
400 struct pvrdma_eqe *eqe;
401
402 eqe = get_eqe(dev, head);
403
404 switch (eqe->type) {
405 case PVRDMA_EVENT_QP_FATAL:
406 case PVRDMA_EVENT_QP_REQ_ERR:
407 case PVRDMA_EVENT_QP_ACCESS_ERR:
408 case PVRDMA_EVENT_COMM_EST:
409 case PVRDMA_EVENT_SQ_DRAINED:
410 case PVRDMA_EVENT_PATH_MIG:
411 case PVRDMA_EVENT_PATH_MIG_ERR:
412 case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
413 pvrdma_qp_event(dev, eqe->info, eqe->type);
414 break;
415
416 case PVRDMA_EVENT_CQ_ERR:
417 pvrdma_cq_event(dev, eqe->info, eqe->type);
418 break;
419
420 case PVRDMA_EVENT_SRQ_ERR:
421 case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
422 break;
423
424 case PVRDMA_EVENT_PORT_ACTIVE:
425 case PVRDMA_EVENT_PORT_ERR:
426 case PVRDMA_EVENT_LID_CHANGE:
427 case PVRDMA_EVENT_PKEY_CHANGE:
428 case PVRDMA_EVENT_SM_CHANGE:
429 case PVRDMA_EVENT_CLIENT_REREGISTER:
430 case PVRDMA_EVENT_GID_CHANGE:
431 pvrdma_dev_event(dev, eqe->info, eqe->type);
432 break;
433
434 case PVRDMA_EVENT_DEVICE_FATAL:
435 pvrdma_dev_event(dev, 1, eqe->type);
436 break;
437
438 default:
439 break;
440 }
441
442 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
443 }
444
445 return IRQ_HANDLED;
446}
447
448static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
449 unsigned int i)
450{
451 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
452 &dev->cq_pdir,
453 PAGE_SIZE +
454 sizeof(struct pvrdma_cqne) * i);
455}
456
457static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
458{
459 struct pvrdma_dev *dev = dev_id;
460 struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
461 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
462 sizeof(struct pvrdma_cqne);
463 unsigned int head;
464 unsigned long flags;
465
466 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
467
468 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
469 struct pvrdma_cqne *cqne;
470 struct pvrdma_cq *cq;
471
472 cqne = get_cqne(dev, head);
473 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
474 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
475 if (cq)
476 atomic_inc(&cq->refcnt);
477 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
478
479 if (cq && cq->ibcq.comp_handler)
480 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
481 if (cq) {
482 atomic_dec(&cq->refcnt);
483 if (atomic_read(&cq->refcnt))
484 wake_up(&cq->wait);
485 }
486 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
487 }
488
489 return IRQ_HANDLED;
490}
491
492static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
493{
494 if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
495 pci_disable_msix(dev->pdev);
496 else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
497 pci_disable_msi(dev->pdev);
498}
499
500static void pvrdma_free_irq(struct pvrdma_dev *dev)
501{
502 int i;
503
504 dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
505
506 if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
507 for (i = 0; i < dev->intr.size; i++) {
508 if (dev->intr.enabled[i]) {
509 free_irq(dev->intr.msix_entry[i].vector, dev);
510 dev->intr.enabled[i] = 0;
511 }
512 }
513 } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
514 dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
515 free_irq(dev->pdev->irq, dev);
516 }
517}
518
519static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
520{
521 dev_dbg(&dev->pdev->dev, "enable interrupts\n");
522 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
523}
524
525static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
526{
527 dev_dbg(&dev->pdev->dev, "disable interrupts\n");
528 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
529}
530
531static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
532{
533 int i;
534 int ret;
535
536 for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
537 dev->intr.msix_entry[i].entry = i;
538 dev->intr.msix_entry[i].vector = i;
539
540 switch (i) {
541 case 0:
542 /* CMD ring handler */
543 dev->intr.handler[i] = pvrdma_intr0_handler;
544 break;
545 case 1:
546 /* Async event ring handler */
547 dev->intr.handler[i] = pvrdma_intr1_handler;
548 break;
549 default:
550 /* Completion queue handler */
551 dev->intr.handler[i] = pvrdma_intrx_handler;
552 break;
553 }
554 }
555
556 ret = pci_enable_msix(pdev, dev->intr.msix_entry,
557 PVRDMA_MAX_INTERRUPTS);
558 if (!ret) {
559 dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
560 dev->intr.size = PVRDMA_MAX_INTERRUPTS;
561 } else if (ret > 0) {
562 ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
563 if (!ret) {
564 dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
565 dev->intr.size = ret;
566 } else {
567 dev->intr.size = 0;
568 }
569 }
570
571 dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
572 dev->intr.type, dev->intr.size);
573
574 return ret;
575}
576
577static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
578{
579 int ret = 0;
580 int i;
581
582 if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
583 pvrdma_enable_msix(dev->pdev, dev)) {
584 /* Try MSI */
585 ret = pci_enable_msi(dev->pdev);
586 if (!ret) {
587 dev->intr.type = PVRDMA_INTR_TYPE_MSI;
588 } else {
589 /* Legacy INTR */
590 dev->intr.type = PVRDMA_INTR_TYPE_INTX;
591 }
592 }
593
594 /* Request First IRQ */
595 switch (dev->intr.type) {
596 case PVRDMA_INTR_TYPE_INTX:
597 case PVRDMA_INTR_TYPE_MSI:
598 ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
599 IRQF_SHARED, DRV_NAME, dev);
600 if (ret) {
601 dev_err(&dev->pdev->dev,
602 "failed to request interrupt\n");
603 goto disable_msi;
604 }
605 break;
606 case PVRDMA_INTR_TYPE_MSIX:
607 ret = request_irq(dev->intr.msix_entry[0].vector,
608 pvrdma_intr0_handler, 0, DRV_NAME, dev);
609 if (ret) {
610 dev_err(&dev->pdev->dev,
611 "failed to request interrupt 0\n");
612 goto disable_msi;
613 }
614 dev->intr.enabled[0] = 1;
615 break;
616 default:
617 /* Not reached */
618 break;
619 }
620
621 /* For MSIX: request intr for each vector */
622 if (dev->intr.size > 1) {
623 ret = request_irq(dev->intr.msix_entry[1].vector,
624 pvrdma_intr1_handler, 0, DRV_NAME, dev);
625 if (ret) {
626 dev_err(&dev->pdev->dev,
627 "failed to request interrupt 1\n");
628 goto free_irq;
629 }
630 dev->intr.enabled[1] = 1;
631
632 for (i = 2; i < dev->intr.size; i++) {
633 ret = request_irq(dev->intr.msix_entry[i].vector,
634 pvrdma_intrx_handler, 0,
635 DRV_NAME, dev);
636 if (ret) {
637 dev_err(&dev->pdev->dev,
638 "failed to request interrupt %d\n", i);
639 goto free_irq;
640 }
641 dev->intr.enabled[i] = 1;
642 }
643 }
644
645 return 0;
646
647free_irq:
648 pvrdma_free_irq(dev);
649disable_msi:
650 pvrdma_disable_msi_all(dev);
651 return ret;
652}
653
654static void pvrdma_free_slots(struct pvrdma_dev *dev)
655{
656 struct pci_dev *pdev = dev->pdev;
657
658 if (dev->resp_slot)
659 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
660 dev->dsr->resp_slot_dma);
661 if (dev->cmd_slot)
662 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
663 dev->dsr->cmd_slot_dma);
664}
665
666static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
667 const union ib_gid *gid,
668 int index)
669{
670 int ret;
671 union pvrdma_cmd_req req;
672 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
673
674 if (!dev->sgid_tbl) {
675 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
676 return -EINVAL;
677 }
678
679 memset(cmd_bind, 0, sizeof(*cmd_bind));
680 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
681 memcpy(cmd_bind->new_gid, gid->raw, 16);
682 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
683 cmd_bind->vlan = 0xfff;
684 cmd_bind->index = index;
685 cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1;
686
687 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
688 if (ret < 0) {
689 dev_warn(&dev->pdev->dev,
690 "could not create binding, error: %d\n", ret);
691 return -EFAULT;
692 }
693 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
694 return 0;
695}
696
697static int pvrdma_add_gid(struct ib_device *ibdev,
698 u8 port_num,
699 unsigned int index,
700 const union ib_gid *gid,
701 const struct ib_gid_attr *attr,
702 void **context)
703{
704 struct pvrdma_dev *dev = to_vdev(ibdev);
705
706 return pvrdma_add_gid_at_index(dev, gid, index);
707}
708
709static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
710{
711 int ret;
712 union pvrdma_cmd_req req;
713 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
714
715 /* Update sgid table. */
716 if (!dev->sgid_tbl) {
717 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
718 return -EINVAL;
719 }
720
721 memset(cmd_dest, 0, sizeof(*cmd_dest));
722 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
723 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
724 cmd_dest->index = index;
725
726 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
727 if (ret < 0) {
728 dev_warn(&dev->pdev->dev,
729 "could not destroy binding, error: %d\n", ret);
730 return ret;
731 }
732 memset(&dev->sgid_tbl[index], 0, 16);
733 return 0;
734}
735
736static int pvrdma_del_gid(struct ib_device *ibdev,
737 u8 port_num,
738 unsigned int index,
739 void **context)
740{
741 struct pvrdma_dev *dev = to_vdev(ibdev);
742
743 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
744 index, dev->netdev->name);
745
746 return pvrdma_del_gid_at_index(dev, index);
747}
748
749static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
750 unsigned long event)
751{
752 switch (event) {
753 case NETDEV_REBOOT:
754 case NETDEV_DOWN:
755 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
756 break;
757 case NETDEV_UP:
758 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
759 break;
760 default:
761 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
762 event, dev->ib_dev.name);
763 break;
764 }
765}
766
767static void pvrdma_netdevice_event_work(struct work_struct *work)
768{
769 struct pvrdma_netdevice_work *netdev_work;
770 struct pvrdma_dev *dev;
771
772 netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
773
774 mutex_lock(&pvrdma_device_list_lock);
775 list_for_each_entry(dev, &pvrdma_device_list, device_link) {
776 if (dev->netdev == netdev_work->event_netdev) {
777 pvrdma_netdevice_event_handle(dev, netdev_work->event);
778 break;
779 }
780 }
781 mutex_unlock(&pvrdma_device_list_lock);
782
783 kfree(netdev_work);
784}
785
786static int pvrdma_netdevice_event(struct notifier_block *this,
787 unsigned long event, void *ptr)
788{
789 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
790 struct pvrdma_netdevice_work *netdev_work;
791
792 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
793 if (!netdev_work)
794 return NOTIFY_BAD;
795
796 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
797 netdev_work->event_netdev = event_netdev;
798 netdev_work->event = event;
799 queue_work(event_wq, &netdev_work->work);
800
801 return NOTIFY_DONE;
802}
803
804static int pvrdma_pci_probe(struct pci_dev *pdev,
805 const struct pci_device_id *id)
806{
807 struct pci_dev *pdev_net;
808 struct pvrdma_dev *dev;
809 int ret;
810 unsigned long start;
811 unsigned long len;
812 unsigned int version;
813 dma_addr_t slot_dma = 0;
814
815 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
816
817 /* Allocate zero-out device */
818 dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev));
819 if (!dev) {
820 dev_err(&pdev->dev, "failed to allocate IB device\n");
821 return -ENOMEM;
822 }
823
824 mutex_lock(&pvrdma_device_list_lock);
825 list_add(&dev->device_link, &pvrdma_device_list);
826 mutex_unlock(&pvrdma_device_list_lock);
827
828 ret = pvrdma_init_device(dev);
829 if (ret)
830 goto err_free_device;
831
832 dev->pdev = pdev;
833 pci_set_drvdata(pdev, dev);
834
835 ret = pci_enable_device(pdev);
836 if (ret) {
837 dev_err(&pdev->dev, "cannot enable PCI device\n");
838 goto err_free_device;
839 }
840
841 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
842 pci_resource_flags(pdev, 0));
843 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
844 (unsigned long long)pci_resource_len(pdev, 0));
845 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
846 (unsigned long long)pci_resource_start(pdev, 0));
847 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
848 pci_resource_flags(pdev, 1));
849 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
850 (unsigned long long)pci_resource_len(pdev, 1));
851 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
852 (unsigned long long)pci_resource_start(pdev, 1));
853
854 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
855 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
856 dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
857 ret = -ENOMEM;
858 goto err_free_device;
859 }
860
861 ret = pci_request_regions(pdev, DRV_NAME);
862 if (ret) {
863 dev_err(&pdev->dev, "cannot request PCI resources\n");
864 goto err_disable_pdev;
865 }
866
867 /* Enable 64-Bit DMA */
868 if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
869 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
870 if (ret != 0) {
871 dev_err(&pdev->dev,
872 "pci_set_consistent_dma_mask failed\n");
873 goto err_free_resource;
874 }
875 } else {
876 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
877 if (ret != 0) {
878 dev_err(&pdev->dev,
879 "pci_set_dma_mask failed\n");
880 goto err_free_resource;
881 }
882 }
883
884 pci_set_master(pdev);
885
886 /* Map register space */
887 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
888 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
889 dev->regs = ioremap(start, len);
890 if (!dev->regs) {
891 dev_err(&pdev->dev, "register mapping failed\n");
892 ret = -ENOMEM;
893 goto err_free_resource;
894 }
895
896 /* Setup per-device UAR. */
897 dev->driver_uar.index = 0;
898 dev->driver_uar.pfn =
899 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
900 PAGE_SHIFT;
901 dev->driver_uar.map =
902 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
903 if (!dev->driver_uar.map) {
904 dev_err(&pdev->dev, "failed to remap UAR pages\n");
905 ret = -ENOMEM;
906 goto err_unmap_regs;
907 }
908
909 version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
910 dev_info(&pdev->dev, "device version %d, driver version %d\n",
911 version, PVRDMA_VERSION);
912 if (version < PVRDMA_VERSION) {
913 dev_err(&pdev->dev, "incompatible device version\n");
914 goto err_uar_unmap;
915 }
916
917 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
918 &dev->dsrbase, GFP_KERNEL);
919 if (!dev->dsr) {
920 dev_err(&pdev->dev, "failed to allocate shared region\n");
921 ret = -ENOMEM;
922 goto err_uar_unmap;
923 }
924
925 /* Setup the shared region */
926 memset(dev->dsr, 0, sizeof(*dev->dsr));
927 dev->dsr->driver_version = PVRDMA_VERSION;
928 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
929 PVRDMA_GOS_BITS_32 :
930 PVRDMA_GOS_BITS_64;
931 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
932 dev->dsr->gos_info.gos_ver = 1;
933 dev->dsr->uar_pfn = dev->driver_uar.pfn;
934
935 /* Command slot. */
936 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
937 &slot_dma, GFP_KERNEL);
938 if (!dev->cmd_slot) {
939 ret = -ENOMEM;
940 goto err_free_dsr;
941 }
942
943 dev->dsr->cmd_slot_dma = (u64)slot_dma;
944
945 /* Response slot. */
946 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
947 &slot_dma, GFP_KERNEL);
948 if (!dev->resp_slot) {
949 ret = -ENOMEM;
950 goto err_free_slots;
951 }
952
953 dev->dsr->resp_slot_dma = (u64)slot_dma;
954
955 /* Async event ring */
956 dev->dsr->async_ring_pages.num_pages = 4;
957 ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
958 dev->dsr->async_ring_pages.num_pages, true);
959 if (ret)
960 goto err_free_slots;
961 dev->async_ring_state = dev->async_pdir.pages[0];
962 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
963
964 /* CQ notification ring */
965 dev->dsr->cq_ring_pages.num_pages = 4;
966 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
967 dev->dsr->cq_ring_pages.num_pages, true);
968 if (ret)
969 goto err_free_async_ring;
970 dev->cq_ring_state = dev->cq_pdir.pages[0];
971 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
972
973 /*
974 * Write the PA of the shared region to the device. The writes must be
975 * ordered such that the high bits are written last. When the writes
976 * complete, the device will have filled out the capabilities.
977 */
978
979 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
980 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
981 (u32)((u64)(dev->dsrbase) >> 32));
982
983 /* Make sure the write is complete before reading status. */
984 mb();
985
986 /* Currently, the driver only supports RoCE mode. */
987 if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) {
988 dev_err(&pdev->dev, "unsupported transport %d\n",
989 dev->dsr->caps.mode);
990 ret = -EFAULT;
991 goto err_free_cq_ring;
992 }
993
994 /* Currently, the driver only supports RoCE V1. */
995 if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) {
996 dev_err(&pdev->dev, "driver needs RoCE v1 support\n");
997 ret = -EFAULT;
998 goto err_free_cq_ring;
999 }
1000
1001 /* Paired vmxnet3 will have same bus, slot. But func will be 0 */
1002 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
1003 if (!pdev_net) {
1004 dev_err(&pdev->dev, "failed to find paired net device\n");
1005 ret = -ENODEV;
1006 goto err_free_cq_ring;
1007 }
1008
1009 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
1010 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
1011 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
1012 pci_dev_put(pdev_net);
1013 ret = -ENODEV;
1014 goto err_free_cq_ring;
1015 }
1016
1017 dev->netdev = pci_get_drvdata(pdev_net);
1018 pci_dev_put(pdev_net);
1019 if (!dev->netdev) {
1020 dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
1021 ret = -ENODEV;
1022 goto err_free_cq_ring;
1023 }
1024
1025 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
1026
1027 /* Interrupt setup */
1028 ret = pvrdma_alloc_intrs(dev);
1029 if (ret) {
1030 dev_err(&pdev->dev, "failed to allocate interrupts\n");
1031 ret = -ENOMEM;
1032 goto err_netdevice;
1033 }
1034
1035 /* Allocate UAR table. */
1036 ret = pvrdma_uar_table_init(dev);
1037 if (ret) {
1038 dev_err(&pdev->dev, "failed to allocate UAR table\n");
1039 ret = -ENOMEM;
1040 goto err_free_intrs;
1041 }
1042
1043 /* Allocate GID table */
1044 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
1045 sizeof(union ib_gid), GFP_KERNEL);
1046 if (!dev->sgid_tbl) {
1047 ret = -ENOMEM;
1048 goto err_free_uar_table;
1049 }
1050 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
1051
1052 pvrdma_enable_intrs(dev);
1053
1054 /* Activate pvrdma device */
1055 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
1056
1057 /* Make sure the write is complete before reading status. */
1058 mb();
1059
1060 /* Check if device was successfully activated */
1061 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
1062 if (ret != 0) {
1063 dev_err(&pdev->dev, "failed to activate device\n");
1064 ret = -EFAULT;
1065 goto err_disable_intr;
1066 }
1067
1068 /* Register IB device */
1069 ret = pvrdma_register_device(dev);
1070 if (ret) {
1071 dev_err(&pdev->dev, "failed to register IB device\n");
1072 goto err_disable_intr;
1073 }
1074
1075 dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
1076 ret = register_netdevice_notifier(&dev->nb_netdev);
1077 if (ret) {
1078 dev_err(&pdev->dev, "failed to register netdevice events\n");
1079 goto err_unreg_ibdev;
1080 }
1081
1082 dev_info(&pdev->dev, "attached to device\n");
1083 return 0;
1084
1085err_unreg_ibdev:
1086 ib_unregister_device(&dev->ib_dev);
1087err_disable_intr:
1088 pvrdma_disable_intrs(dev);
1089 kfree(dev->sgid_tbl);
1090err_free_uar_table:
1091 pvrdma_uar_table_cleanup(dev);
1092err_free_intrs:
1093 pvrdma_free_irq(dev);
1094 pvrdma_disable_msi_all(dev);
1095err_netdevice:
1096 unregister_netdevice_notifier(&dev->nb_netdev);
1097err_free_cq_ring:
1098 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1099err_free_async_ring:
1100 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1101err_free_slots:
1102 pvrdma_free_slots(dev);
1103err_free_dsr:
1104 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1105 dev->dsrbase);
1106err_uar_unmap:
1107 iounmap(dev->driver_uar.map);
1108err_unmap_regs:
1109 iounmap(dev->regs);
1110err_free_resource:
1111 pci_release_regions(pdev);
1112err_disable_pdev:
1113 pci_disable_device(pdev);
1114 pci_set_drvdata(pdev, NULL);
1115err_free_device:
1116 mutex_lock(&pvrdma_device_list_lock);
1117 list_del(&dev->device_link);
1118 mutex_unlock(&pvrdma_device_list_lock);
1119 ib_dealloc_device(&dev->ib_dev);
1120 return ret;
1121}
1122
1123static void pvrdma_pci_remove(struct pci_dev *pdev)
1124{
1125 struct pvrdma_dev *dev = pci_get_drvdata(pdev);
1126
1127 if (!dev)
1128 return;
1129
1130 dev_info(&pdev->dev, "detaching from device\n");
1131
1132 unregister_netdevice_notifier(&dev->nb_netdev);
1133 dev->nb_netdev.notifier_call = NULL;
1134
1135 flush_workqueue(event_wq);
1136
1137 /* Unregister ib device */
1138 ib_unregister_device(&dev->ib_dev);
1139
1140 mutex_lock(&pvrdma_device_list_lock);
1141 list_del(&dev->device_link);
1142 mutex_unlock(&pvrdma_device_list_lock);
1143
1144 pvrdma_disable_intrs(dev);
1145 pvrdma_free_irq(dev);
1146 pvrdma_disable_msi_all(dev);
1147
1148 /* Deactivate pvrdma device */
1149 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
1150 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1151 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1152 pvrdma_free_slots(dev);
1153
1154 iounmap(dev->regs);
1155 kfree(dev->sgid_tbl);
1156 kfree(dev->cq_tbl);
1157 kfree(dev->qp_tbl);
1158 pvrdma_uar_table_cleanup(dev);
1159 iounmap(dev->driver_uar.map);
1160
1161 ib_dealloc_device(&dev->ib_dev);
1162
1163 /* Free pci resources */
1164 pci_release_regions(pdev);
1165 pci_disable_device(pdev);
1166 pci_set_drvdata(pdev, NULL);
1167}
1168
1169static struct pci_device_id pvrdma_pci_table[] = {
1170 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
1171 { 0 },
1172};
1173
1174MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
1175
1176static struct pci_driver pvrdma_driver = {
1177 .name = DRV_NAME,
1178 .id_table = pvrdma_pci_table,
1179 .probe = pvrdma_pci_probe,
1180 .remove = pvrdma_pci_remove,
1181};
1182
1183static int __init pvrdma_init(void)
1184{
1185 int err;
1186
1187 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
1188 if (!event_wq)
1189 return -ENOMEM;
1190
1191 err = pci_register_driver(&pvrdma_driver);
1192 if (err)
1193 destroy_workqueue(event_wq);
1194
1195 return err;
1196}
1197
1198static void __exit pvrdma_cleanup(void)
1199{
1200 pci_unregister_driver(&pvrdma_driver);
1201
1202 destroy_workqueue(event_wq);
1203}
1204
1205module_init(pvrdma_init);
1206module_exit(pvrdma_cleanup);
1207
1208MODULE_AUTHOR("VMware, Inc");
1209MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1210MODULE_VERSION(DRV_VERSION);
1211MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
new file mode 100644
index 000000000000..948b5ccd2a70
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -0,0 +1,304 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <linux/errno.h>
47#include <linux/slab.h>
48#include <linux/bitmap.h>
49
50#include "pvrdma.h"
51
52int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
53 u64 npages, bool alloc_pages)
54{
55 u64 i;
56
57 if (npages > PVRDMA_PAGE_DIR_MAX_PAGES)
58 return -EINVAL;
59
60 memset(pdir, 0, sizeof(*pdir));
61
62 pdir->dir = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
63 &pdir->dir_dma, GFP_KERNEL);
64 if (!pdir->dir)
65 goto err;
66
67 pdir->ntables = PVRDMA_PAGE_DIR_TABLE(npages - 1) + 1;
68 pdir->tables = kcalloc(pdir->ntables, sizeof(*pdir->tables),
69 GFP_KERNEL);
70 if (!pdir->tables)
71 goto err;
72
73 for (i = 0; i < pdir->ntables; i++) {
74 pdir->tables[i] = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
75 (dma_addr_t *)&pdir->dir[i],
76 GFP_KERNEL);
77 if (!pdir->tables[i])
78 goto err;
79 }
80
81 pdir->npages = npages;
82
83 if (alloc_pages) {
84 pdir->pages = kcalloc(npages, sizeof(*pdir->pages),
85 GFP_KERNEL);
86 if (!pdir->pages)
87 goto err;
88
89 for (i = 0; i < pdir->npages; i++) {
90 dma_addr_t page_dma;
91
92 pdir->pages[i] = dma_alloc_coherent(&dev->pdev->dev,
93 PAGE_SIZE,
94 &page_dma,
95 GFP_KERNEL);
96 if (!pdir->pages[i])
97 goto err;
98
99 pvrdma_page_dir_insert_dma(pdir, i, page_dma);
100 }
101 }
102
103 return 0;
104
105err:
106 pvrdma_page_dir_cleanup(dev, pdir);
107
108 return -ENOMEM;
109}
110
111static u64 *pvrdma_page_dir_table(struct pvrdma_page_dir *pdir, u64 idx)
112{
113 return pdir->tables[PVRDMA_PAGE_DIR_TABLE(idx)];
114}
115
116dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx)
117{
118 return pvrdma_page_dir_table(pdir, idx)[PVRDMA_PAGE_DIR_PAGE(idx)];
119}
120
121static void pvrdma_page_dir_cleanup_pages(struct pvrdma_dev *dev,
122 struct pvrdma_page_dir *pdir)
123{
124 if (pdir->pages) {
125 u64 i;
126
127 for (i = 0; i < pdir->npages && pdir->pages[i]; i++) {
128 dma_addr_t page_dma = pvrdma_page_dir_get_dma(pdir, i);
129
130 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
131 pdir->pages[i], page_dma);
132 }
133
134 kfree(pdir->pages);
135 }
136}
137
138static void pvrdma_page_dir_cleanup_tables(struct pvrdma_dev *dev,
139 struct pvrdma_page_dir *pdir)
140{
141 if (pdir->tables) {
142 int i;
143
144 pvrdma_page_dir_cleanup_pages(dev, pdir);
145
146 for (i = 0; i < pdir->ntables; i++) {
147 u64 *table = pdir->tables[i];
148
149 if (table)
150 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
151 table, pdir->dir[i]);
152 }
153
154 kfree(pdir->tables);
155 }
156}
157
158void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
159 struct pvrdma_page_dir *pdir)
160{
161 if (pdir->dir) {
162 pvrdma_page_dir_cleanup_tables(dev, pdir);
163 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
164 pdir->dir, pdir->dir_dma);
165 }
166}
167
168int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
169 dma_addr_t daddr)
170{
171 u64 *table;
172
173 if (idx >= pdir->npages)
174 return -EINVAL;
175
176 table = pvrdma_page_dir_table(pdir, idx);
177 table[PVRDMA_PAGE_DIR_PAGE(idx)] = daddr;
178
179 return 0;
180}
181
182int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
183 struct ib_umem *umem, u64 offset)
184{
185 u64 i = offset;
186 int j, entry;
187 int ret = 0, len = 0;
188 struct scatterlist *sg;
189
190 if (offset >= pdir->npages)
191 return -EINVAL;
192
193 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
194 len = sg_dma_len(sg) >> PAGE_SHIFT;
195 for (j = 0; j < len; j++) {
196 dma_addr_t addr = sg_dma_address(sg) +
197 umem->page_size * j;
198
199 ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
200 if (ret)
201 goto exit;
202
203 i++;
204 }
205 }
206
207exit:
208 return ret;
209}
210
211int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
212 u64 *page_list,
213 int num_pages)
214{
215 int i;
216 int ret;
217
218 if (num_pages > pdir->npages)
219 return -EINVAL;
220
221 for (i = 0; i < num_pages; i++) {
222 ret = pvrdma_page_dir_insert_dma(pdir, i, page_list[i]);
223 if (ret)
224 return ret;
225 }
226
227 return 0;
228}
229
230void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src)
231{
232 dst->max_send_wr = src->max_send_wr;
233 dst->max_recv_wr = src->max_recv_wr;
234 dst->max_send_sge = src->max_send_sge;
235 dst->max_recv_sge = src->max_recv_sge;
236 dst->max_inline_data = src->max_inline_data;
237}
238
239void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, const struct ib_qp_cap *src)
240{
241 dst->max_send_wr = src->max_send_wr;
242 dst->max_recv_wr = src->max_recv_wr;
243 dst->max_send_sge = src->max_send_sge;
244 dst->max_recv_sge = src->max_recv_sge;
245 dst->max_inline_data = src->max_inline_data;
246}
247
248void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src)
249{
250 BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
251 memcpy(dst, src, sizeof(*src));
252}
253
254void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src)
255{
256 BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
257 memcpy(dst, src, sizeof(*src));
258}
259
260void pvrdma_global_route_to_ib(struct ib_global_route *dst,
261 const struct pvrdma_global_route *src)
262{
263 pvrdma_gid_to_ib(&dst->dgid, &src->dgid);
264 dst->flow_label = src->flow_label;
265 dst->sgid_index = src->sgid_index;
266 dst->hop_limit = src->hop_limit;
267 dst->traffic_class = src->traffic_class;
268}
269
270void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
271 const struct ib_global_route *src)
272{
273 ib_gid_to_pvrdma(&dst->dgid, &src->dgid);
274 dst->flow_label = src->flow_label;
275 dst->sgid_index = src->sgid_index;
276 dst->hop_limit = src->hop_limit;
277 dst->traffic_class = src->traffic_class;
278}
279
280void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
281 const struct pvrdma_ah_attr *src)
282{
283 pvrdma_global_route_to_ib(&dst->grh, &src->grh);
284 dst->dlid = src->dlid;
285 dst->sl = src->sl;
286 dst->src_path_bits = src->src_path_bits;
287 dst->static_rate = src->static_rate;
288 dst->ah_flags = src->ah_flags;
289 dst->port_num = src->port_num;
290 memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
291}
292
293void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
294 const struct ib_ah_attr *src)
295{
296 ib_global_route_to_pvrdma(&dst->grh, &src->grh);
297 dst->dlid = src->dlid;
298 dst->sl = src->sl;
299 dst->src_path_bits = src->src_path_bits;
300 dst->static_rate = src->static_rate;
301 dst->ah_flags = src->ah_flags;
302 dst->port_num = src->port_num;
303 memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
304}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
new file mode 100644
index 000000000000..8519f3212e52
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -0,0 +1,334 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <linux/list.h>
47#include <linux/slab.h>
48
49#include "pvrdma.h"
50
51/**
52 * pvrdma_get_dma_mr - get a DMA memory region
53 * @pd: protection domain
54 * @acc: access flags
55 *
56 * @return: ib_mr pointer on success, otherwise returns an errno.
57 */
58struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc)
59{
60 struct pvrdma_dev *dev = to_vdev(pd->device);
61 struct pvrdma_user_mr *mr;
62 union pvrdma_cmd_req req;
63 union pvrdma_cmd_resp rsp;
64 struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
65 struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
66 int ret;
67
68 /* Support only LOCAL_WRITE flag for DMA MRs */
69 if (acc & ~IB_ACCESS_LOCAL_WRITE) {
70 dev_warn(&dev->pdev->dev,
71 "unsupported dma mr access flags %#x\n", acc);
72 return ERR_PTR(-EOPNOTSUPP);
73 }
74
75 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
76 if (!mr)
77 return ERR_PTR(-ENOMEM);
78
79 memset(cmd, 0, sizeof(*cmd));
80 cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
81 cmd->pd_handle = to_vpd(pd)->pd_handle;
82 cmd->access_flags = acc;
83 cmd->flags = PVRDMA_MR_FLAG_DMA;
84
85 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
86 if (ret < 0) {
87 dev_warn(&dev->pdev->dev,
88 "could not get DMA mem region, error: %d\n", ret);
89 kfree(mr);
90 return ERR_PTR(ret);
91 }
92
93 mr->mmr.mr_handle = resp->mr_handle;
94 mr->ibmr.lkey = resp->lkey;
95 mr->ibmr.rkey = resp->rkey;
96
97 return &mr->ibmr;
98}
99
100/**
101 * pvrdma_reg_user_mr - register a userspace memory region
102 * @pd: protection domain
103 * @start: starting address
104 * @length: length of region
105 * @virt_addr: I/O virtual address
106 * @access_flags: access flags for memory region
107 * @udata: user data
108 *
109 * @return: ib_mr pointer on success, otherwise returns an errno.
110 */
111struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
112 u64 virt_addr, int access_flags,
113 struct ib_udata *udata)
114{
115 struct pvrdma_dev *dev = to_vdev(pd->device);
116 struct pvrdma_user_mr *mr = NULL;
117 struct ib_umem *umem;
118 union pvrdma_cmd_req req;
119 union pvrdma_cmd_resp rsp;
120 struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
121 struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
122 int nchunks;
123 int ret;
124 int entry;
125 struct scatterlist *sg;
126
127 if (length == 0 || length > dev->dsr->caps.max_mr_size) {
128 dev_warn(&dev->pdev->dev, "invalid mem region length\n");
129 return ERR_PTR(-EINVAL);
130 }
131
132 umem = ib_umem_get(pd->uobject->context, start,
133 length, access_flags, 0);
134 if (IS_ERR(umem)) {
135 dev_warn(&dev->pdev->dev,
136 "could not get umem for mem region\n");
137 return ERR_CAST(umem);
138 }
139
140 nchunks = 0;
141 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry)
142 nchunks += sg_dma_len(sg) >> PAGE_SHIFT;
143
144 if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) {
145 dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
146 nchunks);
147 ret = -EINVAL;
148 goto err_umem;
149 }
150
151 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
152 if (!mr) {
153 ret = -ENOMEM;
154 goto err_umem;
155 }
156
157 mr->mmr.iova = virt_addr;
158 mr->mmr.size = length;
159 mr->umem = umem;
160
161 ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false);
162 if (ret) {
163 dev_warn(&dev->pdev->dev,
164 "could not allocate page directory\n");
165 goto err_umem;
166 }
167
168 ret = pvrdma_page_dir_insert_umem(&mr->pdir, mr->umem, 0);
169 if (ret)
170 goto err_pdir;
171
172 memset(cmd, 0, sizeof(*cmd));
173 cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
174 cmd->start = start;
175 cmd->length = length;
176 cmd->pd_handle = to_vpd(pd)->pd_handle;
177 cmd->access_flags = access_flags;
178 cmd->nchunks = nchunks;
179 cmd->pdir_dma = mr->pdir.dir_dma;
180
181 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
182 if (ret < 0) {
183 dev_warn(&dev->pdev->dev,
184 "could not register mem region, error: %d\n", ret);
185 goto err_pdir;
186 }
187
188 mr->mmr.mr_handle = resp->mr_handle;
189 mr->ibmr.lkey = resp->lkey;
190 mr->ibmr.rkey = resp->rkey;
191
192 return &mr->ibmr;
193
194err_pdir:
195 pvrdma_page_dir_cleanup(dev, &mr->pdir);
196err_umem:
197 ib_umem_release(umem);
198 kfree(mr);
199
200 return ERR_PTR(ret);
201}
202
203/**
204 * pvrdma_alloc_mr - allocate a memory region
205 * @pd: protection domain
206 * @mr_type: type of memory region
207 * @max_num_sg: maximum number of pages
208 *
209 * @return: ib_mr pointer on success, otherwise returns an errno.
210 */
211struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
212 u32 max_num_sg)
213{
214 struct pvrdma_dev *dev = to_vdev(pd->device);
215 struct pvrdma_user_mr *mr;
216 union pvrdma_cmd_req req;
217 union pvrdma_cmd_resp rsp;
218 struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
219 struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
220 int size = max_num_sg * sizeof(u64);
221 int ret;
222
223 if (mr_type != IB_MR_TYPE_MEM_REG ||
224 max_num_sg > PVRDMA_MAX_FAST_REG_PAGES)
225 return ERR_PTR(-EINVAL);
226
227 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
228 if (!mr)
229 return ERR_PTR(-ENOMEM);
230
231 mr->pages = kzalloc(size, GFP_KERNEL);
232 if (!mr->pages) {
233 ret = -ENOMEM;
234 goto freemr;
235 }
236
237 ret = pvrdma_page_dir_init(dev, &mr->pdir, max_num_sg, false);
238 if (ret) {
239 dev_warn(&dev->pdev->dev,
240 "failed to allocate page dir for mr\n");
241 ret = -ENOMEM;
242 goto freepages;
243 }
244
245 memset(cmd, 0, sizeof(*cmd));
246 cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
247 cmd->pd_handle = to_vpd(pd)->pd_handle;
248 cmd->access_flags = 0;
249 cmd->flags = PVRDMA_MR_FLAG_FRMR;
250 cmd->nchunks = max_num_sg;
251
252 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
253 if (ret < 0) {
254 dev_warn(&dev->pdev->dev,
255 "could not create FR mem region, error: %d\n", ret);
256 goto freepdir;
257 }
258
259 mr->max_pages = max_num_sg;
260 mr->mmr.mr_handle = resp->mr_handle;
261 mr->ibmr.lkey = resp->lkey;
262 mr->ibmr.rkey = resp->rkey;
263 mr->page_shift = PAGE_SHIFT;
264 mr->umem = NULL;
265
266 return &mr->ibmr;
267
268freepdir:
269 pvrdma_page_dir_cleanup(dev, &mr->pdir);
270freepages:
271 kfree(mr->pages);
272freemr:
273 kfree(mr);
274 return ERR_PTR(ret);
275}
276
277/**
278 * pvrdma_dereg_mr - deregister a memory region
279 * @ibmr: memory region
280 *
281 * @return: 0 on success.
282 */
283int pvrdma_dereg_mr(struct ib_mr *ibmr)
284{
285 struct pvrdma_user_mr *mr = to_vmr(ibmr);
286 struct pvrdma_dev *dev = to_vdev(ibmr->device);
287 union pvrdma_cmd_req req;
288 struct pvrdma_cmd_destroy_mr *cmd = &req.destroy_mr;
289 int ret;
290
291 memset(cmd, 0, sizeof(*cmd));
292 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_MR;
293 cmd->mr_handle = mr->mmr.mr_handle;
294 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
295 if (ret < 0)
296 dev_warn(&dev->pdev->dev,
297 "could not deregister mem region, error: %d\n", ret);
298
299 pvrdma_page_dir_cleanup(dev, &mr->pdir);
300 if (mr->umem)
301 ib_umem_release(mr->umem);
302
303 kfree(mr->pages);
304 kfree(mr);
305
306 return 0;
307}
308
309static int pvrdma_set_page(struct ib_mr *ibmr, u64 addr)
310{
311 struct pvrdma_user_mr *mr = to_vmr(ibmr);
312
313 if (mr->npages == mr->max_pages)
314 return -ENOMEM;
315
316 mr->pages[mr->npages++] = addr;
317 return 0;
318}
319
320int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
321 unsigned int *sg_offset)
322{
323 struct pvrdma_user_mr *mr = to_vmr(ibmr);
324 struct pvrdma_dev *dev = to_vdev(ibmr->device);
325 int ret;
326
327 mr->npages = 0;
328
329 ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, pvrdma_set_page);
330 if (ret < 0)
331 dev_warn(&dev->pdev->dev, "could not map sg to pages\n");
332
333 return ret;
334}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
new file mode 100644
index 000000000000..c8c01e558125
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -0,0 +1,972 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <asm/page.h>
47#include <linux/io.h>
48#include <linux/wait.h>
49#include <rdma/ib_addr.h>
50#include <rdma/ib_smi.h>
51#include <rdma/ib_user_verbs.h>
52
53#include "pvrdma.h"
54
55static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
56 struct pvrdma_cq **recv_cq)
57{
58 *send_cq = to_vcq(qp->ibqp.send_cq);
59 *recv_cq = to_vcq(qp->ibqp.recv_cq);
60}
61
62static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
63 unsigned long *scq_flags,
64 unsigned long *rcq_flags)
65 __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
66{
67 if (scq == rcq) {
68 spin_lock_irqsave(&scq->cq_lock, *scq_flags);
69 __acquire(rcq->cq_lock);
70 } else if (scq->cq_handle < rcq->cq_handle) {
71 spin_lock_irqsave(&scq->cq_lock, *scq_flags);
72 spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
73 SINGLE_DEPTH_NESTING);
74 } else {
75 spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
76 spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
77 SINGLE_DEPTH_NESTING);
78 }
79}
80
81static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
82 unsigned long *scq_flags,
83 unsigned long *rcq_flags)
84 __releases(scq->cq_lock) __releases(rcq->cq_lock)
85{
86 if (scq == rcq) {
87 __release(rcq->cq_lock);
88 spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
89 } else if (scq->cq_handle < rcq->cq_handle) {
90 spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
91 spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
92 } else {
93 spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
94 spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
95 }
96}
97
98static void pvrdma_reset_qp(struct pvrdma_qp *qp)
99{
100 struct pvrdma_cq *scq, *rcq;
101 unsigned long scq_flags, rcq_flags;
102
103 /* Clean up cqes */
104 get_cqs(qp, &scq, &rcq);
105 pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
106
107 _pvrdma_flush_cqe(qp, scq);
108 if (scq != rcq)
109 _pvrdma_flush_cqe(qp, rcq);
110
111 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
112
113 /*
114 * Reset queuepair. The checks are because usermode queuepairs won't
115 * have kernel ringstates.
116 */
117 if (qp->rq.ring) {
118 atomic_set(&qp->rq.ring->cons_head, 0);
119 atomic_set(&qp->rq.ring->prod_tail, 0);
120 }
121 if (qp->sq.ring) {
122 atomic_set(&qp->sq.ring->cons_head, 0);
123 atomic_set(&qp->sq.ring->prod_tail, 0);
124 }
125}
126
127static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
128 struct ib_qp_cap *req_cap,
129 struct pvrdma_qp *qp)
130{
131 if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
132 req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
133 dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
134 return -EINVAL;
135 }
136
137 qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
138 qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
139
140 /* Write back */
141 req_cap->max_recv_wr = qp->rq.wqe_cnt;
142 req_cap->max_recv_sge = qp->rq.max_sg;
143
144 qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
145 sizeof(struct pvrdma_sge) *
146 qp->rq.max_sg);
147 qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
148 PAGE_SIZE;
149
150 return 0;
151}
152
153static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
154 enum ib_qp_type type, struct pvrdma_qp *qp)
155{
156 if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
157 req_cap->max_send_sge > dev->dsr->caps.max_sge) {
158 dev_warn(&dev->pdev->dev, "send queue size invalid\n");
159 return -EINVAL;
160 }
161
162 qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
163 qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
164
165 /* Write back */
166 req_cap->max_send_wr = qp->sq.wqe_cnt;
167 req_cap->max_send_sge = qp->sq.max_sg;
168
169 qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
170 sizeof(struct pvrdma_sge) *
171 qp->sq.max_sg);
172 /* Note: one extra page for the header. */
173 qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
174 PAGE_SIZE - 1) / PAGE_SIZE;
175
176 return 0;
177}
178
179/**
180 * pvrdma_create_qp - create queue pair
181 * @pd: protection domain
182 * @init_attr: queue pair attributes
183 * @udata: user data
184 *
185 * @return: the ib_qp pointer on success, otherwise returns an errno.
186 */
187struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
188 struct ib_qp_init_attr *init_attr,
189 struct ib_udata *udata)
190{
191 struct pvrdma_qp *qp = NULL;
192 struct pvrdma_dev *dev = to_vdev(pd->device);
193 union pvrdma_cmd_req req;
194 union pvrdma_cmd_resp rsp;
195 struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
196 struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
197 struct pvrdma_create_qp ucmd;
198 unsigned long flags;
199 int ret;
200
201 if (init_attr->create_flags) {
202 dev_warn(&dev->pdev->dev,
203 "invalid create queuepair flags %#x\n",
204 init_attr->create_flags);
205 return ERR_PTR(-EINVAL);
206 }
207
208 if (init_attr->qp_type != IB_QPT_RC &&
209 init_attr->qp_type != IB_QPT_UD &&
210 init_attr->qp_type != IB_QPT_GSI) {
211 dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
212 init_attr->qp_type);
213 return ERR_PTR(-EINVAL);
214 }
215
216 if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
217 return ERR_PTR(-ENOMEM);
218
219 switch (init_attr->qp_type) {
220 case IB_QPT_GSI:
221 if (init_attr->port_num == 0 ||
222 init_attr->port_num > pd->device->phys_port_cnt ||
223 udata) {
224 dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
225 ret = -EINVAL;
226 goto err_qp;
227 }
228 /* fall through */
229 case IB_QPT_RC:
230 case IB_QPT_UD:
231 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
232 if (!qp) {
233 ret = -ENOMEM;
234 goto err_qp;
235 }
236
237 spin_lock_init(&qp->sq.lock);
238 spin_lock_init(&qp->rq.lock);
239 mutex_init(&qp->mutex);
240 atomic_set(&qp->refcnt, 1);
241 init_waitqueue_head(&qp->wait);
242
243 qp->state = IB_QPS_RESET;
244
245 if (pd->uobject && udata) {
246 dev_dbg(&dev->pdev->dev,
247 "create queuepair from user space\n");
248
249 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
250 ret = -EFAULT;
251 goto err_qp;
252 }
253
254 /* set qp->sq.wqe_cnt, shift, buf_size.. */
255 qp->rumem = ib_umem_get(pd->uobject->context,
256 ucmd.rbuf_addr,
257 ucmd.rbuf_size, 0, 0);
258 if (IS_ERR(qp->rumem)) {
259 ret = PTR_ERR(qp->rumem);
260 goto err_qp;
261 }
262
263 qp->sumem = ib_umem_get(pd->uobject->context,
264 ucmd.sbuf_addr,
265 ucmd.sbuf_size, 0, 0);
266 if (IS_ERR(qp->sumem)) {
267 ib_umem_release(qp->rumem);
268 ret = PTR_ERR(qp->sumem);
269 goto err_qp;
270 }
271
272 qp->npages_send = ib_umem_page_count(qp->sumem);
273 qp->npages_recv = ib_umem_page_count(qp->rumem);
274 qp->npages = qp->npages_send + qp->npages_recv;
275 } else {
276 qp->is_kernel = true;
277
278 ret = pvrdma_set_sq_size(to_vdev(pd->device),
279 &init_attr->cap,
280 init_attr->qp_type, qp);
281 if (ret)
282 goto err_qp;
283
284 ret = pvrdma_set_rq_size(to_vdev(pd->device),
285 &init_attr->cap, qp);
286 if (ret)
287 goto err_qp;
288
289 qp->npages = qp->npages_send + qp->npages_recv;
290
291 /* Skip header page. */
292 qp->sq.offset = PAGE_SIZE;
293
294 /* Recv queue pages are after send pages. */
295 qp->rq.offset = qp->npages_send * PAGE_SIZE;
296 }
297
298 if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
299 dev_warn(&dev->pdev->dev,
300 "overflow pages in queuepair\n");
301 ret = -EINVAL;
302 goto err_umem;
303 }
304
305 ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
306 qp->is_kernel);
307 if (ret) {
308 dev_warn(&dev->pdev->dev,
309 "could not allocate page directory\n");
310 goto err_umem;
311 }
312
313 if (!qp->is_kernel) {
314 pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
315 pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem,
316 qp->npages_send);
317 } else {
318 /* Ring state is always the first page. */
319 qp->sq.ring = qp->pdir.pages[0];
320 qp->rq.ring = &qp->sq.ring[1];
321 }
322 break;
323 default:
324 ret = -EINVAL;
325 goto err_qp;
326 }
327
328 /* Not supported */
329 init_attr->cap.max_inline_data = 0;
330
331 memset(cmd, 0, sizeof(*cmd));
332 cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
333 cmd->pd_handle = to_vpd(pd)->pd_handle;
334 cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
335 cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
336 cmd->max_send_wr = init_attr->cap.max_send_wr;
337 cmd->max_recv_wr = init_attr->cap.max_recv_wr;
338 cmd->max_send_sge = init_attr->cap.max_send_sge;
339 cmd->max_recv_sge = init_attr->cap.max_recv_sge;
340 cmd->max_inline_data = init_attr->cap.max_inline_data;
341 cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
342 cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
343 cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
344 cmd->total_chunks = qp->npages;
345 cmd->send_chunks = qp->npages_send - 1;
346 cmd->pdir_dma = qp->pdir.dir_dma;
347
348 dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
349 cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
350 cmd->max_recv_sge);
351
352 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
353 if (ret < 0) {
354 dev_warn(&dev->pdev->dev,
355 "could not create queuepair, error: %d\n", ret);
356 goto err_pdir;
357 }
358
359 /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
360 qp->qp_handle = resp->qpn;
361 qp->port = init_attr->port_num;
362 qp->ibqp.qp_num = resp->qpn;
363 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
364 dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
365 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
366
367 return &qp->ibqp;
368
369err_pdir:
370 pvrdma_page_dir_cleanup(dev, &qp->pdir);
371err_umem:
372 if (pd->uobject && udata) {
373 if (qp->rumem)
374 ib_umem_release(qp->rumem);
375 if (qp->sumem)
376 ib_umem_release(qp->sumem);
377 }
378err_qp:
379 kfree(qp);
380 atomic_dec(&dev->num_qps);
381
382 return ERR_PTR(ret);
383}
384
385static void pvrdma_free_qp(struct pvrdma_qp *qp)
386{
387 struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
388 struct pvrdma_cq *scq;
389 struct pvrdma_cq *rcq;
390 unsigned long flags, scq_flags, rcq_flags;
391
392 /* In case cq is polling */
393 get_cqs(qp, &scq, &rcq);
394 pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
395
396 _pvrdma_flush_cqe(qp, scq);
397 if (scq != rcq)
398 _pvrdma_flush_cqe(qp, rcq);
399
400 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
401 dev->qp_tbl[qp->qp_handle] = NULL;
402 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
403
404 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
405
406 atomic_dec(&qp->refcnt);
407 wait_event(qp->wait, !atomic_read(&qp->refcnt));
408
409 pvrdma_page_dir_cleanup(dev, &qp->pdir);
410
411 kfree(qp);
412
413 atomic_dec(&dev->num_qps);
414}
415
416/**
417 * pvrdma_destroy_qp - destroy a queue pair
418 * @qp: the queue pair to destroy
419 *
420 * @return: 0 on success.
421 */
422int pvrdma_destroy_qp(struct ib_qp *qp)
423{
424 struct pvrdma_qp *vqp = to_vqp(qp);
425 union pvrdma_cmd_req req;
426 struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
427 int ret;
428
429 memset(cmd, 0, sizeof(*cmd));
430 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
431 cmd->qp_handle = vqp->qp_handle;
432
433 ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
434 if (ret < 0)
435 dev_warn(&to_vdev(qp->device)->pdev->dev,
436 "destroy queuepair failed, error: %d\n", ret);
437
438 pvrdma_free_qp(vqp);
439
440 return 0;
441}
442
443/**
444 * pvrdma_modify_qp - modify queue pair attributes
445 * @ibqp: the queue pair
446 * @attr: the new queue pair's attributes
447 * @attr_mask: attributes mask
448 * @udata: user data
449 *
450 * @returns 0 on success, otherwise returns an errno.
451 */
452int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
453 int attr_mask, struct ib_udata *udata)
454{
455 struct pvrdma_dev *dev = to_vdev(ibqp->device);
456 struct pvrdma_qp *qp = to_vqp(ibqp);
457 union pvrdma_cmd_req req;
458 union pvrdma_cmd_resp rsp;
459 struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
460 int cur_state, next_state;
461 int ret;
462
463 /* Sanity checking. Should need lock here */
464 mutex_lock(&qp->mutex);
465 cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
466 qp->state;
467 next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
468
469 if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
470 attr_mask, IB_LINK_LAYER_ETHERNET)) {
471 ret = -EINVAL;
472 goto out;
473 }
474
475 if (attr_mask & IB_QP_PORT) {
476 if (attr->port_num == 0 ||
477 attr->port_num > ibqp->device->phys_port_cnt) {
478 ret = -EINVAL;
479 goto out;
480 }
481 }
482
483 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
484 if (attr->min_rnr_timer > 31) {
485 ret = -EINVAL;
486 goto out;
487 }
488 }
489
490 if (attr_mask & IB_QP_PKEY_INDEX) {
491 if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
492 ret = -EINVAL;
493 goto out;
494 }
495 }
496
497 if (attr_mask & IB_QP_QKEY)
498 qp->qkey = attr->qkey;
499
500 if (cur_state == next_state && cur_state == IB_QPS_RESET) {
501 ret = 0;
502 goto out;
503 }
504
505 qp->state = next_state;
506 memset(cmd, 0, sizeof(*cmd));
507 cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
508 cmd->qp_handle = qp->qp_handle;
509 cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
510 cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
511 cmd->attrs.cur_qp_state =
512 ib_qp_state_to_pvrdma(attr->cur_qp_state);
513 cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
514 cmd->attrs.path_mig_state =
515 ib_mig_state_to_pvrdma(attr->path_mig_state);
516 cmd->attrs.qkey = attr->qkey;
517 cmd->attrs.rq_psn = attr->rq_psn;
518 cmd->attrs.sq_psn = attr->sq_psn;
519 cmd->attrs.dest_qp_num = attr->dest_qp_num;
520 cmd->attrs.qp_access_flags =
521 ib_access_flags_to_pvrdma(attr->qp_access_flags);
522 cmd->attrs.pkey_index = attr->pkey_index;
523 cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
524 cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
525 cmd->attrs.sq_draining = attr->sq_draining;
526 cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
527 cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
528 cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
529 cmd->attrs.port_num = attr->port_num;
530 cmd->attrs.timeout = attr->timeout;
531 cmd->attrs.retry_cnt = attr->retry_cnt;
532 cmd->attrs.rnr_retry = attr->rnr_retry;
533 cmd->attrs.alt_port_num = attr->alt_port_num;
534 cmd->attrs.alt_timeout = attr->alt_timeout;
535 ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
536 ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
537 ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
538
539 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
540 if (ret < 0) {
541 dev_warn(&dev->pdev->dev,
542 "could not modify queuepair, error: %d\n", ret);
543 } else if (rsp.hdr.err > 0) {
544 dev_warn(&dev->pdev->dev,
545 "cannot modify queuepair, error: %d\n", rsp.hdr.err);
546 ret = -EINVAL;
547 }
548
549 if (ret == 0 && next_state == IB_QPS_RESET)
550 pvrdma_reset_qp(qp);
551
552out:
553 mutex_unlock(&qp->mutex);
554
555 return ret;
556}
557
558static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
559{
560 return pvrdma_page_dir_get_ptr(&qp->pdir,
561 qp->sq.offset + n * qp->sq.wqe_size);
562}
563
564static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
565{
566 return pvrdma_page_dir_get_ptr(&qp->pdir,
567 qp->rq.offset + n * qp->rq.wqe_size);
568}
569
570static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
571{
572 struct pvrdma_user_mr *mr = to_vmr(wr->mr);
573
574 wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
575 wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
576 wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
577 wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
578 wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
579 wqe_hdr->wr.fast_reg.access_flags = wr->access;
580 wqe_hdr->wr.fast_reg.rkey = wr->key;
581
582 return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
583 mr->npages);
584}
585
586/**
587 * pvrdma_post_send - post send work request entries on a QP
588 * @ibqp: the QP
589 * @wr: work request list to post
590 * @bad_wr: the first bad WR returned
591 *
592 * @return: 0 on success, otherwise errno returned.
593 */
594int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
595 struct ib_send_wr **bad_wr)
596{
597 struct pvrdma_qp *qp = to_vqp(ibqp);
598 struct pvrdma_dev *dev = to_vdev(ibqp->device);
599 unsigned long flags;
600 struct pvrdma_sq_wqe_hdr *wqe_hdr;
601 struct pvrdma_sge *sge;
602 int i, index;
603 int nreq;
604 int ret;
605
606 /*
607 * In states lower than RTS, we can fail immediately. In other states,
608 * just post and let the device figure it out.
609 */
610 if (qp->state < IB_QPS_RTS) {
611 *bad_wr = wr;
612 return -EINVAL;
613 }
614
615 spin_lock_irqsave(&qp->sq.lock, flags);
616
617 index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
618 for (nreq = 0; wr; nreq++, wr = wr->next) {
619 unsigned int tail;
620
621 if (unlikely(!pvrdma_idx_ring_has_space(
622 qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
623 dev_warn_ratelimited(&dev->pdev->dev,
624 "send queue is full\n");
625 *bad_wr = wr;
626 ret = -ENOMEM;
627 goto out;
628 }
629
630 if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
631 dev_warn_ratelimited(&dev->pdev->dev,
632 "send SGE overflow\n");
633 *bad_wr = wr;
634 ret = -EINVAL;
635 goto out;
636 }
637
638 if (unlikely(wr->opcode < 0)) {
639 dev_warn_ratelimited(&dev->pdev->dev,
640 "invalid send opcode\n");
641 *bad_wr = wr;
642 ret = -EINVAL;
643 goto out;
644 }
645
646 /*
647 * Only support UD, RC.
648 * Need to check opcode table for thorough checking.
649 * opcode _UD _UC _RC
650 * _SEND x x x
651 * _SEND_WITH_IMM x x x
652 * _RDMA_WRITE x x
653 * _RDMA_WRITE_WITH_IMM x x
654 * _LOCAL_INV x x
655 * _SEND_WITH_INV x x
656 * _RDMA_READ x
657 * _ATOMIC_CMP_AND_SWP x
658 * _ATOMIC_FETCH_AND_ADD x
659 * _MASK_ATOMIC_CMP_AND_SWP x
660 * _MASK_ATOMIC_FETCH_AND_ADD x
661 * _REG_MR x
662 *
663 */
664 if (qp->ibqp.qp_type != IB_QPT_UD &&
665 qp->ibqp.qp_type != IB_QPT_RC &&
666 wr->opcode != IB_WR_SEND) {
667 dev_warn_ratelimited(&dev->pdev->dev,
668 "unsupported queuepair type\n");
669 *bad_wr = wr;
670 ret = -EINVAL;
671 goto out;
672 } else if (qp->ibqp.qp_type == IB_QPT_UD ||
673 qp->ibqp.qp_type == IB_QPT_GSI) {
674 if (wr->opcode != IB_WR_SEND &&
675 wr->opcode != IB_WR_SEND_WITH_IMM) {
676 dev_warn_ratelimited(&dev->pdev->dev,
677 "invalid send opcode\n");
678 *bad_wr = wr;
679 ret = -EINVAL;
680 goto out;
681 }
682 }
683
684 wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
685 memset(wqe_hdr, 0, sizeof(*wqe_hdr));
686 wqe_hdr->wr_id = wr->wr_id;
687 wqe_hdr->num_sge = wr->num_sge;
688 wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
689 wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
690 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
691 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
692 wqe_hdr->ex.imm_data = wr->ex.imm_data;
693
694 switch (qp->ibqp.qp_type) {
695 case IB_QPT_GSI:
696 case IB_QPT_UD:
697 if (unlikely(!ud_wr(wr)->ah)) {
698 dev_warn_ratelimited(&dev->pdev->dev,
699 "invalid address handle\n");
700 *bad_wr = wr;
701 ret = -EINVAL;
702 goto out;
703 }
704
705 /*
706 * Use qkey from qp context if high order bit set,
707 * otherwise from work request.
708 */
709 wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
710 wqe_hdr->wr.ud.remote_qkey =
711 ud_wr(wr)->remote_qkey & 0x80000000 ?
712 qp->qkey : ud_wr(wr)->remote_qkey;
713 wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
714
715 break;
716 case IB_QPT_RC:
717 switch (wr->opcode) {
718 case IB_WR_RDMA_READ:
719 case IB_WR_RDMA_WRITE:
720 case IB_WR_RDMA_WRITE_WITH_IMM:
721 wqe_hdr->wr.rdma.remote_addr =
722 rdma_wr(wr)->remote_addr;
723 wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
724 break;
725 case IB_WR_LOCAL_INV:
726 case IB_WR_SEND_WITH_INV:
727 wqe_hdr->ex.invalidate_rkey =
728 wr->ex.invalidate_rkey;
729 break;
730 case IB_WR_ATOMIC_CMP_AND_SWP:
731 case IB_WR_ATOMIC_FETCH_AND_ADD:
732 wqe_hdr->wr.atomic.remote_addr =
733 atomic_wr(wr)->remote_addr;
734 wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
735 wqe_hdr->wr.atomic.compare_add =
736 atomic_wr(wr)->compare_add;
737 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
738 wqe_hdr->wr.atomic.swap =
739 atomic_wr(wr)->swap;
740 break;
741 case IB_WR_REG_MR:
742 ret = set_reg_seg(wqe_hdr, reg_wr(wr));
743 if (ret < 0) {
744 dev_warn_ratelimited(&dev->pdev->dev,
745 "Failed to set fast register work request\n");
746 *bad_wr = wr;
747 goto out;
748 }
749 break;
750 default:
751 break;
752 }
753
754 break;
755 default:
756 dev_warn_ratelimited(&dev->pdev->dev,
757 "invalid queuepair type\n");
758 ret = -EINVAL;
759 *bad_wr = wr;
760 goto out;
761 }
762
763 sge = (struct pvrdma_sge *)(wqe_hdr + 1);
764 for (i = 0; i < wr->num_sge; i++) {
765 /* Need to check wqe_size 0 or max size */
766 sge->addr = wr->sg_list[i].addr;
767 sge->length = wr->sg_list[i].length;
768 sge->lkey = wr->sg_list[i].lkey;
769 sge++;
770 }
771
772 /* Make sure wqe is written before index update */
773 smp_wmb();
774
775 index++;
776 if (unlikely(index >= qp->sq.wqe_cnt))
777 index = 0;
778 /* Update shared sq ring */
779 pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
780 qp->sq.wqe_cnt);
781 }
782
783 ret = 0;
784
785out:
786 spin_unlock_irqrestore(&qp->sq.lock, flags);
787
788 if (!ret)
789 pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
790
791 return ret;
792}
793
794/**
795 * pvrdma_post_receive - post receive work request entries on a QP
796 * @ibqp: the QP
797 * @wr: the work request list to post
798 * @bad_wr: the first bad WR returned
799 *
800 * @return: 0 on success, otherwise errno returned.
801 */
802int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
803 struct ib_recv_wr **bad_wr)
804{
805 struct pvrdma_dev *dev = to_vdev(ibqp->device);
806 unsigned long flags;
807 struct pvrdma_qp *qp = to_vqp(ibqp);
808 struct pvrdma_rq_wqe_hdr *wqe_hdr;
809 struct pvrdma_sge *sge;
810 int index, nreq;
811 int ret = 0;
812 int i;
813
814 /*
815 * In the RESET state, we can fail immediately. For other states,
816 * just post and let the device figure it out.
817 */
818 if (qp->state == IB_QPS_RESET) {
819 *bad_wr = wr;
820 return -EINVAL;
821 }
822
823 spin_lock_irqsave(&qp->rq.lock, flags);
824
825 index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
826 for (nreq = 0; wr; nreq++, wr = wr->next) {
827 unsigned int tail;
828
829 if (unlikely(wr->num_sge > qp->rq.max_sg ||
830 wr->num_sge < 0)) {
831 ret = -EINVAL;
832 *bad_wr = wr;
833 dev_warn_ratelimited(&dev->pdev->dev,
834 "recv SGE overflow\n");
835 goto out;
836 }
837
838 if (unlikely(!pvrdma_idx_ring_has_space(
839 qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
840 ret = -ENOMEM;
841 *bad_wr = wr;
842 dev_warn_ratelimited(&dev->pdev->dev,
843 "recv queue full\n");
844 goto out;
845 }
846
847 wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
848 wqe_hdr->wr_id = wr->wr_id;
849 wqe_hdr->num_sge = wr->num_sge;
850 wqe_hdr->total_len = 0;
851
852 sge = (struct pvrdma_sge *)(wqe_hdr + 1);
853 for (i = 0; i < wr->num_sge; i++) {
854 sge->addr = wr->sg_list[i].addr;
855 sge->length = wr->sg_list[i].length;
856 sge->lkey = wr->sg_list[i].lkey;
857 sge++;
858 }
859
860 /* Make sure wqe is written before index update */
861 smp_wmb();
862
863 index++;
864 if (unlikely(index >= qp->rq.wqe_cnt))
865 index = 0;
866 /* Update shared rq ring */
867 pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
868 qp->rq.wqe_cnt);
869 }
870
871 spin_unlock_irqrestore(&qp->rq.lock, flags);
872
873 pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
874
875 return ret;
876
877out:
878 spin_unlock_irqrestore(&qp->rq.lock, flags);
879
880 return ret;
881}
882
883/**
884 * pvrdma_query_qp - query a queue pair's attributes
885 * @ibqp: the queue pair to query
886 * @attr: the queue pair's attributes
887 * @attr_mask: attributes mask
888 * @init_attr: initial queue pair attributes
889 *
890 * @returns 0 on success, otherwise returns an errno.
891 */
892int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
893 int attr_mask, struct ib_qp_init_attr *init_attr)
894{
895 struct pvrdma_dev *dev = to_vdev(ibqp->device);
896 struct pvrdma_qp *qp = to_vqp(ibqp);
897 union pvrdma_cmd_req req;
898 union pvrdma_cmd_resp rsp;
899 struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
900 struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
901 int ret = 0;
902
903 mutex_lock(&qp->mutex);
904
905 if (qp->state == IB_QPS_RESET) {
906 attr->qp_state = IB_QPS_RESET;
907 goto out;
908 }
909
910 memset(cmd, 0, sizeof(*cmd));
911 cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
912 cmd->qp_handle = qp->qp_handle;
913 cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
914
915 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
916 if (ret < 0) {
917 dev_warn(&dev->pdev->dev,
918 "could not query queuepair, error: %d\n", ret);
919 goto out;
920 }
921
922 attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
923 attr->cur_qp_state =
924 pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
925 attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
926 attr->path_mig_state =
927 pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
928 attr->qkey = resp->attrs.qkey;
929 attr->rq_psn = resp->attrs.rq_psn;
930 attr->sq_psn = resp->attrs.sq_psn;
931 attr->dest_qp_num = resp->attrs.dest_qp_num;
932 attr->qp_access_flags =
933 pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
934 attr->pkey_index = resp->attrs.pkey_index;
935 attr->alt_pkey_index = resp->attrs.alt_pkey_index;
936 attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
937 attr->sq_draining = resp->attrs.sq_draining;
938 attr->max_rd_atomic = resp->attrs.max_rd_atomic;
939 attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
940 attr->min_rnr_timer = resp->attrs.min_rnr_timer;
941 attr->port_num = resp->attrs.port_num;
942 attr->timeout = resp->attrs.timeout;
943 attr->retry_cnt = resp->attrs.retry_cnt;
944 attr->rnr_retry = resp->attrs.rnr_retry;
945 attr->alt_port_num = resp->attrs.alt_port_num;
946 attr->alt_timeout = resp->attrs.alt_timeout;
947 pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
948 pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
949 pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
950
951 qp->state = attr->qp_state;
952
953 ret = 0;
954
955out:
956 attr->cur_qp_state = attr->qp_state;
957
958 init_attr->event_handler = qp->ibqp.event_handler;
959 init_attr->qp_context = qp->ibqp.qp_context;
960 init_attr->send_cq = qp->ibqp.send_cq;
961 init_attr->recv_cq = qp->ibqp.recv_cq;
962 init_attr->srq = qp->ibqp.srq;
963 init_attr->xrcd = NULL;
964 init_attr->cap = attr->cap;
965 init_attr->sq_sig_type = 0;
966 init_attr->qp_type = qp->ibqp.qp_type;
967 init_attr->create_flags = 0;
968 init_attr->port_num = qp->port;
969
970 mutex_unlock(&qp->mutex);
971 return ret;
972}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
new file mode 100644
index 000000000000..ed9022a91a1d
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
@@ -0,0 +1,131 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __PVRDMA_RING_H__
47#define __PVRDMA_RING_H__
48
49#include <linux/types.h>
50
51#define PVRDMA_INVALID_IDX -1 /* Invalid index. */
52
53struct pvrdma_ring {
54 atomic_t prod_tail; /* Producer tail. */
55 atomic_t cons_head; /* Consumer head. */
56};
57
58struct pvrdma_ring_state {
59 struct pvrdma_ring tx; /* Tx ring. */
60 struct pvrdma_ring rx; /* Rx ring. */
61};
62
63static inline int pvrdma_idx_valid(__u32 idx, __u32 max_elems)
64{
65 /* Generates fewer instructions than a less-than. */
66 return (idx & ~((max_elems << 1) - 1)) == 0;
67}
68
69static inline __s32 pvrdma_idx(atomic_t *var, __u32 max_elems)
70{
71 const unsigned int idx = atomic_read(var);
72
73 if (pvrdma_idx_valid(idx, max_elems))
74 return idx & (max_elems - 1);
75 return PVRDMA_INVALID_IDX;
76}
77
78static inline void pvrdma_idx_ring_inc(atomic_t *var, __u32 max_elems)
79{
80 __u32 idx = atomic_read(var) + 1; /* Increment. */
81
82 idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */
83 atomic_set(var, idx);
84}
85
86static inline __s32 pvrdma_idx_ring_has_space(const struct pvrdma_ring *r,
87 __u32 max_elems, __u32 *out_tail)
88{
89 const __u32 tail = atomic_read(&r->prod_tail);
90 const __u32 head = atomic_read(&r->cons_head);
91
92 if (pvrdma_idx_valid(tail, max_elems) &&
93 pvrdma_idx_valid(head, max_elems)) {
94 *out_tail = tail & (max_elems - 1);
95 return tail != (head ^ max_elems);
96 }
97 return PVRDMA_INVALID_IDX;
98}
99
100static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r,
101 __u32 max_elems, __u32 *out_head)
102{
103 const __u32 tail = atomic_read(&r->prod_tail);
104 const __u32 head = atomic_read(&r->cons_head);
105
106 if (pvrdma_idx_valid(tail, max_elems) &&
107 pvrdma_idx_valid(head, max_elems)) {
108 *out_head = head & (max_elems - 1);
109 return tail != head;
110 }
111 return PVRDMA_INVALID_IDX;
112}
113
114static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r,
115 __u32 max_elems, __u32 *idx)
116{
117 const __u32 tail = atomic_read(&r->prod_tail);
118 const __u32 head = atomic_read(&r->cons_head);
119
120 if (pvrdma_idx_valid(tail, max_elems) &&
121 pvrdma_idx_valid(head, max_elems) &&
122 pvrdma_idx_valid(*idx, max_elems)) {
123 if (tail > head && (*idx < tail && *idx >= head))
124 return true;
125 else if (head > tail && (*idx >= head || *idx < tail))
126 return true;
127 }
128 return false;
129}
130
131#endif /* __PVRDMA_RING_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
new file mode 100644
index 000000000000..54891370d18a
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -0,0 +1,579 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <asm/page.h>
47#include <linux/inet.h>
48#include <linux/io.h>
49#include <rdma/ib_addr.h>
50#include <rdma/ib_smi.h>
51#include <rdma/ib_user_verbs.h>
52#include <rdma/vmw_pvrdma-abi.h>
53
54#include "pvrdma.h"
55
56/**
57 * pvrdma_query_device - query device
58 * @ibdev: the device to query
59 * @props: the device properties
60 * @uhw: user data
61 *
62 * @return: 0 on success, otherwise negative errno
63 */
64int pvrdma_query_device(struct ib_device *ibdev,
65 struct ib_device_attr *props,
66 struct ib_udata *uhw)
67{
68 struct pvrdma_dev *dev = to_vdev(ibdev);
69
70 if (uhw->inlen || uhw->outlen)
71 return -EINVAL;
72
73 memset(props, 0, sizeof(*props));
74
75 props->fw_ver = dev->dsr->caps.fw_ver;
76 props->sys_image_guid = dev->dsr->caps.sys_image_guid;
77 props->max_mr_size = dev->dsr->caps.max_mr_size;
78 props->page_size_cap = dev->dsr->caps.page_size_cap;
79 props->vendor_id = dev->dsr->caps.vendor_id;
80 props->vendor_part_id = dev->pdev->device;
81 props->hw_ver = dev->dsr->caps.hw_ver;
82 props->max_qp = dev->dsr->caps.max_qp;
83 props->max_qp_wr = dev->dsr->caps.max_qp_wr;
84 props->device_cap_flags = dev->dsr->caps.device_cap_flags;
85 props->max_sge = dev->dsr->caps.max_sge;
86 props->max_cq = dev->dsr->caps.max_cq;
87 props->max_cqe = dev->dsr->caps.max_cqe;
88 props->max_mr = dev->dsr->caps.max_mr;
89 props->max_pd = dev->dsr->caps.max_pd;
90 props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom;
91 props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom;
92 props->atomic_cap =
93 dev->dsr->caps.atomic_ops &
94 (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ?
95 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
96 props->masked_atomic_cap = props->atomic_cap;
97 props->max_ah = dev->dsr->caps.max_ah;
98 props->max_pkeys = dev->dsr->caps.max_pkeys;
99 props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay;
100 if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) &&
101 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) &&
102 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) {
103 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
104 }
105
106 return 0;
107}
108
109/**
110 * pvrdma_query_port - query device port attributes
111 * @ibdev: the device to query
112 * @port: the port number
113 * @props: the device properties
114 *
115 * @return: 0 on success, otherwise negative errno
116 */
117int pvrdma_query_port(struct ib_device *ibdev, u8 port,
118 struct ib_port_attr *props)
119{
120 struct pvrdma_dev *dev = to_vdev(ibdev);
121 union pvrdma_cmd_req req;
122 union pvrdma_cmd_resp rsp;
123 struct pvrdma_cmd_query_port *cmd = &req.query_port;
124 struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp;
125 int err;
126
127 memset(cmd, 0, sizeof(*cmd));
128 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT;
129 cmd->port_num = port;
130
131 err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP);
132 if (err < 0) {
133 dev_warn(&dev->pdev->dev,
134 "could not query port, error: %d\n", err);
135 return err;
136 }
137
138 memset(props, 0, sizeof(*props));
139
140 props->state = pvrdma_port_state_to_ib(resp->attrs.state);
141 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
142 props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu);
143 props->gid_tbl_len = resp->attrs.gid_tbl_len;
144 props->port_cap_flags =
145 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
146 props->max_msg_sz = resp->attrs.max_msg_sz;
147 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
148 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
149 props->pkey_tbl_len = resp->attrs.pkey_tbl_len;
150 props->lid = resp->attrs.lid;
151 props->sm_lid = resp->attrs.sm_lid;
152 props->lmc = resp->attrs.lmc;
153 props->max_vl_num = resp->attrs.max_vl_num;
154 props->sm_sl = resp->attrs.sm_sl;
155 props->subnet_timeout = resp->attrs.subnet_timeout;
156 props->init_type_reply = resp->attrs.init_type_reply;
157 props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width);
158 props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed);
159 props->phys_state = resp->attrs.phys_state;
160
161 return 0;
162}
163
164/**
165 * pvrdma_query_gid - query device gid
166 * @ibdev: the device to query
167 * @port: the port number
168 * @index: the index
169 * @gid: the device gid value
170 *
171 * @return: 0 on success, otherwise negative errno
172 */
173int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index,
174 union ib_gid *gid)
175{
176 struct pvrdma_dev *dev = to_vdev(ibdev);
177
178 if (index >= dev->dsr->caps.gid_tbl_len)
179 return -EINVAL;
180
181 memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid));
182
183 return 0;
184}
185
186/**
187 * pvrdma_query_pkey - query device port's P_Key table
188 * @ibdev: the device to query
189 * @port: the port number
190 * @index: the index
191 * @pkey: the device P_Key value
192 *
193 * @return: 0 on success, otherwise negative errno
194 */
195int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
196 u16 *pkey)
197{
198 int err = 0;
199 union pvrdma_cmd_req req;
200 union pvrdma_cmd_resp rsp;
201 struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey;
202
203 memset(cmd, 0, sizeof(*cmd));
204 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY;
205 cmd->port_num = port;
206 cmd->index = index;
207
208 err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp,
209 PVRDMA_CMD_QUERY_PKEY_RESP);
210 if (err < 0) {
211 dev_warn(&to_vdev(ibdev)->pdev->dev,
212 "could not query pkey, error: %d\n", err);
213 return err;
214 }
215
216 *pkey = rsp.query_pkey_resp.pkey;
217
218 return 0;
219}
220
221enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
222 u8 port)
223{
224 return IB_LINK_LAYER_ETHERNET;
225}
226
227int pvrdma_modify_device(struct ib_device *ibdev, int mask,
228 struct ib_device_modify *props)
229{
230 unsigned long flags;
231
232 if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
233 IB_DEVICE_MODIFY_NODE_DESC)) {
234 dev_warn(&to_vdev(ibdev)->pdev->dev,
235 "unsupported device modify mask %#x\n", mask);
236 return -EOPNOTSUPP;
237 }
238
239 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
240 spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
241 memcpy(ibdev->node_desc, props->node_desc, 64);
242 spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
243 }
244
245 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
246 mutex_lock(&to_vdev(ibdev)->port_mutex);
247 to_vdev(ibdev)->sys_image_guid =
248 cpu_to_be64(props->sys_image_guid);
249 mutex_unlock(&to_vdev(ibdev)->port_mutex);
250 }
251
252 return 0;
253}
254
255/**
256 * pvrdma_modify_port - modify device port attributes
257 * @ibdev: the device to modify
258 * @port: the port number
259 * @mask: attributes to modify
260 * @props: the device properties
261 *
262 * @return: 0 on success, otherwise negative errno
263 */
264int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
265 struct ib_port_modify *props)
266{
267 struct ib_port_attr attr;
268 struct pvrdma_dev *vdev = to_vdev(ibdev);
269 int ret;
270
271 if (mask & ~IB_PORT_SHUTDOWN) {
272 dev_warn(&vdev->pdev->dev,
273 "unsupported port modify mask %#x\n", mask);
274 return -EOPNOTSUPP;
275 }
276
277 mutex_lock(&vdev->port_mutex);
278 ret = pvrdma_query_port(ibdev, port, &attr);
279 if (ret)
280 goto out;
281
282 vdev->port_cap_mask |= props->set_port_cap_mask;
283 vdev->port_cap_mask &= ~props->clr_port_cap_mask;
284
285 if (mask & IB_PORT_SHUTDOWN)
286 vdev->ib_active = false;
287
288out:
289 mutex_unlock(&vdev->port_mutex);
290 return ret;
291}
292
293/**
294 * pvrdma_alloc_ucontext - allocate ucontext
295 * @ibdev: the IB device
296 * @udata: user data
297 *
298 * @return: the ib_ucontext pointer on success, otherwise errno.
299 */
300struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
301 struct ib_udata *udata)
302{
303 struct pvrdma_dev *vdev = to_vdev(ibdev);
304 struct pvrdma_ucontext *context;
305 union pvrdma_cmd_req req;
306 union pvrdma_cmd_resp rsp;
307 struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
308 struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
309 struct pvrdma_alloc_ucontext_resp uresp;
310 int ret;
311 void *ptr;
312
313 if (!vdev->ib_active)
314 return ERR_PTR(-EAGAIN);
315
316 context = kmalloc(sizeof(*context), GFP_KERNEL);
317 if (!context)
318 return ERR_PTR(-ENOMEM);
319
320 context->dev = vdev;
321 ret = pvrdma_uar_alloc(vdev, &context->uar);
322 if (ret) {
323 kfree(context);
324 return ERR_PTR(-ENOMEM);
325 }
326
327 /* get ctx_handle from host */
328 memset(cmd, 0, sizeof(*cmd));
329 cmd->pfn = context->uar.pfn;
330 cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC;
331 ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP);
332 if (ret < 0) {
333 dev_warn(&vdev->pdev->dev,
334 "could not create ucontext, error: %d\n", ret);
335 ptr = ERR_PTR(ret);
336 goto err;
337 }
338
339 context->ctx_handle = resp->ctx_handle;
340
341 /* copy back to user */
342 uresp.qp_tab_size = vdev->dsr->caps.max_qp;
343 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
344 if (ret) {
345 pvrdma_uar_free(vdev, &context->uar);
346 context->ibucontext.device = ibdev;
347 pvrdma_dealloc_ucontext(&context->ibucontext);
348 return ERR_PTR(-EFAULT);
349 }
350
351 return &context->ibucontext;
352
353err:
354 pvrdma_uar_free(vdev, &context->uar);
355 kfree(context);
356 return ptr;
357}
358
359/**
360 * pvrdma_dealloc_ucontext - deallocate ucontext
361 * @ibcontext: the ucontext
362 *
363 * @return: 0 on success, otherwise errno.
364 */
365int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
366{
367 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
368 union pvrdma_cmd_req req;
369 struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
370 int ret;
371
372 memset(cmd, 0, sizeof(*cmd));
373 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
374 cmd->ctx_handle = context->ctx_handle;
375
376 ret = pvrdma_cmd_post(context->dev, &req, NULL, 0);
377 if (ret < 0)
378 dev_warn(&context->dev->pdev->dev,
379 "destroy ucontext failed, error: %d\n", ret);
380
381 /* Free the UAR even if the device command failed */
382 pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
383 kfree(context);
384
385 return ret;
386}
387
388/**
389 * pvrdma_mmap - create mmap region
390 * @ibcontext: the user context
391 * @vma: the VMA
392 *
393 * @return: 0 on success, otherwise errno.
394 */
395int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
396{
397 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
398 unsigned long start = vma->vm_start;
399 unsigned long size = vma->vm_end - vma->vm_start;
400 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
401
402 dev_dbg(&context->dev->pdev->dev, "create mmap region\n");
403
404 if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) {
405 dev_warn(&context->dev->pdev->dev,
406 "invalid params for mmap region\n");
407 return -EINVAL;
408 }
409
410 /* Map UAR to kernel space, VM_LOCKED? */
411 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
412 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
413 if (io_remap_pfn_range(vma, start, context->uar.pfn, size,
414 vma->vm_page_prot))
415 return -EAGAIN;
416
417 return 0;
418}
419
420/**
421 * pvrdma_alloc_pd - allocate protection domain
422 * @ibdev: the IB device
423 * @context: user context
424 * @udata: user data
425 *
426 * @return: the ib_pd protection domain pointer on success, otherwise errno.
427 */
428struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
429 struct ib_ucontext *context,
430 struct ib_udata *udata)
431{
432 struct pvrdma_pd *pd;
433 struct pvrdma_dev *dev = to_vdev(ibdev);
434 union pvrdma_cmd_req req;
435 union pvrdma_cmd_resp rsp;
436 struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
437 struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
438 int ret;
439 void *ptr;
440
441 /* Check allowed max pds */
442 if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
443 return ERR_PTR(-ENOMEM);
444
445 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
446 if (!pd) {
447 ptr = ERR_PTR(-ENOMEM);
448 goto err;
449 }
450
451 memset(cmd, 0, sizeof(*cmd));
452 cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
453 cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0;
454 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
455 if (ret < 0) {
456 dev_warn(&dev->pdev->dev,
457 "failed to allocate protection domain, error: %d\n",
458 ret);
459 ptr = ERR_PTR(ret);
460 goto freepd;
461 }
462
463 pd->privileged = !context;
464 pd->pd_handle = resp->pd_handle;
465 pd->pdn = resp->pd_handle;
466
467 if (context) {
468 if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
469 dev_warn(&dev->pdev->dev,
470 "failed to copy back protection domain\n");
471 pvrdma_dealloc_pd(&pd->ibpd);
472 return ERR_PTR(-EFAULT);
473 }
474 }
475
476 /* u32 pd handle */
477 return &pd->ibpd;
478
479freepd:
480 kfree(pd);
481err:
482 atomic_dec(&dev->num_pds);
483 return ptr;
484}
485
486/**
487 * pvrdma_dealloc_pd - deallocate protection domain
488 * @pd: the protection domain to be released
489 *
490 * @return: 0 on success, otherwise errno.
491 */
492int pvrdma_dealloc_pd(struct ib_pd *pd)
493{
494 struct pvrdma_dev *dev = to_vdev(pd->device);
495 union pvrdma_cmd_req req;
496 struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
497 int ret;
498
499 memset(cmd, 0, sizeof(*cmd));
500 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
501 cmd->pd_handle = to_vpd(pd)->pd_handle;
502
503 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
504 if (ret)
505 dev_warn(&dev->pdev->dev,
506 "could not dealloc protection domain, error: %d\n",
507 ret);
508
509 kfree(to_vpd(pd));
510 atomic_dec(&dev->num_pds);
511
512 return 0;
513}
514
515/**
516 * pvrdma_create_ah - create an address handle
517 * @pd: the protection domain
518 * @ah_attr: the attributes of the AH
519 * @udata: user data blob
520 *
521 * @return: the ib_ah pointer on success, otherwise errno.
522 */
523struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
524 struct ib_udata *udata)
525{
526 struct pvrdma_dev *dev = to_vdev(pd->device);
527 struct pvrdma_ah *ah;
528 enum rdma_link_layer ll;
529
530 if (!(ah_attr->ah_flags & IB_AH_GRH))
531 return ERR_PTR(-EINVAL);
532
533 ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num);
534
535 if (ll != IB_LINK_LAYER_ETHERNET ||
536 rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw))
537 return ERR_PTR(-EINVAL);
538
539 if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
540 return ERR_PTR(-ENOMEM);
541
542 ah = kzalloc(sizeof(*ah), GFP_KERNEL);
543 if (!ah) {
544 atomic_dec(&dev->num_ahs);
545 return ERR_PTR(-ENOMEM);
546 }
547
548 ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24);
549 ah->av.src_path_bits = ah_attr->src_path_bits;
550 ah->av.src_path_bits |= 0x80;
551 ah->av.gid_index = ah_attr->grh.sgid_index;
552 ah->av.hop_limit = ah_attr->grh.hop_limit;
553 ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) |
554 ah_attr->grh.flow_label;
555 memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
556 memcpy(ah->av.dmac, ah_attr->dmac, 6);
557
558 ah->ibah.device = pd->device;
559 ah->ibah.pd = pd;
560 ah->ibah.uobject = NULL;
561
562 return &ah->ibah;
563}
564
565/**
566 * pvrdma_destroy_ah - destroy an address handle
567 * @ah: the address handle to destroyed
568 *
569 * @return: 0 on success.
570 */
571int pvrdma_destroy_ah(struct ib_ah *ah)
572{
573 struct pvrdma_dev *dev = to_vdev(ah->device);
574
575 kfree(to_vah(ah));
576 atomic_dec(&dev->num_ahs);
577
578 return 0;
579}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
new file mode 100644
index 000000000000..bfbe96b56255
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -0,0 +1,436 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __PVRDMA_VERBS_H__
47#define __PVRDMA_VERBS_H__
48
49#include <linux/types.h>
50
51union pvrdma_gid {
52 u8 raw[16];
53 struct {
54 __be64 subnet_prefix;
55 __be64 interface_id;
56 } global;
57};
58
59enum pvrdma_link_layer {
60 PVRDMA_LINK_LAYER_UNSPECIFIED,
61 PVRDMA_LINK_LAYER_INFINIBAND,
62 PVRDMA_LINK_LAYER_ETHERNET,
63};
64
65enum pvrdma_mtu {
66 PVRDMA_MTU_256 = 1,
67 PVRDMA_MTU_512 = 2,
68 PVRDMA_MTU_1024 = 3,
69 PVRDMA_MTU_2048 = 4,
70 PVRDMA_MTU_4096 = 5,
71};
72
73static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu)
74{
75 switch (mtu) {
76 case PVRDMA_MTU_256: return 256;
77 case PVRDMA_MTU_512: return 512;
78 case PVRDMA_MTU_1024: return 1024;
79 case PVRDMA_MTU_2048: return 2048;
80 case PVRDMA_MTU_4096: return 4096;
81 default: return -1;
82 }
83}
84
85static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu)
86{
87 switch (mtu) {
88 case 256: return PVRDMA_MTU_256;
89 case 512: return PVRDMA_MTU_512;
90 case 1024: return PVRDMA_MTU_1024;
91 case 2048: return PVRDMA_MTU_2048;
92 case 4096:
93 default: return PVRDMA_MTU_4096;
94 }
95}
96
97enum pvrdma_port_state {
98 PVRDMA_PORT_NOP = 0,
99 PVRDMA_PORT_DOWN = 1,
100 PVRDMA_PORT_INIT = 2,
101 PVRDMA_PORT_ARMED = 3,
102 PVRDMA_PORT_ACTIVE = 4,
103 PVRDMA_PORT_ACTIVE_DEFER = 5,
104};
105
106enum pvrdma_port_cap_flags {
107 PVRDMA_PORT_SM = 1 << 1,
108 PVRDMA_PORT_NOTICE_SUP = 1 << 2,
109 PVRDMA_PORT_TRAP_SUP = 1 << 3,
110 PVRDMA_PORT_OPT_IPD_SUP = 1 << 4,
111 PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5,
112 PVRDMA_PORT_SL_MAP_SUP = 1 << 6,
113 PVRDMA_PORT_MKEY_NVRAM = 1 << 7,
114 PVRDMA_PORT_PKEY_NVRAM = 1 << 8,
115 PVRDMA_PORT_LED_INFO_SUP = 1 << 9,
116 PVRDMA_PORT_SM_DISABLED = 1 << 10,
117 PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
118 PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
119 PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
120 PVRDMA_PORT_CM_SUP = 1 << 16,
121 PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17,
122 PVRDMA_PORT_REINIT_SUP = 1 << 18,
123 PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19,
124 PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20,
125 PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21,
126 PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
127 PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23,
128 PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24,
129 PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25,
130 PVRDMA_PORT_IP_BASED_GIDS = 1 << 26,
131 PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS,
132};
133
134enum pvrdma_port_width {
135 PVRDMA_WIDTH_1X = 1,
136 PVRDMA_WIDTH_4X = 2,
137 PVRDMA_WIDTH_8X = 4,
138 PVRDMA_WIDTH_12X = 8,
139};
140
141static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width)
142{
143 switch (width) {
144 case PVRDMA_WIDTH_1X: return 1;
145 case PVRDMA_WIDTH_4X: return 4;
146 case PVRDMA_WIDTH_8X: return 8;
147 case PVRDMA_WIDTH_12X: return 12;
148 default: return -1;
149 }
150}
151
152enum pvrdma_port_speed {
153 PVRDMA_SPEED_SDR = 1,
154 PVRDMA_SPEED_DDR = 2,
155 PVRDMA_SPEED_QDR = 4,
156 PVRDMA_SPEED_FDR10 = 8,
157 PVRDMA_SPEED_FDR = 16,
158 PVRDMA_SPEED_EDR = 32,
159};
160
161struct pvrdma_port_attr {
162 enum pvrdma_port_state state;
163 enum pvrdma_mtu max_mtu;
164 enum pvrdma_mtu active_mtu;
165 u32 gid_tbl_len;
166 u32 port_cap_flags;
167 u32 max_msg_sz;
168 u32 bad_pkey_cntr;
169 u32 qkey_viol_cntr;
170 u16 pkey_tbl_len;
171 u16 lid;
172 u16 sm_lid;
173 u8 lmc;
174 u8 max_vl_num;
175 u8 sm_sl;
176 u8 subnet_timeout;
177 u8 init_type_reply;
178 u8 active_width;
179 u8 active_speed;
180 u8 phys_state;
181 u8 reserved[2];
182};
183
184struct pvrdma_global_route {
185 union pvrdma_gid dgid;
186 u32 flow_label;
187 u8 sgid_index;
188 u8 hop_limit;
189 u8 traffic_class;
190 u8 reserved;
191};
192
193struct pvrdma_grh {
194 __be32 version_tclass_flow;
195 __be16 paylen;
196 u8 next_hdr;
197 u8 hop_limit;
198 union pvrdma_gid sgid;
199 union pvrdma_gid dgid;
200};
201
202enum pvrdma_ah_flags {
203 PVRDMA_AH_GRH = 1,
204};
205
206enum pvrdma_rate {
207 PVRDMA_RATE_PORT_CURRENT = 0,
208 PVRDMA_RATE_2_5_GBPS = 2,
209 PVRDMA_RATE_5_GBPS = 5,
210 PVRDMA_RATE_10_GBPS = 3,
211 PVRDMA_RATE_20_GBPS = 6,
212 PVRDMA_RATE_30_GBPS = 4,
213 PVRDMA_RATE_40_GBPS = 7,
214 PVRDMA_RATE_60_GBPS = 8,
215 PVRDMA_RATE_80_GBPS = 9,
216 PVRDMA_RATE_120_GBPS = 10,
217 PVRDMA_RATE_14_GBPS = 11,
218 PVRDMA_RATE_56_GBPS = 12,
219 PVRDMA_RATE_112_GBPS = 13,
220 PVRDMA_RATE_168_GBPS = 14,
221 PVRDMA_RATE_25_GBPS = 15,
222 PVRDMA_RATE_100_GBPS = 16,
223 PVRDMA_RATE_200_GBPS = 17,
224 PVRDMA_RATE_300_GBPS = 18,
225};
226
227struct pvrdma_ah_attr {
228 struct pvrdma_global_route grh;
229 u16 dlid;
230 u16 vlan_id;
231 u8 sl;
232 u8 src_path_bits;
233 u8 static_rate;
234 u8 ah_flags;
235 u8 port_num;
236 u8 dmac[6];
237 u8 reserved;
238};
239
240enum pvrdma_cq_notify_flags {
241 PVRDMA_CQ_SOLICITED = 1 << 0,
242 PVRDMA_CQ_NEXT_COMP = 1 << 1,
243 PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED |
244 PVRDMA_CQ_NEXT_COMP,
245 PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2,
246};
247
248struct pvrdma_qp_cap {
249 u32 max_send_wr;
250 u32 max_recv_wr;
251 u32 max_send_sge;
252 u32 max_recv_sge;
253 u32 max_inline_data;
254 u32 reserved;
255};
256
257enum pvrdma_sig_type {
258 PVRDMA_SIGNAL_ALL_WR,
259 PVRDMA_SIGNAL_REQ_WR,
260};
261
262enum pvrdma_qp_type {
263 PVRDMA_QPT_SMI,
264 PVRDMA_QPT_GSI,
265 PVRDMA_QPT_RC,
266 PVRDMA_QPT_UC,
267 PVRDMA_QPT_UD,
268 PVRDMA_QPT_RAW_IPV6,
269 PVRDMA_QPT_RAW_ETHERTYPE,
270 PVRDMA_QPT_RAW_PACKET = 8,
271 PVRDMA_QPT_XRC_INI = 9,
272 PVRDMA_QPT_XRC_TGT,
273 PVRDMA_QPT_MAX,
274};
275
276enum pvrdma_qp_create_flags {
277 PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0,
278 PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
279};
280
281enum pvrdma_qp_attr_mask {
282 PVRDMA_QP_STATE = 1 << 0,
283 PVRDMA_QP_CUR_STATE = 1 << 1,
284 PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2,
285 PVRDMA_QP_ACCESS_FLAGS = 1 << 3,
286 PVRDMA_QP_PKEY_INDEX = 1 << 4,
287 PVRDMA_QP_PORT = 1 << 5,
288 PVRDMA_QP_QKEY = 1 << 6,
289 PVRDMA_QP_AV = 1 << 7,
290 PVRDMA_QP_PATH_MTU = 1 << 8,
291 PVRDMA_QP_TIMEOUT = 1 << 9,
292 PVRDMA_QP_RETRY_CNT = 1 << 10,
293 PVRDMA_QP_RNR_RETRY = 1 << 11,
294 PVRDMA_QP_RQ_PSN = 1 << 12,
295 PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13,
296 PVRDMA_QP_ALT_PATH = 1 << 14,
297 PVRDMA_QP_MIN_RNR_TIMER = 1 << 15,
298 PVRDMA_QP_SQ_PSN = 1 << 16,
299 PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17,
300 PVRDMA_QP_PATH_MIG_STATE = 1 << 18,
301 PVRDMA_QP_CAP = 1 << 19,
302 PVRDMA_QP_DEST_QPN = 1 << 20,
303 PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN,
304};
305
306enum pvrdma_qp_state {
307 PVRDMA_QPS_RESET,
308 PVRDMA_QPS_INIT,
309 PVRDMA_QPS_RTR,
310 PVRDMA_QPS_RTS,
311 PVRDMA_QPS_SQD,
312 PVRDMA_QPS_SQE,
313 PVRDMA_QPS_ERR,
314};
315
316enum pvrdma_mig_state {
317 PVRDMA_MIG_MIGRATED,
318 PVRDMA_MIG_REARM,
319 PVRDMA_MIG_ARMED,
320};
321
322enum pvrdma_mw_type {
323 PVRDMA_MW_TYPE_1 = 1,
324 PVRDMA_MW_TYPE_2 = 2,
325};
326
327struct pvrdma_qp_attr {
328 enum pvrdma_qp_state qp_state;
329 enum pvrdma_qp_state cur_qp_state;
330 enum pvrdma_mtu path_mtu;
331 enum pvrdma_mig_state path_mig_state;
332 u32 qkey;
333 u32 rq_psn;
334 u32 sq_psn;
335 u32 dest_qp_num;
336 u32 qp_access_flags;
337 u16 pkey_index;
338 u16 alt_pkey_index;
339 u8 en_sqd_async_notify;
340 u8 sq_draining;
341 u8 max_rd_atomic;
342 u8 max_dest_rd_atomic;
343 u8 min_rnr_timer;
344 u8 port_num;
345 u8 timeout;
346 u8 retry_cnt;
347 u8 rnr_retry;
348 u8 alt_port_num;
349 u8 alt_timeout;
350 u8 reserved[5];
351 struct pvrdma_qp_cap cap;
352 struct pvrdma_ah_attr ah_attr;
353 struct pvrdma_ah_attr alt_ah_attr;
354};
355
356enum pvrdma_send_flags {
357 PVRDMA_SEND_FENCE = 1 << 0,
358 PVRDMA_SEND_SIGNALED = 1 << 1,
359 PVRDMA_SEND_SOLICITED = 1 << 2,
360 PVRDMA_SEND_INLINE = 1 << 3,
361 PVRDMA_SEND_IP_CSUM = 1 << 4,
362 PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM,
363};
364
365enum pvrdma_access_flags {
366 PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0,
367 PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1,
368 PVRDMA_ACCESS_REMOTE_READ = 1 << 2,
369 PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3,
370 PVRDMA_ACCESS_MW_BIND = 1 << 4,
371 PVRDMA_ZERO_BASED = 1 << 5,
372 PVRDMA_ACCESS_ON_DEMAND = 1 << 6,
373 PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND,
374};
375
376int pvrdma_query_device(struct ib_device *ibdev,
377 struct ib_device_attr *props,
378 struct ib_udata *udata);
379int pvrdma_query_port(struct ib_device *ibdev, u8 port,
380 struct ib_port_attr *props);
381int pvrdma_query_gid(struct ib_device *ibdev, u8 port,
382 int index, union ib_gid *gid);
383int pvrdma_query_pkey(struct ib_device *ibdev, u8 port,
384 u16 index, u16 *pkey);
385enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
386 u8 port);
387int pvrdma_modify_device(struct ib_device *ibdev, int mask,
388 struct ib_device_modify *props);
389int pvrdma_modify_port(struct ib_device *ibdev, u8 port,
390 int mask, struct ib_port_modify *props);
391int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
392struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
393 struct ib_udata *udata);
394int pvrdma_dealloc_ucontext(struct ib_ucontext *context);
395struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
396 struct ib_ucontext *context,
397 struct ib_udata *udata);
398int pvrdma_dealloc_pd(struct ib_pd *ibpd);
399struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
400struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
401 u64 virt_addr, int access_flags,
402 struct ib_udata *udata);
403int pvrdma_dereg_mr(struct ib_mr *mr);
404struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
405 u32 max_num_sg);
406int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
407 int sg_nents, unsigned int *sg_offset);
408int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
409int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
410 struct ib_udata *udata);
411struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
412 const struct ib_cq_init_attr *attr,
413 struct ib_ucontext *context,
414 struct ib_udata *udata);
415int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
416 struct ib_udata *udata);
417int pvrdma_destroy_cq(struct ib_cq *cq);
418int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
419int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
420struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
421 struct ib_udata *udata);
422int pvrdma_destroy_ah(struct ib_ah *ah);
423struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
424 struct ib_qp_init_attr *init_attr,
425 struct ib_udata *udata);
426int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
427 int attr_mask, struct ib_udata *udata);
428int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
429 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
430int pvrdma_destroy_qp(struct ib_qp *qp);
431int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
432 struct ib_send_wr **bad_wr);
433int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
434 struct ib_recv_wr **bad_wr);
435
436#endif /* __PVRDMA_VERBS_H__ */
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index f14ab7ff5fee..6a8a934c540c 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -14,3 +14,4 @@ header-y += mlx5-abi.h
14header-y += mthca-abi.h 14header-y += mthca-abi.h
15header-y += nes-abi.h 15header-y += nes-abi.h
16header-y += ocrdma-abi.h 16header-y += ocrdma-abi.h
17header-y += vmw_pvrdma-abi.h
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
new file mode 100644
index 000000000000..5016abc9ee97
--- /dev/null
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -0,0 +1,289 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __VMW_PVRDMA_ABI_H__
47#define __VMW_PVRDMA_ABI_H__
48
49#include <linux/types.h>
50
51#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */
52#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */
53#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */
54#define PVRDMA_UAR_QP_SEND BIT(30) /* Send bit. */
55#define PVRDMA_UAR_QP_RECV BIT(31) /* Recv bit. */
56#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */
57#define PVRDMA_UAR_CQ_ARM_SOL BIT(29) /* Arm solicited bit. */
58#define PVRDMA_UAR_CQ_ARM BIT(30) /* Arm bit. */
59#define PVRDMA_UAR_CQ_POLL BIT(31) /* Poll bit. */
60
61enum pvrdma_wr_opcode {
62 PVRDMA_WR_RDMA_WRITE,
63 PVRDMA_WR_RDMA_WRITE_WITH_IMM,
64 PVRDMA_WR_SEND,
65 PVRDMA_WR_SEND_WITH_IMM,
66 PVRDMA_WR_RDMA_READ,
67 PVRDMA_WR_ATOMIC_CMP_AND_SWP,
68 PVRDMA_WR_ATOMIC_FETCH_AND_ADD,
69 PVRDMA_WR_LSO,
70 PVRDMA_WR_SEND_WITH_INV,
71 PVRDMA_WR_RDMA_READ_WITH_INV,
72 PVRDMA_WR_LOCAL_INV,
73 PVRDMA_WR_FAST_REG_MR,
74 PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP,
75 PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD,
76 PVRDMA_WR_BIND_MW,
77 PVRDMA_WR_REG_SIG_MR,
78};
79
80enum pvrdma_wc_status {
81 PVRDMA_WC_SUCCESS,
82 PVRDMA_WC_LOC_LEN_ERR,
83 PVRDMA_WC_LOC_QP_OP_ERR,
84 PVRDMA_WC_LOC_EEC_OP_ERR,
85 PVRDMA_WC_LOC_PROT_ERR,
86 PVRDMA_WC_WR_FLUSH_ERR,
87 PVRDMA_WC_MW_BIND_ERR,
88 PVRDMA_WC_BAD_RESP_ERR,
89 PVRDMA_WC_LOC_ACCESS_ERR,
90 PVRDMA_WC_REM_INV_REQ_ERR,
91 PVRDMA_WC_REM_ACCESS_ERR,
92 PVRDMA_WC_REM_OP_ERR,
93 PVRDMA_WC_RETRY_EXC_ERR,
94 PVRDMA_WC_RNR_RETRY_EXC_ERR,
95 PVRDMA_WC_LOC_RDD_VIOL_ERR,
96 PVRDMA_WC_REM_INV_RD_REQ_ERR,
97 PVRDMA_WC_REM_ABORT_ERR,
98 PVRDMA_WC_INV_EECN_ERR,
99 PVRDMA_WC_INV_EEC_STATE_ERR,
100 PVRDMA_WC_FATAL_ERR,
101 PVRDMA_WC_RESP_TIMEOUT_ERR,
102 PVRDMA_WC_GENERAL_ERR,
103};
104
105enum pvrdma_wc_opcode {
106 PVRDMA_WC_SEND,
107 PVRDMA_WC_RDMA_WRITE,
108 PVRDMA_WC_RDMA_READ,
109 PVRDMA_WC_COMP_SWAP,
110 PVRDMA_WC_FETCH_ADD,
111 PVRDMA_WC_BIND_MW,
112 PVRDMA_WC_LSO,
113 PVRDMA_WC_LOCAL_INV,
114 PVRDMA_WC_FAST_REG_MR,
115 PVRDMA_WC_MASKED_COMP_SWAP,
116 PVRDMA_WC_MASKED_FETCH_ADD,
117 PVRDMA_WC_RECV = 1 << 7,
118 PVRDMA_WC_RECV_RDMA_WITH_IMM,
119};
120
121enum pvrdma_wc_flags {
122 PVRDMA_WC_GRH = 1 << 0,
123 PVRDMA_WC_WITH_IMM = 1 << 1,
124 PVRDMA_WC_WITH_INVALIDATE = 1 << 2,
125 PVRDMA_WC_IP_CSUM_OK = 1 << 3,
126 PVRDMA_WC_WITH_SMAC = 1 << 4,
127 PVRDMA_WC_WITH_VLAN = 1 << 5,
128 PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN,
129};
130
131struct pvrdma_alloc_ucontext_resp {
132 __u32 qp_tab_size;
133 __u32 reserved;
134};
135
136struct pvrdma_alloc_pd_resp {
137 __u32 pdn;
138 __u32 reserved;
139};
140
141struct pvrdma_create_cq {
142 __u64 buf_addr;
143 __u32 buf_size;
144 __u32 reserved;
145};
146
147struct pvrdma_create_cq_resp {
148 __u32 cqn;
149 __u32 reserved;
150};
151
152struct pvrdma_resize_cq {
153 __u64 buf_addr;
154 __u32 buf_size;
155 __u32 reserved;
156};
157
158struct pvrdma_create_srq {
159 __u64 buf_addr;
160};
161
162struct pvrdma_create_srq_resp {
163 __u32 srqn;
164 __u32 reserved;
165};
166
167struct pvrdma_create_qp {
168 __u64 rbuf_addr;
169 __u64 sbuf_addr;
170 __u32 rbuf_size;
171 __u32 sbuf_size;
172 __u64 qp_addr;
173};
174
175/* PVRDMA masked atomic compare and swap */
176struct pvrdma_ex_cmp_swap {
177 __u64 swap_val;
178 __u64 compare_val;
179 __u64 swap_mask;
180 __u64 compare_mask;
181};
182
183/* PVRDMA masked atomic fetch and add */
184struct pvrdma_ex_fetch_add {
185 __u64 add_val;
186 __u64 field_boundary;
187};
188
189/* PVRDMA address vector. */
190struct pvrdma_av {
191 __u32 port_pd;
192 __u32 sl_tclass_flowlabel;
193 __u8 dgid[16];
194 __u8 src_path_bits;
195 __u8 gid_index;
196 __u8 stat_rate;
197 __u8 hop_limit;
198 __u8 dmac[6];
199 __u8 reserved[6];
200};
201
202/* PVRDMA scatter/gather entry */
203struct pvrdma_sge {
204 __u64 addr;
205 __u32 length;
206 __u32 lkey;
207};
208
209/* PVRDMA receive queue work request */
210struct pvrdma_rq_wqe_hdr {
211 __u64 wr_id; /* wr id */
212 __u32 num_sge; /* size of s/g array */
213 __u32 total_len; /* reserved */
214};
215/* Use pvrdma_sge (ib_sge) for receive queue s/g array elements. */
216
217/* PVRDMA send queue work request */
218struct pvrdma_sq_wqe_hdr {
219 __u64 wr_id; /* wr id */
220 __u32 num_sge; /* size of s/g array */
221 __u32 total_len; /* reserved */
222 __u32 opcode; /* operation type */
223 __u32 send_flags; /* wr flags */
224 union {
225 __u32 imm_data;
226 __u32 invalidate_rkey;
227 } ex;
228 __u32 reserved;
229 union {
230 struct {
231 __u64 remote_addr;
232 __u32 rkey;
233 __u8 reserved[4];
234 } rdma;
235 struct {
236 __u64 remote_addr;
237 __u64 compare_add;
238 __u64 swap;
239 __u32 rkey;
240 __u32 reserved;
241 } atomic;
242 struct {
243 __u64 remote_addr;
244 __u32 log_arg_sz;
245 __u32 rkey;
246 union {
247 struct pvrdma_ex_cmp_swap cmp_swap;
248 struct pvrdma_ex_fetch_add fetch_add;
249 } wr_data;
250 } masked_atomics;
251 struct {
252 __u64 iova_start;
253 __u64 pl_pdir_dma;
254 __u32 page_shift;
255 __u32 page_list_len;
256 __u32 length;
257 __u32 access_flags;
258 __u32 rkey;
259 } fast_reg;
260 struct {
261 __u32 remote_qpn;
262 __u32 remote_qkey;
263 struct pvrdma_av av;
264 } ud;
265 } wr;
266};
267/* Use pvrdma_sge (ib_sge) for send queue s/g array elements. */
268
269/* Completion queue element. */
270struct pvrdma_cqe {
271 __u64 wr_id;
272 __u64 qp;
273 __u32 opcode;
274 __u32 status;
275 __u32 byte_len;
276 __u32 imm_data;
277 __u32 src_qp;
278 __u32 wc_flags;
279 __u32 vendor_err;
280 __u16 pkey_index;
281 __u16 slid;
282 __u8 sl;
283 __u8 dlid_path_bits;
284 __u8 port_num;
285 __u8 smac[6];
286 __u8 reserved2[7]; /* Pad to next power of 2 (64). */
287};
288
289#endif /* __VMW_PVRDMA_ABI_H__ */