aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
diff options
context:
space:
mode:
authorAdit Ranadive <aditr@vmware.com>2016-10-02 22:10:22 -0400
committerDoug Ledford <dledford@redhat.com>2016-12-14 14:55:10 -0500
commit29c8d9eba550c6d73d17cc1618a9f5f2a7345aa1 (patch)
treee8cef3b3035f1f3c4ad12f0a25ec57b6c789bcab /drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
parentb1226c7db1d997fa6955cd3b54ba333bd0d8a29c (diff)
IB: Add vmw_pvrdma driver
This patch series adds a driver for a paravirtual RDMA device. The device is developed for VMware's Virtual Machines and allows existing RDMA applications to continue to use existing Verbs API when deployed in VMs on ESXi. We recently did a presentation in the OFA Workshop [1] regarding this device. Description and RDMA Support ============================ The virtual device is exposed as a dual function PCIe device. One part is a virtual network device (VMXNet3) which provides networking properties like MAC, IP addresses to the RDMA part of the device. The networking properties are used to register GIDs required by RDMA applications to communicate. These patches add support and the all required infrastructure for letting applications use such a device. We support the mandatory Verbs API as well as the base memory management extensions (Local Inv, Send with Inv and Fast Register Work Requests). We currently support both Reliable Connected and Unreliable Datagram QPs but do not support Shared Receive Queues (SRQs). Also, we support the following types of Work Requests: o Send/Receive (with or without Immediate Data) o RDMA Write (with or without Immediate Data) o RDMA Read o Local Invalidate o Send with Invalidate o Fast Register Work Requests This version only adds support for version 1 of RoCE. We will add RoCEv2 support in a future patch. We do support registration of both MAC-based and IP-based GIDs. I have also created a git tree for our user-level driver [2]. Testing ======= We have tested this internally for various types of Guest OS - Red Hat, Centos, Ubuntu 12.04/14.04/16.04, Oracle Enterprise Linux, SLES 12 using backported versions of this driver. The tests included several runs of the performance tests (included with OFED), Intel MPI PingPong benchmark on OpenMPI, krping for FRWRs. Mellanox has been kind enough to test the backported version of the driver internally on their hardware using a VMware provided ESX build. I have also applied and tested this with Doug's k.o/for-4.9 branch (commit 5603910b). Note, that this patch series should be applied all together. I split out the commits so that it may be easier to review. PVRDMA Resources ================ [1] OFA Workshop Presentation - https://openfabrics.org/images/eventpresos/2016presentations/102parardma.pdf [2] Libpvrdma User-level library - http://git.openfabrics.org/?p=~aditr/libpvrdma.git;a=summary Reviewed-by: Jorgen Hansen <jhansen@vmware.com> Reviewed-by: George Zhang <georgezhang@vmware.com> Reviewed-by: Aditya Sarwade <asarwade@vmware.com> Reviewed-by: Bryan Tan <bryantan@vmware.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Adit Ranadive <aditr@vmware.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/vmw_pvrdma/pvrdma.h')
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h474
1 files changed, 474 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
new file mode 100644
index 000000000000..71e1d55d69d6
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -0,0 +1,474 @@
1/*
2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
16 *
17 * The BSD 2-Clause License
18 *
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
21 * conditions are met:
22 *
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer.
26 *
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#ifndef __PVRDMA_H__
47#define __PVRDMA_H__
48
49#include <linux/compiler.h>
50#include <linux/interrupt.h>
51#include <linux/list.h>
52#include <linux/mutex.h>
53#include <linux/pci.h>
54#include <linux/semaphore.h>
55#include <linux/workqueue.h>
56#include <rdma/ib_umem.h>
57#include <rdma/ib_verbs.h>
58#include <rdma/vmw_pvrdma-abi.h>
59
60#include "pvrdma_ring.h"
61#include "pvrdma_dev_api.h"
62#include "pvrdma_verbs.h"
63
64/* NOT the same as BIT_MASK(). */
65#define PVRDMA_MASK(n) ((n << 1) - 1)
66
67/*
68 * VMware PVRDMA PCI device id.
69 */
70#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
71
72struct pvrdma_dev;
73
74struct pvrdma_page_dir {
75 dma_addr_t dir_dma;
76 u64 *dir;
77 int ntables;
78 u64 **tables;
79 u64 npages;
80 void **pages;
81};
82
83struct pvrdma_cq {
84 struct ib_cq ibcq;
85 int offset;
86 spinlock_t cq_lock; /* Poll lock. */
87 struct pvrdma_uar_map *uar;
88 struct ib_umem *umem;
89 struct pvrdma_ring_state *ring_state;
90 struct pvrdma_page_dir pdir;
91 u32 cq_handle;
92 bool is_kernel;
93 atomic_t refcnt;
94 wait_queue_head_t wait;
95};
96
97struct pvrdma_id_table {
98 u32 last;
99 u32 top;
100 u32 max;
101 u32 mask;
102 spinlock_t lock; /* Table lock. */
103 unsigned long *table;
104};
105
106struct pvrdma_uar_map {
107 unsigned long pfn;
108 void __iomem *map;
109 int index;
110};
111
112struct pvrdma_uar_table {
113 struct pvrdma_id_table tbl;
114 int size;
115};
116
117struct pvrdma_ucontext {
118 struct ib_ucontext ibucontext;
119 struct pvrdma_dev *dev;
120 struct pvrdma_uar_map uar;
121 u64 ctx_handle;
122};
123
124struct pvrdma_pd {
125 struct ib_pd ibpd;
126 u32 pdn;
127 u32 pd_handle;
128 int privileged;
129};
130
131struct pvrdma_mr {
132 u32 mr_handle;
133 u64 iova;
134 u64 size;
135};
136
137struct pvrdma_user_mr {
138 struct ib_mr ibmr;
139 struct ib_umem *umem;
140 struct pvrdma_mr mmr;
141 struct pvrdma_page_dir pdir;
142 u64 *pages;
143 u32 npages;
144 u32 max_pages;
145 u32 page_shift;
146};
147
148struct pvrdma_wq {
149 struct pvrdma_ring *ring;
150 spinlock_t lock; /* Work queue lock. */
151 int wqe_cnt;
152 int wqe_size;
153 int max_sg;
154 int offset;
155};
156
157struct pvrdma_ah {
158 struct ib_ah ibah;
159 struct pvrdma_av av;
160};
161
162struct pvrdma_qp {
163 struct ib_qp ibqp;
164 u32 qp_handle;
165 u32 qkey;
166 struct pvrdma_wq sq;
167 struct pvrdma_wq rq;
168 struct ib_umem *rumem;
169 struct ib_umem *sumem;
170 struct pvrdma_page_dir pdir;
171 int npages;
172 int npages_send;
173 int npages_recv;
174 u32 flags;
175 u8 port;
176 u8 state;
177 bool is_kernel;
178 struct mutex mutex; /* QP state mutex. */
179 atomic_t refcnt;
180 wait_queue_head_t wait;
181};
182
183struct pvrdma_dev {
184 /* PCI device-related information. */
185 struct ib_device ib_dev;
186 struct pci_dev *pdev;
187 void __iomem *regs;
188 struct pvrdma_device_shared_region *dsr; /* Shared region pointer */
189 dma_addr_t dsrbase; /* Shared region base address */
190 void *cmd_slot;
191 void *resp_slot;
192 unsigned long flags;
193 struct list_head device_link;
194
195 /* Locking and interrupt information. */
196 spinlock_t cmd_lock; /* Command lock. */
197 struct semaphore cmd_sema;
198 struct completion cmd_done;
199 struct {
200 enum pvrdma_intr_type type; /* Intr type */
201 struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
202 irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
203 u8 enabled[PVRDMA_MAX_INTERRUPTS];
204 u8 size;
205 } intr;
206
207 /* RDMA-related device information. */
208 union ib_gid *sgid_tbl;
209 struct pvrdma_ring_state *async_ring_state;
210 struct pvrdma_page_dir async_pdir;
211 struct pvrdma_ring_state *cq_ring_state;
212 struct pvrdma_page_dir cq_pdir;
213 struct pvrdma_cq **cq_tbl;
214 spinlock_t cq_tbl_lock;
215 struct pvrdma_qp **qp_tbl;
216 spinlock_t qp_tbl_lock;
217 struct pvrdma_uar_table uar_table;
218 struct pvrdma_uar_map driver_uar;
219 __be64 sys_image_guid;
220 spinlock_t desc_lock; /* Device modification lock. */
221 u32 port_cap_mask;
222 struct mutex port_mutex; /* Port modification mutex. */
223 bool ib_active;
224 atomic_t num_qps;
225 atomic_t num_cqs;
226 atomic_t num_pds;
227 atomic_t num_ahs;
228
229 /* Network device information. */
230 struct net_device *netdev;
231 struct notifier_block nb_netdev;
232};
233
234struct pvrdma_netdevice_work {
235 struct work_struct work;
236 struct net_device *event_netdev;
237 unsigned long event;
238};
239
240static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev)
241{
242 return container_of(ibdev, struct pvrdma_dev, ib_dev);
243}
244
245static inline struct
246pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext)
247{
248 return container_of(ibucontext, struct pvrdma_ucontext, ibucontext);
249}
250
251static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd)
252{
253 return container_of(ibpd, struct pvrdma_pd, ibpd);
254}
255
256static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq)
257{
258 return container_of(ibcq, struct pvrdma_cq, ibcq);
259}
260
261static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr)
262{
263 return container_of(ibmr, struct pvrdma_user_mr, ibmr);
264}
265
266static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp)
267{
268 return container_of(ibqp, struct pvrdma_qp, ibqp);
269}
270
271static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah)
272{
273 return container_of(ibah, struct pvrdma_ah, ibah);
274}
275
276static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val)
277{
278 writel(cpu_to_le32(val), dev->regs + reg);
279}
280
281static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg)
282{
283 return le32_to_cpu(readl(dev->regs + reg));
284}
285
286static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val)
287{
288 writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET);
289}
290
291static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val)
292{
293 writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET);
294}
295
296static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir,
297 u64 offset)
298{
299 return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE);
300}
301
302static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu)
303{
304 return (enum pvrdma_mtu)mtu;
305}
306
307static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu)
308{
309 return (enum ib_mtu)mtu;
310}
311
312static inline enum pvrdma_port_state ib_port_state_to_pvrdma(
313 enum ib_port_state state)
314{
315 return (enum pvrdma_port_state)state;
316}
317
318static inline enum ib_port_state pvrdma_port_state_to_ib(
319 enum pvrdma_port_state state)
320{
321 return (enum ib_port_state)state;
322}
323
324static inline int ib_port_cap_flags_to_pvrdma(int flags)
325{
326 return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX);
327}
328
329static inline int pvrdma_port_cap_flags_to_ib(int flags)
330{
331 return flags;
332}
333
334static inline enum pvrdma_port_width ib_port_width_to_pvrdma(
335 enum ib_port_width width)
336{
337 return (enum pvrdma_port_width)width;
338}
339
340static inline enum ib_port_width pvrdma_port_width_to_ib(
341 enum pvrdma_port_width width)
342{
343 return (enum ib_port_width)width;
344}
345
346static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma(
347 enum ib_port_speed speed)
348{
349 return (enum pvrdma_port_speed)speed;
350}
351
352static inline enum ib_port_speed pvrdma_port_speed_to_ib(
353 enum pvrdma_port_speed speed)
354{
355 return (enum ib_port_speed)speed;
356}
357
358static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
359{
360 return attr_mask;
361}
362
363static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
364{
365 return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
366}
367
368static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma(
369 enum ib_mig_state state)
370{
371 return (enum pvrdma_mig_state)state;
372}
373
374static inline enum ib_mig_state pvrdma_mig_state_to_ib(
375 enum pvrdma_mig_state state)
376{
377 return (enum ib_mig_state)state;
378}
379
380static inline int ib_access_flags_to_pvrdma(int flags)
381{
382 return flags;
383}
384
385static inline int pvrdma_access_flags_to_ib(int flags)
386{
387 return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX);
388}
389
390static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type)
391{
392 return (enum pvrdma_qp_type)type;
393}
394
395static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type)
396{
397 return (enum ib_qp_type)type;
398}
399
400static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state)
401{
402 return (enum pvrdma_qp_state)state;
403}
404
405static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state)
406{
407 return (enum ib_qp_state)state;
408}
409
410static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op)
411{
412 return (enum pvrdma_wr_opcode)op;
413}
414
415static inline enum ib_wc_status pvrdma_wc_status_to_ib(
416 enum pvrdma_wc_status status)
417{
418 return (enum ib_wc_status)status;
419}
420
421static inline int pvrdma_wc_opcode_to_ib(int opcode)
422{
423 return opcode;
424}
425
426static inline int pvrdma_wc_flags_to_ib(int flags)
427{
428 return flags;
429}
430
431static inline int ib_send_flags_to_pvrdma(int flags)
432{
433 return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX);
434}
435
436void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst,
437 const struct pvrdma_qp_cap *src);
438void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst,
439 const struct ib_qp_cap *src);
440void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src);
441void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src);
442void pvrdma_global_route_to_ib(struct ib_global_route *dst,
443 const struct pvrdma_global_route *src);
444void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
445 const struct ib_global_route *src);
446void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
447 const struct pvrdma_ah_attr *src);
448void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
449 const struct ib_ah_attr *src);
450
451int pvrdma_uar_table_init(struct pvrdma_dev *dev);
452void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
453
454int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
455void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
456
457void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq);
458
459int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
460 u64 npages, bool alloc_pages);
461void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
462 struct pvrdma_page_dir *pdir);
463int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
464 dma_addr_t daddr);
465int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
466 struct ib_umem *umem, u64 offset);
467dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx);
468int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
469 u64 *page_list, int num_pages);
470
471int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
472 union pvrdma_cmd_resp *rsp, unsigned resp_code);
473
474#endif /* __PVRDMA_H__ */