aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/device.h72
-rw-r--r--include/linux/mlx5/driver.h14
-rw-r--r--include/linux/mlx5/qp.h65
-rw-r--r--include/rdma/ib_umem.h34
-rw-r--r--include/rdma/ib_umem_odp.h160
-rw-r--r--include/rdma/ib_verbs.h54
-rw-r--r--include/uapi/rdma/ib_user_verbs.h29
7 files changed, 420 insertions, 8 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ea4f1c46f761..4e5bd813bb9a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -120,6 +120,15 @@ enum {
120}; 120};
121 121
122enum { 122enum {
123 MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
124};
125
126enum {
127 MLX5_PFAULT_SUBTYPE_WQE = 0,
128 MLX5_PFAULT_SUBTYPE_RDMA = 1,
129};
130
131enum {
123 MLX5_PERM_LOCAL_READ = 1 << 2, 132 MLX5_PERM_LOCAL_READ = 1 << 2,
124 MLX5_PERM_LOCAL_WRITE = 1 << 3, 133 MLX5_PERM_LOCAL_WRITE = 1 << 3,
125 MLX5_PERM_REMOTE_READ = 1 << 4, 134 MLX5_PERM_REMOTE_READ = 1 << 4,
@@ -180,6 +189,19 @@ enum {
180 MLX5_MKEY_MASK_FREE = 1ull << 29, 189 MLX5_MKEY_MASK_FREE = 1ull << 29,
181}; 190};
182 191
192enum {
193 MLX5_UMR_TRANSLATION_OFFSET_EN = (1 << 4),
194
195 MLX5_UMR_CHECK_NOT_FREE = (1 << 5),
196 MLX5_UMR_CHECK_FREE = (2 << 5),
197
198 MLX5_UMR_INLINE = (1 << 7),
199};
200
201#define MLX5_UMR_MTT_ALIGNMENT 0x40
202#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
203#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
204
183enum mlx5_event { 205enum mlx5_event {
184 MLX5_EVENT_TYPE_COMP = 0x0, 206 MLX5_EVENT_TYPE_COMP = 0x0,
185 207
@@ -206,6 +228,8 @@ enum mlx5_event {
206 228
207 MLX5_EVENT_TYPE_CMD = 0x0a, 229 MLX5_EVENT_TYPE_CMD = 0x0a,
208 MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, 230 MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
231
232 MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
209}; 233};
210 234
211enum { 235enum {
@@ -225,6 +249,7 @@ enum {
225 MLX5_DEV_CAP_FLAG_APM = 1LL << 17, 249 MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
226 MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, 250 MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
227 MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, 251 MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23,
252 MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
228 MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, 253 MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29,
229 MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, 254 MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30,
230 MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, 255 MLX5_DEV_CAP_FLAG_DCT = 1LL << 37,
@@ -290,6 +315,8 @@ enum {
290enum { 315enum {
291 HCA_CAP_OPMOD_GET_MAX = 0, 316 HCA_CAP_OPMOD_GET_MAX = 0,
292 HCA_CAP_OPMOD_GET_CUR = 1, 317 HCA_CAP_OPMOD_GET_CUR = 1,
318 HCA_CAP_OPMOD_GET_ODP_MAX = 4,
319 HCA_CAP_OPMOD_GET_ODP_CUR = 5
293}; 320};
294 321
295struct mlx5_inbox_hdr { 322struct mlx5_inbox_hdr {
@@ -319,6 +346,23 @@ struct mlx5_cmd_query_adapter_mbox_out {
319 u8 vsd_psid[16]; 346 u8 vsd_psid[16];
320}; 347};
321 348
349enum mlx5_odp_transport_cap_bits {
350 MLX5_ODP_SUPPORT_SEND = 1 << 31,
351 MLX5_ODP_SUPPORT_RECV = 1 << 30,
352 MLX5_ODP_SUPPORT_WRITE = 1 << 29,
353 MLX5_ODP_SUPPORT_READ = 1 << 28,
354};
355
356struct mlx5_odp_caps {
357 char reserved[0x10];
358 struct {
359 __be32 rc_odp_caps;
360 __be32 uc_odp_caps;
361 __be32 ud_odp_caps;
362 } per_transport_caps;
363 char reserved2[0xe4];
364};
365
322struct mlx5_cmd_init_hca_mbox_in { 366struct mlx5_cmd_init_hca_mbox_in {
323 struct mlx5_inbox_hdr hdr; 367 struct mlx5_inbox_hdr hdr;
324 u8 rsvd0[2]; 368 u8 rsvd0[2];
@@ -439,6 +483,27 @@ struct mlx5_eqe_page_req {
439 __be32 rsvd1[5]; 483 __be32 rsvd1[5];
440}; 484};
441 485
486struct mlx5_eqe_page_fault {
487 __be32 bytes_committed;
488 union {
489 struct {
490 u16 reserved1;
491 __be16 wqe_index;
492 u16 reserved2;
493 __be16 packet_length;
494 u8 reserved3[12];
495 } __packed wqe;
496 struct {
497 __be32 r_key;
498 u16 reserved1;
499 __be16 packet_length;
500 __be32 rdma_op_len;
501 __be64 rdma_va;
502 } __packed rdma;
503 } __packed;
504 __be32 flags_qpn;
505} __packed;
506
442union ev_data { 507union ev_data {
443 __be32 raw[7]; 508 __be32 raw[7];
444 struct mlx5_eqe_cmd cmd; 509 struct mlx5_eqe_cmd cmd;
@@ -450,6 +515,7 @@ union ev_data {
450 struct mlx5_eqe_congestion cong; 515 struct mlx5_eqe_congestion cong;
451 struct mlx5_eqe_stall_vl stall_vl; 516 struct mlx5_eqe_stall_vl stall_vl;
452 struct mlx5_eqe_page_req req_pages; 517 struct mlx5_eqe_page_req req_pages;
518 struct mlx5_eqe_page_fault page_fault;
453} __packed; 519} __packed;
454 520
455struct mlx5_eqe { 521struct mlx5_eqe {
@@ -776,6 +842,10 @@ struct mlx5_query_eq_mbox_out {
776 struct mlx5_eq_context ctx; 842 struct mlx5_eq_context ctx;
777}; 843};
778 844
845enum {
846 MLX5_MKEY_STATUS_FREE = 1 << 6,
847};
848
779struct mlx5_mkey_seg { 849struct mlx5_mkey_seg {
780 /* This is a two bit field occupying bits 31-30. 850 /* This is a two bit field occupying bits 31-30.
781 * bit 31 is always 0, 851 * bit 31 is always 0,
@@ -812,7 +882,7 @@ struct mlx5_query_special_ctxs_mbox_out {
812struct mlx5_create_mkey_mbox_in { 882struct mlx5_create_mkey_mbox_in {
813 struct mlx5_inbox_hdr hdr; 883 struct mlx5_inbox_hdr hdr;
814 __be32 input_mkey_index; 884 __be32 input_mkey_index;
815 u8 rsvd0[4]; 885 __be32 flags;
816 struct mlx5_mkey_seg seg; 886 struct mlx5_mkey_seg seg;
817 u8 rsvd1[16]; 887 u8 rsvd1[16];
818 __be32 xlat_oct_act_size; 888 __be32 xlat_oct_act_size;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b1bf41556b32..166d9315fe4b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -113,6 +113,13 @@ enum {
113 MLX5_REG_HOST_ENDIANNESS = 0x7004, 113 MLX5_REG_HOST_ENDIANNESS = 0x7004,
114}; 114};
115 115
116enum mlx5_page_fault_resume_flags {
117 MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
118 MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1,
119 MLX5_PAGE_FAULT_RESUME_RDMA = 1 << 2,
120 MLX5_PAGE_FAULT_RESUME_ERROR = 1 << 7,
121};
122
116enum dbg_rsc_type { 123enum dbg_rsc_type {
117 MLX5_DBG_RSC_QP, 124 MLX5_DBG_RSC_QP,
118 MLX5_DBG_RSC_EQ, 125 MLX5_DBG_RSC_EQ,
@@ -467,7 +474,7 @@ struct mlx5_priv {
467 struct workqueue_struct *pg_wq; 474 struct workqueue_struct *pg_wq;
468 struct rb_root page_root; 475 struct rb_root page_root;
469 int fw_pages; 476 int fw_pages;
470 int reg_pages; 477 atomic_t reg_pages;
471 struct list_head free_list; 478 struct list_head free_list;
472 479
473 struct mlx5_core_health health; 480 struct mlx5_core_health health;
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
703void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); 710void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
704void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); 711void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
705void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); 712void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
713#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
714void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
715#endif
706void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); 716void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
707struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); 717struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
708void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); 718void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
740 int npsvs, u32 *sig_index); 750 int npsvs, u32 *sig_index);
741int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); 751int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
742void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); 752void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
753int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
754 struct mlx5_odp_caps *odp_caps);
743 755
744static inline u32 mlx5_mkey_to_idx(u32 mkey) 756static inline u32 mlx5_mkey_to_idx(u32 mkey)
745{ 757{
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3fa075daeb1d..61f7a342d1bf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,6 +50,9 @@
50#define MLX5_BSF_APPTAG_ESCAPE 0x1 50#define MLX5_BSF_APPTAG_ESCAPE 0x1
51#define MLX5_BSF_APPREF_ESCAPE 0x2 51#define MLX5_BSF_APPREF_ESCAPE 0x2
52 52
53#define MLX5_QPN_BITS 24
54#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
55
53enum mlx5_qp_optpar { 56enum mlx5_qp_optpar {
54 MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, 57 MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
55 MLX5_QP_OPTPAR_RRE = 1 << 1, 58 MLX5_QP_OPTPAR_RRE = 1 << 1,
@@ -189,6 +192,14 @@ struct mlx5_wqe_ctrl_seg {
189 __be32 imm; 192 __be32 imm;
190}; 193};
191 194
195#define MLX5_WQE_CTRL_DS_MASK 0x3f
196#define MLX5_WQE_CTRL_QPN_MASK 0xffffff00
197#define MLX5_WQE_CTRL_QPN_SHIFT 8
198#define MLX5_WQE_DS_UNITS 16
199#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
200#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
201#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
202
192struct mlx5_wqe_xrc_seg { 203struct mlx5_wqe_xrc_seg {
193 __be32 xrc_srqn; 204 __be32 xrc_srqn;
194 u8 rsvd[12]; 205 u8 rsvd[12];
@@ -292,6 +303,8 @@ struct mlx5_wqe_signature_seg {
292 u8 rsvd1[11]; 303 u8 rsvd1[11];
293}; 304};
294 305
306#define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff
307
295struct mlx5_wqe_inline_seg { 308struct mlx5_wqe_inline_seg {
296 __be32 byte_count; 309 __be32 byte_count;
297}; 310};
@@ -360,9 +373,46 @@ struct mlx5_stride_block_ctrl_seg {
360 __be16 num_entries; 373 __be16 num_entries;
361}; 374};
362 375
376enum mlx5_pagefault_flags {
377 MLX5_PFAULT_REQUESTOR = 1 << 0,
378 MLX5_PFAULT_WRITE = 1 << 1,
379 MLX5_PFAULT_RDMA = 1 << 2,
380};
381
382/* Contains the details of a pagefault. */
383struct mlx5_pagefault {
384 u32 bytes_committed;
385 u8 event_subtype;
386 enum mlx5_pagefault_flags flags;
387 union {
388 /* Initiator or send message responder pagefault details. */
389 struct {
390 /* Received packet size, only valid for responders. */
391 u32 packet_size;
392 /*
393 * WQE index. Refers to either the send queue or
394 * receive queue, according to event_subtype.
395 */
396 u16 wqe_index;
397 } wqe;
398 /* RDMA responder pagefault details */
399 struct {
400 u32 r_key;
401 /*
402 * Received packet size, minimal size page fault
403 * resolution required for forward progress.
404 */
405 u32 packet_size;
406 u32 rdma_op_len;
407 u64 rdma_va;
408 } rdma;
409 };
410};
411
363struct mlx5_core_qp { 412struct mlx5_core_qp {
364 struct mlx5_core_rsc_common common; /* must be first */ 413 struct mlx5_core_rsc_common common; /* must be first */
365 void (*event) (struct mlx5_core_qp *, int); 414 void (*event) (struct mlx5_core_qp *, int);
415 void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
366 int qpn; 416 int qpn;
367 struct mlx5_rsc_debug *dbg; 417 struct mlx5_rsc_debug *dbg;
368 int pid; 418 int pid;
@@ -530,6 +580,17 @@ static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u
530 return radix_tree_lookup(&dev->priv.mr_table.tree, key); 580 return radix_tree_lookup(&dev->priv.mr_table.tree, key);
531} 581}
532 582
583struct mlx5_page_fault_resume_mbox_in {
584 struct mlx5_inbox_hdr hdr;
585 __be32 flags_qpn;
586 u8 reserved[4];
587};
588
589struct mlx5_page_fault_resume_mbox_out {
590 struct mlx5_outbox_hdr hdr;
591 u8 rsvd[8];
592};
593
533int mlx5_core_create_qp(struct mlx5_core_dev *dev, 594int mlx5_core_create_qp(struct mlx5_core_dev *dev,
534 struct mlx5_core_qp *qp, 595 struct mlx5_core_qp *qp,
535 struct mlx5_create_qp_mbox_in *in, 596 struct mlx5_create_qp_mbox_in *in,
@@ -549,6 +610,10 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
549void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); 610void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
550int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); 611int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
551void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); 612void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
613#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
614int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
615 u8 context, int error);
616#endif
552 617
553static inline const char *mlx5_qp_type_str(int type) 618static inline const char *mlx5_qp_type_str(int type)
554{ 619{
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a2bf41e0bde9..2d83cfd7e6ce 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,11 +38,12 @@
38#include <linux/workqueue.h> 38#include <linux/workqueue.h>
39 39
40struct ib_ucontext; 40struct ib_ucontext;
41struct ib_umem_odp;
41 42
42struct ib_umem { 43struct ib_umem {
43 struct ib_ucontext *context; 44 struct ib_ucontext *context;
44 size_t length; 45 size_t length;
45 int offset; 46 unsigned long address;
46 int page_size; 47 int page_size;
47 int writable; 48 int writable;
48 int hugetlb; 49 int hugetlb;
@@ -50,17 +51,43 @@ struct ib_umem {
50 struct pid *pid; 51 struct pid *pid;
51 struct mm_struct *mm; 52 struct mm_struct *mm;
52 unsigned long diff; 53 unsigned long diff;
54 struct ib_umem_odp *odp_data;
53 struct sg_table sg_head; 55 struct sg_table sg_head;
54 int nmap; 56 int nmap;
55 int npages; 57 int npages;
56}; 58};
57 59
60/* Returns the offset of the umem start relative to the first page. */
61static inline int ib_umem_offset(struct ib_umem *umem)
62{
63 return umem->address & ((unsigned long)umem->page_size - 1);
64}
65
66/* Returns the first page of an ODP umem. */
67static inline unsigned long ib_umem_start(struct ib_umem *umem)
68{
69 return umem->address - ib_umem_offset(umem);
70}
71
72/* Returns the address of the page after the last one of an ODP umem. */
73static inline unsigned long ib_umem_end(struct ib_umem *umem)
74{
75 return PAGE_ALIGN(umem->address + umem->length);
76}
77
78static inline size_t ib_umem_num_pages(struct ib_umem *umem)
79{
80 return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
81}
82
58#ifdef CONFIG_INFINIBAND_USER_MEM 83#ifdef CONFIG_INFINIBAND_USER_MEM
59 84
60struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, 85struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
61 size_t size, int access, int dmasync); 86 size_t size, int access, int dmasync);
62void ib_umem_release(struct ib_umem *umem); 87void ib_umem_release(struct ib_umem *umem);
63int ib_umem_page_count(struct ib_umem *umem); 88int ib_umem_page_count(struct ib_umem *umem);
89int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
90 size_t length);
64 91
65#else /* CONFIG_INFINIBAND_USER_MEM */ 92#else /* CONFIG_INFINIBAND_USER_MEM */
66 93
@@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
73} 100}
74static inline void ib_umem_release(struct ib_umem *umem) { } 101static inline void ib_umem_release(struct ib_umem *umem) { }
75static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } 102static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
76 103static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
104 size_t length) {
105 return -EINVAL;
106}
77#endif /* CONFIG_INFINIBAND_USER_MEM */ 107#endif /* CONFIG_INFINIBAND_USER_MEM */
78 108
79#endif /* IB_UMEM_H */ 109#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 000000000000..3da0b167041b
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,160 @@
1/*
2 * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef IB_UMEM_ODP_H
34#define IB_UMEM_ODP_H
35
36#include <rdma/ib_umem.h>
37#include <rdma/ib_verbs.h>
38#include <linux/interval_tree.h>
39
40struct umem_odp_node {
41 u64 __subtree_last;
42 struct rb_node rb;
43};
44
45struct ib_umem_odp {
46 /*
47 * An array of the pages included in the on-demand paging umem.
48 * Indices of pages that are currently not mapped into the device will
49 * contain NULL.
50 */
51 struct page **page_list;
52 /*
53 * An array of the same size as page_list, with DMA addresses mapped
54 * for pages the pages in page_list. The lower two bits designate
55 * access permissions. See ODP_READ_ALLOWED_BIT and
56 * ODP_WRITE_ALLOWED_BIT.
57 */
58 dma_addr_t *dma_list;
59 /*
60 * The umem_mutex protects the page_list and dma_list fields of an ODP
61 * umem, allowing only a single thread to map/unmap pages. The mutex
62 * also protects access to the mmu notifier counters.
63 */
64 struct mutex umem_mutex;
65 void *private; /* for the HW driver to use. */
66
67 /* When false, use the notifier counter in the ucontext struct. */
68 bool mn_counters_active;
69 int notifiers_seq;
70 int notifiers_count;
71
72 /* A linked list of umems that don't have private mmu notifier
73 * counters yet. */
74 struct list_head no_private_counters;
75 struct ib_umem *umem;
76
77 /* Tree tracking */
78 struct umem_odp_node interval_tree;
79
80 struct completion notifier_completion;
81 int dying;
82};
83
84#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
85
86int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
87
88void ib_umem_odp_release(struct ib_umem *umem);
89
90/*
91 * The lower 2 bits of the DMA address signal the R/W permissions for
92 * the entry. To upgrade the permissions, provide the appropriate
93 * bitmask to the map_dma_pages function.
94 *
95 * Be aware that upgrading a mapped address might result in change of
96 * the DMA address for the page.
97 */
98#define ODP_READ_ALLOWED_BIT (1<<0ULL)
99#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
100
101#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
102
103int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
104 u64 access_mask, unsigned long current_seq);
105
106void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
107 u64 bound);
108
109void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
110void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
111typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
112 void *cookie);
113/*
114 * Call the callback on each ib_umem in the range. Returns the logical or of
115 * the return values of the functions called.
116 */
117int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
118 umem_call_back cb, void *cookie);
119
120struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
121 u64 start, u64 last);
122struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
123 u64 start, u64 last);
124
125static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
126 unsigned long mmu_seq)
127{
128 /*
129 * This code is strongly based on the KVM code from
130 * mmu_notifier_retry. Should be called with
131 * the relevant locks taken (item->odp_data->umem_mutex
132 * and the ucontext umem_mutex semaphore locked for read).
133 */
134
135 /* Do not allow page faults while the new ib_umem hasn't seen a state
136 * with zero notifiers yet, and doesn't have its own valid set of
137 * private counters. */
138 if (!item->odp_data->mn_counters_active)
139 return 1;
140
141 if (unlikely(item->odp_data->notifiers_count))
142 return 1;
143 if (item->odp_data->notifiers_seq != mmu_seq)
144 return 1;
145 return 0;
146}
147
148#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
149
150static inline int ib_umem_odp_get(struct ib_ucontext *context,
151 struct ib_umem *umem)
152{
153 return -EINVAL;
154}
155
156static inline void ib_umem_odp_release(struct ib_umem *umem) {}
157
158#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
159
160#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011d6fa4..0d74f1de99aa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@
51#include <uapi/linux/if_ether.h> 51#include <uapi/linux/if_ether.h>
52 52
53#include <linux/atomic.h> 53#include <linux/atomic.h>
54#include <linux/mmu_notifier.h>
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
55 56
56extern struct workqueue_struct *ib_wq; 57extern struct workqueue_struct *ib_wq;
@@ -123,7 +124,8 @@ enum ib_device_cap_flags {
123 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), 124 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
124 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), 125 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
125 IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), 126 IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
126 IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) 127 IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
128 IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
127}; 129};
128 130
129enum ib_signature_prot_cap { 131enum ib_signature_prot_cap {
@@ -143,6 +145,27 @@ enum ib_atomic_cap {
143 IB_ATOMIC_GLOB 145 IB_ATOMIC_GLOB
144}; 146};
145 147
148enum ib_odp_general_cap_bits {
149 IB_ODP_SUPPORT = 1 << 0,
150};
151
152enum ib_odp_transport_cap_bits {
153 IB_ODP_SUPPORT_SEND = 1 << 0,
154 IB_ODP_SUPPORT_RECV = 1 << 1,
155 IB_ODP_SUPPORT_WRITE = 1 << 2,
156 IB_ODP_SUPPORT_READ = 1 << 3,
157 IB_ODP_SUPPORT_ATOMIC = 1 << 4,
158};
159
160struct ib_odp_caps {
161 uint64_t general_caps;
162 struct {
163 uint32_t rc_odp_caps;
164 uint32_t uc_odp_caps;
165 uint32_t ud_odp_caps;
166 } per_transport_caps;
167};
168
146struct ib_device_attr { 169struct ib_device_attr {
147 u64 fw_ver; 170 u64 fw_ver;
148 __be64 sys_image_guid; 171 __be64 sys_image_guid;
@@ -186,6 +209,7 @@ struct ib_device_attr {
186 u8 local_ca_ack_delay; 209 u8 local_ca_ack_delay;
187 int sig_prot_cap; 210 int sig_prot_cap;
188 int sig_guard_cap; 211 int sig_guard_cap;
212 struct ib_odp_caps odp_caps;
189}; 213};
190 214
191enum ib_mtu { 215enum ib_mtu {
@@ -1073,7 +1097,8 @@ enum ib_access_flags {
1073 IB_ACCESS_REMOTE_READ = (1<<2), 1097 IB_ACCESS_REMOTE_READ = (1<<2),
1074 IB_ACCESS_REMOTE_ATOMIC = (1<<3), 1098 IB_ACCESS_REMOTE_ATOMIC = (1<<3),
1075 IB_ACCESS_MW_BIND = (1<<4), 1099 IB_ACCESS_MW_BIND = (1<<4),
1076 IB_ZERO_BASED = (1<<5) 1100 IB_ZERO_BASED = (1<<5),
1101 IB_ACCESS_ON_DEMAND = (1<<6),
1077}; 1102};
1078 1103
1079struct ib_phys_buf { 1104struct ib_phys_buf {
@@ -1115,6 +1140,8 @@ struct ib_fmr_attr {
1115 u8 page_shift; 1140 u8 page_shift;
1116}; 1141};
1117 1142
1143struct ib_umem;
1144
1118struct ib_ucontext { 1145struct ib_ucontext {
1119 struct ib_device *device; 1146 struct ib_device *device;
1120 struct list_head pd_list; 1147 struct list_head pd_list;
@@ -1127,6 +1154,24 @@ struct ib_ucontext {
1127 struct list_head xrcd_list; 1154 struct list_head xrcd_list;
1128 struct list_head rule_list; 1155 struct list_head rule_list;
1129 int closing; 1156 int closing;
1157
1158 struct pid *tgid;
1159#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1160 struct rb_root umem_tree;
1161 /*
1162 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
1163 * mmu notifiers registration.
1164 */
1165 struct rw_semaphore umem_rwsem;
1166 void (*invalidate_range)(struct ib_umem *umem,
1167 unsigned long start, unsigned long end);
1168
1169 struct mmu_notifier mn;
1170 atomic_t notifier_count;
1171 /* A list of umems that don't have private mmu notifier counters yet. */
1172 struct list_head no_private_counters;
1173 int odp_mrs_count;
1174#endif
1130}; 1175};
1131 1176
1132struct ib_uobject { 1177struct ib_uobject {
@@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t
1662 1707
1663static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) 1708static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
1664{ 1709{
1665 return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; 1710 size_t copy_sz;
1711
1712 copy_sz = min_t(size_t, len, udata->outlen);
1713 return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0;
1666} 1714}
1667 1715
1668/** 1716/**
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 26daf55ff76e..4275b961bf60 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -90,8 +90,9 @@ enum {
90}; 90};
91 91
92enum { 92enum {
93 IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
93 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 94 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
94 IB_USER_VERBS_EX_CMD_DESTROY_FLOW 95 IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
95}; 96};
96 97
97/* 98/*
@@ -201,6 +202,32 @@ struct ib_uverbs_query_device_resp {
201 __u8 reserved[4]; 202 __u8 reserved[4];
202}; 203};
203 204
205enum {
206 IB_USER_VERBS_EX_QUERY_DEVICE_ODP = 1ULL << 0,
207};
208
209struct ib_uverbs_ex_query_device {
210 __u32 comp_mask;
211 __u32 reserved;
212};
213
214struct ib_uverbs_odp_caps {
215 __u64 general_caps;
216 struct {
217 __u32 rc_odp_caps;
218 __u32 uc_odp_caps;
219 __u32 ud_odp_caps;
220 } per_transport_caps;
221 __u32 reserved;
222};
223
224struct ib_uverbs_ex_query_device_resp {
225 struct ib_uverbs_query_device_resp base;
226 __u32 comp_mask;
227 __u32 reserved;
228 struct ib_uverbs_odp_caps odp_caps;
229};
230
204struct ib_uverbs_query_port { 231struct ib_uverbs_query_port {
205 __u64 response; 232 __u64 response;
206 __u8 port_num; 233 __u8 port_num;