diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/mlx5/device.h | 72 | ||||
-rw-r--r-- | include/linux/mlx5/driver.h | 14 | ||||
-rw-r--r-- | include/linux/mlx5/qp.h | 65 | ||||
-rw-r--r-- | include/rdma/ib_umem.h | 34 | ||||
-rw-r--r-- | include/rdma/ib_umem_odp.h | 160 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 54 | ||||
-rw-r--r-- | include/uapi/rdma/ib_user_verbs.h | 29 |
7 files changed, 420 insertions, 8 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ea4f1c46f761..4e5bd813bb9a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h | |||
@@ -120,6 +120,15 @@ enum { | |||
120 | }; | 120 | }; |
121 | 121 | ||
122 | enum { | 122 | enum { |
123 | MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31 | ||
124 | }; | ||
125 | |||
126 | enum { | ||
127 | MLX5_PFAULT_SUBTYPE_WQE = 0, | ||
128 | MLX5_PFAULT_SUBTYPE_RDMA = 1, | ||
129 | }; | ||
130 | |||
131 | enum { | ||
123 | MLX5_PERM_LOCAL_READ = 1 << 2, | 132 | MLX5_PERM_LOCAL_READ = 1 << 2, |
124 | MLX5_PERM_LOCAL_WRITE = 1 << 3, | 133 | MLX5_PERM_LOCAL_WRITE = 1 << 3, |
125 | MLX5_PERM_REMOTE_READ = 1 << 4, | 134 | MLX5_PERM_REMOTE_READ = 1 << 4, |
@@ -180,6 +189,19 @@ enum { | |||
180 | MLX5_MKEY_MASK_FREE = 1ull << 29, | 189 | MLX5_MKEY_MASK_FREE = 1ull << 29, |
181 | }; | 190 | }; |
182 | 191 | ||
192 | enum { | ||
193 | MLX5_UMR_TRANSLATION_OFFSET_EN = (1 << 4), | ||
194 | |||
195 | MLX5_UMR_CHECK_NOT_FREE = (1 << 5), | ||
196 | MLX5_UMR_CHECK_FREE = (2 << 5), | ||
197 | |||
198 | MLX5_UMR_INLINE = (1 << 7), | ||
199 | }; | ||
200 | |||
201 | #define MLX5_UMR_MTT_ALIGNMENT 0x40 | ||
202 | #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) | ||
203 | #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT | ||
204 | |||
183 | enum mlx5_event { | 205 | enum mlx5_event { |
184 | MLX5_EVENT_TYPE_COMP = 0x0, | 206 | MLX5_EVENT_TYPE_COMP = 0x0, |
185 | 207 | ||
@@ -206,6 +228,8 @@ enum mlx5_event { | |||
206 | 228 | ||
207 | MLX5_EVENT_TYPE_CMD = 0x0a, | 229 | MLX5_EVENT_TYPE_CMD = 0x0a, |
208 | MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, | 230 | MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, |
231 | |||
232 | MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, | ||
209 | }; | 233 | }; |
210 | 234 | ||
211 | enum { | 235 | enum { |
@@ -225,6 +249,7 @@ enum { | |||
225 | MLX5_DEV_CAP_FLAG_APM = 1LL << 17, | 249 | MLX5_DEV_CAP_FLAG_APM = 1LL << 17, |
226 | MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, | 250 | MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, |
227 | MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, | 251 | MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, |
252 | MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, | ||
228 | MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, | 253 | MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, |
229 | MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, | 254 | MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, |
230 | MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, | 255 | MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, |
@@ -290,6 +315,8 @@ enum { | |||
290 | enum { | 315 | enum { |
291 | HCA_CAP_OPMOD_GET_MAX = 0, | 316 | HCA_CAP_OPMOD_GET_MAX = 0, |
292 | HCA_CAP_OPMOD_GET_CUR = 1, | 317 | HCA_CAP_OPMOD_GET_CUR = 1, |
318 | HCA_CAP_OPMOD_GET_ODP_MAX = 4, | ||
319 | HCA_CAP_OPMOD_GET_ODP_CUR = 5 | ||
293 | }; | 320 | }; |
294 | 321 | ||
295 | struct mlx5_inbox_hdr { | 322 | struct mlx5_inbox_hdr { |
@@ -319,6 +346,23 @@ struct mlx5_cmd_query_adapter_mbox_out { | |||
319 | u8 vsd_psid[16]; | 346 | u8 vsd_psid[16]; |
320 | }; | 347 | }; |
321 | 348 | ||
349 | enum mlx5_odp_transport_cap_bits { | ||
350 | MLX5_ODP_SUPPORT_SEND = 1 << 31, | ||
351 | MLX5_ODP_SUPPORT_RECV = 1 << 30, | ||
352 | MLX5_ODP_SUPPORT_WRITE = 1 << 29, | ||
353 | MLX5_ODP_SUPPORT_READ = 1 << 28, | ||
354 | }; | ||
355 | |||
356 | struct mlx5_odp_caps { | ||
357 | char reserved[0x10]; | ||
358 | struct { | ||
359 | __be32 rc_odp_caps; | ||
360 | __be32 uc_odp_caps; | ||
361 | __be32 ud_odp_caps; | ||
362 | } per_transport_caps; | ||
363 | char reserved2[0xe4]; | ||
364 | }; | ||
365 | |||
322 | struct mlx5_cmd_init_hca_mbox_in { | 366 | struct mlx5_cmd_init_hca_mbox_in { |
323 | struct mlx5_inbox_hdr hdr; | 367 | struct mlx5_inbox_hdr hdr; |
324 | u8 rsvd0[2]; | 368 | u8 rsvd0[2]; |
@@ -439,6 +483,27 @@ struct mlx5_eqe_page_req { | |||
439 | __be32 rsvd1[5]; | 483 | __be32 rsvd1[5]; |
440 | }; | 484 | }; |
441 | 485 | ||
486 | struct mlx5_eqe_page_fault { | ||
487 | __be32 bytes_committed; | ||
488 | union { | ||
489 | struct { | ||
490 | u16 reserved1; | ||
491 | __be16 wqe_index; | ||
492 | u16 reserved2; | ||
493 | __be16 packet_length; | ||
494 | u8 reserved3[12]; | ||
495 | } __packed wqe; | ||
496 | struct { | ||
497 | __be32 r_key; | ||
498 | u16 reserved1; | ||
499 | __be16 packet_length; | ||
500 | __be32 rdma_op_len; | ||
501 | __be64 rdma_va; | ||
502 | } __packed rdma; | ||
503 | } __packed; | ||
504 | __be32 flags_qpn; | ||
505 | } __packed; | ||
506 | |||
442 | union ev_data { | 507 | union ev_data { |
443 | __be32 raw[7]; | 508 | __be32 raw[7]; |
444 | struct mlx5_eqe_cmd cmd; | 509 | struct mlx5_eqe_cmd cmd; |
@@ -450,6 +515,7 @@ union ev_data { | |||
450 | struct mlx5_eqe_congestion cong; | 515 | struct mlx5_eqe_congestion cong; |
451 | struct mlx5_eqe_stall_vl stall_vl; | 516 | struct mlx5_eqe_stall_vl stall_vl; |
452 | struct mlx5_eqe_page_req req_pages; | 517 | struct mlx5_eqe_page_req req_pages; |
518 | struct mlx5_eqe_page_fault page_fault; | ||
453 | } __packed; | 519 | } __packed; |
454 | 520 | ||
455 | struct mlx5_eqe { | 521 | struct mlx5_eqe { |
@@ -776,6 +842,10 @@ struct mlx5_query_eq_mbox_out { | |||
776 | struct mlx5_eq_context ctx; | 842 | struct mlx5_eq_context ctx; |
777 | }; | 843 | }; |
778 | 844 | ||
845 | enum { | ||
846 | MLX5_MKEY_STATUS_FREE = 1 << 6, | ||
847 | }; | ||
848 | |||
779 | struct mlx5_mkey_seg { | 849 | struct mlx5_mkey_seg { |
780 | /* This is a two bit field occupying bits 31-30. | 850 | /* This is a two bit field occupying bits 31-30. |
781 | * bit 31 is always 0, | 851 | * bit 31 is always 0, |
@@ -812,7 +882,7 @@ struct mlx5_query_special_ctxs_mbox_out { | |||
812 | struct mlx5_create_mkey_mbox_in { | 882 | struct mlx5_create_mkey_mbox_in { |
813 | struct mlx5_inbox_hdr hdr; | 883 | struct mlx5_inbox_hdr hdr; |
814 | __be32 input_mkey_index; | 884 | __be32 input_mkey_index; |
815 | u8 rsvd0[4]; | 885 | __be32 flags; |
816 | struct mlx5_mkey_seg seg; | 886 | struct mlx5_mkey_seg seg; |
817 | u8 rsvd1[16]; | 887 | u8 rsvd1[16]; |
818 | __be32 xlat_oct_act_size; | 888 | __be32 xlat_oct_act_size; |
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b1bf41556b32..166d9315fe4b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h | |||
@@ -113,6 +113,13 @@ enum { | |||
113 | MLX5_REG_HOST_ENDIANNESS = 0x7004, | 113 | MLX5_REG_HOST_ENDIANNESS = 0x7004, |
114 | }; | 114 | }; |
115 | 115 | ||
116 | enum mlx5_page_fault_resume_flags { | ||
117 | MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0, | ||
118 | MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1, | ||
119 | MLX5_PAGE_FAULT_RESUME_RDMA = 1 << 2, | ||
120 | MLX5_PAGE_FAULT_RESUME_ERROR = 1 << 7, | ||
121 | }; | ||
122 | |||
116 | enum dbg_rsc_type { | 123 | enum dbg_rsc_type { |
117 | MLX5_DBG_RSC_QP, | 124 | MLX5_DBG_RSC_QP, |
118 | MLX5_DBG_RSC_EQ, | 125 | MLX5_DBG_RSC_EQ, |
@@ -467,7 +474,7 @@ struct mlx5_priv { | |||
467 | struct workqueue_struct *pg_wq; | 474 | struct workqueue_struct *pg_wq; |
468 | struct rb_root page_root; | 475 | struct rb_root page_root; |
469 | int fw_pages; | 476 | int fw_pages; |
470 | int reg_pages; | 477 | atomic_t reg_pages; |
471 | struct list_head free_list; | 478 | struct list_head free_list; |
472 | 479 | ||
473 | struct mlx5_core_health health; | 480 | struct mlx5_core_health health; |
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev); | |||
703 | void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); | 710 | void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); |
704 | void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); | 711 | void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); |
705 | void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); | 712 | void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); |
713 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
714 | void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); | ||
715 | #endif | ||
706 | void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); | 716 | void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); |
707 | struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); | 717 | struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); |
708 | void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); | 718 | void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); |
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, | |||
740 | int npsvs, u32 *sig_index); | 750 | int npsvs, u32 *sig_index); |
741 | int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); | 751 | int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); |
742 | void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); | 752 | void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); |
753 | int mlx5_query_odp_caps(struct mlx5_core_dev *dev, | ||
754 | struct mlx5_odp_caps *odp_caps); | ||
743 | 755 | ||
744 | static inline u32 mlx5_mkey_to_idx(u32 mkey) | 756 | static inline u32 mlx5_mkey_to_idx(u32 mkey) |
745 | { | 757 | { |
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3fa075daeb1d..61f7a342d1bf 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h | |||
@@ -50,6 +50,9 @@ | |||
50 | #define MLX5_BSF_APPTAG_ESCAPE 0x1 | 50 | #define MLX5_BSF_APPTAG_ESCAPE 0x1 |
51 | #define MLX5_BSF_APPREF_ESCAPE 0x2 | 51 | #define MLX5_BSF_APPREF_ESCAPE 0x2 |
52 | 52 | ||
53 | #define MLX5_QPN_BITS 24 | ||
54 | #define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1) | ||
55 | |||
53 | enum mlx5_qp_optpar { | 56 | enum mlx5_qp_optpar { |
54 | MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, | 57 | MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, |
55 | MLX5_QP_OPTPAR_RRE = 1 << 1, | 58 | MLX5_QP_OPTPAR_RRE = 1 << 1, |
@@ -189,6 +192,14 @@ struct mlx5_wqe_ctrl_seg { | |||
189 | __be32 imm; | 192 | __be32 imm; |
190 | }; | 193 | }; |
191 | 194 | ||
195 | #define MLX5_WQE_CTRL_DS_MASK 0x3f | ||
196 | #define MLX5_WQE_CTRL_QPN_MASK 0xffffff00 | ||
197 | #define MLX5_WQE_CTRL_QPN_SHIFT 8 | ||
198 | #define MLX5_WQE_DS_UNITS 16 | ||
199 | #define MLX5_WQE_CTRL_OPCODE_MASK 0xff | ||
200 | #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 | ||
201 | #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 | ||
202 | |||
192 | struct mlx5_wqe_xrc_seg { | 203 | struct mlx5_wqe_xrc_seg { |
193 | __be32 xrc_srqn; | 204 | __be32 xrc_srqn; |
194 | u8 rsvd[12]; | 205 | u8 rsvd[12]; |
@@ -292,6 +303,8 @@ struct mlx5_wqe_signature_seg { | |||
292 | u8 rsvd1[11]; | 303 | u8 rsvd1[11]; |
293 | }; | 304 | }; |
294 | 305 | ||
306 | #define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff | ||
307 | |||
295 | struct mlx5_wqe_inline_seg { | 308 | struct mlx5_wqe_inline_seg { |
296 | __be32 byte_count; | 309 | __be32 byte_count; |
297 | }; | 310 | }; |
@@ -360,9 +373,46 @@ struct mlx5_stride_block_ctrl_seg { | |||
360 | __be16 num_entries; | 373 | __be16 num_entries; |
361 | }; | 374 | }; |
362 | 375 | ||
376 | enum mlx5_pagefault_flags { | ||
377 | MLX5_PFAULT_REQUESTOR = 1 << 0, | ||
378 | MLX5_PFAULT_WRITE = 1 << 1, | ||
379 | MLX5_PFAULT_RDMA = 1 << 2, | ||
380 | }; | ||
381 | |||
382 | /* Contains the details of a pagefault. */ | ||
383 | struct mlx5_pagefault { | ||
384 | u32 bytes_committed; | ||
385 | u8 event_subtype; | ||
386 | enum mlx5_pagefault_flags flags; | ||
387 | union { | ||
388 | /* Initiator or send message responder pagefault details. */ | ||
389 | struct { | ||
390 | /* Received packet size, only valid for responders. */ | ||
391 | u32 packet_size; | ||
392 | /* | ||
393 | * WQE index. Refers to either the send queue or | ||
394 | * receive queue, according to event_subtype. | ||
395 | */ | ||
396 | u16 wqe_index; | ||
397 | } wqe; | ||
398 | /* RDMA responder pagefault details */ | ||
399 | struct { | ||
400 | u32 r_key; | ||
401 | /* | ||
402 | * Received packet size, minimal size page fault | ||
403 | * resolution required for forward progress. | ||
404 | */ | ||
405 | u32 packet_size; | ||
406 | u32 rdma_op_len; | ||
407 | u64 rdma_va; | ||
408 | } rdma; | ||
409 | }; | ||
410 | }; | ||
411 | |||
363 | struct mlx5_core_qp { | 412 | struct mlx5_core_qp { |
364 | struct mlx5_core_rsc_common common; /* must be first */ | 413 | struct mlx5_core_rsc_common common; /* must be first */ |
365 | void (*event) (struct mlx5_core_qp *, int); | 414 | void (*event) (struct mlx5_core_qp *, int); |
415 | void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *); | ||
366 | int qpn; | 416 | int qpn; |
367 | struct mlx5_rsc_debug *dbg; | 417 | struct mlx5_rsc_debug *dbg; |
368 | int pid; | 418 | int pid; |
@@ -530,6 +580,17 @@ static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u | |||
530 | return radix_tree_lookup(&dev->priv.mr_table.tree, key); | 580 | return radix_tree_lookup(&dev->priv.mr_table.tree, key); |
531 | } | 581 | } |
532 | 582 | ||
583 | struct mlx5_page_fault_resume_mbox_in { | ||
584 | struct mlx5_inbox_hdr hdr; | ||
585 | __be32 flags_qpn; | ||
586 | u8 reserved[4]; | ||
587 | }; | ||
588 | |||
589 | struct mlx5_page_fault_resume_mbox_out { | ||
590 | struct mlx5_outbox_hdr hdr; | ||
591 | u8 rsvd[8]; | ||
592 | }; | ||
593 | |||
533 | int mlx5_core_create_qp(struct mlx5_core_dev *dev, | 594 | int mlx5_core_create_qp(struct mlx5_core_dev *dev, |
534 | struct mlx5_core_qp *qp, | 595 | struct mlx5_core_qp *qp, |
535 | struct mlx5_create_qp_mbox_in *in, | 596 | struct mlx5_create_qp_mbox_in *in, |
@@ -549,6 +610,10 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev); | |||
549 | void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); | 610 | void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); |
550 | int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); | 611 | int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); |
551 | void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); | 612 | void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); |
613 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
614 | int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, | ||
615 | u8 context, int error); | ||
616 | #endif | ||
552 | 617 | ||
553 | static inline const char *mlx5_qp_type_str(int type) | 618 | static inline const char *mlx5_qp_type_str(int type) |
554 | { | 619 | { |
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a2bf41e0bde9..2d83cfd7e6ce 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h | |||
@@ -38,11 +38,12 @@ | |||
38 | #include <linux/workqueue.h> | 38 | #include <linux/workqueue.h> |
39 | 39 | ||
40 | struct ib_ucontext; | 40 | struct ib_ucontext; |
41 | struct ib_umem_odp; | ||
41 | 42 | ||
42 | struct ib_umem { | 43 | struct ib_umem { |
43 | struct ib_ucontext *context; | 44 | struct ib_ucontext *context; |
44 | size_t length; | 45 | size_t length; |
45 | int offset; | 46 | unsigned long address; |
46 | int page_size; | 47 | int page_size; |
47 | int writable; | 48 | int writable; |
48 | int hugetlb; | 49 | int hugetlb; |
@@ -50,17 +51,43 @@ struct ib_umem { | |||
50 | struct pid *pid; | 51 | struct pid *pid; |
51 | struct mm_struct *mm; | 52 | struct mm_struct *mm; |
52 | unsigned long diff; | 53 | unsigned long diff; |
54 | struct ib_umem_odp *odp_data; | ||
53 | struct sg_table sg_head; | 55 | struct sg_table sg_head; |
54 | int nmap; | 56 | int nmap; |
55 | int npages; | 57 | int npages; |
56 | }; | 58 | }; |
57 | 59 | ||
60 | /* Returns the offset of the umem start relative to the first page. */ | ||
61 | static inline int ib_umem_offset(struct ib_umem *umem) | ||
62 | { | ||
63 | return umem->address & ((unsigned long)umem->page_size - 1); | ||
64 | } | ||
65 | |||
66 | /* Returns the first page of an ODP umem. */ | ||
67 | static inline unsigned long ib_umem_start(struct ib_umem *umem) | ||
68 | { | ||
69 | return umem->address - ib_umem_offset(umem); | ||
70 | } | ||
71 | |||
72 | /* Returns the address of the page after the last one of an ODP umem. */ | ||
73 | static inline unsigned long ib_umem_end(struct ib_umem *umem) | ||
74 | { | ||
75 | return PAGE_ALIGN(umem->address + umem->length); | ||
76 | } | ||
77 | |||
78 | static inline size_t ib_umem_num_pages(struct ib_umem *umem) | ||
79 | { | ||
80 | return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT; | ||
81 | } | ||
82 | |||
58 | #ifdef CONFIG_INFINIBAND_USER_MEM | 83 | #ifdef CONFIG_INFINIBAND_USER_MEM |
59 | 84 | ||
60 | struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, | 85 | struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, |
61 | size_t size, int access, int dmasync); | 86 | size_t size, int access, int dmasync); |
62 | void ib_umem_release(struct ib_umem *umem); | 87 | void ib_umem_release(struct ib_umem *umem); |
63 | int ib_umem_page_count(struct ib_umem *umem); | 88 | int ib_umem_page_count(struct ib_umem *umem); |
89 | int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, | ||
90 | size_t length); | ||
64 | 91 | ||
65 | #else /* CONFIG_INFINIBAND_USER_MEM */ | 92 | #else /* CONFIG_INFINIBAND_USER_MEM */ |
66 | 93 | ||
@@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, | |||
73 | } | 100 | } |
74 | static inline void ib_umem_release(struct ib_umem *umem) { } | 101 | static inline void ib_umem_release(struct ib_umem *umem) { } |
75 | static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } | 102 | static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } |
76 | 103 | static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, | |
104 | size_t length) { | ||
105 | return -EINVAL; | ||
106 | } | ||
77 | #endif /* CONFIG_INFINIBAND_USER_MEM */ | 107 | #endif /* CONFIG_INFINIBAND_USER_MEM */ |
78 | 108 | ||
79 | #endif /* IB_UMEM_H */ | 109 | #endif /* IB_UMEM_H */ |
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h new file mode 100644 index 000000000000..3da0b167041b --- /dev/null +++ b/include/rdma/ib_umem_odp.h | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Mellanox Technologies. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef IB_UMEM_ODP_H | ||
34 | #define IB_UMEM_ODP_H | ||
35 | |||
36 | #include <rdma/ib_umem.h> | ||
37 | #include <rdma/ib_verbs.h> | ||
38 | #include <linux/interval_tree.h> | ||
39 | |||
40 | struct umem_odp_node { | ||
41 | u64 __subtree_last; | ||
42 | struct rb_node rb; | ||
43 | }; | ||
44 | |||
45 | struct ib_umem_odp { | ||
46 | /* | ||
47 | * An array of the pages included in the on-demand paging umem. | ||
48 | * Indices of pages that are currently not mapped into the device will | ||
49 | * contain NULL. | ||
50 | */ | ||
51 | struct page **page_list; | ||
52 | /* | ||
53 | * An array of the same size as page_list, with DMA addresses mapped | ||
54 | * for pages the pages in page_list. The lower two bits designate | ||
55 | * access permissions. See ODP_READ_ALLOWED_BIT and | ||
56 | * ODP_WRITE_ALLOWED_BIT. | ||
57 | */ | ||
58 | dma_addr_t *dma_list; | ||
59 | /* | ||
60 | * The umem_mutex protects the page_list and dma_list fields of an ODP | ||
61 | * umem, allowing only a single thread to map/unmap pages. The mutex | ||
62 | * also protects access to the mmu notifier counters. | ||
63 | */ | ||
64 | struct mutex umem_mutex; | ||
65 | void *private; /* for the HW driver to use. */ | ||
66 | |||
67 | /* When false, use the notifier counter in the ucontext struct. */ | ||
68 | bool mn_counters_active; | ||
69 | int notifiers_seq; | ||
70 | int notifiers_count; | ||
71 | |||
72 | /* A linked list of umems that don't have private mmu notifier | ||
73 | * counters yet. */ | ||
74 | struct list_head no_private_counters; | ||
75 | struct ib_umem *umem; | ||
76 | |||
77 | /* Tree tracking */ | ||
78 | struct umem_odp_node interval_tree; | ||
79 | |||
80 | struct completion notifier_completion; | ||
81 | int dying; | ||
82 | }; | ||
83 | |||
84 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
85 | |||
86 | int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); | ||
87 | |||
88 | void ib_umem_odp_release(struct ib_umem *umem); | ||
89 | |||
90 | /* | ||
91 | * The lower 2 bits of the DMA address signal the R/W permissions for | ||
92 | * the entry. To upgrade the permissions, provide the appropriate | ||
93 | * bitmask to the map_dma_pages function. | ||
94 | * | ||
95 | * Be aware that upgrading a mapped address might result in change of | ||
96 | * the DMA address for the page. | ||
97 | */ | ||
98 | #define ODP_READ_ALLOWED_BIT (1<<0ULL) | ||
99 | #define ODP_WRITE_ALLOWED_BIT (1<<1ULL) | ||
100 | |||
101 | #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) | ||
102 | |||
103 | int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, | ||
104 | u64 access_mask, unsigned long current_seq); | ||
105 | |||
106 | void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, | ||
107 | u64 bound); | ||
108 | |||
109 | void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); | ||
110 | void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); | ||
111 | typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, | ||
112 | void *cookie); | ||
113 | /* | ||
114 | * Call the callback on each ib_umem in the range. Returns the logical or of | ||
115 | * the return values of the functions called. | ||
116 | */ | ||
117 | int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, | ||
118 | umem_call_back cb, void *cookie); | ||
119 | |||
120 | struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, | ||
121 | u64 start, u64 last); | ||
122 | struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, | ||
123 | u64 start, u64 last); | ||
124 | |||
125 | static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, | ||
126 | unsigned long mmu_seq) | ||
127 | { | ||
128 | /* | ||
129 | * This code is strongly based on the KVM code from | ||
130 | * mmu_notifier_retry. Should be called with | ||
131 | * the relevant locks taken (item->odp_data->umem_mutex | ||
132 | * and the ucontext umem_mutex semaphore locked for read). | ||
133 | */ | ||
134 | |||
135 | /* Do not allow page faults while the new ib_umem hasn't seen a state | ||
136 | * with zero notifiers yet, and doesn't have its own valid set of | ||
137 | * private counters. */ | ||
138 | if (!item->odp_data->mn_counters_active) | ||
139 | return 1; | ||
140 | |||
141 | if (unlikely(item->odp_data->notifiers_count)) | ||
142 | return 1; | ||
143 | if (item->odp_data->notifiers_seq != mmu_seq) | ||
144 | return 1; | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | ||
149 | |||
150 | static inline int ib_umem_odp_get(struct ib_ucontext *context, | ||
151 | struct ib_umem *umem) | ||
152 | { | ||
153 | return -EINVAL; | ||
154 | } | ||
155 | |||
156 | static inline void ib_umem_odp_release(struct ib_umem *umem) {} | ||
157 | |||
158 | #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ | ||
159 | |||
160 | #endif /* IB_UMEM_ODP_H */ | ||
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 470a011d6fa4..0d74f1de99aa 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <uapi/linux/if_ether.h> | 51 | #include <uapi/linux/if_ether.h> |
52 | 52 | ||
53 | #include <linux/atomic.h> | 53 | #include <linux/atomic.h> |
54 | #include <linux/mmu_notifier.h> | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | 56 | ||
56 | extern struct workqueue_struct *ib_wq; | 57 | extern struct workqueue_struct *ib_wq; |
@@ -123,7 +124,8 @@ enum ib_device_cap_flags { | |||
123 | IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), | 124 | IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), |
124 | IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), | 125 | IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), |
125 | IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), | 126 | IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), |
126 | IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) | 127 | IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), |
128 | IB_DEVICE_ON_DEMAND_PAGING = (1<<31), | ||
127 | }; | 129 | }; |
128 | 130 | ||
129 | enum ib_signature_prot_cap { | 131 | enum ib_signature_prot_cap { |
@@ -143,6 +145,27 @@ enum ib_atomic_cap { | |||
143 | IB_ATOMIC_GLOB | 145 | IB_ATOMIC_GLOB |
144 | }; | 146 | }; |
145 | 147 | ||
148 | enum ib_odp_general_cap_bits { | ||
149 | IB_ODP_SUPPORT = 1 << 0, | ||
150 | }; | ||
151 | |||
152 | enum ib_odp_transport_cap_bits { | ||
153 | IB_ODP_SUPPORT_SEND = 1 << 0, | ||
154 | IB_ODP_SUPPORT_RECV = 1 << 1, | ||
155 | IB_ODP_SUPPORT_WRITE = 1 << 2, | ||
156 | IB_ODP_SUPPORT_READ = 1 << 3, | ||
157 | IB_ODP_SUPPORT_ATOMIC = 1 << 4, | ||
158 | }; | ||
159 | |||
160 | struct ib_odp_caps { | ||
161 | uint64_t general_caps; | ||
162 | struct { | ||
163 | uint32_t rc_odp_caps; | ||
164 | uint32_t uc_odp_caps; | ||
165 | uint32_t ud_odp_caps; | ||
166 | } per_transport_caps; | ||
167 | }; | ||
168 | |||
146 | struct ib_device_attr { | 169 | struct ib_device_attr { |
147 | u64 fw_ver; | 170 | u64 fw_ver; |
148 | __be64 sys_image_guid; | 171 | __be64 sys_image_guid; |
@@ -186,6 +209,7 @@ struct ib_device_attr { | |||
186 | u8 local_ca_ack_delay; | 209 | u8 local_ca_ack_delay; |
187 | int sig_prot_cap; | 210 | int sig_prot_cap; |
188 | int sig_guard_cap; | 211 | int sig_guard_cap; |
212 | struct ib_odp_caps odp_caps; | ||
189 | }; | 213 | }; |
190 | 214 | ||
191 | enum ib_mtu { | 215 | enum ib_mtu { |
@@ -1073,7 +1097,8 @@ enum ib_access_flags { | |||
1073 | IB_ACCESS_REMOTE_READ = (1<<2), | 1097 | IB_ACCESS_REMOTE_READ = (1<<2), |
1074 | IB_ACCESS_REMOTE_ATOMIC = (1<<3), | 1098 | IB_ACCESS_REMOTE_ATOMIC = (1<<3), |
1075 | IB_ACCESS_MW_BIND = (1<<4), | 1099 | IB_ACCESS_MW_BIND = (1<<4), |
1076 | IB_ZERO_BASED = (1<<5) | 1100 | IB_ZERO_BASED = (1<<5), |
1101 | IB_ACCESS_ON_DEMAND = (1<<6), | ||
1077 | }; | 1102 | }; |
1078 | 1103 | ||
1079 | struct ib_phys_buf { | 1104 | struct ib_phys_buf { |
@@ -1115,6 +1140,8 @@ struct ib_fmr_attr { | |||
1115 | u8 page_shift; | 1140 | u8 page_shift; |
1116 | }; | 1141 | }; |
1117 | 1142 | ||
1143 | struct ib_umem; | ||
1144 | |||
1118 | struct ib_ucontext { | 1145 | struct ib_ucontext { |
1119 | struct ib_device *device; | 1146 | struct ib_device *device; |
1120 | struct list_head pd_list; | 1147 | struct list_head pd_list; |
@@ -1127,6 +1154,24 @@ struct ib_ucontext { | |||
1127 | struct list_head xrcd_list; | 1154 | struct list_head xrcd_list; |
1128 | struct list_head rule_list; | 1155 | struct list_head rule_list; |
1129 | int closing; | 1156 | int closing; |
1157 | |||
1158 | struct pid *tgid; | ||
1159 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
1160 | struct rb_root umem_tree; | ||
1161 | /* | ||
1162 | * Protects .umem_rbroot and tree, as well as odp_mrs_count and | ||
1163 | * mmu notifiers registration. | ||
1164 | */ | ||
1165 | struct rw_semaphore umem_rwsem; | ||
1166 | void (*invalidate_range)(struct ib_umem *umem, | ||
1167 | unsigned long start, unsigned long end); | ||
1168 | |||
1169 | struct mmu_notifier mn; | ||
1170 | atomic_t notifier_count; | ||
1171 | /* A list of umems that don't have private mmu notifier counters yet. */ | ||
1172 | struct list_head no_private_counters; | ||
1173 | int odp_mrs_count; | ||
1174 | #endif | ||
1130 | }; | 1175 | }; |
1131 | 1176 | ||
1132 | struct ib_uobject { | 1177 | struct ib_uobject { |
@@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t | |||
1662 | 1707 | ||
1663 | static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) | 1708 | static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) |
1664 | { | 1709 | { |
1665 | return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; | 1710 | size_t copy_sz; |
1711 | |||
1712 | copy_sz = min_t(size_t, len, udata->outlen); | ||
1713 | return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0; | ||
1666 | } | 1714 | } |
1667 | 1715 | ||
1668 | /** | 1716 | /** |
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 26daf55ff76e..4275b961bf60 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h | |||
@@ -90,8 +90,9 @@ enum { | |||
90 | }; | 90 | }; |
91 | 91 | ||
92 | enum { | 92 | enum { |
93 | IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, | ||
93 | IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, | 94 | IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, |
94 | IB_USER_VERBS_EX_CMD_DESTROY_FLOW | 95 | IB_USER_VERBS_EX_CMD_DESTROY_FLOW, |
95 | }; | 96 | }; |
96 | 97 | ||
97 | /* | 98 | /* |
@@ -201,6 +202,32 @@ struct ib_uverbs_query_device_resp { | |||
201 | __u8 reserved[4]; | 202 | __u8 reserved[4]; |
202 | }; | 203 | }; |
203 | 204 | ||
205 | enum { | ||
206 | IB_USER_VERBS_EX_QUERY_DEVICE_ODP = 1ULL << 0, | ||
207 | }; | ||
208 | |||
209 | struct ib_uverbs_ex_query_device { | ||
210 | __u32 comp_mask; | ||
211 | __u32 reserved; | ||
212 | }; | ||
213 | |||
214 | struct ib_uverbs_odp_caps { | ||
215 | __u64 general_caps; | ||
216 | struct { | ||
217 | __u32 rc_odp_caps; | ||
218 | __u32 uc_odp_caps; | ||
219 | __u32 ud_odp_caps; | ||
220 | } per_transport_caps; | ||
221 | __u32 reserved; | ||
222 | }; | ||
223 | |||
224 | struct ib_uverbs_ex_query_device_resp { | ||
225 | struct ib_uverbs_query_device_resp base; | ||
226 | __u32 comp_mask; | ||
227 | __u32 reserved; | ||
228 | struct ib_uverbs_odp_caps odp_caps; | ||
229 | }; | ||
230 | |||
204 | struct ib_uverbs_query_port { | 231 | struct ib_uverbs_query_port { |
205 | __u64 response; | 232 | __u64 response; |
206 | __u8 port_num; | 233 | __u8 port_num; |