diff options
author | Haggai Eran <haggaie@mellanox.com> | 2014-12-11 10:04:25 -0500 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-12-15 21:19:03 -0500 |
commit | eab668a6d082b90b806efc6da12f9b30e03f401d (patch) | |
tree | c3c3323be75824d3d0c254a61fa70a824a39c80d /drivers/infiniband | |
parent | 7bdf65d411c1715d695be0d9a555d7f48d0a7220 (diff) |
IB/mlx5: Add support for RDMA read/write responder page faults
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bd1dbe5ebc15..936a6cd4ecc7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c | |||
@@ -35,6 +35,8 @@ | |||
35 | 35 | ||
36 | #include "mlx5_ib.h" | 36 | #include "mlx5_ib.h" |
37 | 37 | ||
38 | #define MAX_PREFETCH_LEN (4*1024*1024U) | ||
39 | |||
38 | struct workqueue_struct *mlx5_ib_page_fault_wq; | 40 | struct workqueue_struct *mlx5_ib_page_fault_wq; |
39 | 41 | ||
40 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ | 42 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ |
@@ -490,6 +492,80 @@ resolve_page_fault: | |||
490 | free_page((unsigned long)buffer); | 492 | free_page((unsigned long)buffer); |
491 | } | 493 | } |
492 | 494 | ||
495 | static int pages_in_range(u64 address, u32 length) | ||
496 | { | ||
497 | return (ALIGN(address + length, PAGE_SIZE) - | ||
498 | (address & PAGE_MASK)) >> PAGE_SHIFT; | ||
499 | } | ||
500 | |||
501 | static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp, | ||
502 | struct mlx5_ib_pfault *pfault) | ||
503 | { | ||
504 | struct mlx5_pagefault *mpfault = &pfault->mpfault; | ||
505 | u64 address; | ||
506 | u32 length; | ||
507 | u32 prefetch_len = mpfault->bytes_committed; | ||
508 | int prefetch_activated = 0; | ||
509 | u32 rkey = mpfault->rdma.r_key; | ||
510 | int ret; | ||
511 | |||
512 | /* The RDMA responder handler handles the page fault in two parts. | ||
513 | * First it brings the necessary pages for the current packet | ||
514 | * (and uses the pfault context), and then (after resuming the QP) | ||
515 | * prefetches more pages. The second operation cannot use the pfault | ||
516 | * context and therefore uses the dummy_pfault context allocated on | ||
517 | * the stack */ | ||
518 | struct mlx5_ib_pfault dummy_pfault = {}; | ||
519 | |||
520 | dummy_pfault.mpfault.bytes_committed = 0; | ||
521 | |||
522 | mpfault->rdma.rdma_va += mpfault->bytes_committed; | ||
523 | mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed, | ||
524 | mpfault->rdma.rdma_op_len); | ||
525 | mpfault->bytes_committed = 0; | ||
526 | |||
527 | address = mpfault->rdma.rdma_va; | ||
528 | length = mpfault->rdma.rdma_op_len; | ||
529 | |||
530 | /* For some operations, the hardware cannot tell the exact message | ||
531 | * length, and in those cases it reports zero. Use prefetch | ||
532 | * logic. */ | ||
533 | if (length == 0) { | ||
534 | prefetch_activated = 1; | ||
535 | length = mpfault->rdma.packet_size; | ||
536 | prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); | ||
537 | } | ||
538 | |||
539 | ret = pagefault_single_data_segment(qp, pfault, rkey, address, length, | ||
540 | NULL); | ||
541 | if (ret == -EAGAIN) { | ||
542 | /* We're racing with an invalidation, don't prefetch */ | ||
543 | prefetch_activated = 0; | ||
544 | } else if (ret < 0 || pages_in_range(address, length) > ret) { | ||
545 | mlx5_ib_page_fault_resume(qp, pfault, 1); | ||
546 | return; | ||
547 | } | ||
548 | |||
549 | mlx5_ib_page_fault_resume(qp, pfault, 0); | ||
550 | |||
551 | /* At this point, there might be a new pagefault already arriving in | ||
552 | * the eq, switch to the dummy pagefault for the rest of the | ||
553 | * processing. We're still OK with the objects being alive as the | ||
554 | * work-queue is being fenced. */ | ||
555 | |||
556 | if (prefetch_activated) { | ||
557 | ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey, | ||
558 | address, | ||
559 | prefetch_len, | ||
560 | NULL); | ||
561 | if (ret < 0) { | ||
562 | pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n", | ||
563 | ret, prefetch_activated, | ||
564 | qp->ibqp.qp_num, address, prefetch_len); | ||
565 | } | ||
566 | } | ||
567 | } | ||
568 | |||
493 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | 569 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, |
494 | struct mlx5_ib_pfault *pfault) | 570 | struct mlx5_ib_pfault *pfault) |
495 | { | 571 | { |
@@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | |||
499 | case MLX5_PFAULT_SUBTYPE_WQE: | 575 | case MLX5_PFAULT_SUBTYPE_WQE: |
500 | mlx5_ib_mr_wqe_pfault_handler(qp, pfault); | 576 | mlx5_ib_mr_wqe_pfault_handler(qp, pfault); |
501 | break; | 577 | break; |
578 | case MLX5_PFAULT_SUBTYPE_RDMA: | ||
579 | mlx5_ib_mr_rdma_pfault_handler(qp, pfault); | ||
580 | break; | ||
502 | default: | 581 | default: |
503 | pr_warn("Invalid page fault event subtype: 0x%x\n", | 582 | pr_warn("Invalid page fault event subtype: 0x%x\n", |
504 | event_subtype); | 583 | event_subtype); |