diff options
| -rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bd1dbe5ebc15..936a6cd4ecc7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c | |||
| @@ -35,6 +35,8 @@ | |||
| 35 | 35 | ||
| 36 | #include "mlx5_ib.h" | 36 | #include "mlx5_ib.h" |
| 37 | 37 | ||
| 38 | #define MAX_PREFETCH_LEN (4*1024*1024U) | ||
| 39 | |||
| 38 | struct workqueue_struct *mlx5_ib_page_fault_wq; | 40 | struct workqueue_struct *mlx5_ib_page_fault_wq; |
| 39 | 41 | ||
| 40 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ | 42 | #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ |
| @@ -490,6 +492,80 @@ resolve_page_fault: | |||
| 490 | free_page((unsigned long)buffer); | 492 | free_page((unsigned long)buffer); |
| 491 | } | 493 | } |
| 492 | 494 | ||
| 495 | static int pages_in_range(u64 address, u32 length) | ||
| 496 | { | ||
| 497 | return (ALIGN(address + length, PAGE_SIZE) - | ||
| 498 | (address & PAGE_MASK)) >> PAGE_SHIFT; | ||
| 499 | } | ||
| 500 | |||
| 501 | static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp, | ||
| 502 | struct mlx5_ib_pfault *pfault) | ||
| 503 | { | ||
| 504 | struct mlx5_pagefault *mpfault = &pfault->mpfault; | ||
| 505 | u64 address; | ||
| 506 | u32 length; | ||
| 507 | u32 prefetch_len = mpfault->bytes_committed; | ||
| 508 | int prefetch_activated = 0; | ||
| 509 | u32 rkey = mpfault->rdma.r_key; | ||
| 510 | int ret; | ||
| 511 | |||
| 512 | /* The RDMA responder handler handles the page fault in two parts. | ||
| 513 | * First it brings the necessary pages for the current packet | ||
| 514 | * (and uses the pfault context), and then (after resuming the QP) | ||
| 515 | * prefetches more pages. The second operation cannot use the pfault | ||
| 516 | * context and therefore uses the dummy_pfault context allocated on | ||
| 517 | * the stack */ | ||
| 518 | struct mlx5_ib_pfault dummy_pfault = {}; | ||
| 519 | |||
| 520 | dummy_pfault.mpfault.bytes_committed = 0; | ||
| 521 | |||
| 522 | mpfault->rdma.rdma_va += mpfault->bytes_committed; | ||
| 523 | mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed, | ||
| 524 | mpfault->rdma.rdma_op_len); | ||
| 525 | mpfault->bytes_committed = 0; | ||
| 526 | |||
| 527 | address = mpfault->rdma.rdma_va; | ||
| 528 | length = mpfault->rdma.rdma_op_len; | ||
| 529 | |||
| 530 | /* For some operations, the hardware cannot tell the exact message | ||
| 531 | * length, and in those cases it reports zero. Use prefetch | ||
| 532 | * logic. */ | ||
| 533 | if (length == 0) { | ||
| 534 | prefetch_activated = 1; | ||
| 535 | length = mpfault->rdma.packet_size; | ||
| 536 | prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); | ||
| 537 | } | ||
| 538 | |||
| 539 | ret = pagefault_single_data_segment(qp, pfault, rkey, address, length, | ||
| 540 | NULL); | ||
| 541 | if (ret == -EAGAIN) { | ||
| 542 | /* We're racing with an invalidation, don't prefetch */ | ||
| 543 | prefetch_activated = 0; | ||
| 544 | } else if (ret < 0 || pages_in_range(address, length) > ret) { | ||
| 545 | mlx5_ib_page_fault_resume(qp, pfault, 1); | ||
| 546 | return; | ||
| 547 | } | ||
| 548 | |||
| 549 | mlx5_ib_page_fault_resume(qp, pfault, 0); | ||
| 550 | |||
| 551 | /* At this point, there might be a new pagefault already arriving in | ||
| 552 | * the eq, switch to the dummy pagefault for the rest of the | ||
| 553 | * processing. We're still OK with the objects being alive as the | ||
| 554 | * work-queue is being fenced. */ | ||
| 555 | |||
| 556 | if (prefetch_activated) { | ||
| 557 | ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey, | ||
| 558 | address, | ||
| 559 | prefetch_len, | ||
| 560 | NULL); | ||
| 561 | if (ret < 0) { | ||
| 562 | pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n", | ||
| 563 | ret, prefetch_activated, | ||
| 564 | qp->ibqp.qp_num, address, prefetch_len); | ||
| 565 | } | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 493 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | 569 | void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, |
| 494 | struct mlx5_ib_pfault *pfault) | 570 | struct mlx5_ib_pfault *pfault) |
| 495 | { | 571 | { |
| @@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, | |||
| 499 | case MLX5_PFAULT_SUBTYPE_WQE: | 575 | case MLX5_PFAULT_SUBTYPE_WQE: |
| 500 | mlx5_ib_mr_wqe_pfault_handler(qp, pfault); | 576 | mlx5_ib_mr_wqe_pfault_handler(qp, pfault); |
| 501 | break; | 577 | break; |
| 578 | case MLX5_PFAULT_SUBTYPE_RDMA: | ||
| 579 | mlx5_ib_mr_rdma_pfault_handler(qp, pfault); | ||
| 580 | break; | ||
| 502 | default: | 581 | default: |
| 503 | pr_warn("Invalid page fault event subtype: 0x%x\n", | 582 | pr_warn("Invalid page fault event subtype: 0x%x\n", |
| 504 | event_subtype); | 583 | event_subtype); |
