diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 741 |
1 files changed, 507 insertions, 234 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
284 | switch (event->event) { | 284 | switch (event->event) { |
285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
287 | ia->ri_async_rc = 0; | ||
287 | complete(&ia->ri_done); | 288 | complete(&ia->ri_done); |
288 | break; | 289 | break; |
289 | case RDMA_CM_EVENT_ADDR_ERROR: | 290 | case RDMA_CM_EVENT_ADDR_ERROR: |
@@ -338,13 +339,32 @@ connected: | |||
338 | wake_up_all(&ep->rep_connect_wait); | 339 | wake_up_all(&ep->rep_connect_wait); |
339 | break; | 340 | break; |
340 | default: | 341 | default: |
341 | ia->ri_async_rc = -EINVAL; | 342 | dprintk("RPC: %s: unexpected CM event %d\n", |
342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
343 | __func__, event->event); | 343 | __func__, event->event); |
344 | complete(&ia->ri_done); | ||
345 | break; | 344 | break; |
346 | } | 345 | } |
347 | 346 | ||
347 | #ifdef RPC_DEBUG | ||
348 | if (connstate == 1) { | ||
349 | int ird = attr.max_dest_rd_atomic; | ||
350 | int tird = ep->rep_remote_cma.responder_resources; | ||
351 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
352 | "on %s, memreg %d slots %d ird %d%s\n", | ||
353 | NIPQUAD(addr->sin_addr.s_addr), | ||
354 | ntohs(addr->sin_port), | ||
355 | ia->ri_id->device->name, | ||
356 | ia->ri_memreg_strategy, | ||
357 | xprt->rx_buf.rb_max_requests, | ||
358 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | ||
359 | } else if (connstate < 0) { | ||
360 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
361 | "closed (%d)\n", | ||
362 | NIPQUAD(addr->sin_addr.s_addr), | ||
363 | ntohs(addr->sin_port), | ||
364 | connstate); | ||
365 | } | ||
366 | #endif | ||
367 | |||
348 | return 0; | 368 | return 0; |
349 | } | 369 | } |
350 | 370 | ||
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
355 | struct rdma_cm_id *id; | 375 | struct rdma_cm_id *id; |
356 | int rc; | 376 | int rc; |
357 | 377 | ||
378 | init_completion(&ia->ri_done); | ||
379 | |||
358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | 380 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); |
359 | if (IS_ERR(id)) { | 381 | if (IS_ERR(id)) { |
360 | rc = PTR_ERR(id); | 382 | rc = PTR_ERR(id); |
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
363 | return id; | 385 | return id; |
364 | } | 386 | } |
365 | 387 | ||
366 | ia->ri_async_rc = 0; | 388 | ia->ri_async_rc = -ETIMEDOUT; |
367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 389 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); |
368 | if (rc) { | 390 | if (rc) { |
369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 391 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
370 | __func__, rc); | 392 | __func__, rc); |
371 | goto out; | 393 | goto out; |
372 | } | 394 | } |
373 | wait_for_completion(&ia->ri_done); | 395 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
396 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
374 | rc = ia->ri_async_rc; | 397 | rc = ia->ri_async_rc; |
375 | if (rc) | 398 | if (rc) |
376 | goto out; | 399 | goto out; |
377 | 400 | ||
378 | ia->ri_async_rc = 0; | 401 | ia->ri_async_rc = -ETIMEDOUT; |
379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 402 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
380 | if (rc) { | 403 | if (rc) { |
381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | 404 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
382 | __func__, rc); | 405 | __func__, rc); |
383 | goto out; | 406 | goto out; |
384 | } | 407 | } |
385 | wait_for_completion(&ia->ri_done); | 408 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
409 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
386 | rc = ia->ri_async_rc; | 410 | rc = ia->ri_async_rc; |
387 | if (rc) | 411 | if (rc) |
388 | goto out; | 412 | goto out; |
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) | |||
423 | int | 447 | int |
424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 448 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
425 | { | 449 | { |
426 | int rc; | 450 | int rc, mem_priv; |
451 | struct ib_device_attr devattr; | ||
427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 452 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
428 | 453 | ||
429 | init_completion(&ia->ri_done); | ||
430 | |||
431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 454 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
432 | if (IS_ERR(ia->ri_id)) { | 455 | if (IS_ERR(ia->ri_id)) { |
433 | rc = PTR_ERR(ia->ri_id); | 456 | rc = PTR_ERR(ia->ri_id); |
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
443 | } | 466 | } |
444 | 467 | ||
445 | /* | 468 | /* |
469 | * Query the device to determine if the requested memory | ||
470 | * registration strategy is supported. If it isn't, set the | ||
471 | * strategy to a globally supported model. | ||
472 | */ | ||
473 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
474 | if (rc) { | ||
475 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
476 | __func__, rc); | ||
477 | goto out2; | ||
478 | } | ||
479 | |||
480 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | ||
481 | ia->ri_have_dma_lkey = 1; | ||
482 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | ||
483 | } | ||
484 | |||
485 | switch (memreg) { | ||
486 | case RPCRDMA_MEMWINDOWS: | ||
487 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
488 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
489 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
490 | "specified but not supported by adapter, " | ||
491 | "using slower RPCRDMA_REGISTER\n", | ||
492 | __func__); | ||
493 | memreg = RPCRDMA_REGISTER; | ||
494 | } | ||
495 | break; | ||
496 | case RPCRDMA_MTHCAFMR: | ||
497 | if (!ia->ri_id->device->alloc_fmr) { | ||
498 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
499 | dprintk("RPC: %s: MTHCAFMR registration " | ||
500 | "specified but not supported by adapter, " | ||
501 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
502 | __func__); | ||
503 | memreg = RPCRDMA_ALLPHYSICAL; | ||
504 | #else | ||
505 | dprintk("RPC: %s: MTHCAFMR registration " | ||
506 | "specified but not supported by adapter, " | ||
507 | "using slower RPCRDMA_REGISTER\n", | ||
508 | __func__); | ||
509 | memreg = RPCRDMA_REGISTER; | ||
510 | #endif | ||
511 | } | ||
512 | break; | ||
513 | case RPCRDMA_FRMR: | ||
514 | /* Requires both frmr reg and local dma lkey */ | ||
515 | if ((devattr.device_cap_flags & | ||
516 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
517 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
518 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
519 | dprintk("RPC: %s: FRMR registration " | ||
520 | "specified but not supported by adapter, " | ||
521 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
522 | __func__); | ||
523 | memreg = RPCRDMA_ALLPHYSICAL; | ||
524 | #else | ||
525 | dprintk("RPC: %s: FRMR registration " | ||
526 | "specified but not supported by adapter, " | ||
527 | "using slower RPCRDMA_REGISTER\n", | ||
528 | __func__); | ||
529 | memreg = RPCRDMA_REGISTER; | ||
530 | #endif | ||
531 | } | ||
532 | break; | ||
533 | } | ||
534 | |||
535 | /* | ||
446 | * Optionally obtain an underlying physical identity mapping in | 536 | * Optionally obtain an underlying physical identity mapping in |
447 | * order to do a memory window-based bind. This base registration | 537 | * order to do a memory window-based bind. This base registration |
448 | * is protected from remote access - that is enabled only by binding | 538 | * is protected from remote access - that is enabled only by binding |
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
450 | * revoked after the corresponding completion similar to a storage | 540 | * revoked after the corresponding completion similar to a storage |
451 | * adapter. | 541 | * adapter. |
452 | */ | 542 | */ |
453 | if (memreg > RPCRDMA_REGISTER) { | 543 | switch (memreg) { |
454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | 544 | case RPCRDMA_BOUNCEBUFFERS: |
455 | switch (memreg) { | 545 | case RPCRDMA_REGISTER: |
546 | case RPCRDMA_FRMR: | ||
547 | break; | ||
456 | #if RPCRDMA_PERSISTENT_REGISTRATION | 548 | #if RPCRDMA_PERSISTENT_REGISTRATION |
457 | case RPCRDMA_ALLPHYSICAL: | 549 | case RPCRDMA_ALLPHYSICAL: |
458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | 550 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
459 | mem_priv |= IB_ACCESS_REMOTE_READ; | 551 | IB_ACCESS_REMOTE_WRITE | |
460 | break; | 552 | IB_ACCESS_REMOTE_READ; |
553 | goto register_setup; | ||
461 | #endif | 554 | #endif |
462 | case RPCRDMA_MEMWINDOWS_ASYNC: | 555 | case RPCRDMA_MEMWINDOWS_ASYNC: |
463 | case RPCRDMA_MEMWINDOWS: | 556 | case RPCRDMA_MEMWINDOWS: |
464 | mem_priv |= IB_ACCESS_MW_BIND; | 557 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
465 | break; | 558 | IB_ACCESS_MW_BIND; |
466 | default: | 559 | goto register_setup; |
560 | case RPCRDMA_MTHCAFMR: | ||
561 | if (ia->ri_have_dma_lkey) | ||
467 | break; | 562 | break; |
468 | } | 563 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
564 | register_setup: | ||
469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 565 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
470 | if (IS_ERR(ia->ri_bind_mem)) { | 566 | if (IS_ERR(ia->ri_bind_mem)) { |
471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 567 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
475 | memreg = RPCRDMA_REGISTER; | 571 | memreg = RPCRDMA_REGISTER; |
476 | ia->ri_bind_mem = NULL; | 572 | ia->ri_bind_mem = NULL; |
477 | } | 573 | } |
574 | break; | ||
575 | default: | ||
576 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | ||
577 | __func__, memreg); | ||
578 | rc = -EINVAL; | ||
579 | goto out2; | ||
478 | } | 580 | } |
581 | dprintk("RPC: %s: memory registration strategy is %d\n", | ||
582 | __func__, memreg); | ||
479 | 583 | ||
480 | /* Else will do memory reg/dereg for each chunk */ | 584 | /* Else will do memory reg/dereg for each chunk */ |
481 | ia->ri_memreg_strategy = memreg; | 585 | ia->ri_memreg_strategy = memreg; |
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
483 | return 0; | 587 | return 0; |
484 | out2: | 588 | out2: |
485 | rdma_destroy_id(ia->ri_id); | 589 | rdma_destroy_id(ia->ri_id); |
590 | ia->ri_id = NULL; | ||
486 | out1: | 591 | out1: |
487 | return rc; | 592 | return rc; |
488 | } | 593 | } |
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 608 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
504 | __func__, rc); | 609 | __func__, rc); |
505 | } | 610 | } |
506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | 611 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
507 | rdma_destroy_qp(ia->ri_id); | 612 | if (ia->ri_id->qp) |
613 | rdma_destroy_qp(ia->ri_id); | ||
614 | rdma_destroy_id(ia->ri_id); | ||
615 | ia->ri_id = NULL; | ||
616 | } | ||
508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 617 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { |
509 | rc = ib_dealloc_pd(ia->ri_pd); | 618 | rc = ib_dealloc_pd(ia->ri_pd); |
510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 619 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", |
511 | __func__, rc); | 620 | __func__, rc); |
512 | } | 621 | } |
513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
514 | rdma_destroy_id(ia->ri_id); | ||
515 | } | 622 | } |
516 | 623 | ||
517 | /* | 624 | /* |
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
541 | ep->rep_attr.srq = NULL; | 648 | ep->rep_attr.srq = NULL; |
542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 649 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
543 | switch (ia->ri_memreg_strategy) { | 650 | switch (ia->ri_memreg_strategy) { |
651 | case RPCRDMA_FRMR: | ||
652 | /* Add room for frmr register and invalidate WRs */ | ||
653 | ep->rep_attr.cap.max_send_wr *= 3; | ||
654 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
655 | return -EINVAL; | ||
656 | break; | ||
544 | case RPCRDMA_MEMWINDOWS_ASYNC: | 657 | case RPCRDMA_MEMWINDOWS_ASYNC: |
545 | case RPCRDMA_MEMWINDOWS: | 658 | case RPCRDMA_MEMWINDOWS: |
546 | /* Add room for mw_binds+unbinds - overkill! */ | 659 | /* Add room for mw_binds+unbinds - overkill! */ |
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
617 | ep->rep_remote_cma.private_data_len = 0; | 730 | ep->rep_remote_cma.private_data_len = 0; |
618 | 731 | ||
619 | /* Client offers RDMA Read but does not initiate */ | 732 | /* Client offers RDMA Read but does not initiate */ |
620 | switch (ia->ri_memreg_strategy) { | 733 | ep->rep_remote_cma.initiator_depth = 0; |
621 | case RPCRDMA_BOUNCEBUFFERS: | 734 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) |
622 | ep->rep_remote_cma.responder_resources = 0; | 735 | ep->rep_remote_cma.responder_resources = 0; |
623 | break; | 736 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
624 | case RPCRDMA_MTHCAFMR: | 737 | ep->rep_remote_cma.responder_resources = 32; |
625 | case RPCRDMA_REGISTER: | 738 | else |
626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
628 | break; | ||
629 | case RPCRDMA_MEMWINDOWS: | ||
630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
632 | case RPCRDMA_ALLPHYSICAL: | ||
633 | #endif | ||
634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
636 | break; | ||
637 | default: | ||
638 | break; | ||
639 | } | ||
640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 739 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
642 | ep->rep_remote_cma.initiator_depth = 0; | ||
643 | 740 | ||
644 | ep->rep_remote_cma.retry_count = 7; | 741 | ep->rep_remote_cma.retry_count = 7; |
645 | ep->rep_remote_cma.flow_control = 0; | 742 | ep->rep_remote_cma.flow_control = 0; |
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
679 | if (rc) | 776 | if (rc) |
680 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 777 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
681 | " returned %i\n", __func__, rc); | 778 | " returned %i\n", __func__, rc); |
779 | rdma_destroy_qp(ia->ri_id); | ||
780 | ia->ri_id->qp = NULL; | ||
682 | } | 781 | } |
683 | 782 | ||
684 | ep->rep_func = NULL; | ||
685 | |||
686 | /* padding - could be done in rpcrdma_buffer_destroy... */ | 783 | /* padding - could be done in rpcrdma_buffer_destroy... */ |
687 | if (ep->rep_pad_mr) { | 784 | if (ep->rep_pad_mr) { |
688 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | 785 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); |
689 | ep->rep_pad_mr = NULL; | 786 | ep->rep_pad_mr = NULL; |
690 | } | 787 | } |
691 | 788 | ||
692 | if (ia->ri_id->qp) { | ||
693 | rdma_destroy_qp(ia->ri_id); | ||
694 | ia->ri_id->qp = NULL; | ||
695 | } | ||
696 | |||
697 | rpcrdma_clean_cq(ep->rep_cq); | 789 | rpcrdma_clean_cq(ep->rep_cq); |
698 | rc = ib_destroy_cq(ep->rep_cq); | 790 | rc = ib_destroy_cq(ep->rep_cq); |
699 | if (rc) | 791 | if (rc) |
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
712 | struct rdma_cm_id *id; | 804 | struct rdma_cm_id *id; |
713 | int rc = 0; | 805 | int rc = 0; |
714 | int retry_count = 0; | 806 | int retry_count = 0; |
715 | int reconnect = (ep->rep_connected != 0); | ||
716 | 807 | ||
717 | if (reconnect) { | 808 | if (ep->rep_connected != 0) { |
718 | struct rpcrdma_xprt *xprt; | 809 | struct rpcrdma_xprt *xprt; |
719 | retry: | 810 | retry: |
720 | rc = rpcrdma_ep_disconnect(ep, ia); | 811 | rc = rpcrdma_ep_disconnect(ep, ia); |
@@ -745,6 +836,7 @@ retry: | |||
745 | goto out; | 836 | goto out; |
746 | } | 837 | } |
747 | /* END TEMP */ | 838 | /* END TEMP */ |
839 | rdma_destroy_qp(ia->ri_id); | ||
748 | rdma_destroy_id(ia->ri_id); | 840 | rdma_destroy_id(ia->ri_id); |
749 | ia->ri_id = id; | 841 | ia->ri_id = id; |
750 | } | 842 | } |
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
769 | } | 861 | } |
770 | } | 862 | } |
771 | 863 | ||
772 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
773 | * but many peers fail to complete the connection unless they | ||
774 | * == responder_resources! */ | ||
775 | if (ep->rep_remote_cma.initiator_depth != | ||
776 | ep->rep_remote_cma.responder_resources) | ||
777 | ep->rep_remote_cma.initiator_depth = | ||
778 | ep->rep_remote_cma.responder_resources; | ||
779 | |||
780 | ep->rep_connected = 0; | 864 | ep->rep_connected = 0; |
781 | 865 | ||
782 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 866 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
786 | goto out; | 870 | goto out; |
787 | } | 871 | } |
788 | 872 | ||
789 | if (reconnect) | ||
790 | return 0; | ||
791 | |||
792 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 873 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
793 | 874 | ||
794 | /* | 875 | /* |
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
805 | if (ep->rep_connected <= 0) { | 886 | if (ep->rep_connected <= 0) { |
806 | /* Sometimes, the only way to reliably connect to remote | 887 | /* Sometimes, the only way to reliably connect to remote |
807 | * CMs is to use same nonzero values for ORD and IRD. */ | 888 | * CMs is to use same nonzero values for ORD and IRD. */ |
808 | ep->rep_remote_cma.initiator_depth = | 889 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && |
809 | ep->rep_remote_cma.responder_resources; | 890 | (ep->rep_remote_cma.responder_resources == 0 || |
810 | if (ep->rep_remote_cma.initiator_depth == 0) | 891 | ep->rep_remote_cma.initiator_depth != |
811 | ++ep->rep_remote_cma.initiator_depth; | 892 | ep->rep_remote_cma.responder_resources)) { |
812 | if (ep->rep_remote_cma.responder_resources == 0) | 893 | if (ep->rep_remote_cma.responder_resources == 0) |
813 | ++ep->rep_remote_cma.responder_resources; | 894 | ep->rep_remote_cma.responder_resources = 1; |
814 | if (retry_count++ == 0) | 895 | ep->rep_remote_cma.initiator_depth = |
896 | ep->rep_remote_cma.responder_resources; | ||
815 | goto retry; | 897 | goto retry; |
898 | } | ||
816 | rc = ep->rep_connected; | 899 | rc = ep->rep_connected; |
817 | } else { | 900 | } else { |
818 | dprintk("RPC: %s: connected\n", __func__); | 901 | dprintk("RPC: %s: connected\n", __func__); |
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
863 | char *p; | 946 | char *p; |
864 | size_t len; | 947 | size_t len; |
865 | int i, rc; | 948 | int i, rc; |
949 | struct rpcrdma_mw *r; | ||
866 | 950 | ||
867 | buf->rb_max_requests = cdata->max_requests; | 951 | buf->rb_max_requests = cdata->max_requests; |
868 | spin_lock_init(&buf->rb_lock); | 952 | spin_lock_init(&buf->rb_lock); |
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
873 | * 2. arrays of struct rpcrdma_req to fill in pointers | 957 | * 2. arrays of struct rpcrdma_req to fill in pointers |
874 | * 3. array of struct rpcrdma_rep for replies | 958 | * 3. array of struct rpcrdma_rep for replies |
875 | * 4. padding, if any | 959 | * 4. padding, if any |
876 | * 5. mw's, if any | 960 | * 5. mw's, fmr's or frmr's, if any |
877 | * Send/recv buffers in req/rep need to be registered | 961 | * Send/recv buffers in req/rep need to be registered |
878 | */ | 962 | */ |
879 | 963 | ||
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
881 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 965 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
882 | len += cdata->padding; | 966 | len += cdata->padding; |
883 | switch (ia->ri_memreg_strategy) { | 967 | switch (ia->ri_memreg_strategy) { |
968 | case RPCRDMA_FRMR: | ||
969 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
970 | sizeof(struct rpcrdma_mw); | ||
971 | break; | ||
884 | case RPCRDMA_MTHCAFMR: | 972 | case RPCRDMA_MTHCAFMR: |
885 | /* TBD we are perhaps overallocating here */ | 973 | /* TBD we are perhaps overallocating here */ |
886 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 974 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
927 | * and also reduce unbind-to-bind collision. | 1015 | * and also reduce unbind-to-bind collision. |
928 | */ | 1016 | */ |
929 | INIT_LIST_HEAD(&buf->rb_mws); | 1017 | INIT_LIST_HEAD(&buf->rb_mws); |
1018 | r = (struct rpcrdma_mw *)p; | ||
930 | switch (ia->ri_memreg_strategy) { | 1019 | switch (ia->ri_memreg_strategy) { |
1020 | case RPCRDMA_FRMR: | ||
1021 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
1022 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1023 | RPCRDMA_MAX_SEGS); | ||
1024 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1025 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1026 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1027 | " failed %i\n", __func__, rc); | ||
1028 | goto out; | ||
1029 | } | ||
1030 | r->r.frmr.fr_pgl = | ||
1031 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1032 | RPCRDMA_MAX_SEGS); | ||
1033 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1034 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1035 | dprintk("RPC: %s: " | ||
1036 | "ib_alloc_fast_reg_page_list " | ||
1037 | "failed %i\n", __func__, rc); | ||
1038 | goto out; | ||
1039 | } | ||
1040 | list_add(&r->mw_list, &buf->rb_mws); | ||
1041 | ++r; | ||
1042 | } | ||
1043 | break; | ||
931 | case RPCRDMA_MTHCAFMR: | 1044 | case RPCRDMA_MTHCAFMR: |
932 | { | ||
933 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
934 | struct ib_fmr_attr fa = { | ||
935 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
936 | }; | ||
937 | /* TBD we are perhaps overallocating here */ | 1045 | /* TBD we are perhaps overallocating here */ |
938 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1046 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
1047 | static struct ib_fmr_attr fa = | ||
1048 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
939 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | 1049 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, |
940 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | 1050 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, |
941 | &fa); | 1051 | &fa); |
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
948 | list_add(&r->mw_list, &buf->rb_mws); | 1058 | list_add(&r->mw_list, &buf->rb_mws); |
949 | ++r; | 1059 | ++r; |
950 | } | 1060 | } |
951 | } | ||
952 | break; | 1061 | break; |
953 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1062 | case RPCRDMA_MEMWINDOWS_ASYNC: |
954 | case RPCRDMA_MEMWINDOWS: | 1063 | case RPCRDMA_MEMWINDOWS: |
955 | { | ||
956 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
957 | /* Allocate one extra request's worth, for full cycling */ | 1064 | /* Allocate one extra request's worth, for full cycling */ |
958 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1065 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
959 | r->r.mw = ib_alloc_mw(ia->ri_pd); | 1066 | r->r.mw = ib_alloc_mw(ia->ri_pd); |
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
966 | list_add(&r->mw_list, &buf->rb_mws); | 1073 | list_add(&r->mw_list, &buf->rb_mws); |
967 | ++r; | 1074 | ++r; |
968 | } | 1075 | } |
969 | } | ||
970 | break; | 1076 | break; |
971 | default: | 1077 | default: |
972 | break; | 1078 | break; |
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1046 | { | 1152 | { |
1047 | int rc, i; | 1153 | int rc, i; |
1048 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1154 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1155 | struct rpcrdma_mw *r; | ||
1049 | 1156 | ||
1050 | /* clean up in reverse order from create | 1157 | /* clean up in reverse order from create |
1051 | * 1. recv mr memory (mr free, then kfree) | 1158 | * 1. recv mr memory (mr free, then kfree) |
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1065 | } | 1172 | } |
1066 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1173 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
1067 | while (!list_empty(&buf->rb_mws)) { | 1174 | while (!list_empty(&buf->rb_mws)) { |
1068 | struct rpcrdma_mw *r; | ||
1069 | r = list_entry(buf->rb_mws.next, | 1175 | r = list_entry(buf->rb_mws.next, |
1070 | struct rpcrdma_mw, mw_list); | 1176 | struct rpcrdma_mw, mw_list); |
1071 | list_del(&r->mw_list); | 1177 | list_del(&r->mw_list); |
1072 | switch (ia->ri_memreg_strategy) { | 1178 | switch (ia->ri_memreg_strategy) { |
1179 | case RPCRDMA_FRMR: | ||
1180 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1181 | if (rc) | ||
1182 | dprintk("RPC: %s:" | ||
1183 | " ib_dereg_mr" | ||
1184 | " failed %i\n", | ||
1185 | __func__, rc); | ||
1186 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1187 | break; | ||
1073 | case RPCRDMA_MTHCAFMR: | 1188 | case RPCRDMA_MTHCAFMR: |
1074 | rc = ib_dealloc_fmr(r->r.fmr); | 1189 | rc = ib_dealloc_fmr(r->r.fmr); |
1075 | if (rc) | 1190 | if (rc) |
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1115 | { | 1230 | { |
1116 | struct rpcrdma_req *req; | 1231 | struct rpcrdma_req *req; |
1117 | unsigned long flags; | 1232 | unsigned long flags; |
1233 | int i; | ||
1234 | struct rpcrdma_mw *r; | ||
1118 | 1235 | ||
1119 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1236 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1120 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1237 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1135 | } | 1252 | } |
1136 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1253 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
1137 | if (!list_empty(&buffers->rb_mws)) { | 1254 | if (!list_empty(&buffers->rb_mws)) { |
1138 | int i = RPCRDMA_MAX_SEGS - 1; | 1255 | i = RPCRDMA_MAX_SEGS - 1; |
1139 | do { | 1256 | do { |
1140 | struct rpcrdma_mw *r; | ||
1141 | r = list_entry(buffers->rb_mws.next, | 1257 | r = list_entry(buffers->rb_mws.next, |
1142 | struct rpcrdma_mw, mw_list); | 1258 | struct rpcrdma_mw, mw_list); |
1143 | list_del(&r->mw_list); | 1259 | list_del(&r->mw_list); |
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1171 | req->rl_reply = NULL; | 1287 | req->rl_reply = NULL; |
1172 | } | 1288 | } |
1173 | switch (ia->ri_memreg_strategy) { | 1289 | switch (ia->ri_memreg_strategy) { |
1290 | case RPCRDMA_FRMR: | ||
1174 | case RPCRDMA_MTHCAFMR: | 1291 | case RPCRDMA_MTHCAFMR: |
1175 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1292 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1176 | case RPCRDMA_MEMWINDOWS: | 1293 | case RPCRDMA_MEMWINDOWS: |
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
1252 | va, len, DMA_BIDIRECTIONAL); | 1369 | va, len, DMA_BIDIRECTIONAL); |
1253 | iov->length = len; | 1370 | iov->length = len; |
1254 | 1371 | ||
1255 | if (ia->ri_bind_mem != NULL) { | 1372 | if (ia->ri_have_dma_lkey) { |
1373 | *mrp = NULL; | ||
1374 | iov->lkey = ia->ri_dma_lkey; | ||
1375 | return 0; | ||
1376 | } else if (ia->ri_bind_mem != NULL) { | ||
1256 | *mrp = NULL; | 1377 | *mrp = NULL; |
1257 | iov->lkey = ia->ri_bind_mem->lkey; | 1378 | iov->lkey = ia->ri_bind_mem->lkey; |
1258 | return 0; | 1379 | return 0; |
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
1329 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | 1450 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); |
1330 | } | 1451 | } |
1331 | 1452 | ||
1453 | static int | ||
1454 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1455 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1456 | struct rpcrdma_xprt *r_xprt) | ||
1457 | { | ||
1458 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1459 | struct ib_send_wr frmr_wr, *bad_wr; | ||
1460 | u8 key; | ||
1461 | int len, pageoff; | ||
1462 | int i, rc; | ||
1463 | |||
1464 | pageoff = offset_in_page(seg1->mr_offset); | ||
1465 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1466 | seg1->mr_len += pageoff; | ||
1467 | len = -pageoff; | ||
1468 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1469 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1470 | for (i = 0; i < *nsegs;) { | ||
1471 | rpcrdma_map_one(ia, seg, writing); | ||
1472 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
1473 | len += seg->mr_len; | ||
1474 | ++seg; | ||
1475 | ++i; | ||
1476 | /* Check for holes */ | ||
1477 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1478 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1479 | break; | ||
1480 | } | ||
1481 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1482 | __func__, seg1->mr_chunk.rl_mw, i); | ||
1483 | |||
1484 | /* Bump the key */ | ||
1485 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1486 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1487 | |||
1488 | /* Prepare FRMR WR */ | ||
1489 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
1490 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
1491 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
1492 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
1493 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
1494 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
1495 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1496 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
1497 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
1498 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
1499 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1500 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1501 | |||
1502 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
1503 | |||
1504 | if (rc) { | ||
1505 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1506 | " status %i\n", __func__, rc); | ||
1507 | while (i--) | ||
1508 | rpcrdma_unmap_one(ia, --seg); | ||
1509 | } else { | ||
1510 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1511 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1512 | seg1->mr_nsegs = i; | ||
1513 | seg1->mr_len = len; | ||
1514 | } | ||
1515 | *nsegs = i; | ||
1516 | return rc; | ||
1517 | } | ||
1518 | |||
1519 | static int | ||
1520 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1521 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1522 | { | ||
1523 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1524 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1525 | int rc; | ||
1526 | |||
1527 | while (seg1->mr_nsegs--) | ||
1528 | rpcrdma_unmap_one(ia, seg++); | ||
1529 | |||
1530 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1531 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1532 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
1533 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1534 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1535 | |||
1536 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1537 | if (rc) | ||
1538 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
1539 | " status %i\n", __func__, rc); | ||
1540 | return rc; | ||
1541 | } | ||
1542 | |||
1543 | static int | ||
1544 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1545 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1546 | { | ||
1547 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1548 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1549 | int len, pageoff, i, rc; | ||
1550 | |||
1551 | pageoff = offset_in_page(seg1->mr_offset); | ||
1552 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1553 | seg1->mr_len += pageoff; | ||
1554 | len = -pageoff; | ||
1555 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1556 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1557 | for (i = 0; i < *nsegs;) { | ||
1558 | rpcrdma_map_one(ia, seg, writing); | ||
1559 | physaddrs[i] = seg->mr_dma; | ||
1560 | len += seg->mr_len; | ||
1561 | ++seg; | ||
1562 | ++i; | ||
1563 | /* Check for holes */ | ||
1564 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1565 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1566 | break; | ||
1567 | } | ||
1568 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1569 | physaddrs, i, seg1->mr_dma); | ||
1570 | if (rc) { | ||
1571 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1572 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1573 | len, (unsigned long long)seg1->mr_dma, | ||
1574 | pageoff, i, rc); | ||
1575 | while (i--) | ||
1576 | rpcrdma_unmap_one(ia, --seg); | ||
1577 | } else { | ||
1578 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1579 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1580 | seg1->mr_nsegs = i; | ||
1581 | seg1->mr_len = len; | ||
1582 | } | ||
1583 | *nsegs = i; | ||
1584 | return rc; | ||
1585 | } | ||
1586 | |||
1587 | static int | ||
1588 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1589 | struct rpcrdma_ia *ia) | ||
1590 | { | ||
1591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1592 | LIST_HEAD(l); | ||
1593 | int rc; | ||
1594 | |||
1595 | list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1596 | rc = ib_unmap_fmr(&l); | ||
1597 | while (seg1->mr_nsegs--) | ||
1598 | rpcrdma_unmap_one(ia, seg++); | ||
1599 | if (rc) | ||
1600 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1601 | " status %i\n", __func__, rc); | ||
1602 | return rc; | ||
1603 | } | ||
1604 | |||
1605 | static int | ||
1606 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1607 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1608 | struct rpcrdma_xprt *r_xprt) | ||
1609 | { | ||
1610 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1611 | IB_ACCESS_REMOTE_READ); | ||
1612 | struct ib_mw_bind param; | ||
1613 | int rc; | ||
1614 | |||
1615 | *nsegs = 1; | ||
1616 | rpcrdma_map_one(ia, seg, writing); | ||
1617 | param.mr = ia->ri_bind_mem; | ||
1618 | param.wr_id = 0ULL; /* no send cookie */ | ||
1619 | param.addr = seg->mr_dma; | ||
1620 | param.length = seg->mr_len; | ||
1621 | param.send_flags = 0; | ||
1622 | param.mw_access_flags = mem_priv; | ||
1623 | |||
1624 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1625 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1626 | if (rc) { | ||
1627 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1628 | "%u@0x%llx status %i\n", | ||
1629 | __func__, seg->mr_len, | ||
1630 | (unsigned long long)seg->mr_dma, rc); | ||
1631 | rpcrdma_unmap_one(ia, seg); | ||
1632 | } else { | ||
1633 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1634 | seg->mr_base = param.addr; | ||
1635 | seg->mr_nsegs = 1; | ||
1636 | } | ||
1637 | return rc; | ||
1638 | } | ||
1639 | |||
1640 | static int | ||
1641 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1642 | struct rpcrdma_ia *ia, | ||
1643 | struct rpcrdma_xprt *r_xprt, void **r) | ||
1644 | { | ||
1645 | struct ib_mw_bind param; | ||
1646 | LIST_HEAD(l); | ||
1647 | int rc; | ||
1648 | |||
1649 | BUG_ON(seg->mr_nsegs != 1); | ||
1650 | param.mr = ia->ri_bind_mem; | ||
1651 | param.addr = 0ULL; /* unbind */ | ||
1652 | param.length = 0; | ||
1653 | param.mw_access_flags = 0; | ||
1654 | if (*r) { | ||
1655 | param.wr_id = (u64) (unsigned long) *r; | ||
1656 | param.send_flags = IB_SEND_SIGNALED; | ||
1657 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1658 | } else { | ||
1659 | param.wr_id = 0ULL; | ||
1660 | param.send_flags = 0; | ||
1661 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1662 | } | ||
1663 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1664 | rpcrdma_unmap_one(ia, seg); | ||
1665 | if (rc) | ||
1666 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1667 | " status %i\n", __func__, rc); | ||
1668 | else | ||
1669 | *r = NULL; /* will upcall on completion */ | ||
1670 | return rc; | ||
1671 | } | ||
1672 | |||
1673 | static int | ||
1674 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
1675 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1676 | { | ||
1677 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1678 | IB_ACCESS_REMOTE_READ); | ||
1679 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1680 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1681 | int len, i, rc = 0; | ||
1682 | |||
1683 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1684 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1685 | for (len = 0, i = 0; i < *nsegs;) { | ||
1686 | rpcrdma_map_one(ia, seg, writing); | ||
1687 | ipb[i].addr = seg->mr_dma; | ||
1688 | ipb[i].size = seg->mr_len; | ||
1689 | len += seg->mr_len; | ||
1690 | ++seg; | ||
1691 | ++i; | ||
1692 | /* Check for holes */ | ||
1693 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1694 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1695 | break; | ||
1696 | } | ||
1697 | seg1->mr_base = seg1->mr_dma; | ||
1698 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1699 | ipb, i, mem_priv, &seg1->mr_base); | ||
1700 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1701 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1702 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1703 | "%u@0x%llx (%d)... status %i\n", | ||
1704 | __func__, len, | ||
1705 | (unsigned long long)seg1->mr_dma, i, rc); | ||
1706 | while (i--) | ||
1707 | rpcrdma_unmap_one(ia, --seg); | ||
1708 | } else { | ||
1709 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1710 | seg1->mr_nsegs = i; | ||
1711 | seg1->mr_len = len; | ||
1712 | } | ||
1713 | *nsegs = i; | ||
1714 | return rc; | ||
1715 | } | ||
1716 | |||
1717 | static int | ||
1718 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
1719 | struct rpcrdma_ia *ia) | ||
1720 | { | ||
1721 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1722 | int rc; | ||
1723 | |||
1724 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1725 | seg1->mr_chunk.rl_mr = NULL; | ||
1726 | while (seg1->mr_nsegs--) | ||
1727 | rpcrdma_unmap_one(ia, seg++); | ||
1728 | if (rc) | ||
1729 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1730 | " status %i\n", __func__, rc); | ||
1731 | return rc; | ||
1732 | } | ||
1733 | |||
1332 | int | 1734 | int |
1333 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1735 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
1334 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1736 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
1335 | { | 1737 | { |
1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1738 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1337 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1338 | IB_ACCESS_REMOTE_READ); | ||
1339 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1340 | int i; | ||
1341 | int rc = 0; | 1739 | int rc = 0; |
1342 | 1740 | ||
1343 | switch (ia->ri_memreg_strategy) { | 1741 | switch (ia->ri_memreg_strategy) { |
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1352 | break; | 1750 | break; |
1353 | #endif | 1751 | #endif |
1354 | 1752 | ||
1355 | /* Registration using fast memory registration */ | 1753 | /* Registration using frmr registration */ |
1754 | case RPCRDMA_FRMR: | ||
1755 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
1756 | break; | ||
1757 | |||
1758 | /* Registration using fmr memory registration */ | ||
1356 | case RPCRDMA_MTHCAFMR: | 1759 | case RPCRDMA_MTHCAFMR: |
1357 | { | 1760 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
1358 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1359 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
1360 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1361 | seg1->mr_len += pageoff; | ||
1362 | len = -pageoff; | ||
1363 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1364 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1365 | for (i = 0; i < nsegs;) { | ||
1366 | rpcrdma_map_one(ia, seg, writing); | ||
1367 | physaddrs[i] = seg->mr_dma; | ||
1368 | len += seg->mr_len; | ||
1369 | ++seg; | ||
1370 | ++i; | ||
1371 | /* Check for holes */ | ||
1372 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1373 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1374 | break; | ||
1375 | } | ||
1376 | nsegs = i; | ||
1377 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1378 | physaddrs, nsegs, seg1->mr_dma); | ||
1379 | if (rc) { | ||
1380 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1381 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1382 | len, (unsigned long long)seg1->mr_dma, | ||
1383 | pageoff, nsegs, rc); | ||
1384 | while (nsegs--) | ||
1385 | rpcrdma_unmap_one(ia, --seg); | ||
1386 | } else { | ||
1387 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1388 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1389 | seg1->mr_nsegs = nsegs; | ||
1390 | seg1->mr_len = len; | ||
1391 | } | ||
1392 | } | ||
1393 | break; | 1761 | break; |
1394 | 1762 | ||
1395 | /* Registration using memory windows */ | 1763 | /* Registration using memory windows */ |
1396 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1764 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1397 | case RPCRDMA_MEMWINDOWS: | 1765 | case RPCRDMA_MEMWINDOWS: |
1398 | { | 1766 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); |
1399 | struct ib_mw_bind param; | ||
1400 | rpcrdma_map_one(ia, seg, writing); | ||
1401 | param.mr = ia->ri_bind_mem; | ||
1402 | param.wr_id = 0ULL; /* no send cookie */ | ||
1403 | param.addr = seg->mr_dma; | ||
1404 | param.length = seg->mr_len; | ||
1405 | param.send_flags = 0; | ||
1406 | param.mw_access_flags = mem_priv; | ||
1407 | |||
1408 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1409 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1410 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1411 | if (rc) { | ||
1412 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1413 | "%u@0x%llx status %i\n", | ||
1414 | __func__, seg->mr_len, | ||
1415 | (unsigned long long)seg->mr_dma, rc); | ||
1416 | rpcrdma_unmap_one(ia, seg); | ||
1417 | } else { | ||
1418 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1419 | seg->mr_base = param.addr; | ||
1420 | seg->mr_nsegs = 1; | ||
1421 | nsegs = 1; | ||
1422 | } | ||
1423 | } | ||
1424 | break; | 1767 | break; |
1425 | 1768 | ||
1426 | /* Default registration each time */ | 1769 | /* Default registration each time */ |
1427 | default: | 1770 | default: |
1428 | { | 1771 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); |
1429 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1430 | int len = 0; | ||
1431 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1432 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1433 | for (i = 0; i < nsegs;) { | ||
1434 | rpcrdma_map_one(ia, seg, writing); | ||
1435 | ipb[i].addr = seg->mr_dma; | ||
1436 | ipb[i].size = seg->mr_len; | ||
1437 | len += seg->mr_len; | ||
1438 | ++seg; | ||
1439 | ++i; | ||
1440 | /* Check for holes */ | ||
1441 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1442 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1443 | break; | ||
1444 | } | ||
1445 | nsegs = i; | ||
1446 | seg1->mr_base = seg1->mr_dma; | ||
1447 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1448 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
1449 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1450 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1451 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1452 | "%u@0x%llx (%d)... status %i\n", | ||
1453 | __func__, len, | ||
1454 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
1455 | while (nsegs--) | ||
1456 | rpcrdma_unmap_one(ia, --seg); | ||
1457 | } else { | ||
1458 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1459 | seg1->mr_nsegs = nsegs; | ||
1460 | seg1->mr_len = len; | ||
1461 | } | ||
1462 | } | ||
1463 | break; | 1772 | break; |
1464 | } | 1773 | } |
1465 | if (rc) | 1774 | if (rc) |
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1473 | struct rpcrdma_xprt *r_xprt, void *r) | 1782 | struct rpcrdma_xprt *r_xprt, void *r) |
1474 | { | 1783 | { |
1475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1784 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1476 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1477 | int nsegs = seg->mr_nsegs, rc; | 1785 | int nsegs = seg->mr_nsegs, rc; |
1478 | 1786 | ||
1479 | switch (ia->ri_memreg_strategy) { | 1787 | switch (ia->ri_memreg_strategy) { |
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1486 | break; | 1794 | break; |
1487 | #endif | 1795 | #endif |
1488 | 1796 | ||
1797 | case RPCRDMA_FRMR: | ||
1798 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
1799 | break; | ||
1800 | |||
1489 | case RPCRDMA_MTHCAFMR: | 1801 | case RPCRDMA_MTHCAFMR: |
1490 | { | 1802 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1491 | LIST_HEAD(l); | ||
1492 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1493 | rc = ib_unmap_fmr(&l); | ||
1494 | while (seg1->mr_nsegs--) | ||
1495 | rpcrdma_unmap_one(ia, seg++); | ||
1496 | } | ||
1497 | if (rc) | ||
1498 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1499 | " status %i\n", __func__, rc); | ||
1500 | break; | 1803 | break; |
1501 | 1804 | ||
1502 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1805 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1503 | case RPCRDMA_MEMWINDOWS: | 1806 | case RPCRDMA_MEMWINDOWS: |
1504 | { | 1807 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); |
1505 | struct ib_mw_bind param; | ||
1506 | BUG_ON(nsegs != 1); | ||
1507 | param.mr = ia->ri_bind_mem; | ||
1508 | param.addr = 0ULL; /* unbind */ | ||
1509 | param.length = 0; | ||
1510 | param.mw_access_flags = 0; | ||
1511 | if (r) { | ||
1512 | param.wr_id = (u64) (unsigned long) r; | ||
1513 | param.send_flags = IB_SEND_SIGNALED; | ||
1514 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1515 | } else { | ||
1516 | param.wr_id = 0ULL; | ||
1517 | param.send_flags = 0; | ||
1518 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1519 | } | ||
1520 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1521 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1522 | rpcrdma_unmap_one(ia, seg); | ||
1523 | } | ||
1524 | if (rc) | ||
1525 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1526 | " status %i\n", __func__, rc); | ||
1527 | else | ||
1528 | r = NULL; /* will upcall on completion */ | ||
1529 | break; | 1808 | break; |
1530 | 1809 | ||
1531 | default: | 1810 | default: |
1532 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | 1811 | rc = rpcrdma_deregister_default_external(seg, ia); |
1533 | seg1->mr_chunk.rl_mr = NULL; | ||
1534 | while (seg1->mr_nsegs--) | ||
1535 | rpcrdma_unmap_one(ia, seg++); | ||
1536 | if (rc) | ||
1537 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1538 | " status %i\n", __func__, rc); | ||
1539 | break; | 1812 | break; |
1540 | } | 1813 | } |
1541 | if (r) { | 1814 | if (r) { |