aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/verbs.c
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2008-10-15 15:54:56 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2008-10-15 15:54:56 -0400
commit6925bac120097b823fc990c143b9789c21cc60b5 (patch)
tree0f92e1b22122fba623aeff4c271c16df673244eb /net/sunrpc/xprtrdma/verbs.c
parent04ab591808565f968d4406f6435090ad671ebdab (diff)
parent011935a0a710c20bb7ae63523b78856848db1926 (diff)
Merge branch 'next'
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r--net/sunrpc/xprtrdma/verbs.c741
1 files changed, 507 insertions, 234 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8ea283ecc52..a5fef5e6c32 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284 switch (event->event) { 284 switch (event->event) {
285 case RDMA_CM_EVENT_ADDR_RESOLVED: 285 case RDMA_CM_EVENT_ADDR_RESOLVED:
286 case RDMA_CM_EVENT_ROUTE_RESOLVED: 286 case RDMA_CM_EVENT_ROUTE_RESOLVED:
287 ia->ri_async_rc = 0;
287 complete(&ia->ri_done); 288 complete(&ia->ri_done);
288 break; 289 break;
289 case RDMA_CM_EVENT_ADDR_ERROR: 290 case RDMA_CM_EVENT_ADDR_ERROR:
@@ -338,13 +339,32 @@ connected:
338 wake_up_all(&ep->rep_connect_wait); 339 wake_up_all(&ep->rep_connect_wait);
339 break; 340 break;
340 default: 341 default:
341 ia->ri_async_rc = -EINVAL; 342 dprintk("RPC: %s: unexpected CM event %d\n",
342 dprintk("RPC: %s: unexpected CM event %X\n",
343 __func__, event->event); 343 __func__, event->event);
344 complete(&ia->ri_done);
345 break; 344 break;
346 } 345 }
347 346
347#ifdef RPC_DEBUG
348 if (connstate == 1) {
349 int ird = attr.max_dest_rd_atomic;
350 int tird = ep->rep_remote_cma.responder_resources;
351 printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
352 "on %s, memreg %d slots %d ird %d%s\n",
353 NIPQUAD(addr->sin_addr.s_addr),
354 ntohs(addr->sin_port),
355 ia->ri_id->device->name,
356 ia->ri_memreg_strategy,
357 xprt->rx_buf.rb_max_requests,
358 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
359 } else if (connstate < 0) {
360 printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
361 "closed (%d)\n",
362 NIPQUAD(addr->sin_addr.s_addr),
363 ntohs(addr->sin_port),
364 connstate);
365 }
366#endif
367
348 return 0; 368 return 0;
349} 369}
350 370
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
355 struct rdma_cm_id *id; 375 struct rdma_cm_id *id;
356 int rc; 376 int rc;
357 377
378 init_completion(&ia->ri_done);
379
358 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); 380 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
359 if (IS_ERR(id)) { 381 if (IS_ERR(id)) {
360 rc = PTR_ERR(id); 382 rc = PTR_ERR(id);
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
363 return id; 385 return id;
364 } 386 }
365 387
366 ia->ri_async_rc = 0; 388 ia->ri_async_rc = -ETIMEDOUT;
367 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); 389 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
368 if (rc) { 390 if (rc) {
369 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", 391 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
370 __func__, rc); 392 __func__, rc);
371 goto out; 393 goto out;
372 } 394 }
373 wait_for_completion(&ia->ri_done); 395 wait_for_completion_interruptible_timeout(&ia->ri_done,
396 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
374 rc = ia->ri_async_rc; 397 rc = ia->ri_async_rc;
375 if (rc) 398 if (rc)
376 goto out; 399 goto out;
377 400
378 ia->ri_async_rc = 0; 401 ia->ri_async_rc = -ETIMEDOUT;
379 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 402 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
380 if (rc) { 403 if (rc) {
381 dprintk("RPC: %s: rdma_resolve_route() failed %i\n", 404 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
382 __func__, rc); 405 __func__, rc);
383 goto out; 406 goto out;
384 } 407 }
385 wait_for_completion(&ia->ri_done); 408 wait_for_completion_interruptible_timeout(&ia->ri_done,
409 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
386 rc = ia->ri_async_rc; 410 rc = ia->ri_async_rc;
387 if (rc) 411 if (rc)
388 goto out; 412 goto out;
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
423int 447int
424rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) 448rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
425{ 449{
426 int rc; 450 int rc, mem_priv;
451 struct ib_device_attr devattr;
427 struct rpcrdma_ia *ia = &xprt->rx_ia; 452 struct rpcrdma_ia *ia = &xprt->rx_ia;
428 453
429 init_completion(&ia->ri_done);
430
431 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 454 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
432 if (IS_ERR(ia->ri_id)) { 455 if (IS_ERR(ia->ri_id)) {
433 rc = PTR_ERR(ia->ri_id); 456 rc = PTR_ERR(ia->ri_id);
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
443 } 466 }
444 467
445 /* 468 /*
469 * Query the device to determine if the requested memory
470 * registration strategy is supported. If it isn't, set the
471 * strategy to a globally supported model.
472 */
473 rc = ib_query_device(ia->ri_id->device, &devattr);
474 if (rc) {
475 dprintk("RPC: %s: ib_query_device failed %d\n",
476 __func__, rc);
477 goto out2;
478 }
479
480 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
481 ia->ri_have_dma_lkey = 1;
482 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
483 }
484
485 switch (memreg) {
486 case RPCRDMA_MEMWINDOWS:
487 case RPCRDMA_MEMWINDOWS_ASYNC:
488 if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
489 dprintk("RPC: %s: MEMWINDOWS registration "
490 "specified but not supported by adapter, "
491 "using slower RPCRDMA_REGISTER\n",
492 __func__);
493 memreg = RPCRDMA_REGISTER;
494 }
495 break;
496 case RPCRDMA_MTHCAFMR:
497 if (!ia->ri_id->device->alloc_fmr) {
498#if RPCRDMA_PERSISTENT_REGISTRATION
499 dprintk("RPC: %s: MTHCAFMR registration "
500 "specified but not supported by adapter, "
501 "using riskier RPCRDMA_ALLPHYSICAL\n",
502 __func__);
503 memreg = RPCRDMA_ALLPHYSICAL;
504#else
505 dprintk("RPC: %s: MTHCAFMR registration "
506 "specified but not supported by adapter, "
507 "using slower RPCRDMA_REGISTER\n",
508 __func__);
509 memreg = RPCRDMA_REGISTER;
510#endif
511 }
512 break;
513 case RPCRDMA_FRMR:
514 /* Requires both frmr reg and local dma lkey */
515 if ((devattr.device_cap_flags &
516 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
517 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
518#if RPCRDMA_PERSISTENT_REGISTRATION
519 dprintk("RPC: %s: FRMR registration "
520 "specified but not supported by adapter, "
521 "using riskier RPCRDMA_ALLPHYSICAL\n",
522 __func__);
523 memreg = RPCRDMA_ALLPHYSICAL;
524#else
525 dprintk("RPC: %s: FRMR registration "
526 "specified but not supported by adapter, "
527 "using slower RPCRDMA_REGISTER\n",
528 __func__);
529 memreg = RPCRDMA_REGISTER;
530#endif
531 }
532 break;
533 }
534
535 /*
446 * Optionally obtain an underlying physical identity mapping in 536 * Optionally obtain an underlying physical identity mapping in
447 * order to do a memory window-based bind. This base registration 537 * order to do a memory window-based bind. This base registration
448 * is protected from remote access - that is enabled only by binding 538 * is protected from remote access - that is enabled only by binding
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
450 * revoked after the corresponding completion similar to a storage 540 * revoked after the corresponding completion similar to a storage
451 * adapter. 541 * adapter.
452 */ 542 */
453 if (memreg > RPCRDMA_REGISTER) { 543 switch (memreg) {
454 int mem_priv = IB_ACCESS_LOCAL_WRITE; 544 case RPCRDMA_BOUNCEBUFFERS:
455 switch (memreg) { 545 case RPCRDMA_REGISTER:
546 case RPCRDMA_FRMR:
547 break;
456#if RPCRDMA_PERSISTENT_REGISTRATION 548#if RPCRDMA_PERSISTENT_REGISTRATION
457 case RPCRDMA_ALLPHYSICAL: 549 case RPCRDMA_ALLPHYSICAL:
458 mem_priv |= IB_ACCESS_REMOTE_WRITE; 550 mem_priv = IB_ACCESS_LOCAL_WRITE |
459 mem_priv |= IB_ACCESS_REMOTE_READ; 551 IB_ACCESS_REMOTE_WRITE |
460 break; 552 IB_ACCESS_REMOTE_READ;
553 goto register_setup;
461#endif 554#endif
462 case RPCRDMA_MEMWINDOWS_ASYNC: 555 case RPCRDMA_MEMWINDOWS_ASYNC:
463 case RPCRDMA_MEMWINDOWS: 556 case RPCRDMA_MEMWINDOWS:
464 mem_priv |= IB_ACCESS_MW_BIND; 557 mem_priv = IB_ACCESS_LOCAL_WRITE |
465 break; 558 IB_ACCESS_MW_BIND;
466 default: 559 goto register_setup;
560 case RPCRDMA_MTHCAFMR:
561 if (ia->ri_have_dma_lkey)
467 break; 562 break;
468 } 563 mem_priv = IB_ACCESS_LOCAL_WRITE;
564 register_setup:
469 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); 565 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
470 if (IS_ERR(ia->ri_bind_mem)) { 566 if (IS_ERR(ia->ri_bind_mem)) {
471 printk(KERN_ALERT "%s: ib_get_dma_mr for " 567 printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
475 memreg = RPCRDMA_REGISTER; 571 memreg = RPCRDMA_REGISTER;
476 ia->ri_bind_mem = NULL; 572 ia->ri_bind_mem = NULL;
477 } 573 }
574 break;
575 default:
576 printk(KERN_ERR "%s: invalid memory registration mode %d\n",
577 __func__, memreg);
578 rc = -EINVAL;
579 goto out2;
478 } 580 }
581 dprintk("RPC: %s: memory registration strategy is %d\n",
582 __func__, memreg);
479 583
480 /* Else will do memory reg/dereg for each chunk */ 584 /* Else will do memory reg/dereg for each chunk */
481 ia->ri_memreg_strategy = memreg; 585 ia->ri_memreg_strategy = memreg;
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
483 return 0; 587 return 0;
484out2: 588out2:
485 rdma_destroy_id(ia->ri_id); 589 rdma_destroy_id(ia->ri_id);
590 ia->ri_id = NULL;
486out1: 591out1:
487 return rc; 592 return rc;
488} 593}
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
503 dprintk("RPC: %s: ib_dereg_mr returned %i\n", 608 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
504 __func__, rc); 609 __func__, rc);
505 } 610 }
506 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) 611 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
507 rdma_destroy_qp(ia->ri_id); 612 if (ia->ri_id->qp)
613 rdma_destroy_qp(ia->ri_id);
614 rdma_destroy_id(ia->ri_id);
615 ia->ri_id = NULL;
616 }
508 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { 617 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
509 rc = ib_dealloc_pd(ia->ri_pd); 618 rc = ib_dealloc_pd(ia->ri_pd);
510 dprintk("RPC: %s: ib_dealloc_pd returned %i\n", 619 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
511 __func__, rc); 620 __func__, rc);
512 } 621 }
513 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
514 rdma_destroy_id(ia->ri_id);
515} 622}
516 623
517/* 624/*
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
541 ep->rep_attr.srq = NULL; 648 ep->rep_attr.srq = NULL;
542 ep->rep_attr.cap.max_send_wr = cdata->max_requests; 649 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
543 switch (ia->ri_memreg_strategy) { 650 switch (ia->ri_memreg_strategy) {
651 case RPCRDMA_FRMR:
652 /* Add room for frmr register and invalidate WRs */
653 ep->rep_attr.cap.max_send_wr *= 3;
654 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
655 return -EINVAL;
656 break;
544 case RPCRDMA_MEMWINDOWS_ASYNC: 657 case RPCRDMA_MEMWINDOWS_ASYNC:
545 case RPCRDMA_MEMWINDOWS: 658 case RPCRDMA_MEMWINDOWS:
546 /* Add room for mw_binds+unbinds - overkill! */ 659 /* Add room for mw_binds+unbinds - overkill! */
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
617 ep->rep_remote_cma.private_data_len = 0; 730 ep->rep_remote_cma.private_data_len = 0;
618 731
619 /* Client offers RDMA Read but does not initiate */ 732 /* Client offers RDMA Read but does not initiate */
620 switch (ia->ri_memreg_strategy) { 733 ep->rep_remote_cma.initiator_depth = 0;
621 case RPCRDMA_BOUNCEBUFFERS: 734 if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
622 ep->rep_remote_cma.responder_resources = 0; 735 ep->rep_remote_cma.responder_resources = 0;
623 break; 736 else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
624 case RPCRDMA_MTHCAFMR: 737 ep->rep_remote_cma.responder_resources = 32;
625 case RPCRDMA_REGISTER: 738 else
626 ep->rep_remote_cma.responder_resources = cdata->max_requests *
627 (RPCRDMA_MAX_DATA_SEGS / 8);
628 break;
629 case RPCRDMA_MEMWINDOWS:
630 case RPCRDMA_MEMWINDOWS_ASYNC:
631#if RPCRDMA_PERSISTENT_REGISTRATION
632 case RPCRDMA_ALLPHYSICAL:
633#endif
634 ep->rep_remote_cma.responder_resources = cdata->max_requests *
635 (RPCRDMA_MAX_DATA_SEGS / 2);
636 break;
637 default:
638 break;
639 }
640 if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
641 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; 739 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
642 ep->rep_remote_cma.initiator_depth = 0;
643 740
644 ep->rep_remote_cma.retry_count = 7; 741 ep->rep_remote_cma.retry_count = 7;
645 ep->rep_remote_cma.flow_control = 0; 742 ep->rep_remote_cma.flow_control = 0;
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
679 if (rc) 776 if (rc)
680 dprintk("RPC: %s: rpcrdma_ep_disconnect" 777 dprintk("RPC: %s: rpcrdma_ep_disconnect"
681 " returned %i\n", __func__, rc); 778 " returned %i\n", __func__, rc);
779 rdma_destroy_qp(ia->ri_id);
780 ia->ri_id->qp = NULL;
682 } 781 }
683 782
684 ep->rep_func = NULL;
685
686 /* padding - could be done in rpcrdma_buffer_destroy... */ 783 /* padding - could be done in rpcrdma_buffer_destroy... */
687 if (ep->rep_pad_mr) { 784 if (ep->rep_pad_mr) {
688 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); 785 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
689 ep->rep_pad_mr = NULL; 786 ep->rep_pad_mr = NULL;
690 } 787 }
691 788
692 if (ia->ri_id->qp) {
693 rdma_destroy_qp(ia->ri_id);
694 ia->ri_id->qp = NULL;
695 }
696
697 rpcrdma_clean_cq(ep->rep_cq); 789 rpcrdma_clean_cq(ep->rep_cq);
698 rc = ib_destroy_cq(ep->rep_cq); 790 rc = ib_destroy_cq(ep->rep_cq);
699 if (rc) 791 if (rc)
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
712 struct rdma_cm_id *id; 804 struct rdma_cm_id *id;
713 int rc = 0; 805 int rc = 0;
714 int retry_count = 0; 806 int retry_count = 0;
715 int reconnect = (ep->rep_connected != 0);
716 807
717 if (reconnect) { 808 if (ep->rep_connected != 0) {
718 struct rpcrdma_xprt *xprt; 809 struct rpcrdma_xprt *xprt;
719retry: 810retry:
720 rc = rpcrdma_ep_disconnect(ep, ia); 811 rc = rpcrdma_ep_disconnect(ep, ia);
@@ -745,6 +836,7 @@ retry:
745 goto out; 836 goto out;
746 } 837 }
747 /* END TEMP */ 838 /* END TEMP */
839 rdma_destroy_qp(ia->ri_id);
748 rdma_destroy_id(ia->ri_id); 840 rdma_destroy_id(ia->ri_id);
749 ia->ri_id = id; 841 ia->ri_id = id;
750 } 842 }
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
769 } 861 }
770} 862}
771 863
772 /* Theoretically a client initiator_depth > 0 is not needed,
773 * but many peers fail to complete the connection unless they
774 * == responder_resources! */
775 if (ep->rep_remote_cma.initiator_depth !=
776 ep->rep_remote_cma.responder_resources)
777 ep->rep_remote_cma.initiator_depth =
778 ep->rep_remote_cma.responder_resources;
779
780 ep->rep_connected = 0; 864 ep->rep_connected = 0;
781 865
782 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); 866 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
786 goto out; 870 goto out;
787 } 871 }
788 872
789 if (reconnect)
790 return 0;
791
792 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); 873 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
793 874
794 /* 875 /*
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
805 if (ep->rep_connected <= 0) { 886 if (ep->rep_connected <= 0) {
806 /* Sometimes, the only way to reliably connect to remote 887 /* Sometimes, the only way to reliably connect to remote
807 * CMs is to use same nonzero values for ORD and IRD. */ 888 * CMs is to use same nonzero values for ORD and IRD. */
808 ep->rep_remote_cma.initiator_depth = 889 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
809 ep->rep_remote_cma.responder_resources; 890 (ep->rep_remote_cma.responder_resources == 0 ||
810 if (ep->rep_remote_cma.initiator_depth == 0) 891 ep->rep_remote_cma.initiator_depth !=
811 ++ep->rep_remote_cma.initiator_depth; 892 ep->rep_remote_cma.responder_resources)) {
812 if (ep->rep_remote_cma.responder_resources == 0) 893 if (ep->rep_remote_cma.responder_resources == 0)
813 ++ep->rep_remote_cma.responder_resources; 894 ep->rep_remote_cma.responder_resources = 1;
814 if (retry_count++ == 0) 895 ep->rep_remote_cma.initiator_depth =
896 ep->rep_remote_cma.responder_resources;
815 goto retry; 897 goto retry;
898 }
816 rc = ep->rep_connected; 899 rc = ep->rep_connected;
817 } else { 900 } else {
818 dprintk("RPC: %s: connected\n", __func__); 901 dprintk("RPC: %s: connected\n", __func__);
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
863 char *p; 946 char *p;
864 size_t len; 947 size_t len;
865 int i, rc; 948 int i, rc;
949 struct rpcrdma_mw *r;
866 950
867 buf->rb_max_requests = cdata->max_requests; 951 buf->rb_max_requests = cdata->max_requests;
868 spin_lock_init(&buf->rb_lock); 952 spin_lock_init(&buf->rb_lock);
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
873 * 2. arrays of struct rpcrdma_req to fill in pointers 957 * 2. arrays of struct rpcrdma_req to fill in pointers
874 * 3. array of struct rpcrdma_rep for replies 958 * 3. array of struct rpcrdma_rep for replies
875 * 4. padding, if any 959 * 4. padding, if any
876 * 5. mw's, if any 960 * 5. mw's, fmr's or frmr's, if any
877 * Send/recv buffers in req/rep need to be registered 961 * Send/recv buffers in req/rep need to be registered
878 */ 962 */
879 963
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
881 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); 965 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
882 len += cdata->padding; 966 len += cdata->padding;
883 switch (ia->ri_memreg_strategy) { 967 switch (ia->ri_memreg_strategy) {
968 case RPCRDMA_FRMR:
969 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
970 sizeof(struct rpcrdma_mw);
971 break;
884 case RPCRDMA_MTHCAFMR: 972 case RPCRDMA_MTHCAFMR:
885 /* TBD we are perhaps overallocating here */ 973 /* TBD we are perhaps overallocating here */
886 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * 974 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
927 * and also reduce unbind-to-bind collision. 1015 * and also reduce unbind-to-bind collision.
928 */ 1016 */
929 INIT_LIST_HEAD(&buf->rb_mws); 1017 INIT_LIST_HEAD(&buf->rb_mws);
1018 r = (struct rpcrdma_mw *)p;
930 switch (ia->ri_memreg_strategy) { 1019 switch (ia->ri_memreg_strategy) {
1020 case RPCRDMA_FRMR:
1021 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1022 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1023 RPCRDMA_MAX_SEGS);
1024 if (IS_ERR(r->r.frmr.fr_mr)) {
1025 rc = PTR_ERR(r->r.frmr.fr_mr);
1026 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1027 " failed %i\n", __func__, rc);
1028 goto out;
1029 }
1030 r->r.frmr.fr_pgl =
1031 ib_alloc_fast_reg_page_list(ia->ri_id->device,
1032 RPCRDMA_MAX_SEGS);
1033 if (IS_ERR(r->r.frmr.fr_pgl)) {
1034 rc = PTR_ERR(r->r.frmr.fr_pgl);
1035 dprintk("RPC: %s: "
1036 "ib_alloc_fast_reg_page_list "
1037 "failed %i\n", __func__, rc);
1038 goto out;
1039 }
1040 list_add(&r->mw_list, &buf->rb_mws);
1041 ++r;
1042 }
1043 break;
931 case RPCRDMA_MTHCAFMR: 1044 case RPCRDMA_MTHCAFMR:
932 {
933 struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
934 struct ib_fmr_attr fa = {
935 RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
936 };
937 /* TBD we are perhaps overallocating here */ 1045 /* TBD we are perhaps overallocating here */
938 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1046 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
1047 static struct ib_fmr_attr fa =
1048 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
939 r->r.fmr = ib_alloc_fmr(ia->ri_pd, 1049 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
940 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, 1050 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
941 &fa); 1051 &fa);
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
948 list_add(&r->mw_list, &buf->rb_mws); 1058 list_add(&r->mw_list, &buf->rb_mws);
949 ++r; 1059 ++r;
950 } 1060 }
951 }
952 break; 1061 break;
953 case RPCRDMA_MEMWINDOWS_ASYNC: 1062 case RPCRDMA_MEMWINDOWS_ASYNC:
954 case RPCRDMA_MEMWINDOWS: 1063 case RPCRDMA_MEMWINDOWS:
955 {
956 struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
957 /* Allocate one extra request's worth, for full cycling */ 1064 /* Allocate one extra request's worth, for full cycling */
958 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { 1065 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
959 r->r.mw = ib_alloc_mw(ia->ri_pd); 1066 r->r.mw = ib_alloc_mw(ia->ri_pd);
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
966 list_add(&r->mw_list, &buf->rb_mws); 1073 list_add(&r->mw_list, &buf->rb_mws);
967 ++r; 1074 ++r;
968 } 1075 }
969 }
970 break; 1076 break;
971 default: 1077 default:
972 break; 1078 break;
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1046{ 1152{
1047 int rc, i; 1153 int rc, i;
1048 struct rpcrdma_ia *ia = rdmab_to_ia(buf); 1154 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1155 struct rpcrdma_mw *r;
1049 1156
1050 /* clean up in reverse order from create 1157 /* clean up in reverse order from create
1051 * 1. recv mr memory (mr free, then kfree) 1158 * 1. recv mr memory (mr free, then kfree)
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1065 } 1172 }
1066 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { 1173 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
1067 while (!list_empty(&buf->rb_mws)) { 1174 while (!list_empty(&buf->rb_mws)) {
1068 struct rpcrdma_mw *r;
1069 r = list_entry(buf->rb_mws.next, 1175 r = list_entry(buf->rb_mws.next,
1070 struct rpcrdma_mw, mw_list); 1176 struct rpcrdma_mw, mw_list);
1071 list_del(&r->mw_list); 1177 list_del(&r->mw_list);
1072 switch (ia->ri_memreg_strategy) { 1178 switch (ia->ri_memreg_strategy) {
1179 case RPCRDMA_FRMR:
1180 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1181 if (rc)
1182 dprintk("RPC: %s:"
1183 " ib_dereg_mr"
1184 " failed %i\n",
1185 __func__, rc);
1186 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1187 break;
1073 case RPCRDMA_MTHCAFMR: 1188 case RPCRDMA_MTHCAFMR:
1074 rc = ib_dealloc_fmr(r->r.fmr); 1189 rc = ib_dealloc_fmr(r->r.fmr);
1075 if (rc) 1190 if (rc)
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1115{ 1230{
1116 struct rpcrdma_req *req; 1231 struct rpcrdma_req *req;
1117 unsigned long flags; 1232 unsigned long flags;
1233 int i;
1234 struct rpcrdma_mw *r;
1118 1235
1119 spin_lock_irqsave(&buffers->rb_lock, flags); 1236 spin_lock_irqsave(&buffers->rb_lock, flags);
1120 if (buffers->rb_send_index == buffers->rb_max_requests) { 1237 if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1135 } 1252 }
1136 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; 1253 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1137 if (!list_empty(&buffers->rb_mws)) { 1254 if (!list_empty(&buffers->rb_mws)) {
1138 int i = RPCRDMA_MAX_SEGS - 1; 1255 i = RPCRDMA_MAX_SEGS - 1;
1139 do { 1256 do {
1140 struct rpcrdma_mw *r;
1141 r = list_entry(buffers->rb_mws.next, 1257 r = list_entry(buffers->rb_mws.next,
1142 struct rpcrdma_mw, mw_list); 1258 struct rpcrdma_mw, mw_list);
1143 list_del(&r->mw_list); 1259 list_del(&r->mw_list);
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
1171 req->rl_reply = NULL; 1287 req->rl_reply = NULL;
1172 } 1288 }
1173 switch (ia->ri_memreg_strategy) { 1289 switch (ia->ri_memreg_strategy) {
1290 case RPCRDMA_FRMR:
1174 case RPCRDMA_MTHCAFMR: 1291 case RPCRDMA_MTHCAFMR:
1175 case RPCRDMA_MEMWINDOWS_ASYNC: 1292 case RPCRDMA_MEMWINDOWS_ASYNC:
1176 case RPCRDMA_MEMWINDOWS: 1293 case RPCRDMA_MEMWINDOWS:
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1252 va, len, DMA_BIDIRECTIONAL); 1369 va, len, DMA_BIDIRECTIONAL);
1253 iov->length = len; 1370 iov->length = len;
1254 1371
1255 if (ia->ri_bind_mem != NULL) { 1372 if (ia->ri_have_dma_lkey) {
1373 *mrp = NULL;
1374 iov->lkey = ia->ri_dma_lkey;
1375 return 0;
1376 } else if (ia->ri_bind_mem != NULL) {
1256 *mrp = NULL; 1377 *mrp = NULL;
1257 iov->lkey = ia->ri_bind_mem->lkey; 1378 iov->lkey = ia->ri_bind_mem->lkey;
1258 return 0; 1379 return 0;
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1329 seg->mr_dma, seg->mr_dmalen, seg->mr_dir); 1450 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1330} 1451}
1331 1452
1453static int
1454rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1455 int *nsegs, int writing, struct rpcrdma_ia *ia,
1456 struct rpcrdma_xprt *r_xprt)
1457{
1458 struct rpcrdma_mr_seg *seg1 = seg;
1459 struct ib_send_wr frmr_wr, *bad_wr;
1460 u8 key;
1461 int len, pageoff;
1462 int i, rc;
1463
1464 pageoff = offset_in_page(seg1->mr_offset);
1465 seg1->mr_offset -= pageoff; /* start of page */
1466 seg1->mr_len += pageoff;
1467 len = -pageoff;
1468 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1469 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1470 for (i = 0; i < *nsegs;) {
1471 rpcrdma_map_one(ia, seg, writing);
1472 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1473 len += seg->mr_len;
1474 ++seg;
1475 ++i;
1476 /* Check for holes */
1477 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1478 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1479 break;
1480 }
1481 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1482 __func__, seg1->mr_chunk.rl_mw, i);
1483
1484 /* Bump the key */
1485 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1486 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1487
1488 /* Prepare FRMR WR */
1489 memset(&frmr_wr, 0, sizeof frmr_wr);
1490 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1491 frmr_wr.send_flags = 0; /* unsignaled */
1492 frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
1493 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1494 frmr_wr.wr.fast_reg.page_list_len = i;
1495 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1496 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1497 frmr_wr.wr.fast_reg.access_flags = (writing ?
1498 IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
1499 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1500 DECR_CQCOUNT(&r_xprt->rx_ep);
1501
1502 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
1503
1504 if (rc) {
1505 dprintk("RPC: %s: failed ib_post_send for register,"
1506 " status %i\n", __func__, rc);
1507 while (i--)
1508 rpcrdma_unmap_one(ia, --seg);
1509 } else {
1510 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1511 seg1->mr_base = seg1->mr_dma + pageoff;
1512 seg1->mr_nsegs = i;
1513 seg1->mr_len = len;
1514 }
1515 *nsegs = i;
1516 return rc;
1517}
1518
1519static int
1520rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1521 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1522{
1523 struct rpcrdma_mr_seg *seg1 = seg;
1524 struct ib_send_wr invalidate_wr, *bad_wr;
1525 int rc;
1526
1527 while (seg1->mr_nsegs--)
1528 rpcrdma_unmap_one(ia, seg++);
1529
1530 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1531 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1532 invalidate_wr.send_flags = 0; /* unsignaled */
1533 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1534 DECR_CQCOUNT(&r_xprt->rx_ep);
1535
1536 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1537 if (rc)
1538 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1539 " status %i\n", __func__, rc);
1540 return rc;
1541}
1542
1543static int
1544rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1545 int *nsegs, int writing, struct rpcrdma_ia *ia)
1546{
1547 struct rpcrdma_mr_seg *seg1 = seg;
1548 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1549 int len, pageoff, i, rc;
1550
1551 pageoff = offset_in_page(seg1->mr_offset);
1552 seg1->mr_offset -= pageoff; /* start of page */
1553 seg1->mr_len += pageoff;
1554 len = -pageoff;
1555 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1556 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1557 for (i = 0; i < *nsegs;) {
1558 rpcrdma_map_one(ia, seg, writing);
1559 physaddrs[i] = seg->mr_dma;
1560 len += seg->mr_len;
1561 ++seg;
1562 ++i;
1563 /* Check for holes */
1564 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1565 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1566 break;
1567 }
1568 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1569 physaddrs, i, seg1->mr_dma);
1570 if (rc) {
1571 dprintk("RPC: %s: failed ib_map_phys_fmr "
1572 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1573 len, (unsigned long long)seg1->mr_dma,
1574 pageoff, i, rc);
1575 while (i--)
1576 rpcrdma_unmap_one(ia, --seg);
1577 } else {
1578 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1579 seg1->mr_base = seg1->mr_dma + pageoff;
1580 seg1->mr_nsegs = i;
1581 seg1->mr_len = len;
1582 }
1583 *nsegs = i;
1584 return rc;
1585}
1586
1587static int
1588rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1589 struct rpcrdma_ia *ia)
1590{
1591 struct rpcrdma_mr_seg *seg1 = seg;
1592 LIST_HEAD(l);
1593 int rc;
1594
1595 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1596 rc = ib_unmap_fmr(&l);
1597 while (seg1->mr_nsegs--)
1598 rpcrdma_unmap_one(ia, seg++);
1599 if (rc)
1600 dprintk("RPC: %s: failed ib_unmap_fmr,"
1601 " status %i\n", __func__, rc);
1602 return rc;
1603}
1604
1605static int
1606rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
1607 int *nsegs, int writing, struct rpcrdma_ia *ia,
1608 struct rpcrdma_xprt *r_xprt)
1609{
1610 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1611 IB_ACCESS_REMOTE_READ);
1612 struct ib_mw_bind param;
1613 int rc;
1614
1615 *nsegs = 1;
1616 rpcrdma_map_one(ia, seg, writing);
1617 param.mr = ia->ri_bind_mem;
1618 param.wr_id = 0ULL; /* no send cookie */
1619 param.addr = seg->mr_dma;
1620 param.length = seg->mr_len;
1621 param.send_flags = 0;
1622 param.mw_access_flags = mem_priv;
1623
1624 DECR_CQCOUNT(&r_xprt->rx_ep);
1625 rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
1626 if (rc) {
1627 dprintk("RPC: %s: failed ib_bind_mw "
1628 "%u@0x%llx status %i\n",
1629 __func__, seg->mr_len,
1630 (unsigned long long)seg->mr_dma, rc);
1631 rpcrdma_unmap_one(ia, seg);
1632 } else {
1633 seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
1634 seg->mr_base = param.addr;
1635 seg->mr_nsegs = 1;
1636 }
1637 return rc;
1638}
1639
1640static int
1641rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
1642 struct rpcrdma_ia *ia,
1643 struct rpcrdma_xprt *r_xprt, void **r)
1644{
1645 struct ib_mw_bind param;
1646 LIST_HEAD(l);
1647 int rc;
1648
1649 BUG_ON(seg->mr_nsegs != 1);
1650 param.mr = ia->ri_bind_mem;
1651 param.addr = 0ULL; /* unbind */
1652 param.length = 0;
1653 param.mw_access_flags = 0;
1654 if (*r) {
1655 param.wr_id = (u64) (unsigned long) *r;
1656 param.send_flags = IB_SEND_SIGNALED;
1657 INIT_CQCOUNT(&r_xprt->rx_ep);
1658 } else {
1659 param.wr_id = 0ULL;
1660 param.send_flags = 0;
1661 DECR_CQCOUNT(&r_xprt->rx_ep);
1662 }
1663 rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
1664 rpcrdma_unmap_one(ia, seg);
1665 if (rc)
1666 dprintk("RPC: %s: failed ib_(un)bind_mw,"
1667 " status %i\n", __func__, rc);
1668 else
1669 *r = NULL; /* will upcall on completion */
1670 return rc;
1671}
1672
1673static int
1674rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
1675 int *nsegs, int writing, struct rpcrdma_ia *ia)
1676{
1677 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1678 IB_ACCESS_REMOTE_READ);
1679 struct rpcrdma_mr_seg *seg1 = seg;
1680 struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
1681 int len, i, rc = 0;
1682
1683 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1684 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1685 for (len = 0, i = 0; i < *nsegs;) {
1686 rpcrdma_map_one(ia, seg, writing);
1687 ipb[i].addr = seg->mr_dma;
1688 ipb[i].size = seg->mr_len;
1689 len += seg->mr_len;
1690 ++seg;
1691 ++i;
1692 /* Check for holes */
1693 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1694 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1695 break;
1696 }
1697 seg1->mr_base = seg1->mr_dma;
1698 seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
1699 ipb, i, mem_priv, &seg1->mr_base);
1700 if (IS_ERR(seg1->mr_chunk.rl_mr)) {
1701 rc = PTR_ERR(seg1->mr_chunk.rl_mr);
1702 dprintk("RPC: %s: failed ib_reg_phys_mr "
1703 "%u@0x%llx (%d)... status %i\n",
1704 __func__, len,
1705 (unsigned long long)seg1->mr_dma, i, rc);
1706 while (i--)
1707 rpcrdma_unmap_one(ia, --seg);
1708 } else {
1709 seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
1710 seg1->mr_nsegs = i;
1711 seg1->mr_len = len;
1712 }
1713 *nsegs = i;
1714 return rc;
1715}
1716
1717static int
1718rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
1719 struct rpcrdma_ia *ia)
1720{
1721 struct rpcrdma_mr_seg *seg1 = seg;
1722 int rc;
1723
1724 rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
1725 seg1->mr_chunk.rl_mr = NULL;
1726 while (seg1->mr_nsegs--)
1727 rpcrdma_unmap_one(ia, seg++);
1728 if (rc)
1729 dprintk("RPC: %s: failed ib_dereg_mr,"
1730 " status %i\n", __func__, rc);
1731 return rc;
1732}
1733
1332int 1734int
1333rpcrdma_register_external(struct rpcrdma_mr_seg *seg, 1735rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1334 int nsegs, int writing, struct rpcrdma_xprt *r_xprt) 1736 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1335{ 1737{
1336 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1738 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1337 int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
1338 IB_ACCESS_REMOTE_READ);
1339 struct rpcrdma_mr_seg *seg1 = seg;
1340 int i;
1341 int rc = 0; 1739 int rc = 0;
1342 1740
1343 switch (ia->ri_memreg_strategy) { 1741 switch (ia->ri_memreg_strategy) {
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1352 break; 1750 break;
1353#endif 1751#endif
1354 1752
1355 /* Registration using fast memory registration */ 1753 /* Registration using frmr registration */
1754 case RPCRDMA_FRMR:
1755 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1756 break;
1757
1758 /* Registration using fmr memory registration */
1356 case RPCRDMA_MTHCAFMR: 1759 case RPCRDMA_MTHCAFMR:
1357 { 1760 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
1358 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1359 int len, pageoff = offset_in_page(seg->mr_offset);
1360 seg1->mr_offset -= pageoff; /* start of page */
1361 seg1->mr_len += pageoff;
1362 len = -pageoff;
1363 if (nsegs > RPCRDMA_MAX_DATA_SEGS)
1364 nsegs = RPCRDMA_MAX_DATA_SEGS;
1365 for (i = 0; i < nsegs;) {
1366 rpcrdma_map_one(ia, seg, writing);
1367 physaddrs[i] = seg->mr_dma;
1368 len += seg->mr_len;
1369 ++seg;
1370 ++i;
1371 /* Check for holes */
1372 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
1373 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1374 break;
1375 }
1376 nsegs = i;
1377 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1378 physaddrs, nsegs, seg1->mr_dma);
1379 if (rc) {
1380 dprintk("RPC: %s: failed ib_map_phys_fmr "
1381 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1382 len, (unsigned long long)seg1->mr_dma,
1383 pageoff, nsegs, rc);
1384 while (nsegs--)
1385 rpcrdma_unmap_one(ia, --seg);
1386 } else {
1387 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1388 seg1->mr_base = seg1->mr_dma + pageoff;
1389 seg1->mr_nsegs = nsegs;
1390 seg1->mr_len = len;
1391 }
1392 }
1393 break; 1761 break;
1394 1762
1395 /* Registration using memory windows */ 1763 /* Registration using memory windows */
1396 case RPCRDMA_MEMWINDOWS_ASYNC: 1764 case RPCRDMA_MEMWINDOWS_ASYNC:
1397 case RPCRDMA_MEMWINDOWS: 1765 case RPCRDMA_MEMWINDOWS:
1398 { 1766 rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
1399 struct ib_mw_bind param;
1400 rpcrdma_map_one(ia, seg, writing);
1401 param.mr = ia->ri_bind_mem;
1402 param.wr_id = 0ULL; /* no send cookie */
1403 param.addr = seg->mr_dma;
1404 param.length = seg->mr_len;
1405 param.send_flags = 0;
1406 param.mw_access_flags = mem_priv;
1407
1408 DECR_CQCOUNT(&r_xprt->rx_ep);
1409 rc = ib_bind_mw(ia->ri_id->qp,
1410 seg->mr_chunk.rl_mw->r.mw, &param);
1411 if (rc) {
1412 dprintk("RPC: %s: failed ib_bind_mw "
1413 "%u@0x%llx status %i\n",
1414 __func__, seg->mr_len,
1415 (unsigned long long)seg->mr_dma, rc);
1416 rpcrdma_unmap_one(ia, seg);
1417 } else {
1418 seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
1419 seg->mr_base = param.addr;
1420 seg->mr_nsegs = 1;
1421 nsegs = 1;
1422 }
1423 }
1424 break; 1767 break;
1425 1768
1426 /* Default registration each time */ 1769 /* Default registration each time */
1427 default: 1770 default:
1428 { 1771 rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
1429 struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
1430 int len = 0;
1431 if (nsegs > RPCRDMA_MAX_DATA_SEGS)
1432 nsegs = RPCRDMA_MAX_DATA_SEGS;
1433 for (i = 0; i < nsegs;) {
1434 rpcrdma_map_one(ia, seg, writing);
1435 ipb[i].addr = seg->mr_dma;
1436 ipb[i].size = seg->mr_len;
1437 len += seg->mr_len;
1438 ++seg;
1439 ++i;
1440 /* Check for holes */
1441 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
1442 offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
1443 break;
1444 }
1445 nsegs = i;
1446 seg1->mr_base = seg1->mr_dma;
1447 seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
1448 ipb, nsegs, mem_priv, &seg1->mr_base);
1449 if (IS_ERR(seg1->mr_chunk.rl_mr)) {
1450 rc = PTR_ERR(seg1->mr_chunk.rl_mr);
1451 dprintk("RPC: %s: failed ib_reg_phys_mr "
1452 "%u@0x%llx (%d)... status %i\n",
1453 __func__, len,
1454 (unsigned long long)seg1->mr_dma, nsegs, rc);
1455 while (nsegs--)
1456 rpcrdma_unmap_one(ia, --seg);
1457 } else {
1458 seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
1459 seg1->mr_nsegs = nsegs;
1460 seg1->mr_len = len;
1461 }
1462 }
1463 break; 1772 break;
1464 } 1773 }
1465 if (rc) 1774 if (rc)
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1473 struct rpcrdma_xprt *r_xprt, void *r) 1782 struct rpcrdma_xprt *r_xprt, void *r)
1474{ 1783{
1475 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1784 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1476 struct rpcrdma_mr_seg *seg1 = seg;
1477 int nsegs = seg->mr_nsegs, rc; 1785 int nsegs = seg->mr_nsegs, rc;
1478 1786
1479 switch (ia->ri_memreg_strategy) { 1787 switch (ia->ri_memreg_strategy) {
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
1486 break; 1794 break;
1487#endif 1795#endif
1488 1796
1797 case RPCRDMA_FRMR:
1798 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1799 break;
1800
1489 case RPCRDMA_MTHCAFMR: 1801 case RPCRDMA_MTHCAFMR:
1490 { 1802 rc = rpcrdma_deregister_fmr_external(seg, ia);
1491 LIST_HEAD(l);
1492 list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
1493 rc = ib_unmap_fmr(&l);
1494 while (seg1->mr_nsegs--)
1495 rpcrdma_unmap_one(ia, seg++);
1496 }
1497 if (rc)
1498 dprintk("RPC: %s: failed ib_unmap_fmr,"
1499 " status %i\n", __func__, rc);
1500 break; 1803 break;
1501 1804
1502 case RPCRDMA_MEMWINDOWS_ASYNC: 1805 case RPCRDMA_MEMWINDOWS_ASYNC:
1503 case RPCRDMA_MEMWINDOWS: 1806 case RPCRDMA_MEMWINDOWS:
1504 { 1807 rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
1505 struct ib_mw_bind param;
1506 BUG_ON(nsegs != 1);
1507 param.mr = ia->ri_bind_mem;
1508 param.addr = 0ULL; /* unbind */
1509 param.length = 0;
1510 param.mw_access_flags = 0;
1511 if (r) {
1512 param.wr_id = (u64) (unsigned long) r;
1513 param.send_flags = IB_SEND_SIGNALED;
1514 INIT_CQCOUNT(&r_xprt->rx_ep);
1515 } else {
1516 param.wr_id = 0ULL;
1517 param.send_flags = 0;
1518 DECR_CQCOUNT(&r_xprt->rx_ep);
1519 }
1520 rc = ib_bind_mw(ia->ri_id->qp,
1521 seg->mr_chunk.rl_mw->r.mw, &param);
1522 rpcrdma_unmap_one(ia, seg);
1523 }
1524 if (rc)
1525 dprintk("RPC: %s: failed ib_(un)bind_mw,"
1526 " status %i\n", __func__, rc);
1527 else
1528 r = NULL; /* will upcall on completion */
1529 break; 1808 break;
1530 1809
1531 default: 1810 default:
1532 rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); 1811 rc = rpcrdma_deregister_default_external(seg, ia);
1533 seg1->mr_chunk.rl_mr = NULL;
1534 while (seg1->mr_nsegs--)
1535 rpcrdma_unmap_one(ia, seg++);
1536 if (rc)
1537 dprintk("RPC: %s: failed ib_dereg_mr,"
1538 " status %i\n", __func__, rc);
1539 break; 1812 break;
1540 } 1813 }
1541 if (r) { 1814 if (r) {