diff options
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 29 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 41 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 741 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 17 |
4 files changed, 578 insertions, 250 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5c1954d28d09..14106d26bb95 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
118 | } | 118 | } |
119 | 119 | ||
120 | if (xdrbuf->tail[0].iov_len) { | 120 | if (xdrbuf->tail[0].iov_len) { |
121 | /* the rpcrdma protocol allows us to omit any trailing | ||
122 | * xdr pad bytes, saving the server an RDMA operation. */ | ||
123 | if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) | ||
124 | return n; | ||
121 | if (n == nsegs) | 125 | if (n == nsegs) |
122 | return 0; | 126 | return 0; |
123 | seg[n].mr_page = NULL; | 127 | seg[n].mr_page = NULL; |
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
508 | if (hdrlen == 0) | 512 | if (hdrlen == 0) |
509 | return -1; | 513 | return -1; |
510 | 514 | ||
511 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" | 515 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
512 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 516 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
513 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, | 517 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
514 | headerp, base, req->rl_iov.lkey); | 518 | headerp, base, req->rl_iov.lkey); |
515 | 519 | ||
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b | |||
594 | * Scatter inline received data back into provided iov's. | 598 | * Scatter inline received data back into provided iov's. |
595 | */ | 599 | */ |
596 | static void | 600 | static void |
597 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | 601 | rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) |
598 | { | 602 | { |
599 | int i, npages, curlen, olen; | 603 | int i, npages, curlen, olen; |
600 | char *destp; | 604 | char *destp; |
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) | |||
660 | } else | 664 | } else |
661 | rqst->rq_rcv_buf.tail[0].iov_len = 0; | 665 | rqst->rq_rcv_buf.tail[0].iov_len = 0; |
662 | 666 | ||
667 | if (pad) { | ||
668 | /* implicit padding on terminal chunk */ | ||
669 | unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; | ||
670 | while (pad--) | ||
671 | p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; | ||
672 | } | ||
673 | |||
663 | if (copy_len) | 674 | if (copy_len) |
664 | dprintk("RPC: %s: %d bytes in" | 675 | dprintk("RPC: %s: %d bytes in" |
665 | " %d extra segments (%d lost)\n", | 676 | " %d extra segments (%d lost)\n", |
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
681 | struct rpc_xprt *xprt = ep->rep_xprt; | 692 | struct rpc_xprt *xprt = ep->rep_xprt; |
682 | 693 | ||
683 | spin_lock_bh(&xprt->transport_lock); | 694 | spin_lock_bh(&xprt->transport_lock); |
695 | if (++xprt->connect_cookie == 0) /* maintain a reserved value */ | ||
696 | ++xprt->connect_cookie; | ||
684 | if (ep->rep_connected > 0) { | 697 | if (ep->rep_connected > 0) { |
685 | if (!xprt_test_and_set_connected(xprt)) | 698 | if (!xprt_test_and_set_connected(xprt)) |
686 | xprt_wake_pending_tasks(xprt, 0); | 699 | xprt_wake_pending_tasks(xprt, 0); |
687 | } else { | 700 | } else { |
688 | if (xprt_test_and_clear_connected(xprt)) | 701 | if (xprt_test_and_clear_connected(xprt)) |
689 | xprt_wake_pending_tasks(xprt, ep->rep_connected); | 702 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
690 | } | 703 | } |
691 | spin_unlock_bh(&xprt->transport_lock); | 704 | spin_unlock_bh(&xprt->transport_lock); |
692 | } | 705 | } |
@@ -792,14 +805,20 @@ repost: | |||
792 | ((unsigned char *)iptr - (unsigned char *)headerp); | 805 | ((unsigned char *)iptr - (unsigned char *)headerp); |
793 | status = rep->rr_len + rdmalen; | 806 | status = rep->rr_len + rdmalen; |
794 | r_xprt->rx_stats.total_rdma_reply += rdmalen; | 807 | r_xprt->rx_stats.total_rdma_reply += rdmalen; |
808 | /* special case - last chunk may omit padding */ | ||
809 | if (rdmalen &= 3) { | ||
810 | rdmalen = 4 - rdmalen; | ||
811 | status += rdmalen; | ||
812 | } | ||
795 | } else { | 813 | } else { |
796 | /* else ordinary inline */ | 814 | /* else ordinary inline */ |
815 | rdmalen = 0; | ||
797 | iptr = (__be32 *)((unsigned char *)headerp + 28); | 816 | iptr = (__be32 *)((unsigned char *)headerp + 28); |
798 | rep->rr_len -= 28; /*sizeof *headerp;*/ | 817 | rep->rr_len -= 28; /*sizeof *headerp;*/ |
799 | status = rep->rr_len; | 818 | status = rep->rr_len; |
800 | } | 819 | } |
801 | /* Fix up the rpc results for upper layer */ | 820 | /* Fix up the rpc results for upper layer */ |
802 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); | 821 | rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); |
803 | break; | 822 | break; |
804 | 823 | ||
805 | case htonl(RDMA_NOMSG): | 824 | case htonl(RDMA_NOMSG): |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |||
70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 70 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 71 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
72 | static unsigned int xprt_rdma_inline_write_padding; | 72 | static unsigned int xprt_rdma_inline_write_padding; |
73 | #if !RPCRDMA_PERSISTENT_REGISTRATION | 73 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
74 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ | 74 | int xprt_rdma_pad_optimize = 0; |
75 | #else | ||
76 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; | ||
77 | #endif | ||
78 | 75 | ||
79 | #ifdef RPC_DEBUG | 76 | #ifdef RPC_DEBUG |
80 | 77 | ||
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { | |||
140 | .extra2 = &max_memreg, | 137 | .extra2 = &max_memreg, |
141 | }, | 138 | }, |
142 | { | 139 | { |
140 | .ctl_name = CTL_UNNUMBERED, | ||
141 | .procname = "rdma_pad_optimize", | ||
142 | .data = &xprt_rdma_pad_optimize, | ||
143 | .maxlen = sizeof(unsigned int), | ||
144 | .mode = 0644, | ||
145 | .proc_handler = &proc_dointvec, | ||
146 | }, | ||
147 | { | ||
143 | .ctl_name = 0, | 148 | .ctl_name = 0, |
144 | }, | 149 | }, |
145 | }; | 150 | }; |
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
458 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 463 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
459 | 464 | ||
460 | dprintk("RPC: %s: closing\n", __func__); | 465 | dprintk("RPC: %s: closing\n", __func__); |
466 | if (r_xprt->rx_ep.rep_connected > 0) | ||
467 | xprt->reestablish_timeout = 0; | ||
461 | xprt_disconnect_done(xprt); | 468 | xprt_disconnect_done(xprt); |
462 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); | 469 | (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
463 | } | 470 | } |
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) | |||
485 | /* Reconnect */ | 492 | /* Reconnect */ |
486 | schedule_delayed_work(&r_xprt->rdma_connect, | 493 | schedule_delayed_work(&r_xprt->rdma_connect, |
487 | xprt->reestablish_timeout); | 494 | xprt->reestablish_timeout); |
495 | xprt->reestablish_timeout <<= 1; | ||
496 | if (xprt->reestablish_timeout > (30 * HZ)) | ||
497 | xprt->reestablish_timeout = (30 * HZ); | ||
498 | else if (xprt->reestablish_timeout < (5 * HZ)) | ||
499 | xprt->reestablish_timeout = (5 * HZ); | ||
488 | } else { | 500 | } else { |
489 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 501 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
490 | if (!RPC_IS_ASYNC(task)) | 502 | if (!RPC_IS_ASYNC(task)) |
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
591 | } | 603 | } |
592 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 604 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
593 | out: | 605 | out: |
606 | req->rl_connect_cookie = 0; /* our reserved value */ | ||
594 | return req->rl_xdr_buf; | 607 | return req->rl_xdr_buf; |
595 | 608 | ||
596 | outfail: | 609 | outfail: |
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
694 | req->rl_reply->rr_xprt = xprt; | 707 | req->rl_reply->rr_xprt = xprt; |
695 | } | 708 | } |
696 | 709 | ||
697 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { | 710 | /* Must suppress retransmit to maintain credits */ |
698 | xprt_disconnect_done(xprt); | 711 | if (req->rl_connect_cookie == xprt->connect_cookie) |
699 | return -ENOTCONN; /* implies disconnect */ | 712 | goto drop_connection; |
700 | } | 713 | req->rl_connect_cookie = xprt->connect_cookie; |
714 | |||
715 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | ||
716 | goto drop_connection; | ||
701 | 717 | ||
718 | task->tk_bytes_sent += rqst->rq_snd_buf.len; | ||
702 | rqst->rq_bytes_sent = 0; | 719 | rqst->rq_bytes_sent = 0; |
703 | return 0; | 720 | return 0; |
721 | |||
722 | drop_connection: | ||
723 | xprt_disconnect_done(xprt); | ||
724 | return -ENOTCONN; /* implies disconnect */ | ||
704 | } | 725 | } |
705 | 726 | ||
706 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 727 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) | |||
770 | { | 791 | { |
771 | int rc; | 792 | int rc; |
772 | 793 | ||
773 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | 794 | dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
774 | #ifdef RPC_DEBUG | 795 | #ifdef RPC_DEBUG |
775 | if (sunrpc_table_header) { | 796 | if (sunrpc_table_header) { |
776 | unregister_sysctl_table(sunrpc_table_header); | 797 | unregister_sysctl_table(sunrpc_table_header); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
284 | switch (event->event) { | 284 | switch (event->event) { |
285 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 285 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | 286 | case RDMA_CM_EVENT_ROUTE_RESOLVED: |
287 | ia->ri_async_rc = 0; | ||
287 | complete(&ia->ri_done); | 288 | complete(&ia->ri_done); |
288 | break; | 289 | break; |
289 | case RDMA_CM_EVENT_ADDR_ERROR: | 290 | case RDMA_CM_EVENT_ADDR_ERROR: |
@@ -338,13 +339,32 @@ connected: | |||
338 | wake_up_all(&ep->rep_connect_wait); | 339 | wake_up_all(&ep->rep_connect_wait); |
339 | break; | 340 | break; |
340 | default: | 341 | default: |
341 | ia->ri_async_rc = -EINVAL; | 342 | dprintk("RPC: %s: unexpected CM event %d\n", |
342 | dprintk("RPC: %s: unexpected CM event %X\n", | ||
343 | __func__, event->event); | 343 | __func__, event->event); |
344 | complete(&ia->ri_done); | ||
345 | break; | 344 | break; |
346 | } | 345 | } |
347 | 346 | ||
347 | #ifdef RPC_DEBUG | ||
348 | if (connstate == 1) { | ||
349 | int ird = attr.max_dest_rd_atomic; | ||
350 | int tird = ep->rep_remote_cma.responder_resources; | ||
351 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
352 | "on %s, memreg %d slots %d ird %d%s\n", | ||
353 | NIPQUAD(addr->sin_addr.s_addr), | ||
354 | ntohs(addr->sin_port), | ||
355 | ia->ri_id->device->name, | ||
356 | ia->ri_memreg_strategy, | ||
357 | xprt->rx_buf.rb_max_requests, | ||
358 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | ||
359 | } else if (connstate < 0) { | ||
360 | printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " | ||
361 | "closed (%d)\n", | ||
362 | NIPQUAD(addr->sin_addr.s_addr), | ||
363 | ntohs(addr->sin_port), | ||
364 | connstate); | ||
365 | } | ||
366 | #endif | ||
367 | |||
348 | return 0; | 368 | return 0; |
349 | } | 369 | } |
350 | 370 | ||
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
355 | struct rdma_cm_id *id; | 375 | struct rdma_cm_id *id; |
356 | int rc; | 376 | int rc; |
357 | 377 | ||
378 | init_completion(&ia->ri_done); | ||
379 | |||
358 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); | 380 | id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); |
359 | if (IS_ERR(id)) { | 381 | if (IS_ERR(id)) { |
360 | rc = PTR_ERR(id); | 382 | rc = PTR_ERR(id); |
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, | |||
363 | return id; | 385 | return id; |
364 | } | 386 | } |
365 | 387 | ||
366 | ia->ri_async_rc = 0; | 388 | ia->ri_async_rc = -ETIMEDOUT; |
367 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); | 389 | rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); |
368 | if (rc) { | 390 | if (rc) { |
369 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | 391 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", |
370 | __func__, rc); | 392 | __func__, rc); |
371 | goto out; | 393 | goto out; |
372 | } | 394 | } |
373 | wait_for_completion(&ia->ri_done); | 395 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
396 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
374 | rc = ia->ri_async_rc; | 397 | rc = ia->ri_async_rc; |
375 | if (rc) | 398 | if (rc) |
376 | goto out; | 399 | goto out; |
377 | 400 | ||
378 | ia->ri_async_rc = 0; | 401 | ia->ri_async_rc = -ETIMEDOUT; |
379 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 402 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
380 | if (rc) { | 403 | if (rc) { |
381 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | 404 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", |
382 | __func__, rc); | 405 | __func__, rc); |
383 | goto out; | 406 | goto out; |
384 | } | 407 | } |
385 | wait_for_completion(&ia->ri_done); | 408 | wait_for_completion_interruptible_timeout(&ia->ri_done, |
409 | msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); | ||
386 | rc = ia->ri_async_rc; | 410 | rc = ia->ri_async_rc; |
387 | if (rc) | 411 | if (rc) |
388 | goto out; | 412 | goto out; |
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) | |||
423 | int | 447 | int |
424 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 448 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
425 | { | 449 | { |
426 | int rc; | 450 | int rc, mem_priv; |
451 | struct ib_device_attr devattr; | ||
427 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 452 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
428 | 453 | ||
429 | init_completion(&ia->ri_done); | ||
430 | |||
431 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); | 454 | ia->ri_id = rpcrdma_create_id(xprt, ia, addr); |
432 | if (IS_ERR(ia->ri_id)) { | 455 | if (IS_ERR(ia->ri_id)) { |
433 | rc = PTR_ERR(ia->ri_id); | 456 | rc = PTR_ERR(ia->ri_id); |
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
443 | } | 466 | } |
444 | 467 | ||
445 | /* | 468 | /* |
469 | * Query the device to determine if the requested memory | ||
470 | * registration strategy is supported. If it isn't, set the | ||
471 | * strategy to a globally supported model. | ||
472 | */ | ||
473 | rc = ib_query_device(ia->ri_id->device, &devattr); | ||
474 | if (rc) { | ||
475 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
476 | __func__, rc); | ||
477 | goto out2; | ||
478 | } | ||
479 | |||
480 | if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | ||
481 | ia->ri_have_dma_lkey = 1; | ||
482 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | ||
483 | } | ||
484 | |||
485 | switch (memreg) { | ||
486 | case RPCRDMA_MEMWINDOWS: | ||
487 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
488 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
489 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
490 | "specified but not supported by adapter, " | ||
491 | "using slower RPCRDMA_REGISTER\n", | ||
492 | __func__); | ||
493 | memreg = RPCRDMA_REGISTER; | ||
494 | } | ||
495 | break; | ||
496 | case RPCRDMA_MTHCAFMR: | ||
497 | if (!ia->ri_id->device->alloc_fmr) { | ||
498 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
499 | dprintk("RPC: %s: MTHCAFMR registration " | ||
500 | "specified but not supported by adapter, " | ||
501 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
502 | __func__); | ||
503 | memreg = RPCRDMA_ALLPHYSICAL; | ||
504 | #else | ||
505 | dprintk("RPC: %s: MTHCAFMR registration " | ||
506 | "specified but not supported by adapter, " | ||
507 | "using slower RPCRDMA_REGISTER\n", | ||
508 | __func__); | ||
509 | memreg = RPCRDMA_REGISTER; | ||
510 | #endif | ||
511 | } | ||
512 | break; | ||
513 | case RPCRDMA_FRMR: | ||
514 | /* Requires both frmr reg and local dma lkey */ | ||
515 | if ((devattr.device_cap_flags & | ||
516 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | ||
517 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
518 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
519 | dprintk("RPC: %s: FRMR registration " | ||
520 | "specified but not supported by adapter, " | ||
521 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
522 | __func__); | ||
523 | memreg = RPCRDMA_ALLPHYSICAL; | ||
524 | #else | ||
525 | dprintk("RPC: %s: FRMR registration " | ||
526 | "specified but not supported by adapter, " | ||
527 | "using slower RPCRDMA_REGISTER\n", | ||
528 | __func__); | ||
529 | memreg = RPCRDMA_REGISTER; | ||
530 | #endif | ||
531 | } | ||
532 | break; | ||
533 | } | ||
534 | |||
535 | /* | ||
446 | * Optionally obtain an underlying physical identity mapping in | 536 | * Optionally obtain an underlying physical identity mapping in |
447 | * order to do a memory window-based bind. This base registration | 537 | * order to do a memory window-based bind. This base registration |
448 | * is protected from remote access - that is enabled only by binding | 538 | * is protected from remote access - that is enabled only by binding |
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
450 | * revoked after the corresponding completion similar to a storage | 540 | * revoked after the corresponding completion similar to a storage |
451 | * adapter. | 541 | * adapter. |
452 | */ | 542 | */ |
453 | if (memreg > RPCRDMA_REGISTER) { | 543 | switch (memreg) { |
454 | int mem_priv = IB_ACCESS_LOCAL_WRITE; | 544 | case RPCRDMA_BOUNCEBUFFERS: |
455 | switch (memreg) { | 545 | case RPCRDMA_REGISTER: |
546 | case RPCRDMA_FRMR: | ||
547 | break; | ||
456 | #if RPCRDMA_PERSISTENT_REGISTRATION | 548 | #if RPCRDMA_PERSISTENT_REGISTRATION |
457 | case RPCRDMA_ALLPHYSICAL: | 549 | case RPCRDMA_ALLPHYSICAL: |
458 | mem_priv |= IB_ACCESS_REMOTE_WRITE; | 550 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
459 | mem_priv |= IB_ACCESS_REMOTE_READ; | 551 | IB_ACCESS_REMOTE_WRITE | |
460 | break; | 552 | IB_ACCESS_REMOTE_READ; |
553 | goto register_setup; | ||
461 | #endif | 554 | #endif |
462 | case RPCRDMA_MEMWINDOWS_ASYNC: | 555 | case RPCRDMA_MEMWINDOWS_ASYNC: |
463 | case RPCRDMA_MEMWINDOWS: | 556 | case RPCRDMA_MEMWINDOWS: |
464 | mem_priv |= IB_ACCESS_MW_BIND; | 557 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
465 | break; | 558 | IB_ACCESS_MW_BIND; |
466 | default: | 559 | goto register_setup; |
560 | case RPCRDMA_MTHCAFMR: | ||
561 | if (ia->ri_have_dma_lkey) | ||
467 | break; | 562 | break; |
468 | } | 563 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
564 | register_setup: | ||
469 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 565 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
470 | if (IS_ERR(ia->ri_bind_mem)) { | 566 | if (IS_ERR(ia->ri_bind_mem)) { |
471 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 567 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
475 | memreg = RPCRDMA_REGISTER; | 571 | memreg = RPCRDMA_REGISTER; |
476 | ia->ri_bind_mem = NULL; | 572 | ia->ri_bind_mem = NULL; |
477 | } | 573 | } |
574 | break; | ||
575 | default: | ||
576 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | ||
577 | __func__, memreg); | ||
578 | rc = -EINVAL; | ||
579 | goto out2; | ||
478 | } | 580 | } |
581 | dprintk("RPC: %s: memory registration strategy is %d\n", | ||
582 | __func__, memreg); | ||
479 | 583 | ||
480 | /* Else will do memory reg/dereg for each chunk */ | 584 | /* Else will do memory reg/dereg for each chunk */ |
481 | ia->ri_memreg_strategy = memreg; | 585 | ia->ri_memreg_strategy = memreg; |
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
483 | return 0; | 587 | return 0; |
484 | out2: | 588 | out2: |
485 | rdma_destroy_id(ia->ri_id); | 589 | rdma_destroy_id(ia->ri_id); |
590 | ia->ri_id = NULL; | ||
486 | out1: | 591 | out1: |
487 | return rc; | 592 | return rc; |
488 | } | 593 | } |
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
503 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 608 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
504 | __func__, rc); | 609 | __func__, rc); |
505 | } | 610 | } |
506 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) | 611 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
507 | rdma_destroy_qp(ia->ri_id); | 612 | if (ia->ri_id->qp) |
613 | rdma_destroy_qp(ia->ri_id); | ||
614 | rdma_destroy_id(ia->ri_id); | ||
615 | ia->ri_id = NULL; | ||
616 | } | ||
508 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 617 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { |
509 | rc = ib_dealloc_pd(ia->ri_pd); | 618 | rc = ib_dealloc_pd(ia->ri_pd); |
510 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 619 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", |
511 | __func__, rc); | 620 | __func__, rc); |
512 | } | 621 | } |
513 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) | ||
514 | rdma_destroy_id(ia->ri_id); | ||
515 | } | 622 | } |
516 | 623 | ||
517 | /* | 624 | /* |
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
541 | ep->rep_attr.srq = NULL; | 648 | ep->rep_attr.srq = NULL; |
542 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 649 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
543 | switch (ia->ri_memreg_strategy) { | 650 | switch (ia->ri_memreg_strategy) { |
651 | case RPCRDMA_FRMR: | ||
652 | /* Add room for frmr register and invalidate WRs */ | ||
653 | ep->rep_attr.cap.max_send_wr *= 3; | ||
654 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
655 | return -EINVAL; | ||
656 | break; | ||
544 | case RPCRDMA_MEMWINDOWS_ASYNC: | 657 | case RPCRDMA_MEMWINDOWS_ASYNC: |
545 | case RPCRDMA_MEMWINDOWS: | 658 | case RPCRDMA_MEMWINDOWS: |
546 | /* Add room for mw_binds+unbinds - overkill! */ | 659 | /* Add room for mw_binds+unbinds - overkill! */ |
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
617 | ep->rep_remote_cma.private_data_len = 0; | 730 | ep->rep_remote_cma.private_data_len = 0; |
618 | 731 | ||
619 | /* Client offers RDMA Read but does not initiate */ | 732 | /* Client offers RDMA Read but does not initiate */ |
620 | switch (ia->ri_memreg_strategy) { | 733 | ep->rep_remote_cma.initiator_depth = 0; |
621 | case RPCRDMA_BOUNCEBUFFERS: | 734 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) |
622 | ep->rep_remote_cma.responder_resources = 0; | 735 | ep->rep_remote_cma.responder_resources = 0; |
623 | break; | 736 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
624 | case RPCRDMA_MTHCAFMR: | 737 | ep->rep_remote_cma.responder_resources = 32; |
625 | case RPCRDMA_REGISTER: | 738 | else |
626 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
627 | (RPCRDMA_MAX_DATA_SEGS / 8); | ||
628 | break; | ||
629 | case RPCRDMA_MEMWINDOWS: | ||
630 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
631 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
632 | case RPCRDMA_ALLPHYSICAL: | ||
633 | #endif | ||
634 | ep->rep_remote_cma.responder_resources = cdata->max_requests * | ||
635 | (RPCRDMA_MAX_DATA_SEGS / 2); | ||
636 | break; | ||
637 | default: | ||
638 | break; | ||
639 | } | ||
640 | if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) | ||
641 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 739 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
642 | ep->rep_remote_cma.initiator_depth = 0; | ||
643 | 740 | ||
644 | ep->rep_remote_cma.retry_count = 7; | 741 | ep->rep_remote_cma.retry_count = 7; |
645 | ep->rep_remote_cma.flow_control = 0; | 742 | ep->rep_remote_cma.flow_control = 0; |
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
679 | if (rc) | 776 | if (rc) |
680 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 777 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
681 | " returned %i\n", __func__, rc); | 778 | " returned %i\n", __func__, rc); |
779 | rdma_destroy_qp(ia->ri_id); | ||
780 | ia->ri_id->qp = NULL; | ||
682 | } | 781 | } |
683 | 782 | ||
684 | ep->rep_func = NULL; | ||
685 | |||
686 | /* padding - could be done in rpcrdma_buffer_destroy... */ | 783 | /* padding - could be done in rpcrdma_buffer_destroy... */ |
687 | if (ep->rep_pad_mr) { | 784 | if (ep->rep_pad_mr) { |
688 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); | 785 | rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); |
689 | ep->rep_pad_mr = NULL; | 786 | ep->rep_pad_mr = NULL; |
690 | } | 787 | } |
691 | 788 | ||
692 | if (ia->ri_id->qp) { | ||
693 | rdma_destroy_qp(ia->ri_id); | ||
694 | ia->ri_id->qp = NULL; | ||
695 | } | ||
696 | |||
697 | rpcrdma_clean_cq(ep->rep_cq); | 789 | rpcrdma_clean_cq(ep->rep_cq); |
698 | rc = ib_destroy_cq(ep->rep_cq); | 790 | rc = ib_destroy_cq(ep->rep_cq); |
699 | if (rc) | 791 | if (rc) |
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
712 | struct rdma_cm_id *id; | 804 | struct rdma_cm_id *id; |
713 | int rc = 0; | 805 | int rc = 0; |
714 | int retry_count = 0; | 806 | int retry_count = 0; |
715 | int reconnect = (ep->rep_connected != 0); | ||
716 | 807 | ||
717 | if (reconnect) { | 808 | if (ep->rep_connected != 0) { |
718 | struct rpcrdma_xprt *xprt; | 809 | struct rpcrdma_xprt *xprt; |
719 | retry: | 810 | retry: |
720 | rc = rpcrdma_ep_disconnect(ep, ia); | 811 | rc = rpcrdma_ep_disconnect(ep, ia); |
@@ -745,6 +836,7 @@ retry: | |||
745 | goto out; | 836 | goto out; |
746 | } | 837 | } |
747 | /* END TEMP */ | 838 | /* END TEMP */ |
839 | rdma_destroy_qp(ia->ri_id); | ||
748 | rdma_destroy_id(ia->ri_id); | 840 | rdma_destroy_id(ia->ri_id); |
749 | ia->ri_id = id; | 841 | ia->ri_id = id; |
750 | } | 842 | } |
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
769 | } | 861 | } |
770 | } | 862 | } |
771 | 863 | ||
772 | /* Theoretically a client initiator_depth > 0 is not needed, | ||
773 | * but many peers fail to complete the connection unless they | ||
774 | * == responder_resources! */ | ||
775 | if (ep->rep_remote_cma.initiator_depth != | ||
776 | ep->rep_remote_cma.responder_resources) | ||
777 | ep->rep_remote_cma.initiator_depth = | ||
778 | ep->rep_remote_cma.responder_resources; | ||
779 | |||
780 | ep->rep_connected = 0; | 864 | ep->rep_connected = 0; |
781 | 865 | ||
782 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 866 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
786 | goto out; | 870 | goto out; |
787 | } | 871 | } |
788 | 872 | ||
789 | if (reconnect) | ||
790 | return 0; | ||
791 | |||
792 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 873 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
793 | 874 | ||
794 | /* | 875 | /* |
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | |||
805 | if (ep->rep_connected <= 0) { | 886 | if (ep->rep_connected <= 0) { |
806 | /* Sometimes, the only way to reliably connect to remote | 887 | /* Sometimes, the only way to reliably connect to remote |
807 | * CMs is to use same nonzero values for ORD and IRD. */ | 888 | * CMs is to use same nonzero values for ORD and IRD. */ |
808 | ep->rep_remote_cma.initiator_depth = | 889 | if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && |
809 | ep->rep_remote_cma.responder_resources; | 890 | (ep->rep_remote_cma.responder_resources == 0 || |
810 | if (ep->rep_remote_cma.initiator_depth == 0) | 891 | ep->rep_remote_cma.initiator_depth != |
811 | ++ep->rep_remote_cma.initiator_depth; | 892 | ep->rep_remote_cma.responder_resources)) { |
812 | if (ep->rep_remote_cma.responder_resources == 0) | 893 | if (ep->rep_remote_cma.responder_resources == 0) |
813 | ++ep->rep_remote_cma.responder_resources; | 894 | ep->rep_remote_cma.responder_resources = 1; |
814 | if (retry_count++ == 0) | 895 | ep->rep_remote_cma.initiator_depth = |
896 | ep->rep_remote_cma.responder_resources; | ||
815 | goto retry; | 897 | goto retry; |
898 | } | ||
816 | rc = ep->rep_connected; | 899 | rc = ep->rep_connected; |
817 | } else { | 900 | } else { |
818 | dprintk("RPC: %s: connected\n", __func__); | 901 | dprintk("RPC: %s: connected\n", __func__); |
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
863 | char *p; | 946 | char *p; |
864 | size_t len; | 947 | size_t len; |
865 | int i, rc; | 948 | int i, rc; |
949 | struct rpcrdma_mw *r; | ||
866 | 950 | ||
867 | buf->rb_max_requests = cdata->max_requests; | 951 | buf->rb_max_requests = cdata->max_requests; |
868 | spin_lock_init(&buf->rb_lock); | 952 | spin_lock_init(&buf->rb_lock); |
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
873 | * 2. arrays of struct rpcrdma_req to fill in pointers | 957 | * 2. arrays of struct rpcrdma_req to fill in pointers |
874 | * 3. array of struct rpcrdma_rep for replies | 958 | * 3. array of struct rpcrdma_rep for replies |
875 | * 4. padding, if any | 959 | * 4. padding, if any |
876 | * 5. mw's, if any | 960 | * 5. mw's, fmr's or frmr's, if any |
877 | * Send/recv buffers in req/rep need to be registered | 961 | * Send/recv buffers in req/rep need to be registered |
878 | */ | 962 | */ |
879 | 963 | ||
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
881 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); | 965 | (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); |
882 | len += cdata->padding; | 966 | len += cdata->padding; |
883 | switch (ia->ri_memreg_strategy) { | 967 | switch (ia->ri_memreg_strategy) { |
968 | case RPCRDMA_FRMR: | ||
969 | len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * | ||
970 | sizeof(struct rpcrdma_mw); | ||
971 | break; | ||
884 | case RPCRDMA_MTHCAFMR: | 972 | case RPCRDMA_MTHCAFMR: |
885 | /* TBD we are perhaps overallocating here */ | 973 | /* TBD we are perhaps overallocating here */ |
886 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 974 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
927 | * and also reduce unbind-to-bind collision. | 1015 | * and also reduce unbind-to-bind collision. |
928 | */ | 1016 | */ |
929 | INIT_LIST_HEAD(&buf->rb_mws); | 1017 | INIT_LIST_HEAD(&buf->rb_mws); |
1018 | r = (struct rpcrdma_mw *)p; | ||
930 | switch (ia->ri_memreg_strategy) { | 1019 | switch (ia->ri_memreg_strategy) { |
1020 | case RPCRDMA_FRMR: | ||
1021 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | ||
1022 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1023 | RPCRDMA_MAX_SEGS); | ||
1024 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1025 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1026 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1027 | " failed %i\n", __func__, rc); | ||
1028 | goto out; | ||
1029 | } | ||
1030 | r->r.frmr.fr_pgl = | ||
1031 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1032 | RPCRDMA_MAX_SEGS); | ||
1033 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1034 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1035 | dprintk("RPC: %s: " | ||
1036 | "ib_alloc_fast_reg_page_list " | ||
1037 | "failed %i\n", __func__, rc); | ||
1038 | goto out; | ||
1039 | } | ||
1040 | list_add(&r->mw_list, &buf->rb_mws); | ||
1041 | ++r; | ||
1042 | } | ||
1043 | break; | ||
931 | case RPCRDMA_MTHCAFMR: | 1044 | case RPCRDMA_MTHCAFMR: |
932 | { | ||
933 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
934 | struct ib_fmr_attr fa = { | ||
935 | RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT | ||
936 | }; | ||
937 | /* TBD we are perhaps overallocating here */ | 1045 | /* TBD we are perhaps overallocating here */ |
938 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1046 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
1047 | static struct ib_fmr_attr fa = | ||
1048 | { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; | ||
939 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, | 1049 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, |
940 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, | 1050 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, |
941 | &fa); | 1051 | &fa); |
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
948 | list_add(&r->mw_list, &buf->rb_mws); | 1058 | list_add(&r->mw_list, &buf->rb_mws); |
949 | ++r; | 1059 | ++r; |
950 | } | 1060 | } |
951 | } | ||
952 | break; | 1061 | break; |
953 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1062 | case RPCRDMA_MEMWINDOWS_ASYNC: |
954 | case RPCRDMA_MEMWINDOWS: | 1063 | case RPCRDMA_MEMWINDOWS: |
955 | { | ||
956 | struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; | ||
957 | /* Allocate one extra request's worth, for full cycling */ | 1064 | /* Allocate one extra request's worth, for full cycling */ |
958 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | 1065 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { |
959 | r->r.mw = ib_alloc_mw(ia->ri_pd); | 1066 | r->r.mw = ib_alloc_mw(ia->ri_pd); |
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
966 | list_add(&r->mw_list, &buf->rb_mws); | 1073 | list_add(&r->mw_list, &buf->rb_mws); |
967 | ++r; | 1074 | ++r; |
968 | } | 1075 | } |
969 | } | ||
970 | break; | 1076 | break; |
971 | default: | 1077 | default: |
972 | break; | 1078 | break; |
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1046 | { | 1152 | { |
1047 | int rc, i; | 1153 | int rc, i; |
1048 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | 1154 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); |
1155 | struct rpcrdma_mw *r; | ||
1049 | 1156 | ||
1050 | /* clean up in reverse order from create | 1157 | /* clean up in reverse order from create |
1051 | * 1. recv mr memory (mr free, then kfree) | 1158 | * 1. recv mr memory (mr free, then kfree) |
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1065 | } | 1172 | } |
1066 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1173 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
1067 | while (!list_empty(&buf->rb_mws)) { | 1174 | while (!list_empty(&buf->rb_mws)) { |
1068 | struct rpcrdma_mw *r; | ||
1069 | r = list_entry(buf->rb_mws.next, | 1175 | r = list_entry(buf->rb_mws.next, |
1070 | struct rpcrdma_mw, mw_list); | 1176 | struct rpcrdma_mw, mw_list); |
1071 | list_del(&r->mw_list); | 1177 | list_del(&r->mw_list); |
1072 | switch (ia->ri_memreg_strategy) { | 1178 | switch (ia->ri_memreg_strategy) { |
1179 | case RPCRDMA_FRMR: | ||
1180 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1181 | if (rc) | ||
1182 | dprintk("RPC: %s:" | ||
1183 | " ib_dereg_mr" | ||
1184 | " failed %i\n", | ||
1185 | __func__, rc); | ||
1186 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1187 | break; | ||
1073 | case RPCRDMA_MTHCAFMR: | 1188 | case RPCRDMA_MTHCAFMR: |
1074 | rc = ib_dealloc_fmr(r->r.fmr); | 1189 | rc = ib_dealloc_fmr(r->r.fmr); |
1075 | if (rc) | 1190 | if (rc) |
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1115 | { | 1230 | { |
1116 | struct rpcrdma_req *req; | 1231 | struct rpcrdma_req *req; |
1117 | unsigned long flags; | 1232 | unsigned long flags; |
1233 | int i; | ||
1234 | struct rpcrdma_mw *r; | ||
1118 | 1235 | ||
1119 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1236 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1120 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1237 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1135 | } | 1252 | } |
1136 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1253 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
1137 | if (!list_empty(&buffers->rb_mws)) { | 1254 | if (!list_empty(&buffers->rb_mws)) { |
1138 | int i = RPCRDMA_MAX_SEGS - 1; | 1255 | i = RPCRDMA_MAX_SEGS - 1; |
1139 | do { | 1256 | do { |
1140 | struct rpcrdma_mw *r; | ||
1141 | r = list_entry(buffers->rb_mws.next, | 1257 | r = list_entry(buffers->rb_mws.next, |
1142 | struct rpcrdma_mw, mw_list); | 1258 | struct rpcrdma_mw, mw_list); |
1143 | list_del(&r->mw_list); | 1259 | list_del(&r->mw_list); |
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
1171 | req->rl_reply = NULL; | 1287 | req->rl_reply = NULL; |
1172 | } | 1288 | } |
1173 | switch (ia->ri_memreg_strategy) { | 1289 | switch (ia->ri_memreg_strategy) { |
1290 | case RPCRDMA_FRMR: | ||
1174 | case RPCRDMA_MTHCAFMR: | 1291 | case RPCRDMA_MTHCAFMR: |
1175 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1292 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1176 | case RPCRDMA_MEMWINDOWS: | 1293 | case RPCRDMA_MEMWINDOWS: |
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
1252 | va, len, DMA_BIDIRECTIONAL); | 1369 | va, len, DMA_BIDIRECTIONAL); |
1253 | iov->length = len; | 1370 | iov->length = len; |
1254 | 1371 | ||
1255 | if (ia->ri_bind_mem != NULL) { | 1372 | if (ia->ri_have_dma_lkey) { |
1373 | *mrp = NULL; | ||
1374 | iov->lkey = ia->ri_dma_lkey; | ||
1375 | return 0; | ||
1376 | } else if (ia->ri_bind_mem != NULL) { | ||
1256 | *mrp = NULL; | 1377 | *mrp = NULL; |
1257 | iov->lkey = ia->ri_bind_mem->lkey; | 1378 | iov->lkey = ia->ri_bind_mem->lkey; |
1258 | return 0; | 1379 | return 0; |
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | |||
1329 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | 1450 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); |
1330 | } | 1451 | } |
1331 | 1452 | ||
1453 | static int | ||
1454 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1455 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1456 | struct rpcrdma_xprt *r_xprt) | ||
1457 | { | ||
1458 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1459 | struct ib_send_wr frmr_wr, *bad_wr; | ||
1460 | u8 key; | ||
1461 | int len, pageoff; | ||
1462 | int i, rc; | ||
1463 | |||
1464 | pageoff = offset_in_page(seg1->mr_offset); | ||
1465 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1466 | seg1->mr_len += pageoff; | ||
1467 | len = -pageoff; | ||
1468 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1469 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1470 | for (i = 0; i < *nsegs;) { | ||
1471 | rpcrdma_map_one(ia, seg, writing); | ||
1472 | seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; | ||
1473 | len += seg->mr_len; | ||
1474 | ++seg; | ||
1475 | ++i; | ||
1476 | /* Check for holes */ | ||
1477 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1478 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1479 | break; | ||
1480 | } | ||
1481 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1482 | __func__, seg1->mr_chunk.rl_mw, i); | ||
1483 | |||
1484 | /* Bump the key */ | ||
1485 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
1486 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
1487 | |||
1488 | /* Prepare FRMR WR */ | ||
1489 | memset(&frmr_wr, 0, sizeof frmr_wr); | ||
1490 | frmr_wr.opcode = IB_WR_FAST_REG_MR; | ||
1491 | frmr_wr.send_flags = 0; /* unsignaled */ | ||
1492 | frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; | ||
1493 | frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; | ||
1494 | frmr_wr.wr.fast_reg.page_list_len = i; | ||
1495 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1496 | frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; | ||
1497 | frmr_wr.wr.fast_reg.access_flags = (writing ? | ||
1498 | IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); | ||
1499 | frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1500 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1501 | |||
1502 | rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); | ||
1503 | |||
1504 | if (rc) { | ||
1505 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1506 | " status %i\n", __func__, rc); | ||
1507 | while (i--) | ||
1508 | rpcrdma_unmap_one(ia, --seg); | ||
1509 | } else { | ||
1510 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1511 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1512 | seg1->mr_nsegs = i; | ||
1513 | seg1->mr_len = len; | ||
1514 | } | ||
1515 | *nsegs = i; | ||
1516 | return rc; | ||
1517 | } | ||
1518 | |||
1519 | static int | ||
1520 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1521 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1522 | { | ||
1523 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1524 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1525 | int rc; | ||
1526 | |||
1527 | while (seg1->mr_nsegs--) | ||
1528 | rpcrdma_unmap_one(ia, seg++); | ||
1529 | |||
1530 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1531 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1532 | invalidate_wr.send_flags = 0; /* unsignaled */ | ||
1533 | invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; | ||
1534 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1535 | |||
1536 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1537 | if (rc) | ||
1538 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
1539 | " status %i\n", __func__, rc); | ||
1540 | return rc; | ||
1541 | } | ||
1542 | |||
1543 | static int | ||
1544 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1545 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1546 | { | ||
1547 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1548 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1549 | int len, pageoff, i, rc; | ||
1550 | |||
1551 | pageoff = offset_in_page(seg1->mr_offset); | ||
1552 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1553 | seg1->mr_len += pageoff; | ||
1554 | len = -pageoff; | ||
1555 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1556 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1557 | for (i = 0; i < *nsegs;) { | ||
1558 | rpcrdma_map_one(ia, seg, writing); | ||
1559 | physaddrs[i] = seg->mr_dma; | ||
1560 | len += seg->mr_len; | ||
1561 | ++seg; | ||
1562 | ++i; | ||
1563 | /* Check for holes */ | ||
1564 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1565 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1566 | break; | ||
1567 | } | ||
1568 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1569 | physaddrs, i, seg1->mr_dma); | ||
1570 | if (rc) { | ||
1571 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1572 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1573 | len, (unsigned long long)seg1->mr_dma, | ||
1574 | pageoff, i, rc); | ||
1575 | while (i--) | ||
1576 | rpcrdma_unmap_one(ia, --seg); | ||
1577 | } else { | ||
1578 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1579 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1580 | seg1->mr_nsegs = i; | ||
1581 | seg1->mr_len = len; | ||
1582 | } | ||
1583 | *nsegs = i; | ||
1584 | return rc; | ||
1585 | } | ||
1586 | |||
1587 | static int | ||
1588 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
1589 | struct rpcrdma_ia *ia) | ||
1590 | { | ||
1591 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1592 | LIST_HEAD(l); | ||
1593 | int rc; | ||
1594 | |||
1595 | list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1596 | rc = ib_unmap_fmr(&l); | ||
1597 | while (seg1->mr_nsegs--) | ||
1598 | rpcrdma_unmap_one(ia, seg++); | ||
1599 | if (rc) | ||
1600 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1601 | " status %i\n", __func__, rc); | ||
1602 | return rc; | ||
1603 | } | ||
1604 | |||
1605 | static int | ||
1606 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1607 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1608 | struct rpcrdma_xprt *r_xprt) | ||
1609 | { | ||
1610 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1611 | IB_ACCESS_REMOTE_READ); | ||
1612 | struct ib_mw_bind param; | ||
1613 | int rc; | ||
1614 | |||
1615 | *nsegs = 1; | ||
1616 | rpcrdma_map_one(ia, seg, writing); | ||
1617 | param.mr = ia->ri_bind_mem; | ||
1618 | param.wr_id = 0ULL; /* no send cookie */ | ||
1619 | param.addr = seg->mr_dma; | ||
1620 | param.length = seg->mr_len; | ||
1621 | param.send_flags = 0; | ||
1622 | param.mw_access_flags = mem_priv; | ||
1623 | |||
1624 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1625 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1626 | if (rc) { | ||
1627 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1628 | "%u@0x%llx status %i\n", | ||
1629 | __func__, seg->mr_len, | ||
1630 | (unsigned long long)seg->mr_dma, rc); | ||
1631 | rpcrdma_unmap_one(ia, seg); | ||
1632 | } else { | ||
1633 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1634 | seg->mr_base = param.addr; | ||
1635 | seg->mr_nsegs = 1; | ||
1636 | } | ||
1637 | return rc; | ||
1638 | } | ||
1639 | |||
1640 | static int | ||
1641 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
1642 | struct rpcrdma_ia *ia, | ||
1643 | struct rpcrdma_xprt *r_xprt, void **r) | ||
1644 | { | ||
1645 | struct ib_mw_bind param; | ||
1646 | LIST_HEAD(l); | ||
1647 | int rc; | ||
1648 | |||
1649 | BUG_ON(seg->mr_nsegs != 1); | ||
1650 | param.mr = ia->ri_bind_mem; | ||
1651 | param.addr = 0ULL; /* unbind */ | ||
1652 | param.length = 0; | ||
1653 | param.mw_access_flags = 0; | ||
1654 | if (*r) { | ||
1655 | param.wr_id = (u64) (unsigned long) *r; | ||
1656 | param.send_flags = IB_SEND_SIGNALED; | ||
1657 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1658 | } else { | ||
1659 | param.wr_id = 0ULL; | ||
1660 | param.send_flags = 0; | ||
1661 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1662 | } | ||
1663 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1664 | rpcrdma_unmap_one(ia, seg); | ||
1665 | if (rc) | ||
1666 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1667 | " status %i\n", __func__, rc); | ||
1668 | else | ||
1669 | *r = NULL; /* will upcall on completion */ | ||
1670 | return rc; | ||
1671 | } | ||
1672 | |||
1673 | static int | ||
1674 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
1675 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
1676 | { | ||
1677 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1678 | IB_ACCESS_REMOTE_READ); | ||
1679 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1680 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1681 | int len, i, rc = 0; | ||
1682 | |||
1683 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1684 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1685 | for (len = 0, i = 0; i < *nsegs;) { | ||
1686 | rpcrdma_map_one(ia, seg, writing); | ||
1687 | ipb[i].addr = seg->mr_dma; | ||
1688 | ipb[i].size = seg->mr_len; | ||
1689 | len += seg->mr_len; | ||
1690 | ++seg; | ||
1691 | ++i; | ||
1692 | /* Check for holes */ | ||
1693 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1694 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1695 | break; | ||
1696 | } | ||
1697 | seg1->mr_base = seg1->mr_dma; | ||
1698 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1699 | ipb, i, mem_priv, &seg1->mr_base); | ||
1700 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1701 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1702 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1703 | "%u@0x%llx (%d)... status %i\n", | ||
1704 | __func__, len, | ||
1705 | (unsigned long long)seg1->mr_dma, i, rc); | ||
1706 | while (i--) | ||
1707 | rpcrdma_unmap_one(ia, --seg); | ||
1708 | } else { | ||
1709 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1710 | seg1->mr_nsegs = i; | ||
1711 | seg1->mr_len = len; | ||
1712 | } | ||
1713 | *nsegs = i; | ||
1714 | return rc; | ||
1715 | } | ||
1716 | |||
1717 | static int | ||
1718 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
1719 | struct rpcrdma_ia *ia) | ||
1720 | { | ||
1721 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1722 | int rc; | ||
1723 | |||
1724 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
1725 | seg1->mr_chunk.rl_mr = NULL; | ||
1726 | while (seg1->mr_nsegs--) | ||
1727 | rpcrdma_unmap_one(ia, seg++); | ||
1728 | if (rc) | ||
1729 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1730 | " status %i\n", __func__, rc); | ||
1731 | return rc; | ||
1732 | } | ||
1733 | |||
1332 | int | 1734 | int |
1333 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1735 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
1334 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1736 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
1335 | { | 1737 | { |
1336 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1738 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1337 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
1338 | IB_ACCESS_REMOTE_READ); | ||
1339 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1340 | int i; | ||
1341 | int rc = 0; | 1739 | int rc = 0; |
1342 | 1740 | ||
1343 | switch (ia->ri_memreg_strategy) { | 1741 | switch (ia->ri_memreg_strategy) { |
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
1352 | break; | 1750 | break; |
1353 | #endif | 1751 | #endif |
1354 | 1752 | ||
1355 | /* Registration using fast memory registration */ | 1753 | /* Registration using frmr registration */ |
1754 | case RPCRDMA_FRMR: | ||
1755 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
1756 | break; | ||
1757 | |||
1758 | /* Registration using fmr memory registration */ | ||
1356 | case RPCRDMA_MTHCAFMR: | 1759 | case RPCRDMA_MTHCAFMR: |
1357 | { | 1760 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
1358 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
1359 | int len, pageoff = offset_in_page(seg->mr_offset); | ||
1360 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1361 | seg1->mr_len += pageoff; | ||
1362 | len = -pageoff; | ||
1363 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1364 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1365 | for (i = 0; i < nsegs;) { | ||
1366 | rpcrdma_map_one(ia, seg, writing); | ||
1367 | physaddrs[i] = seg->mr_dma; | ||
1368 | len += seg->mr_len; | ||
1369 | ++seg; | ||
1370 | ++i; | ||
1371 | /* Check for holes */ | ||
1372 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1373 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1374 | break; | ||
1375 | } | ||
1376 | nsegs = i; | ||
1377 | rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, | ||
1378 | physaddrs, nsegs, seg1->mr_dma); | ||
1379 | if (rc) { | ||
1380 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
1381 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
1382 | len, (unsigned long long)seg1->mr_dma, | ||
1383 | pageoff, nsegs, rc); | ||
1384 | while (nsegs--) | ||
1385 | rpcrdma_unmap_one(ia, --seg); | ||
1386 | } else { | ||
1387 | seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; | ||
1388 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1389 | seg1->mr_nsegs = nsegs; | ||
1390 | seg1->mr_len = len; | ||
1391 | } | ||
1392 | } | ||
1393 | break; | 1761 | break; |
1394 | 1762 | ||
1395 | /* Registration using memory windows */ | 1763 | /* Registration using memory windows */ |
1396 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1764 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1397 | case RPCRDMA_MEMWINDOWS: | 1765 | case RPCRDMA_MEMWINDOWS: |
1398 | { | 1766 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); |
1399 | struct ib_mw_bind param; | ||
1400 | rpcrdma_map_one(ia, seg, writing); | ||
1401 | param.mr = ia->ri_bind_mem; | ||
1402 | param.wr_id = 0ULL; /* no send cookie */ | ||
1403 | param.addr = seg->mr_dma; | ||
1404 | param.length = seg->mr_len; | ||
1405 | param.send_flags = 0; | ||
1406 | param.mw_access_flags = mem_priv; | ||
1407 | |||
1408 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1409 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1410 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1411 | if (rc) { | ||
1412 | dprintk("RPC: %s: failed ib_bind_mw " | ||
1413 | "%u@0x%llx status %i\n", | ||
1414 | __func__, seg->mr_len, | ||
1415 | (unsigned long long)seg->mr_dma, rc); | ||
1416 | rpcrdma_unmap_one(ia, seg); | ||
1417 | } else { | ||
1418 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
1419 | seg->mr_base = param.addr; | ||
1420 | seg->mr_nsegs = 1; | ||
1421 | nsegs = 1; | ||
1422 | } | ||
1423 | } | ||
1424 | break; | 1767 | break; |
1425 | 1768 | ||
1426 | /* Default registration each time */ | 1769 | /* Default registration each time */ |
1427 | default: | 1770 | default: |
1428 | { | 1771 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); |
1429 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
1430 | int len = 0; | ||
1431 | if (nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
1432 | nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
1433 | for (i = 0; i < nsegs;) { | ||
1434 | rpcrdma_map_one(ia, seg, writing); | ||
1435 | ipb[i].addr = seg->mr_dma; | ||
1436 | ipb[i].size = seg->mr_len; | ||
1437 | len += seg->mr_len; | ||
1438 | ++seg; | ||
1439 | ++i; | ||
1440 | /* Check for holes */ | ||
1441 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
1442 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
1443 | break; | ||
1444 | } | ||
1445 | nsegs = i; | ||
1446 | seg1->mr_base = seg1->mr_dma; | ||
1447 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
1448 | ipb, nsegs, mem_priv, &seg1->mr_base); | ||
1449 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
1450 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
1451 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
1452 | "%u@0x%llx (%d)... status %i\n", | ||
1453 | __func__, len, | ||
1454 | (unsigned long long)seg1->mr_dma, nsegs, rc); | ||
1455 | while (nsegs--) | ||
1456 | rpcrdma_unmap_one(ia, --seg); | ||
1457 | } else { | ||
1458 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
1459 | seg1->mr_nsegs = nsegs; | ||
1460 | seg1->mr_len = len; | ||
1461 | } | ||
1462 | } | ||
1463 | break; | 1772 | break; |
1464 | } | 1773 | } |
1465 | if (rc) | 1774 | if (rc) |
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1473 | struct rpcrdma_xprt *r_xprt, void *r) | 1782 | struct rpcrdma_xprt *r_xprt, void *r) |
1474 | { | 1783 | { |
1475 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1784 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
1476 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1477 | int nsegs = seg->mr_nsegs, rc; | 1785 | int nsegs = seg->mr_nsegs, rc; |
1478 | 1786 | ||
1479 | switch (ia->ri_memreg_strategy) { | 1787 | switch (ia->ri_memreg_strategy) { |
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
1486 | break; | 1794 | break; |
1487 | #endif | 1795 | #endif |
1488 | 1796 | ||
1797 | case RPCRDMA_FRMR: | ||
1798 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
1799 | break; | ||
1800 | |||
1489 | case RPCRDMA_MTHCAFMR: | 1801 | case RPCRDMA_MTHCAFMR: |
1490 | { | 1802 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
1491 | LIST_HEAD(l); | ||
1492 | list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); | ||
1493 | rc = ib_unmap_fmr(&l); | ||
1494 | while (seg1->mr_nsegs--) | ||
1495 | rpcrdma_unmap_one(ia, seg++); | ||
1496 | } | ||
1497 | if (rc) | ||
1498 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
1499 | " status %i\n", __func__, rc); | ||
1500 | break; | 1803 | break; |
1501 | 1804 | ||
1502 | case RPCRDMA_MEMWINDOWS_ASYNC: | 1805 | case RPCRDMA_MEMWINDOWS_ASYNC: |
1503 | case RPCRDMA_MEMWINDOWS: | 1806 | case RPCRDMA_MEMWINDOWS: |
1504 | { | 1807 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); |
1505 | struct ib_mw_bind param; | ||
1506 | BUG_ON(nsegs != 1); | ||
1507 | param.mr = ia->ri_bind_mem; | ||
1508 | param.addr = 0ULL; /* unbind */ | ||
1509 | param.length = 0; | ||
1510 | param.mw_access_flags = 0; | ||
1511 | if (r) { | ||
1512 | param.wr_id = (u64) (unsigned long) r; | ||
1513 | param.send_flags = IB_SEND_SIGNALED; | ||
1514 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
1515 | } else { | ||
1516 | param.wr_id = 0ULL; | ||
1517 | param.send_flags = 0; | ||
1518 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1519 | } | ||
1520 | rc = ib_bind_mw(ia->ri_id->qp, | ||
1521 | seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
1522 | rpcrdma_unmap_one(ia, seg); | ||
1523 | } | ||
1524 | if (rc) | ||
1525 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
1526 | " status %i\n", __func__, rc); | ||
1527 | else | ||
1528 | r = NULL; /* will upcall on completion */ | ||
1529 | break; | 1808 | break; |
1530 | 1809 | ||
1531 | default: | 1810 | default: |
1532 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | 1811 | rc = rpcrdma_deregister_default_external(seg, ia); |
1533 | seg1->mr_chunk.rl_mr = NULL; | ||
1534 | while (seg1->mr_nsegs--) | ||
1535 | rpcrdma_unmap_one(ia, seg++); | ||
1536 | if (rc) | ||
1537 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
1538 | " status %i\n", __func__, rc); | ||
1539 | break; | 1812 | break; |
1540 | } | 1813 | } |
1541 | if (r) { | 1814 | if (r) { |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -51,6 +51,9 @@ | |||
51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | 51 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ |
52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | 52 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ |
53 | 53 | ||
54 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ | ||
55 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | ||
56 | |||
54 | /* | 57 | /* |
55 | * Interface Adapter -- one per transport instance | 58 | * Interface Adapter -- one per transport instance |
56 | */ | 59 | */ |
@@ -58,6 +61,8 @@ struct rpcrdma_ia { | |||
58 | struct rdma_cm_id *ri_id; | 61 | struct rdma_cm_id *ri_id; |
59 | struct ib_pd *ri_pd; | 62 | struct ib_pd *ri_pd; |
60 | struct ib_mr *ri_bind_mem; | 63 | struct ib_mr *ri_bind_mem; |
64 | u32 ri_dma_lkey; | ||
65 | int ri_have_dma_lkey; | ||
61 | struct completion ri_done; | 66 | struct completion ri_done; |
62 | int ri_async_rc; | 67 | int ri_async_rc; |
63 | enum rpcrdma_memreg ri_memreg_strategy; | 68 | enum rpcrdma_memreg ri_memreg_strategy; |
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
156 | union { | 161 | union { |
157 | struct ib_mw *mw; | 162 | struct ib_mw *mw; |
158 | struct ib_fmr *fmr; | 163 | struct ib_fmr *fmr; |
164 | struct { | ||
165 | struct ib_fast_reg_page_list *fr_pgl; | ||
166 | struct ib_mr *fr_mr; | ||
167 | } frmr; | ||
159 | } r; | 168 | } r; |
160 | struct list_head mw_list; | 169 | struct list_head mw_list; |
161 | } *rl_mw; | 170 | } *rl_mw; |
@@ -175,6 +184,7 @@ struct rpcrdma_req { | |||
175 | size_t rl_size; /* actual length of buffer */ | 184 | size_t rl_size; /* actual length of buffer */ |
176 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 185 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
177 | unsigned int rl_nchunks; /* non-zero if chunks */ | 186 | unsigned int rl_nchunks; /* non-zero if chunks */ |
187 | unsigned int rl_connect_cookie; /* retry detection */ | ||
178 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 188 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
179 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 189 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
180 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ | 190 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ |
@@ -198,7 +208,7 @@ struct rpcrdma_buffer { | |||
198 | atomic_t rb_credits; /* most recent server credits */ | 208 | atomic_t rb_credits; /* most recent server credits */ |
199 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | 209 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ |
200 | int rb_max_requests;/* client max requests */ | 210 | int rb_max_requests;/* client max requests */ |
201 | struct list_head rb_mws; /* optional memory windows/fmrs */ | 211 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
202 | int rb_send_index; | 212 | int rb_send_index; |
203 | struct rpcrdma_req **rb_send_bufs; | 213 | struct rpcrdma_req **rb_send_bufs; |
204 | int rb_recv_index; | 214 | int rb_recv_index; |
@@ -273,6 +283,11 @@ struct rpcrdma_xprt { | |||
273 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) | 283 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) |
274 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) | 284 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
275 | 285 | ||
286 | /* Setting this to 0 ensures interoperability with early servers. | ||
287 | * Setting this to 1 enhances certain unaligned read/write performance. | ||
288 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | ||
289 | extern int xprt_rdma_pad_optimize; | ||
290 | |||
276 | /* | 291 | /* |
277 | * Interface Adapter calls - xprtrdma/verbs.c | 292 | * Interface Adapter calls - xprtrdma/verbs.c |
278 | */ | 293 | */ |