diff options
| -rw-r--r-- | include/linux/sunrpc/svc_rdma.h | 27 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 187 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 255 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 364 |
4 files changed, 710 insertions, 123 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index dc05b54bd3a3..c14fe86dac59 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h | |||
| @@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod; | |||
| 72 | */ | 72 | */ |
| 73 | struct svc_rdma_op_ctxt { | 73 | struct svc_rdma_op_ctxt { |
| 74 | struct svc_rdma_op_ctxt *read_hdr; | 74 | struct svc_rdma_op_ctxt *read_hdr; |
| 75 | struct svc_rdma_fastreg_mr *frmr; | ||
| 75 | int hdr_count; | 76 | int hdr_count; |
| 76 | struct xdr_buf arg; | 77 | struct xdr_buf arg; |
| 77 | struct list_head dto_q; | 78 | struct list_head dto_q; |
| @@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge { | |||
| 103 | int start; /* sge no for this chunk */ | 104 | int start; /* sge no for this chunk */ |
| 104 | int count; /* sge count for this chunk */ | 105 | int count; /* sge count for this chunk */ |
| 105 | }; | 106 | }; |
| 107 | struct svc_rdma_fastreg_mr { | ||
| 108 | struct ib_mr *mr; | ||
| 109 | void *kva; | ||
| 110 | struct ib_fast_reg_page_list *page_list; | ||
| 111 | int page_list_len; | ||
| 112 | unsigned long access_flags; | ||
| 113 | unsigned long map_len; | ||
| 114 | enum dma_data_direction direction; | ||
| 115 | struct list_head frmr_list; | ||
| 116 | }; | ||
| 106 | struct svc_rdma_req_map { | 117 | struct svc_rdma_req_map { |
| 118 | struct svc_rdma_fastreg_mr *frmr; | ||
| 107 | unsigned long count; | 119 | unsigned long count; |
| 108 | union { | 120 | union { |
| 109 | struct kvec sge[RPCSVC_MAXPAGES]; | 121 | struct kvec sge[RPCSVC_MAXPAGES]; |
| 110 | struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; | 122 | struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; |
| 111 | }; | 123 | }; |
| 112 | }; | 124 | }; |
| 113 | 125 | #define RDMACTXT_F_FAST_UNREG 1 | |
| 114 | #define RDMACTXT_F_LAST_CTXT 2 | 126 | #define RDMACTXT_F_LAST_CTXT 2 |
| 115 | 127 | ||
| 128 | #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ | ||
| 129 | #define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ | ||
| 130 | |||
| 116 | struct svcxprt_rdma { | 131 | struct svcxprt_rdma { |
| 117 | struct svc_xprt sc_xprt; /* SVC transport structure */ | 132 | struct svc_xprt sc_xprt; /* SVC transport structure */ |
| 118 | struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ | 133 | struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ |
| @@ -136,6 +151,11 @@ struct svcxprt_rdma { | |||
| 136 | struct ib_cq *sc_rq_cq; | 151 | struct ib_cq *sc_rq_cq; |
| 137 | struct ib_cq *sc_sq_cq; | 152 | struct ib_cq *sc_sq_cq; |
| 138 | struct ib_mr *sc_phys_mr; /* MR for server memory */ | 153 | struct ib_mr *sc_phys_mr; /* MR for server memory */ |
| 154 | u32 sc_dev_caps; /* distilled device caps */ | ||
| 155 | u32 sc_dma_lkey; /* local dma key */ | ||
| 156 | unsigned int sc_frmr_pg_list_len; | ||
| 157 | struct list_head sc_frmr_q; | ||
| 158 | spinlock_t sc_frmr_q_lock; | ||
| 139 | 159 | ||
| 140 | spinlock_t sc_lock; /* transport lock */ | 160 | spinlock_t sc_lock; /* transport lock */ |
| 141 | 161 | ||
| @@ -192,8 +212,13 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *); | |||
| 192 | extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); | 212 | extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); |
| 193 | extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); | 213 | extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); |
| 194 | extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); | 214 | extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); |
| 215 | extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); | ||
| 195 | extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); | 216 | extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); |
| 196 | extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); | 217 | extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); |
| 218 | extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); | ||
| 219 | extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); | ||
| 220 | extern void svc_rdma_put_frmr(struct svcxprt_rdma *, | ||
| 221 | struct svc_rdma_fastreg_mr *); | ||
| 197 | extern void svc_sq_reap(struct svcxprt_rdma *); | 222 | extern void svc_sq_reap(struct svcxprt_rdma *); |
| 198 | extern void svc_rq_reap(struct svcxprt_rdma *); | 223 | extern void svc_rq_reap(struct svcxprt_rdma *); |
| 199 | extern struct svc_xprt_class svc_rdma_class; | 224 | extern struct svc_xprt_class svc_rdma_class; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 74de31a06616..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
| @@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 116 | * | 116 | * |
| 117 | * Assumptions: | 117 | * Assumptions: |
| 118 | * - chunk[0]->position points to pages[0] at an offset of 0 | 118 | * - chunk[0]->position points to pages[0] at an offset of 0 |
| 119 | * - pages[] is not physically or virtually contigous and consists of | 119 | * - pages[] is not physically or virtually contiguous and consists of |
| 120 | * PAGE_SIZE elements. | 120 | * PAGE_SIZE elements. |
| 121 | * | 121 | * |
| 122 | * Output: | 122 | * Output: |
| @@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 125 | * chunk in the read list | 125 | * chunk in the read list |
| 126 | * | 126 | * |
| 127 | */ | 127 | */ |
| 128 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | 128 | static int map_read_chunks(struct svcxprt_rdma *xprt, |
| 129 | struct svc_rqst *rqstp, | 129 | struct svc_rqst *rqstp, |
| 130 | struct svc_rdma_op_ctxt *head, | 130 | struct svc_rdma_op_ctxt *head, |
| 131 | struct rpcrdma_msg *rmsgp, | 131 | struct rpcrdma_msg *rmsgp, |
| @@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | |||
| 211 | return sge_no; | 211 | return sge_no; |
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | 214 | /* Map a read-chunk-list to an XDR and fast register the page-list. |
| 215 | struct svc_rdma_op_ctxt *ctxt, | 215 | * |
| 216 | struct kvec *vec, | 216 | * Assumptions: |
| 217 | u64 *sgl_offset, | 217 | * - chunk[0] position points to pages[0] at an offset of 0 |
| 218 | int count) | 218 | * - pages[] will be made physically contiguous by creating a one-off memory |
| 219 | * region using the fastreg verb. | ||
| 220 | * - byte_count is # of bytes in read-chunk-list | ||
| 221 | * - ch_count is # of chunks in read-chunk-list | ||
| 222 | * | ||
| 223 | * Output: | ||
| 224 | * - sge array pointing into pages[] array. | ||
| 225 | * - chunk_sge array specifying sge index and count for each | ||
| 226 | * chunk in the read list | ||
| 227 | */ | ||
| 228 | static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, | ||
| 229 | struct svc_rqst *rqstp, | ||
| 230 | struct svc_rdma_op_ctxt *head, | ||
| 231 | struct rpcrdma_msg *rmsgp, | ||
| 232 | struct svc_rdma_req_map *rpl_map, | ||
| 233 | struct svc_rdma_req_map *chl_map, | ||
| 234 | int ch_count, | ||
| 235 | int byte_count) | ||
| 236 | { | ||
| 237 | int page_no; | ||
| 238 | int ch_no; | ||
| 239 | u32 offset; | ||
| 240 | struct rpcrdma_read_chunk *ch; | ||
| 241 | struct svc_rdma_fastreg_mr *frmr; | ||
| 242 | int ret = 0; | ||
| 243 | |||
| 244 | frmr = svc_rdma_get_frmr(xprt); | ||
| 245 | if (IS_ERR(frmr)) | ||
| 246 | return -ENOMEM; | ||
| 247 | |||
| 248 | head->frmr = frmr; | ||
| 249 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
| 250 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
| 251 | head->arg.pages = &head->pages[head->count]; | ||
| 252 | head->hdr_count = head->count; /* save count of hdr pages */ | ||
| 253 | head->arg.page_base = 0; | ||
| 254 | head->arg.page_len = byte_count; | ||
| 255 | head->arg.len = rqstp->rq_arg.len + byte_count; | ||
| 256 | head->arg.buflen = rqstp->rq_arg.buflen + byte_count; | ||
| 257 | |||
| 258 | /* Fast register the page list */ | ||
| 259 | frmr->kva = page_address(rqstp->rq_arg.pages[0]); | ||
| 260 | frmr->direction = DMA_FROM_DEVICE; | ||
| 261 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | ||
| 262 | frmr->map_len = byte_count; | ||
| 263 | frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; | ||
| 264 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
| 265 | frmr->page_list->page_list[page_no] = | ||
| 266 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
| 267 | page_address(rqstp->rq_arg.pages[page_no]), | ||
| 268 | PAGE_SIZE, DMA_TO_DEVICE); | ||
| 269 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
| 270 | frmr->page_list->page_list[page_no])) | ||
| 271 | goto fatal_err; | ||
| 272 | atomic_inc(&xprt->sc_dma_used); | ||
| 273 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
| 274 | } | ||
| 275 | head->count += page_no; | ||
| 276 | |||
| 277 | /* rq_respages points one past arg pages */ | ||
| 278 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
| 279 | |||
| 280 | /* Create the reply and chunk maps */ | ||
| 281 | offset = 0; | ||
| 282 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
| 283 | for (ch_no = 0; ch_no < ch_count; ch_no++) { | ||
| 284 | rpl_map->sge[ch_no].iov_base = frmr->kva + offset; | ||
| 285 | rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; | ||
| 286 | chl_map->ch[ch_no].count = 1; | ||
| 287 | chl_map->ch[ch_no].start = ch_no; | ||
| 288 | offset += ch->rc_target.rs_length; | ||
| 289 | ch++; | ||
| 290 | } | ||
| 291 | |||
| 292 | ret = svc_rdma_fastreg(xprt, frmr); | ||
| 293 | if (ret) | ||
| 294 | goto fatal_err; | ||
| 295 | |||
| 296 | return ch_no; | ||
| 297 | |||
| 298 | fatal_err: | ||
| 299 | printk("svcrdma: error fast registering xdr for xprt %p", xprt); | ||
| 300 | svc_rdma_put_frmr(xprt, frmr); | ||
| 301 | return -EIO; | ||
| 302 | } | ||
| 303 | |||
| 304 | static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | ||
| 305 | struct svc_rdma_op_ctxt *ctxt, | ||
| 306 | struct svc_rdma_fastreg_mr *frmr, | ||
| 307 | struct kvec *vec, | ||
| 308 | u64 *sgl_offset, | ||
| 309 | int count) | ||
| 219 | { | 310 | { |
| 220 | int i; | 311 | int i; |
| 221 | 312 | ||
| 222 | ctxt->count = count; | 313 | ctxt->count = count; |
| 223 | ctxt->direction = DMA_FROM_DEVICE; | 314 | ctxt->direction = DMA_FROM_DEVICE; |
| 224 | for (i = 0; i < count; i++) { | 315 | for (i = 0; i < count; i++) { |
| 225 | atomic_inc(&xprt->sc_dma_used); | 316 | ctxt->sge[i].length = 0; /* in case map fails */ |
| 226 | ctxt->sge[i].addr = | 317 | if (!frmr) { |
| 227 | ib_dma_map_single(xprt->sc_cm_id->device, | 318 | ctxt->sge[i].addr = |
| 228 | vec[i].iov_base, vec[i].iov_len, | 319 | ib_dma_map_single(xprt->sc_cm_id->device, |
| 229 | DMA_FROM_DEVICE); | 320 | vec[i].iov_base, |
| 321 | vec[i].iov_len, | ||
| 322 | DMA_FROM_DEVICE); | ||
| 323 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
| 324 | ctxt->sge[i].addr)) | ||
| 325 | return -EINVAL; | ||
| 326 | ctxt->sge[i].lkey = xprt->sc_dma_lkey; | ||
| 327 | atomic_inc(&xprt->sc_dma_used); | ||
| 328 | } else { | ||
| 329 | ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; | ||
| 330 | ctxt->sge[i].lkey = frmr->mr->lkey; | ||
| 331 | } | ||
| 230 | ctxt->sge[i].length = vec[i].iov_len; | 332 | ctxt->sge[i].length = vec[i].iov_len; |
| 231 | ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; | ||
| 232 | *sgl_offset = *sgl_offset + vec[i].iov_len; | 333 | *sgl_offset = *sgl_offset + vec[i].iov_len; |
| 233 | } | 334 | } |
| 335 | return 0; | ||
| 234 | } | 336 | } |
| 235 | 337 | ||
| 236 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | 338 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) |
| @@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
| 278 | struct svc_rdma_op_ctxt *hdr_ctxt) | 380 | struct svc_rdma_op_ctxt *hdr_ctxt) |
| 279 | { | 381 | { |
| 280 | struct ib_send_wr read_wr; | 382 | struct ib_send_wr read_wr; |
| 383 | struct ib_send_wr inv_wr; | ||
| 281 | int err = 0; | 384 | int err = 0; |
| 282 | int ch_no; | 385 | int ch_no; |
| 283 | int ch_count; | 386 | int ch_count; |
| @@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
| 301 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | 404 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); |
| 302 | if (ch_count > RPCSVC_MAXPAGES) | 405 | if (ch_count > RPCSVC_MAXPAGES) |
| 303 | return -EINVAL; | 406 | return -EINVAL; |
| 304 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | 407 | |
| 305 | rpl_map, chl_map, | 408 | if (!xprt->sc_frmr_pg_list_len) |
| 306 | ch_count, byte_count); | 409 | sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, |
| 410 | rpl_map, chl_map, ch_count, | ||
| 411 | byte_count); | ||
| 412 | else | ||
| 413 | sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, | ||
| 414 | rpl_map, chl_map, ch_count, | ||
| 415 | byte_count); | ||
| 416 | if (sge_count < 0) { | ||
| 417 | err = -EIO; | ||
| 418 | goto out; | ||
| 419 | } | ||
| 420 | |||
| 307 | sgl_offset = 0; | 421 | sgl_offset = 0; |
| 308 | ch_no = 0; | 422 | ch_no = 0; |
| 309 | 423 | ||
| @@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
| 312 | next_sge: | 426 | next_sge: |
| 313 | ctxt = svc_rdma_get_context(xprt); | 427 | ctxt = svc_rdma_get_context(xprt); |
| 314 | ctxt->direction = DMA_FROM_DEVICE; | 428 | ctxt->direction = DMA_FROM_DEVICE; |
| 429 | ctxt->frmr = hdr_ctxt->frmr; | ||
| 430 | ctxt->read_hdr = NULL; | ||
| 315 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 431 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
| 432 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
| 316 | 433 | ||
| 317 | /* Prepare READ WR */ | 434 | /* Prepare READ WR */ |
| 318 | memset(&read_wr, 0, sizeof read_wr); | 435 | memset(&read_wr, 0, sizeof read_wr); |
| 319 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
| 320 | read_wr.wr_id = (unsigned long)ctxt; | 436 | read_wr.wr_id = (unsigned long)ctxt; |
| 321 | read_wr.opcode = IB_WR_RDMA_READ; | 437 | read_wr.opcode = IB_WR_RDMA_READ; |
| 438 | ctxt->wr_op = read_wr.opcode; | ||
| 322 | read_wr.send_flags = IB_SEND_SIGNALED; | 439 | read_wr.send_flags = IB_SEND_SIGNALED; |
| 323 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | 440 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; |
| 324 | read_wr.wr.rdma.remote_addr = | 441 | read_wr.wr.rdma.remote_addr = |
| @@ -327,10 +444,15 @@ next_sge: | |||
| 327 | read_wr.sg_list = ctxt->sge; | 444 | read_wr.sg_list = ctxt->sge; |
| 328 | read_wr.num_sge = | 445 | read_wr.num_sge = |
| 329 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); | 446 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); |
| 330 | rdma_set_ctxt_sge(xprt, ctxt, | 447 | err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, |
| 331 | &rpl_map->sge[chl_map->ch[ch_no].start], | 448 | &rpl_map->sge[chl_map->ch[ch_no].start], |
| 332 | &sgl_offset, | 449 | &sgl_offset, |
| 333 | read_wr.num_sge); | 450 | read_wr.num_sge); |
| 451 | if (err) { | ||
| 452 | svc_rdma_unmap_dma(ctxt); | ||
| 453 | svc_rdma_put_context(ctxt, 0); | ||
| 454 | goto out; | ||
| 455 | } | ||
| 334 | if (((ch+1)->rc_discrim == 0) && | 456 | if (((ch+1)->rc_discrim == 0) && |
| 335 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { | 457 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { |
| 336 | /* | 458 | /* |
| @@ -339,6 +461,29 @@ next_sge: | |||
| 339 | * the client and the RPC needs to be enqueued. | 461 | * the client and the RPC needs to be enqueued. |
| 340 | */ | 462 | */ |
| 341 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 463 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
| 464 | if (hdr_ctxt->frmr) { | ||
| 465 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
| 466 | /* | ||
| 467 | * Invalidate the local MR used to map the data | ||
| 468 | * sink. | ||
| 469 | */ | ||
| 470 | if (xprt->sc_dev_caps & | ||
| 471 | SVCRDMA_DEVCAP_READ_W_INV) { | ||
| 472 | read_wr.opcode = | ||
| 473 | IB_WR_RDMA_READ_WITH_INV; | ||
| 474 | ctxt->wr_op = read_wr.opcode; | ||
| 475 | read_wr.ex.invalidate_rkey = | ||
| 476 | ctxt->frmr->mr->lkey; | ||
| 477 | } else { | ||
| 478 | /* Prepare INVALIDATE WR */ | ||
| 479 | memset(&inv_wr, 0, sizeof inv_wr); | ||
| 480 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
| 481 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
| 482 | inv_wr.ex.invalidate_rkey = | ||
| 483 | hdr_ctxt->frmr->mr->lkey; | ||
| 484 | read_wr.next = &inv_wr; | ||
| 485 | } | ||
| 486 | } | ||
| 342 | ctxt->read_hdr = hdr_ctxt; | 487 | ctxt->read_hdr = hdr_ctxt; |
| 343 | } | 488 | } |
| 344 | /* Post the read */ | 489 | /* Post the read */ |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 84d328329d98..9a7a8e7ae038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
| @@ -69,9 +69,127 @@ | |||
| 69 | * array is only concerned with the reply we are assured that we have | 69 | * array is only concerned with the reply we are assured that we have |
| 70 | * on extra page for the RPCRMDA header. | 70 | * on extra page for the RPCRMDA header. |
| 71 | */ | 71 | */ |
| 72 | static void xdr_to_sge(struct svcxprt_rdma *xprt, | 72 | int fast_reg_xdr(struct svcxprt_rdma *xprt, |
| 73 | struct xdr_buf *xdr, | 73 | struct xdr_buf *xdr, |
| 74 | struct svc_rdma_req_map *vec) | 74 | struct svc_rdma_req_map *vec) |
| 75 | { | ||
| 76 | int sge_no; | ||
| 77 | u32 sge_bytes; | ||
| 78 | u32 page_bytes; | ||
| 79 | u32 page_off; | ||
| 80 | int page_no = 0; | ||
| 81 | u8 *frva; | ||
| 82 | struct svc_rdma_fastreg_mr *frmr; | ||
| 83 | |||
| 84 | frmr = svc_rdma_get_frmr(xprt); | ||
| 85 | if (IS_ERR(frmr)) | ||
| 86 | return -ENOMEM; | ||
| 87 | vec->frmr = frmr; | ||
| 88 | |||
| 89 | /* Skip the RPCRDMA header */ | ||
| 90 | sge_no = 1; | ||
| 91 | |||
| 92 | /* Map the head. */ | ||
| 93 | frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); | ||
| 94 | vec->sge[sge_no].iov_base = xdr->head[0].iov_base; | ||
| 95 | vec->sge[sge_no].iov_len = xdr->head[0].iov_len; | ||
| 96 | vec->count = 2; | ||
| 97 | sge_no++; | ||
| 98 | |||
| 99 | /* Build the FRMR */ | ||
| 100 | frmr->kva = frva; | ||
| 101 | frmr->direction = DMA_TO_DEVICE; | ||
| 102 | frmr->access_flags = 0; | ||
| 103 | frmr->map_len = PAGE_SIZE; | ||
| 104 | frmr->page_list_len = 1; | ||
| 105 | frmr->page_list->page_list[page_no] = | ||
| 106 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
| 107 | (void *)xdr->head[0].iov_base, | ||
| 108 | PAGE_SIZE, DMA_TO_DEVICE); | ||
| 109 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
| 110 | frmr->page_list->page_list[page_no])) | ||
| 111 | goto fatal_err; | ||
| 112 | atomic_inc(&xprt->sc_dma_used); | ||
| 113 | |||
| 114 | page_off = xdr->page_base; | ||
| 115 | page_bytes = xdr->page_len + page_off; | ||
| 116 | if (!page_bytes) | ||
| 117 | goto encode_tail; | ||
| 118 | |||
| 119 | /* Map the pages */ | ||
| 120 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
| 121 | vec->sge[sge_no].iov_len = page_bytes; | ||
| 122 | sge_no++; | ||
| 123 | while (page_bytes) { | ||
| 124 | struct page *page; | ||
| 125 | |||
| 126 | page = xdr->pages[page_no++]; | ||
| 127 | sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); | ||
| 128 | page_bytes -= sge_bytes; | ||
| 129 | |||
| 130 | frmr->page_list->page_list[page_no] = | ||
| 131 | ib_dma_map_page(xprt->sc_cm_id->device, page, 0, | ||
| 132 | PAGE_SIZE, DMA_TO_DEVICE); | ||
| 133 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
| 134 | frmr->page_list->page_list[page_no])) | ||
| 135 | goto fatal_err; | ||
| 136 | |||
| 137 | atomic_inc(&xprt->sc_dma_used); | ||
| 138 | page_off = 0; /* reset for next time through loop */ | ||
| 139 | frmr->map_len += PAGE_SIZE; | ||
| 140 | frmr->page_list_len++; | ||
| 141 | } | ||
| 142 | vec->count++; | ||
| 143 | |||
| 144 | encode_tail: | ||
| 145 | /* Map tail */ | ||
| 146 | if (0 == xdr->tail[0].iov_len) | ||
| 147 | goto done; | ||
| 148 | |||
| 149 | vec->count++; | ||
| 150 | vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; | ||
| 151 | |||
| 152 | if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == | ||
| 153 | ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { | ||
| 154 | /* | ||
| 155 | * If head and tail use the same page, we don't need | ||
| 156 | * to map it again. | ||
| 157 | */ | ||
| 158 | vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; | ||
| 159 | } else { | ||
| 160 | void *va; | ||
| 161 | |||
| 162 | /* Map another page for the tail */ | ||
| 163 | page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; | ||
| 164 | va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); | ||
| 165 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
| 166 | |||
| 167 | frmr->page_list->page_list[page_no] = | ||
| 168 | ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, | ||
| 169 | DMA_TO_DEVICE); | ||
| 170 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
| 171 | frmr->page_list->page_list[page_no])) | ||
| 172 | goto fatal_err; | ||
| 173 | atomic_inc(&xprt->sc_dma_used); | ||
| 174 | frmr->map_len += PAGE_SIZE; | ||
| 175 | frmr->page_list_len++; | ||
| 176 | } | ||
| 177 | |||
| 178 | done: | ||
| 179 | if (svc_rdma_fastreg(xprt, frmr)) | ||
| 180 | goto fatal_err; | ||
| 181 | |||
| 182 | return 0; | ||
| 183 | |||
| 184 | fatal_err: | ||
| 185 | printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); | ||
| 186 | svc_rdma_put_frmr(xprt, frmr); | ||
| 187 | return -EIO; | ||
| 188 | } | ||
| 189 | |||
| 190 | static int map_xdr(struct svcxprt_rdma *xprt, | ||
| 191 | struct xdr_buf *xdr, | ||
| 192 | struct svc_rdma_req_map *vec) | ||
| 75 | { | 193 | { |
| 76 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | 194 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; |
| 77 | int sge_no; | 195 | int sge_no; |
| @@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
| 83 | BUG_ON(xdr->len != | 201 | BUG_ON(xdr->len != |
| 84 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); | 202 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); |
| 85 | 203 | ||
| 204 | if (xprt->sc_frmr_pg_list_len) | ||
| 205 | return fast_reg_xdr(xprt, xdr, vec); | ||
| 206 | |||
| 86 | /* Skip the first sge, this is for the RPCRDMA header */ | 207 | /* Skip the first sge, this is for the RPCRDMA header */ |
| 87 | sge_no = 1; | 208 | sge_no = 1; |
| 88 | 209 | ||
| @@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
| 116 | 237 | ||
| 117 | BUG_ON(sge_no > sge_max); | 238 | BUG_ON(sge_no > sge_max); |
| 118 | vec->count = sge_no; | 239 | vec->count = sge_no; |
| 240 | return 0; | ||
| 119 | } | 241 | } |
| 120 | 242 | ||
| 121 | /* Assumptions: | 243 | /* Assumptions: |
| 244 | * - We are using FRMR | ||
| 245 | * - or - | ||
| 122 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | 246 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE |
| 123 | */ | 247 | */ |
| 124 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | 248 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, |
| @@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
| 158 | sge_no = 0; | 282 | sge_no = 0; |
| 159 | 283 | ||
| 160 | /* Copy the remaining SGE */ | 284 | /* Copy the remaining SGE */ |
| 161 | while (bc != 0 && xdr_sge_no < vec->count) { | 285 | while (bc != 0) { |
| 162 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 286 | sge_bytes = min_t(size_t, |
| 163 | sge_bytes = min((size_t)bc, | 287 | bc, vec->sge[xdr_sge_no].iov_len-sge_off); |
| 164 | (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off)); | ||
| 165 | sge[sge_no].length = sge_bytes; | 288 | sge[sge_no].length = sge_bytes; |
| 166 | atomic_inc(&xprt->sc_dma_used); | 289 | if (!vec->frmr) { |
| 167 | sge[sge_no].addr = | 290 | sge[sge_no].addr = |
| 168 | ib_dma_map_single(xprt->sc_cm_id->device, | 291 | ib_dma_map_single(xprt->sc_cm_id->device, |
| 169 | (void *) | 292 | (void *) |
| 170 | vec->sge[xdr_sge_no].iov_base + sge_off, | 293 | vec->sge[xdr_sge_no].iov_base + sge_off, |
| 171 | sge_bytes, DMA_TO_DEVICE); | 294 | sge_bytes, DMA_TO_DEVICE); |
| 172 | if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, | 295 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, |
| 173 | sge[sge_no].addr)) | 296 | sge[sge_no].addr)) |
| 174 | goto err; | 297 | goto err; |
| 298 | atomic_inc(&xprt->sc_dma_used); | ||
| 299 | sge[sge_no].lkey = xprt->sc_dma_lkey; | ||
| 300 | } else { | ||
| 301 | sge[sge_no].addr = (unsigned long) | ||
| 302 | vec->sge[xdr_sge_no].iov_base + sge_off; | ||
| 303 | sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
| 304 | } | ||
| 305 | ctxt->count++; | ||
| 306 | ctxt->frmr = vec->frmr; | ||
| 175 | sge_off = 0; | 307 | sge_off = 0; |
| 176 | sge_no++; | 308 | sge_no++; |
| 177 | ctxt->count++; | ||
| 178 | xdr_sge_no++; | 309 | xdr_sge_no++; |
| 310 | BUG_ON(xdr_sge_no > vec->count); | ||
| 179 | bc -= sge_bytes; | 311 | bc -= sge_bytes; |
| 180 | } | 312 | } |
| 181 | 313 | ||
| 182 | BUG_ON(bc != 0); | ||
| 183 | BUG_ON(xdr_sge_no > vec->count); | ||
| 184 | |||
| 185 | /* Prepare WRITE WR */ | 314 | /* Prepare WRITE WR */ |
| 186 | memset(&write_wr, 0, sizeof write_wr); | 315 | memset(&write_wr, 0, sizeof write_wr); |
| 187 | ctxt->wr_op = IB_WR_RDMA_WRITE; | 316 | ctxt->wr_op = IB_WR_RDMA_WRITE; |
| @@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, | |||
| 226 | res_ary = (struct rpcrdma_write_array *) | 355 | res_ary = (struct rpcrdma_write_array *) |
| 227 | &rdma_resp->rm_body.rm_chunks[1]; | 356 | &rdma_resp->rm_body.rm_chunks[1]; |
| 228 | 357 | ||
| 229 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 358 | if (vec->frmr) |
| 359 | max_write = vec->frmr->map_len; | ||
| 360 | else | ||
| 361 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
| 230 | 362 | ||
| 231 | /* Write chunks start at the pagelist */ | 363 | /* Write chunks start at the pagelist */ |
| 232 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | 364 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; |
| @@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
| 297 | res_ary = (struct rpcrdma_write_array *) | 429 | res_ary = (struct rpcrdma_write_array *) |
| 298 | &rdma_resp->rm_body.rm_chunks[2]; | 430 | &rdma_resp->rm_body.rm_chunks[2]; |
| 299 | 431 | ||
| 300 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 432 | if (vec->frmr) |
| 433 | max_write = vec->frmr->map_len; | ||
| 434 | else | ||
| 435 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
| 301 | 436 | ||
| 302 | /* xdr offset starts at RPC message */ | 437 | /* xdr offset starts at RPC message */ |
| 303 | for (xdr_off = 0, chunk_no = 0; | 438 | for (xdr_off = 0, chunk_no = 0; |
| @@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
| 307 | ch = &arg_ary->wc_array[chunk_no].wc_target; | 442 | ch = &arg_ary->wc_array[chunk_no].wc_target; |
| 308 | write_len = min(xfer_len, ch->rs_length); | 443 | write_len = min(xfer_len, ch->rs_length); |
| 309 | 444 | ||
| 310 | |||
| 311 | /* Prepare the reply chunk given the length actually | 445 | /* Prepare the reply chunk given the length actually |
| 312 | * written */ | 446 | * written */ |
| 313 | rs_offset = get_unaligned(&(ch->rs_offset)); | 447 | rs_offset = get_unaligned(&(ch->rs_offset)); |
| @@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 366 | int byte_count) | 500 | int byte_count) |
| 367 | { | 501 | { |
| 368 | struct ib_send_wr send_wr; | 502 | struct ib_send_wr send_wr; |
| 503 | struct ib_send_wr inv_wr; | ||
| 369 | int sge_no; | 504 | int sge_no; |
| 370 | int sge_bytes; | 505 | int sge_bytes; |
| 371 | int page_no; | 506 | int page_no; |
| @@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 385 | /* Prepare the context */ | 520 | /* Prepare the context */ |
| 386 | ctxt->pages[0] = page; | 521 | ctxt->pages[0] = page; |
| 387 | ctxt->count = 1; | 522 | ctxt->count = 1; |
| 523 | ctxt->frmr = vec->frmr; | ||
| 524 | if (vec->frmr) | ||
| 525 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
| 526 | else | ||
| 527 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
| 388 | 528 | ||
| 389 | /* Prepare the SGE for the RPCRDMA Header */ | 529 | /* Prepare the SGE for the RPCRDMA Header */ |
| 390 | atomic_inc(&rdma->sc_dma_used); | ||
| 391 | ctxt->sge[0].addr = | 530 | ctxt->sge[0].addr = |
| 392 | ib_dma_map_page(rdma->sc_cm_id->device, | 531 | ib_dma_map_page(rdma->sc_cm_id->device, |
| 393 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | 532 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); |
| 533 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) | ||
| 534 | goto err; | ||
| 535 | atomic_inc(&rdma->sc_dma_used); | ||
| 536 | |||
| 394 | ctxt->direction = DMA_TO_DEVICE; | 537 | ctxt->direction = DMA_TO_DEVICE; |
| 538 | |||
| 395 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | 539 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); |
| 396 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | 540 | ctxt->sge[0].lkey = rdma->sc_dma_lkey; |
| 397 | 541 | ||
| 398 | /* Determine how many of our SGE are to be transmitted */ | 542 | /* Determine how many of our SGE are to be transmitted */ |
| 399 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { | 543 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { |
| 400 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); | 544 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); |
| 401 | byte_count -= sge_bytes; | 545 | byte_count -= sge_bytes; |
| 402 | atomic_inc(&rdma->sc_dma_used); | 546 | if (!vec->frmr) { |
| 403 | ctxt->sge[sge_no].addr = | 547 | ctxt->sge[sge_no].addr = |
| 404 | ib_dma_map_single(rdma->sc_cm_id->device, | 548 | ib_dma_map_single(rdma->sc_cm_id->device, |
| 405 | vec->sge[sge_no].iov_base, | 549 | vec->sge[sge_no].iov_base, |
| 406 | sge_bytes, DMA_TO_DEVICE); | 550 | sge_bytes, DMA_TO_DEVICE); |
| 551 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, | ||
| 552 | ctxt->sge[sge_no].addr)) | ||
| 553 | goto err; | ||
| 554 | atomic_inc(&rdma->sc_dma_used); | ||
| 555 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; | ||
| 556 | } else { | ||
| 557 | ctxt->sge[sge_no].addr = (unsigned long) | ||
| 558 | vec->sge[sge_no].iov_base; | ||
| 559 | ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
| 560 | } | ||
| 407 | ctxt->sge[sge_no].length = sge_bytes; | 561 | ctxt->sge[sge_no].length = sge_bytes; |
| 408 | ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey; | ||
| 409 | } | 562 | } |
| 410 | BUG_ON(byte_count != 0); | 563 | BUG_ON(byte_count != 0); |
| 411 | 564 | ||
| @@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 417 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | 570 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; |
| 418 | ctxt->count++; | 571 | ctxt->count++; |
| 419 | rqstp->rq_respages[page_no] = NULL; | 572 | rqstp->rq_respages[page_no] = NULL; |
| 420 | /* If there are more pages than SGE, terminate SGE list */ | 573 | /* |
| 574 | * If there are more pages than SGE, terminate SGE | ||
| 575 | * list so that svc_rdma_unmap_dma doesn't attempt to | ||
| 576 | * unmap garbage. | ||
| 577 | */ | ||
| 421 | if (page_no+1 >= sge_no) | 578 | if (page_no+1 >= sge_no) |
| 422 | ctxt->sge[page_no+1].length = 0; | 579 | ctxt->sge[page_no+1].length = 0; |
| 423 | } | 580 | } |
| 424 | BUG_ON(sge_no > rdma->sc_max_sge); | 581 | BUG_ON(sge_no > rdma->sc_max_sge); |
| 582 | BUG_ON(sge_no > ctxt->count); | ||
| 425 | memset(&send_wr, 0, sizeof send_wr); | 583 | memset(&send_wr, 0, sizeof send_wr); |
| 426 | ctxt->wr_op = IB_WR_SEND; | 584 | ctxt->wr_op = IB_WR_SEND; |
| 427 | send_wr.wr_id = (unsigned long)ctxt; | 585 | send_wr.wr_id = (unsigned long)ctxt; |
| @@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 429 | send_wr.num_sge = sge_no; | 587 | send_wr.num_sge = sge_no; |
| 430 | send_wr.opcode = IB_WR_SEND; | 588 | send_wr.opcode = IB_WR_SEND; |
| 431 | send_wr.send_flags = IB_SEND_SIGNALED; | 589 | send_wr.send_flags = IB_SEND_SIGNALED; |
| 590 | if (vec->frmr) { | ||
| 591 | /* Prepare INVALIDATE WR */ | ||
| 592 | memset(&inv_wr, 0, sizeof inv_wr); | ||
| 593 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
| 594 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
| 595 | inv_wr.ex.invalidate_rkey = | ||
| 596 | vec->frmr->mr->lkey; | ||
| 597 | send_wr.next = &inv_wr; | ||
| 598 | } | ||
| 432 | 599 | ||
| 433 | ret = svc_rdma_send(rdma, &send_wr); | 600 | ret = svc_rdma_send(rdma, &send_wr); |
| 434 | if (ret) | 601 | if (ret) |
| 435 | svc_rdma_put_context(ctxt, 1); | 602 | goto err; |
| 436 | 603 | ||
| 437 | return ret; | 604 | return 0; |
| 605 | |||
| 606 | err: | ||
| 607 | svc_rdma_put_frmr(rdma, vec->frmr); | ||
| 608 | svc_rdma_put_context(ctxt, 1); | ||
| 609 | return -EIO; | ||
| 438 | } | 610 | } |
| 439 | 611 | ||
| 440 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | 612 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) |
| @@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
| 477 | ctxt = svc_rdma_get_context(rdma); | 649 | ctxt = svc_rdma_get_context(rdma); |
| 478 | ctxt->direction = DMA_TO_DEVICE; | 650 | ctxt->direction = DMA_TO_DEVICE; |
| 479 | vec = svc_rdma_get_req_map(); | 651 | vec = svc_rdma_get_req_map(); |
| 480 | xdr_to_sge(rdma, &rqstp->rq_res, vec); | 652 | ret = map_xdr(rdma, &rqstp->rq_res, vec); |
| 481 | 653 | if (ret) | |
| 654 | goto err0; | ||
| 482 | inline_bytes = rqstp->rq_res.len; | 655 | inline_bytes = rqstp->rq_res.len; |
| 483 | 656 | ||
| 484 | /* Create the RDMA response header */ | 657 | /* Create the RDMA response header */ |
| @@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
| 498 | if (ret < 0) { | 671 | if (ret < 0) { |
| 499 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | 672 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", |
| 500 | ret); | 673 | ret); |
| 501 | goto error; | 674 | goto err1; |
| 502 | } | 675 | } |
| 503 | inline_bytes -= ret; | 676 | inline_bytes -= ret; |
| 504 | 677 | ||
| @@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
| 508 | if (ret < 0) { | 681 | if (ret < 0) { |
| 509 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | 682 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", |
| 510 | ret); | 683 | ret); |
| 511 | goto error; | 684 | goto err1; |
| 512 | } | 685 | } |
| 513 | inline_bytes -= ret; | 686 | inline_bytes -= ret; |
| 514 | 687 | ||
| @@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
| 517 | svc_rdma_put_req_map(vec); | 690 | svc_rdma_put_req_map(vec); |
| 518 | dprintk("svcrdma: send_reply returns %d\n", ret); | 691 | dprintk("svcrdma: send_reply returns %d\n", ret); |
| 519 | return ret; | 692 | return ret; |
| 520 | error: | 693 | |
| 694 | err1: | ||
| 695 | put_page(res_page); | ||
| 696 | err0: | ||
| 521 | svc_rdma_put_req_map(vec); | 697 | svc_rdma_put_req_map(vec); |
| 522 | svc_rdma_put_context(ctxt, 0); | 698 | svc_rdma_put_context(ctxt, 0); |
| 523 | put_page(res_page); | ||
| 524 | return ret; | 699 | return ret; |
| 525 | } | 700 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69728c6..6fb493cbd29f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
| @@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | |||
| 100 | ctxt->xprt = xprt; | 100 | ctxt->xprt = xprt; |
| 101 | INIT_LIST_HEAD(&ctxt->dto_q); | 101 | INIT_LIST_HEAD(&ctxt->dto_q); |
| 102 | ctxt->count = 0; | 102 | ctxt->count = 0; |
| 103 | ctxt->frmr = NULL; | ||
| 103 | atomic_inc(&xprt->sc_ctxt_used); | 104 | atomic_inc(&xprt->sc_ctxt_used); |
| 104 | return ctxt; | 105 | return ctxt; |
| 105 | } | 106 | } |
| 106 | 107 | ||
| 107 | static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | 108 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) |
| 108 | { | 109 | { |
| 109 | struct svcxprt_rdma *xprt = ctxt->xprt; | 110 | struct svcxprt_rdma *xprt = ctxt->xprt; |
| 110 | int i; | 111 | int i; |
| 111 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { | 112 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { |
| 112 | atomic_dec(&xprt->sc_dma_used); | 113 | /* |
| 113 | ib_dma_unmap_single(xprt->sc_cm_id->device, | 114 | * Unmap the DMA addr in the SGE if the lkey matches |
| 114 | ctxt->sge[i].addr, | 115 | * the sc_dma_lkey, otherwise, ignore it since it is |
| 115 | ctxt->sge[i].length, | 116 | * an FRMR lkey and will be unmapped later when the |
| 116 | ctxt->direction); | 117 | * last WR that uses it completes. |
| 118 | */ | ||
| 119 | if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { | ||
| 120 | atomic_dec(&xprt->sc_dma_used); | ||
| 121 | ib_dma_unmap_single(xprt->sc_cm_id->device, | ||
| 122 | ctxt->sge[i].addr, | ||
| 123 | ctxt->sge[i].length, | ||
| 124 | ctxt->direction); | ||
| 125 | } | ||
| 117 | } | 126 | } |
| 118 | } | 127 | } |
| 119 | 128 | ||
| @@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) | |||
| 150 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | 159 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); |
| 151 | } | 160 | } |
| 152 | map->count = 0; | 161 | map->count = 0; |
| 162 | map->frmr = NULL; | ||
| 153 | return map; | 163 | return map; |
| 154 | } | 164 | } |
| 155 | 165 | ||
| @@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) | |||
| 316 | } | 326 | } |
| 317 | 327 | ||
| 318 | /* | 328 | /* |
| 329 | * Processs a completion context | ||
| 330 | */ | ||
| 331 | static void process_context(struct svcxprt_rdma *xprt, | ||
| 332 | struct svc_rdma_op_ctxt *ctxt) | ||
| 333 | { | ||
| 334 | svc_rdma_unmap_dma(ctxt); | ||
| 335 | |||
| 336 | switch (ctxt->wr_op) { | ||
| 337 | case IB_WR_SEND: | ||
| 338 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
| 339 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
| 340 | svc_rdma_put_context(ctxt, 1); | ||
| 341 | break; | ||
| 342 | |||
| 343 | case IB_WR_RDMA_WRITE: | ||
| 344 | svc_rdma_put_context(ctxt, 0); | ||
| 345 | break; | ||
| 346 | |||
| 347 | case IB_WR_RDMA_READ: | ||
| 348 | case IB_WR_RDMA_READ_WITH_INV: | ||
| 349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
| 350 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
| 351 | BUG_ON(!read_hdr); | ||
| 352 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
| 353 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
| 354 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
| 355 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
| 356 | list_add_tail(&read_hdr->dto_q, | ||
| 357 | &xprt->sc_read_complete_q); | ||
| 358 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
| 359 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
| 360 | } | ||
| 361 | svc_rdma_put_context(ctxt, 0); | ||
| 362 | break; | ||
| 363 | |||
| 364 | default: | ||
| 365 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
| 366 | "opcode=%d\n", | ||
| 367 | ctxt->wr_op); | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 319 | * Send Queue Completion Handler - potentially called on interrupt context. | 373 | * Send Queue Completion Handler - potentially called on interrupt context. |
| 320 | * | 374 | * |
| 321 | * Note that caller must hold a transport reference. | 375 | * Note that caller must hold a transport reference. |
| @@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
| 327 | struct ib_cq *cq = xprt->sc_sq_cq; | 381 | struct ib_cq *cq = xprt->sc_sq_cq; |
| 328 | int ret; | 382 | int ret; |
| 329 | 383 | ||
| 330 | |||
| 331 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) | 384 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) |
| 332 | return; | 385 | return; |
| 333 | 386 | ||
| 334 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | 387 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); |
| 335 | atomic_inc(&rdma_stat_sq_poll); | 388 | atomic_inc(&rdma_stat_sq_poll); |
| 336 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | 389 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { |
| 337 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
| 338 | xprt = ctxt->xprt; | ||
| 339 | |||
| 340 | svc_rdma_unmap_dma(ctxt); | ||
| 341 | if (wc.status != IB_WC_SUCCESS) | 390 | if (wc.status != IB_WC_SUCCESS) |
| 342 | /* Close the transport */ | 391 | /* Close the transport */ |
| 343 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | 392 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
| @@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
| 346 | atomic_dec(&xprt->sc_sq_count); | 395 | atomic_dec(&xprt->sc_sq_count); |
| 347 | wake_up(&xprt->sc_send_wait); | 396 | wake_up(&xprt->sc_send_wait); |
| 348 | 397 | ||
| 349 | switch (ctxt->wr_op) { | 398 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; |
| 350 | case IB_WR_SEND: | 399 | if (ctxt) |
| 351 | svc_rdma_put_context(ctxt, 1); | 400 | process_context(xprt, ctxt); |
| 352 | break; | ||
| 353 | |||
| 354 | case IB_WR_RDMA_WRITE: | ||
| 355 | svc_rdma_put_context(ctxt, 0); | ||
| 356 | break; | ||
| 357 | |||
| 358 | case IB_WR_RDMA_READ: | ||
| 359 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
| 360 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
| 361 | BUG_ON(!read_hdr); | ||
| 362 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
| 363 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
| 364 | list_add_tail(&read_hdr->dto_q, | ||
| 365 | &xprt->sc_read_complete_q); | ||
| 366 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
| 367 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
| 368 | } | ||
| 369 | svc_rdma_put_context(ctxt, 0); | ||
| 370 | break; | ||
| 371 | 401 | ||
| 372 | default: | ||
| 373 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
| 374 | "opcode=%d, status=%d\n", | ||
| 375 | wc.opcode, wc.status); | ||
| 376 | break; | ||
| 377 | } | ||
| 378 | svc_xprt_put(&xprt->sc_xprt); | 402 | svc_xprt_put(&xprt->sc_xprt); |
| 379 | } | 403 | } |
| 380 | 404 | ||
| @@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | |||
| 425 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | 449 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); |
| 426 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | 450 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); |
| 427 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | 451 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); |
| 452 | INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); | ||
| 428 | init_waitqueue_head(&cma_xprt->sc_send_wait); | 453 | init_waitqueue_head(&cma_xprt->sc_send_wait); |
| 429 | 454 | ||
| 430 | spin_lock_init(&cma_xprt->sc_lock); | 455 | spin_lock_init(&cma_xprt->sc_lock); |
| 431 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | 456 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); |
| 457 | spin_lock_init(&cma_xprt->sc_frmr_q_lock); | ||
| 432 | 458 | ||
| 433 | cma_xprt->sc_ord = svcrdma_ord; | 459 | cma_xprt->sc_ord = svcrdma_ord; |
| 434 | 460 | ||
| @@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
| 462 | struct ib_recv_wr recv_wr, *bad_recv_wr; | 488 | struct ib_recv_wr recv_wr, *bad_recv_wr; |
| 463 | struct svc_rdma_op_ctxt *ctxt; | 489 | struct svc_rdma_op_ctxt *ctxt; |
| 464 | struct page *page; | 490 | struct page *page; |
| 465 | unsigned long pa; | 491 | dma_addr_t pa; |
| 466 | int sge_no; | 492 | int sge_no; |
| 467 | int buflen; | 493 | int buflen; |
| 468 | int ret; | 494 | int ret; |
| @@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
| 474 | BUG_ON(sge_no >= xprt->sc_max_sge); | 500 | BUG_ON(sge_no >= xprt->sc_max_sge); |
| 475 | page = svc_rdma_get_page(); | 501 | page = svc_rdma_get_page(); |
| 476 | ctxt->pages[sge_no] = page; | 502 | ctxt->pages[sge_no] = page; |
| 477 | atomic_inc(&xprt->sc_dma_used); | ||
| 478 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | 503 | pa = ib_dma_map_page(xprt->sc_cm_id->device, |
| 479 | page, 0, PAGE_SIZE, | 504 | page, 0, PAGE_SIZE, |
| 480 | DMA_FROM_DEVICE); | 505 | DMA_FROM_DEVICE); |
| 506 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) | ||
| 507 | goto err_put_ctxt; | ||
| 508 | atomic_inc(&xprt->sc_dma_used); | ||
| 481 | ctxt->sge[sge_no].addr = pa; | 509 | ctxt->sge[sge_no].addr = pa; |
| 482 | ctxt->sge[sge_no].length = PAGE_SIZE; | 510 | ctxt->sge[sge_no].length = PAGE_SIZE; |
| 483 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 511 | ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; |
| 484 | buflen += PAGE_SIZE; | 512 | buflen += PAGE_SIZE; |
| 485 | } | 513 | } |
| 486 | ctxt->count = sge_no; | 514 | ctxt->count = sge_no; |
| @@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
| 496 | svc_rdma_put_context(ctxt, 1); | 524 | svc_rdma_put_context(ctxt, 1); |
| 497 | } | 525 | } |
| 498 | return ret; | 526 | return ret; |
| 527 | |||
| 528 | err_put_ctxt: | ||
| 529 | svc_rdma_put_context(ctxt, 1); | ||
| 530 | return -ENOMEM; | ||
| 499 | } | 531 | } |
| 500 | 532 | ||
| 501 | /* | 533 | /* |
| @@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, | |||
| 566 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | 598 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " |
| 567 | "event=%d\n", cma_id, cma_id->context, event->event); | 599 | "event=%d\n", cma_id, cma_id->context, event->event); |
| 568 | handle_connect_req(cma_id, | 600 | handle_connect_req(cma_id, |
| 569 | event->param.conn.responder_resources); | 601 | event->param.conn.initiator_depth); |
| 570 | break; | 602 | break; |
| 571 | 603 | ||
| 572 | case RDMA_CM_EVENT_ESTABLISHED: | 604 | case RDMA_CM_EVENT_ESTABLISHED: |
| @@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
| 686 | return ERR_PTR(ret); | 718 | return ERR_PTR(ret); |
| 687 | } | 719 | } |
| 688 | 720 | ||
| 721 | static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) | ||
| 722 | { | ||
| 723 | struct ib_mr *mr; | ||
| 724 | struct ib_fast_reg_page_list *pl; | ||
| 725 | struct svc_rdma_fastreg_mr *frmr; | ||
| 726 | |||
| 727 | frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); | ||
| 728 | if (!frmr) | ||
| 729 | goto err; | ||
| 730 | |||
| 731 | mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); | ||
| 732 | if (!mr) | ||
| 733 | goto err_free_frmr; | ||
| 734 | |||
| 735 | pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, | ||
| 736 | RPCSVC_MAXPAGES); | ||
| 737 | if (!pl) | ||
| 738 | goto err_free_mr; | ||
| 739 | |||
| 740 | frmr->mr = mr; | ||
| 741 | frmr->page_list = pl; | ||
| 742 | INIT_LIST_HEAD(&frmr->frmr_list); | ||
| 743 | return frmr; | ||
| 744 | |||
| 745 | err_free_mr: | ||
| 746 | ib_dereg_mr(mr); | ||
| 747 | err_free_frmr: | ||
| 748 | kfree(frmr); | ||
| 749 | err: | ||
| 750 | return ERR_PTR(-ENOMEM); | ||
| 751 | } | ||
| 752 | |||
| 753 | static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) | ||
| 754 | { | ||
| 755 | struct svc_rdma_fastreg_mr *frmr; | ||
| 756 | |||
| 757 | while (!list_empty(&xprt->sc_frmr_q)) { | ||
| 758 | frmr = list_entry(xprt->sc_frmr_q.next, | ||
| 759 | struct svc_rdma_fastreg_mr, frmr_list); | ||
| 760 | list_del_init(&frmr->frmr_list); | ||
| 761 | ib_dereg_mr(frmr->mr); | ||
| 762 | ib_free_fast_reg_page_list(frmr->page_list); | ||
| 763 | kfree(frmr); | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) | ||
| 768 | { | ||
| 769 | struct svc_rdma_fastreg_mr *frmr = NULL; | ||
| 770 | |||
| 771 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
| 772 | if (!list_empty(&rdma->sc_frmr_q)) { | ||
| 773 | frmr = list_entry(rdma->sc_frmr_q.next, | ||
| 774 | struct svc_rdma_fastreg_mr, frmr_list); | ||
| 775 | list_del_init(&frmr->frmr_list); | ||
| 776 | frmr->map_len = 0; | ||
| 777 | frmr->page_list_len = 0; | ||
| 778 | } | ||
| 779 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
| 780 | if (frmr) | ||
| 781 | return frmr; | ||
| 782 | |||
| 783 | return rdma_alloc_frmr(rdma); | ||
| 784 | } | ||
| 785 | |||
| 786 | static void frmr_unmap_dma(struct svcxprt_rdma *xprt, | ||
| 787 | struct svc_rdma_fastreg_mr *frmr) | ||
| 788 | { | ||
| 789 | int page_no; | ||
| 790 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
| 791 | dma_addr_t addr = frmr->page_list->page_list[page_no]; | ||
| 792 | if (ib_dma_mapping_error(frmr->mr->device, addr)) | ||
| 793 | continue; | ||
| 794 | atomic_dec(&xprt->sc_dma_used); | ||
| 795 | ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, | ||
| 796 | frmr->direction); | ||
| 797 | } | ||
| 798 | } | ||
| 799 | |||
| 800 | void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, | ||
| 801 | struct svc_rdma_fastreg_mr *frmr) | ||
| 802 | { | ||
| 803 | if (frmr) { | ||
| 804 | frmr_unmap_dma(rdma, frmr); | ||
| 805 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
| 806 | BUG_ON(!list_empty(&frmr->frmr_list)); | ||
| 807 | list_add(&frmr->frmr_list, &rdma->sc_frmr_q); | ||
| 808 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
| 809 | } | ||
| 810 | } | ||
| 811 | |||
| 689 | /* | 812 | /* |
| 690 | * This is the xpo_recvfrom function for listening endpoints. Its | 813 | * This is the xpo_recvfrom function for listening endpoints. Its |
| 691 | * purpose is to accept incoming connections. The CMA callback handler | 814 | * purpose is to accept incoming connections. The CMA callback handler |
| @@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
| 704 | struct rdma_conn_param conn_param; | 827 | struct rdma_conn_param conn_param; |
| 705 | struct ib_qp_init_attr qp_attr; | 828 | struct ib_qp_init_attr qp_attr; |
| 706 | struct ib_device_attr devattr; | 829 | struct ib_device_attr devattr; |
| 830 | int dma_mr_acc; | ||
| 831 | int need_dma_mr; | ||
| 707 | int ret; | 832 | int ret; |
| 708 | int i; | 833 | int i; |
| 709 | 834 | ||
| @@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
| 819 | } | 944 | } |
| 820 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | 945 | newxprt->sc_qp = newxprt->sc_cm_id->qp; |
| 821 | 946 | ||
| 822 | /* Register all of physical memory */ | 947 | /* |
| 823 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | 948 | * Use the most secure set of MR resources based on the |
| 824 | IB_ACCESS_LOCAL_WRITE | | 949 | * transport type and available memory management features in |
| 825 | IB_ACCESS_REMOTE_WRITE); | 950 | * the device. Here's the table implemented below: |
| 826 | if (IS_ERR(newxprt->sc_phys_mr)) { | 951 | * |
| 827 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | 952 | * Fast Global DMA Remote WR |
| 953 | * Reg LKEY MR Access | ||
| 954 | * Sup'd Sup'd Needed Needed | ||
| 955 | * | ||
| 956 | * IWARP N N Y Y | ||
| 957 | * N Y Y Y | ||
| 958 | * Y N Y N | ||
| 959 | * Y Y N - | ||
| 960 | * | ||
| 961 | * IB N N Y N | ||
| 962 | * N Y N - | ||
| 963 | * Y N Y N | ||
| 964 | * Y Y N - | ||
| 965 | * | ||
| 966 | * NB: iWARP requires remote write access for the data sink | ||
| 967 | * of an RDMA_READ. IB does not. | ||
| 968 | */ | ||
| 969 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { | ||
| 970 | newxprt->sc_frmr_pg_list_len = | ||
| 971 | devattr.max_fast_reg_page_list_len; | ||
| 972 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | ||
| 973 | } | ||
| 974 | |||
| 975 | /* | ||
| 976 | * Determine if a DMA MR is required and if so, what privs are required | ||
| 977 | */ | ||
| 978 | switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { | ||
| 979 | case RDMA_TRANSPORT_IWARP: | ||
| 980 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; | ||
| 981 | if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { | ||
| 982 | need_dma_mr = 1; | ||
| 983 | dma_mr_acc = | ||
| 984 | (IB_ACCESS_LOCAL_WRITE | | ||
| 985 | IB_ACCESS_REMOTE_WRITE); | ||
| 986 | } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
| 987 | need_dma_mr = 1; | ||
| 988 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
| 989 | } else | ||
| 990 | need_dma_mr = 0; | ||
| 991 | break; | ||
| 992 | case RDMA_TRANSPORT_IB: | ||
| 993 | if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
| 994 | need_dma_mr = 1; | ||
| 995 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
| 996 | } else | ||
| 997 | need_dma_mr = 0; | ||
| 998 | break; | ||
| 999 | default: | ||
| 828 | goto errout; | 1000 | goto errout; |
| 829 | } | 1001 | } |
| 830 | 1002 | ||
| 1003 | /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ | ||
| 1004 | if (need_dma_mr) { | ||
| 1005 | /* Register all of physical memory */ | ||
| 1006 | newxprt->sc_phys_mr = | ||
| 1007 | ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); | ||
| 1008 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
| 1009 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", | ||
| 1010 | ret); | ||
| 1011 | goto errout; | ||
| 1012 | } | ||
| 1013 | newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; | ||
| 1014 | } else | ||
| 1015 | newxprt->sc_dma_lkey = | ||
| 1016 | newxprt->sc_cm_id->device->local_dma_lkey; | ||
| 1017 | |||
| 831 | /* Post receive buffers */ | 1018 | /* Post receive buffers */ |
| 832 | for (i = 0; i < newxprt->sc_max_requests; i++) { | 1019 | for (i = 0; i < newxprt->sc_max_requests; i++) { |
| 833 | ret = svc_rdma_post_recv(newxprt); | 1020 | ret = svc_rdma_post_recv(newxprt); |
| @@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work) | |||
| 961 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); | 1148 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); |
| 962 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); | 1149 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); |
| 963 | 1150 | ||
| 1151 | /* De-allocate fastreg mr */ | ||
| 1152 | rdma_dealloc_frmr_q(rdma); | ||
| 1153 | |||
| 964 | /* Destroy the QP if present (not a listener) */ | 1154 | /* Destroy the QP if present (not a listener) */ |
| 965 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) | 1155 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) |
| 966 | ib_destroy_qp(rdma->sc_qp); | 1156 | ib_destroy_qp(rdma->sc_qp); |
| @@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) | |||
| 1014 | return 1; | 1204 | return 1; |
| 1015 | } | 1205 | } |
| 1016 | 1206 | ||
| 1207 | /* | ||
| 1208 | * Attempt to register the kvec representing the RPC memory with the | ||
| 1209 | * device. | ||
| 1210 | * | ||
| 1211 | * Returns: | ||
| 1212 | * NULL : The device does not support fastreg or there were no more | ||
| 1213 | * fastreg mr. | ||
| 1214 | * frmr : The kvec register request was successfully posted. | ||
| 1215 | * <0 : An error was encountered attempting to register the kvec. | ||
| 1216 | */ | ||
| 1217 | int svc_rdma_fastreg(struct svcxprt_rdma *xprt, | ||
| 1218 | struct svc_rdma_fastreg_mr *frmr) | ||
| 1219 | { | ||
| 1220 | struct ib_send_wr fastreg_wr; | ||
| 1221 | u8 key; | ||
| 1222 | |||
| 1223 | /* Bump the key */ | ||
| 1224 | key = (u8)(frmr->mr->lkey & 0x000000FF); | ||
| 1225 | ib_update_fast_reg_key(frmr->mr, ++key); | ||
| 1226 | |||
| 1227 | /* Prepare FASTREG WR */ | ||
| 1228 | memset(&fastreg_wr, 0, sizeof fastreg_wr); | ||
| 1229 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
| 1230 | fastreg_wr.send_flags = IB_SEND_SIGNALED; | ||
| 1231 | fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; | ||
| 1232 | fastreg_wr.wr.fast_reg.page_list = frmr->page_list; | ||
| 1233 | fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; | ||
| 1234 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
| 1235 | fastreg_wr.wr.fast_reg.length = frmr->map_len; | ||
| 1236 | fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; | ||
| 1237 | fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; | ||
| 1238 | return svc_rdma_send(xprt, &fastreg_wr); | ||
| 1239 | } | ||
| 1240 | |||
| 1017 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | 1241 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) |
| 1018 | { | 1242 | { |
| 1019 | struct ib_send_wr *bad_wr; | 1243 | struct ib_send_wr *bad_wr, *n_wr; |
| 1244 | int wr_count; | ||
| 1245 | int i; | ||
| 1020 | int ret; | 1246 | int ret; |
| 1021 | 1247 | ||
| 1022 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | 1248 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) |
| 1023 | return -ENOTCONN; | 1249 | return -ENOTCONN; |
| 1024 | 1250 | ||
| 1025 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | 1251 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); |
| 1026 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | 1252 | wr_count = 1; |
| 1027 | wr->opcode); | 1253 | for (n_wr = wr->next; n_wr; n_wr = n_wr->next) |
| 1254 | wr_count++; | ||
| 1255 | |||
| 1028 | /* If the SQ is full, wait until an SQ entry is available */ | 1256 | /* If the SQ is full, wait until an SQ entry is available */ |
| 1029 | while (1) { | 1257 | while (1) { |
| 1030 | spin_lock_bh(&xprt->sc_lock); | 1258 | spin_lock_bh(&xprt->sc_lock); |
| 1031 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | 1259 | if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { |
| 1032 | spin_unlock_bh(&xprt->sc_lock); | 1260 | spin_unlock_bh(&xprt->sc_lock); |
| 1033 | atomic_inc(&rdma_stat_sq_starve); | 1261 | atomic_inc(&rdma_stat_sq_starve); |
| 1034 | 1262 | ||
| @@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | |||
| 1043 | return 0; | 1271 | return 0; |
| 1044 | continue; | 1272 | continue; |
| 1045 | } | 1273 | } |
| 1046 | /* Bumped used SQ WR count and post */ | 1274 | /* Take a transport ref for each WR posted */ |
| 1047 | svc_xprt_get(&xprt->sc_xprt); | 1275 | for (i = 0; i < wr_count; i++) |
| 1276 | svc_xprt_get(&xprt->sc_xprt); | ||
| 1277 | |||
| 1278 | /* Bump used SQ WR count and post */ | ||
| 1279 | atomic_add(wr_count, &xprt->sc_sq_count); | ||
| 1048 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | 1280 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); |
| 1049 | if (!ret) | 1281 | if (ret) { |
| 1050 | atomic_inc(&xprt->sc_sq_count); | 1282 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
| 1051 | else { | 1283 | atomic_sub(wr_count, &xprt->sc_sq_count); |
| 1052 | svc_xprt_put(&xprt->sc_xprt); | 1284 | for (i = 0; i < wr_count; i ++) |
| 1285 | svc_xprt_put(&xprt->sc_xprt); | ||
| 1053 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | 1286 | dprintk("svcrdma: failed to post SQ WR rc=%d, " |
| 1054 | "sc_sq_count=%d, sc_sq_depth=%d\n", | 1287 | "sc_sq_count=%d, sc_sq_depth=%d\n", |
| 1055 | ret, atomic_read(&xprt->sc_sq_count), | 1288 | ret, atomic_read(&xprt->sc_sq_count), |
| 1056 | xprt->sc_sq_depth); | 1289 | xprt->sc_sq_depth); |
| 1057 | } | 1290 | } |
| 1058 | spin_unlock_bh(&xprt->sc_lock); | 1291 | spin_unlock_bh(&xprt->sc_lock); |
| 1292 | if (ret) | ||
| 1293 | wake_up(&xprt->sc_send_wait); | ||
| 1059 | break; | 1294 | break; |
| 1060 | } | 1295 | } |
| 1061 | return ret; | 1296 | return ret; |
| @@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
| 1079 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | 1314 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); |
| 1080 | 1315 | ||
| 1081 | /* Prepare SGE for local address */ | 1316 | /* Prepare SGE for local address */ |
| 1082 | atomic_inc(&xprt->sc_dma_used); | ||
| 1083 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | 1317 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, |
| 1084 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | 1318 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); |
| 1085 | sge.lkey = xprt->sc_phys_mr->lkey; | 1319 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { |
| 1320 | put_page(p); | ||
| 1321 | return; | ||
| 1322 | } | ||
| 1323 | atomic_inc(&xprt->sc_dma_used); | ||
| 1324 | sge.lkey = xprt->sc_dma_lkey; | ||
| 1086 | sge.length = length; | 1325 | sge.length = length; |
| 1087 | 1326 | ||
| 1088 | ctxt = svc_rdma_get_context(xprt); | 1327 | ctxt = svc_rdma_get_context(xprt); |
| @@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
| 1103 | if (ret) { | 1342 | if (ret) { |
| 1104 | dprintk("svcrdma: Error %d posting send for protocol error\n", | 1343 | dprintk("svcrdma: Error %d posting send for protocol error\n", |
| 1105 | ret); | 1344 | ret); |
| 1345 | ib_dma_unmap_page(xprt->sc_cm_id->device, | ||
| 1346 | sge.addr, PAGE_SIZE, | ||
| 1347 | DMA_FROM_DEVICE); | ||
| 1106 | svc_rdma_put_context(ctxt, 1); | 1348 | svc_rdma_put_context(ctxt, 1); |
| 1107 | } | 1349 | } |
| 1108 | } | 1350 | } |
