diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-01-12 17:13:15 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:11:38 -0400 |
commit | ff87e97a9d70c9ae133d3d3d7792b26ab85f4297 (patch) | |
tree | 32de73cdf5e4353e89b3351eaae695f69faa868b /net | |
parent | 21f79afa5fda2820671a8f64c3d0e43bb118053b (diff) |
RDS: make m_rdma_op a member of rds_message
This eliminates a separate memory alloc, although
it is now necessary to add an "r_active" flag, since
it is no longer to use the m_rdma_op pointer as an
indicator of if an rdma op is present.
rdma SGs allocated from rm sg pool.
rds_rm_size also gets bigger. It's a little inefficient to
run through CMSGs twice, but it makes later steps a lot smoother.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/rds/ib_send.c | 20 | ||||
-rw-r--r-- | net/rds/iw_send.c | 16 | ||||
-rw-r--r-- | net/rds/message.c | 9 | ||||
-rw-r--r-- | net/rds/rdma.c | 113 | ||||
-rw-r--r-- | net/rds/rds.h | 2 | ||||
-rw-r--r-- | net/rds/send.c | 59 |
6 files changed, 129 insertions, 90 deletions
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 575fce463c65..f0edfdb2866c 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c | |||
@@ -85,8 +85,8 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
85 | rm->data.m_sg, rm->data.m_nents, | 85 | rm->data.m_sg, rm->data.m_nents, |
86 | DMA_TO_DEVICE); | 86 | DMA_TO_DEVICE); |
87 | 87 | ||
88 | if (rm->rdma.m_rdma_op) { | 88 | if (rm->rdma.m_rdma_op.r_active) { |
89 | rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); | 89 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); |
90 | 90 | ||
91 | /* If the user asked for a completion notification on this | 91 | /* If the user asked for a completion notification on this |
92 | * message, we can implement three different semantics: | 92 | * message, we can implement three different semantics: |
@@ -110,10 +110,10 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
110 | */ | 110 | */ |
111 | rds_ib_send_rdma_complete(rm, wc_status); | 111 | rds_ib_send_rdma_complete(rm, wc_status); |
112 | 112 | ||
113 | if (rm->rdma.m_rdma_op->r_write) | 113 | if (rm->rdma.m_rdma_op.r_write) |
114 | rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); | 114 | rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
115 | else | 115 | else |
116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); | 116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
117 | } | 117 | } |
118 | 118 | ||
119 | /* If anyone waited for this message to get flushed out, wake | 119 | /* If anyone waited for this message to get flushed out, wake |
@@ -243,8 +243,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
243 | 243 | ||
244 | rm = rds_send_get_message(conn, send->s_op); | 244 | rm = rds_send_get_message(conn, send->s_op); |
245 | if (rm) { | 245 | if (rm) { |
246 | if (rm->rdma.m_rdma_op) | 246 | if (rm->rdma.m_rdma_op.r_active) |
247 | rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); | 247 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); |
248 | rds_ib_send_rdma_complete(rm, wc.status); | 248 | rds_ib_send_rdma_complete(rm, wc.status); |
249 | rds_message_put(rm); | 249 | rds_message_put(rm); |
250 | } | 250 | } |
@@ -560,10 +560,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
560 | 560 | ||
561 | /* If it has a RDMA op, tell the peer we did it. This is | 561 | /* If it has a RDMA op, tell the peer we did it. This is |
562 | * used by the peer to release use-once RDMA MRs. */ | 562 | * used by the peer to release use-once RDMA MRs. */ |
563 | if (rm->rdma.m_rdma_op) { | 563 | if (rm->rdma.m_rdma_op.r_active) { |
564 | struct rds_ext_header_rdma ext_hdr; | 564 | struct rds_ext_header_rdma ext_hdr; |
565 | 565 | ||
566 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); | 566 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op.r_key); |
567 | rds_message_add_extension(&rm->m_inc.i_hdr, | 567 | rds_message_add_extension(&rm->m_inc.i_hdr, |
568 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); | 568 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); |
569 | } | 569 | } |
@@ -601,7 +601,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
601 | * or when requested by the user. Right now, we let | 601 | * or when requested by the user. Right now, we let |
602 | * the application choose. | 602 | * the application choose. |
603 | */ | 603 | */ |
604 | if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) | 604 | if (rm->rdma.m_rdma_op.r_active && rm->rdma.m_rdma_op.r_fence) |
605 | send_flags = IB_SEND_FENCE; | 605 | send_flags = IB_SEND_FENCE; |
606 | 606 | ||
607 | /* | 607 | /* |
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 62234b804d93..9b79a1b10445 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c | |||
@@ -85,8 +85,8 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, | |||
85 | rm->data.m_sg, rm->data.m_nents, | 85 | rm->data.m_sg, rm->data.m_nents, |
86 | DMA_TO_DEVICE); | 86 | DMA_TO_DEVICE); |
87 | 87 | ||
88 | if (rm->rdma.m_rdma_op) { | 88 | if (rm->rdma.m_rdma_op.r_active) { |
89 | rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op); | 89 | rds_iw_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); |
90 | 90 | ||
91 | /* If the user asked for a completion notification on this | 91 | /* If the user asked for a completion notification on this |
92 | * message, we can implement three different semantics: | 92 | * message, we can implement three different semantics: |
@@ -110,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, | |||
110 | */ | 110 | */ |
111 | rds_iw_send_rdma_complete(rm, wc_status); | 111 | rds_iw_send_rdma_complete(rm, wc_status); |
112 | 112 | ||
113 | if (rm->rdma.m_rdma_op->r_write) | 113 | if (rm->rdma.m_rdma_op.r_write) |
114 | rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); | 114 | rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
115 | else | 115 | else |
116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); | 116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
117 | } | 117 | } |
118 | 118 | ||
119 | /* If anyone waited for this message to get flushed out, wake | 119 | /* If anyone waited for this message to get flushed out, wake |
@@ -591,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
591 | 591 | ||
592 | /* If it has a RDMA op, tell the peer we did it. This is | 592 | /* If it has a RDMA op, tell the peer we did it. This is |
593 | * used by the peer to release use-once RDMA MRs. */ | 593 | * used by the peer to release use-once RDMA MRs. */ |
594 | if (rm->rdma.m_rdma_op) { | 594 | if (rm->rdma.m_rdma_op.r_active) { |
595 | struct rds_ext_header_rdma ext_hdr; | 595 | struct rds_ext_header_rdma ext_hdr; |
596 | 596 | ||
597 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); | 597 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op.r_key); |
598 | rds_message_add_extension(&rm->m_inc.i_hdr, | 598 | rds_message_add_extension(&rm->m_inc.i_hdr, |
599 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); | 599 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); |
600 | } | 600 | } |
@@ -632,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
632 | * or when requested by the user. Right now, we let | 632 | * or when requested by the user. Right now, we let |
633 | * the application choose. | 633 | * the application choose. |
634 | */ | 634 | */ |
635 | if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) | 635 | if (rm->rdma.m_rdma_op.r_active && rm->rdma.m_rdma_op.r_fence) |
636 | send_flags = IB_SEND_FENCE; | 636 | send_flags = IB_SEND_FENCE; |
637 | 637 | ||
638 | /* | 638 | /* |
diff --git a/net/rds/message.c b/net/rds/message.c index fb382fbb5b6f..4352ce79b376 100644 --- a/net/rds/message.c +++ b/net/rds/message.c | |||
@@ -69,8 +69,8 @@ static void rds_message_purge(struct rds_message *rm) | |||
69 | } | 69 | } |
70 | rm->data.m_nents = 0; | 70 | rm->data.m_nents = 0; |
71 | 71 | ||
72 | if (rm->rdma.m_rdma_op) | 72 | if (rm->rdma.m_rdma_op.r_active) |
73 | rds_rdma_free_op(rm->rdma.m_rdma_op); | 73 | rds_rdma_free_op(&rm->rdma.m_rdma_op); |
74 | if (rm->rdma.m_rdma_mr) | 74 | if (rm->rdma.m_rdma_mr) |
75 | rds_mr_put(rm->rdma.m_rdma_mr); | 75 | rds_mr_put(rm->rdma.m_rdma_mr); |
76 | } | 76 | } |
@@ -259,14 +259,17 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in | |||
259 | { | 259 | { |
260 | struct rds_message *rm; | 260 | struct rds_message *rm; |
261 | unsigned int i; | 261 | unsigned int i; |
262 | int num_sgs = ceil(total_len, PAGE_SIZE); | ||
263 | int extra_bytes = num_sgs * sizeof(struct scatterlist); | ||
262 | 264 | ||
263 | rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); | 265 | rm = rds_message_alloc(extra_bytes, GFP_KERNEL); |
264 | if (!rm) | 266 | if (!rm) |
265 | return ERR_PTR(-ENOMEM); | 267 | return ERR_PTR(-ENOMEM); |
266 | 268 | ||
267 | set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); | 269 | set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); |
268 | rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); | 270 | rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); |
269 | rm->data.m_nents = ceil(total_len, PAGE_SIZE); | 271 | rm->data.m_nents = ceil(total_len, PAGE_SIZE); |
272 | rm->data.m_sg = rds_message_alloc_sgs(rm, num_sgs); | ||
270 | 273 | ||
271 | for (i = 0; i < rm->data.m_nents; ++i) { | 274 | for (i = 0; i < rm->data.m_nents; ++i) { |
272 | sg_set_page(&rm->data.m_sg[i], | 275 | sg_set_page(&rm->data.m_sg[i], |
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a21edad33950..7ff3379bab14 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -458,26 +458,60 @@ void rds_rdma_free_op(struct rds_rdma_op *ro) | |||
458 | } | 458 | } |
459 | 459 | ||
460 | kfree(ro->r_notifier); | 460 | kfree(ro->r_notifier); |
461 | kfree(ro); | 461 | ro->r_notifier = NULL; |
462 | ro->r_active = 0; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Count the number of pages needed to describe an incoming iovec. | ||
467 | */ | ||
468 | static int rds_rdma_pages(struct rds_rdma_args *args) | ||
469 | { | ||
470 | struct rds_iovec vec; | ||
471 | struct rds_iovec __user *local_vec; | ||
472 | unsigned int tot_pages = 0; | ||
473 | unsigned int nr_pages; | ||
474 | unsigned int i; | ||
475 | |||
476 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
477 | |||
478 | /* figure out the number of pages in the vector */ | ||
479 | for (i = 0; i < args->nr_local; i++) { | ||
480 | if (copy_from_user(&vec, &local_vec[i], | ||
481 | sizeof(struct rds_iovec))) | ||
482 | return -EFAULT; | ||
483 | |||
484 | nr_pages = rds_pages_in_vec(&vec); | ||
485 | if (nr_pages == 0) | ||
486 | return -EINVAL; | ||
487 | |||
488 | tot_pages += nr_pages; | ||
489 | } | ||
490 | |||
491 | return tot_pages; | ||
492 | } | ||
493 | |||
494 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
495 | { | ||
496 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
462 | } | 497 | } |
463 | 498 | ||
464 | /* | 499 | /* |
465 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. | 500 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. |
466 | */ | 501 | */ |
467 | static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | 502 | static int rds_rdma_prepare(struct rds_message *rm, |
468 | struct rds_rdma_args *args) | 503 | struct rds_sock *rs, |
504 | struct rds_rdma_args *args) | ||
469 | { | 505 | { |
470 | struct rds_iovec vec; | 506 | struct rds_iovec vec; |
471 | struct rds_rdma_op *op = NULL; | 507 | struct rds_rdma_op *op = &rm->rdma.m_rdma_op; |
472 | unsigned int nr_pages; | 508 | unsigned int nr_pages; |
473 | unsigned int max_pages; | ||
474 | unsigned int nr_bytes; | 509 | unsigned int nr_bytes; |
475 | struct page **pages = NULL; | 510 | struct page **pages = NULL; |
476 | struct rds_iovec __user *local_vec; | 511 | struct rds_iovec __user *local_vec; |
477 | struct scatterlist *sg; | ||
478 | unsigned int nr; | 512 | unsigned int nr; |
479 | unsigned int i, j; | 513 | unsigned int i, j; |
480 | int ret; | 514 | int ret = 0; |
481 | 515 | ||
482 | 516 | ||
483 | if (rs->rs_bound_addr == 0) { | 517 | if (rs->rs_bound_addr == 0) { |
@@ -490,44 +524,21 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
490 | goto out; | 524 | goto out; |
491 | } | 525 | } |
492 | 526 | ||
493 | nr_pages = 0; | 527 | nr_pages = rds_rdma_pages(args); |
494 | max_pages = 0; | 528 | if (nr_pages < 0) |
495 | |||
496 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
497 | |||
498 | /* figure out the number of pages in the vector */ | ||
499 | for (i = 0; i < args->nr_local; i++) { | ||
500 | if (copy_from_user(&vec, &local_vec[i], | ||
501 | sizeof(struct rds_iovec))) { | ||
502 | ret = -EFAULT; | ||
503 | goto out; | ||
504 | } | ||
505 | |||
506 | nr = rds_pages_in_vec(&vec); | ||
507 | if (nr == 0) { | ||
508 | ret = -EINVAL; | ||
509 | goto out; | ||
510 | } | ||
511 | |||
512 | max_pages = max(nr, max_pages); | ||
513 | nr_pages += nr; | ||
514 | } | ||
515 | |||
516 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); | ||
517 | if (!pages) { | ||
518 | ret = -ENOMEM; | ||
519 | goto out; | 529 | goto out; |
520 | } | ||
521 | 530 | ||
522 | op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); | 531 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
523 | if (!op) { | 532 | if (!pages) { |
524 | ret = -ENOMEM; | 533 | ret = -ENOMEM; |
525 | goto out; | 534 | goto out; |
526 | } | 535 | } |
527 | 536 | ||
537 | op->r_sg = rds_message_alloc_sgs(rm, nr_pages); | ||
528 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); | 538 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); |
529 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); | 539 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); |
530 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | 540 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); |
541 | op->r_active = 1; | ||
531 | op->r_recverr = rs->rs_recverr; | 542 | op->r_recverr = rs->rs_recverr; |
532 | WARN_ON(!nr_pages); | 543 | WARN_ON(!nr_pages); |
533 | sg_init_table(op->r_sg, nr_pages); | 544 | sg_init_table(op->r_sg, nr_pages); |
@@ -564,6 +575,8 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
564 | (unsigned long long)args->remote_vec.addr, | 575 | (unsigned long long)args->remote_vec.addr, |
565 | op->r_key); | 576 | op->r_key); |
566 | 577 | ||
578 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
579 | |||
567 | for (i = 0; i < args->nr_local; i++) { | 580 | for (i = 0; i < args->nr_local; i++) { |
568 | if (copy_from_user(&vec, &local_vec[i], | 581 | if (copy_from_user(&vec, &local_vec[i], |
569 | sizeof(struct rds_iovec))) { | 582 | sizeof(struct rds_iovec))) { |
@@ -580,11 +593,6 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
580 | rs->rs_user_addr = vec.addr; | 593 | rs->rs_user_addr = vec.addr; |
581 | rs->rs_user_bytes = vec.bytes; | 594 | rs->rs_user_bytes = vec.bytes; |
582 | 595 | ||
583 | /* did the user change the vec under us? */ | ||
584 | if (nr > max_pages || op->r_nents + nr > nr_pages) { | ||
585 | ret = -EINVAL; | ||
586 | goto out; | ||
587 | } | ||
588 | /* If it's a WRITE operation, we want to pin the pages for reading. | 596 | /* If it's a WRITE operation, we want to pin the pages for reading. |
589 | * If it's a READ operation, we need to pin the pages for writing. | 597 | * If it's a READ operation, we need to pin the pages for writing. |
590 | */ | 598 | */ |
@@ -599,6 +607,7 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
599 | 607 | ||
600 | for (j = 0; j < nr; j++) { | 608 | for (j = 0; j < nr; j++) { |
601 | unsigned int offset = vec.addr & ~PAGE_MASK; | 609 | unsigned int offset = vec.addr & ~PAGE_MASK; |
610 | struct scatterlist *sg; | ||
602 | 611 | ||
603 | sg = &op->r_sg[op->r_nents + j]; | 612 | sg = &op->r_sg[op->r_nents + j]; |
604 | sg_set_page(sg, pages[j], | 613 | sg_set_page(sg, pages[j], |
@@ -628,12 +637,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
628 | ret = 0; | 637 | ret = 0; |
629 | out: | 638 | out: |
630 | kfree(pages); | 639 | kfree(pages); |
631 | if (ret) { | 640 | if (ret) |
632 | if (op) | 641 | rds_rdma_free_op(op); |
633 | rds_rdma_free_op(op); | 642 | |
634 | op = ERR_PTR(ret); | 643 | return ret; |
635 | } | ||
636 | return op; | ||
637 | } | 644 | } |
638 | 645 | ||
639 | /* | 646 | /* |
@@ -643,17 +650,17 @@ out: | |||
643 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | 650 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, |
644 | struct cmsghdr *cmsg) | 651 | struct cmsghdr *cmsg) |
645 | { | 652 | { |
646 | struct rds_rdma_op *op; | 653 | int ret; |
647 | 654 | ||
648 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || | 655 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || |
649 | rm->rdma.m_rdma_op) | 656 | rm->rdma.m_rdma_op.r_active) |
650 | return -EINVAL; | 657 | return -EINVAL; |
651 | 658 | ||
652 | op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); | 659 | ret = rds_rdma_prepare(rm, rs, CMSG_DATA(cmsg)); |
653 | if (IS_ERR(op)) | 660 | if (ret) |
654 | return PTR_ERR(op); | 661 | return ret; |
662 | |||
655 | rds_stats_inc(s_send_rdma); | 663 | rds_stats_inc(s_send_rdma); |
656 | rm->rdma.m_rdma_op = op; | ||
657 | return 0; | 664 | return 0; |
658 | } | 665 | } |
659 | 666 | ||
diff --git a/net/rds/rds.h b/net/rds/rds.h index 7c4adbe8c284..0bb4957e0cfc 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -316,7 +316,7 @@ struct rds_message { | |||
316 | rds_rdma_cookie_t m_rdma_cookie; | 316 | rds_rdma_cookie_t m_rdma_cookie; |
317 | struct { | 317 | struct { |
318 | struct { | 318 | struct { |
319 | struct rds_rdma_op *m_rdma_op; | 319 | struct rds_rdma_op m_rdma_op; |
320 | struct rds_mr *m_rdma_mr; | 320 | struct rds_mr *m_rdma_mr; |
321 | } rdma; | 321 | } rdma; |
322 | struct { | 322 | struct { |
diff --git a/net/rds/send.c b/net/rds/send.c index 89e26ffdc812..72dbe7fc4f54 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -235,7 +235,7 @@ int rds_send_xmit(struct rds_connection *conn) | |||
235 | * connection. | 235 | * connection. |
236 | * Therefore, we never retransmit messages with RDMA ops. | 236 | * Therefore, we never retransmit messages with RDMA ops. |
237 | */ | 237 | */ |
238 | if (rm->rdma.m_rdma_op && | 238 | if (rm->rdma.m_rdma_op.r_active && |
239 | test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { | 239 | test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { |
240 | spin_lock_irqsave(&conn->c_lock, flags); | 240 | spin_lock_irqsave(&conn->c_lock, flags); |
241 | if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) | 241 | if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) |
@@ -267,8 +267,8 @@ int rds_send_xmit(struct rds_connection *conn) | |||
267 | * keep this simple and require that the transport either | 267 | * keep this simple and require that the transport either |
268 | * send the whole rdma or none of it. | 268 | * send the whole rdma or none of it. |
269 | */ | 269 | */ |
270 | if (rm->rdma.m_rdma_op && !conn->c_xmit_rdma_sent) { | 270 | if (rm->rdma.m_rdma_op.r_active && !conn->c_xmit_rdma_sent) { |
271 | ret = conn->c_trans->xmit_rdma(conn, rm->rdma.m_rdma_op); | 271 | ret = conn->c_trans->xmit_rdma(conn, &rm->rdma.m_rdma_op); |
272 | if (ret) | 272 | if (ret) |
273 | break; | 273 | break; |
274 | conn->c_xmit_rdma_sent = 1; | 274 | conn->c_xmit_rdma_sent = 1; |
@@ -418,9 +418,9 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) | |||
418 | 418 | ||
419 | spin_lock_irqsave(&rm->m_rs_lock, flags); | 419 | spin_lock_irqsave(&rm->m_rs_lock, flags); |
420 | 420 | ||
421 | ro = rm->rdma.m_rdma_op; | 421 | ro = &rm->rdma.m_rdma_op; |
422 | if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && | 422 | if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && |
423 | ro && ro->r_notify && ro->r_notifier) { | 423 | ro->r_active && ro->r_notify && ro->r_notifier) { |
424 | notifier = ro->r_notifier; | 424 | notifier = ro->r_notifier; |
425 | rs = rm->m_rs; | 425 | rs = rm->m_rs; |
426 | sock_hold(rds_rs_to_sk(rs)); | 426 | sock_hold(rds_rs_to_sk(rs)); |
@@ -452,8 +452,8 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status | |||
452 | { | 452 | { |
453 | struct rds_rdma_op *ro; | 453 | struct rds_rdma_op *ro; |
454 | 454 | ||
455 | ro = rm->rdma.m_rdma_op; | 455 | ro = &rm->rdma.m_rdma_op; |
456 | if (ro && ro->r_notify && ro->r_notifier) { | 456 | if (ro->r_active && ro->r_notify && ro->r_notifier) { |
457 | ro->r_notifier->n_status = status; | 457 | ro->r_notifier->n_status = status; |
458 | list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); | 458 | list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); |
459 | ro->r_notifier = NULL; | 459 | ro->r_notifier = NULL; |
@@ -476,7 +476,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn, | |||
476 | spin_lock_irqsave(&conn->c_lock, flags); | 476 | spin_lock_irqsave(&conn->c_lock, flags); |
477 | 477 | ||
478 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { | 478 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { |
479 | if (rm->rdma.m_rdma_op == op) { | 479 | if (&rm->rdma.m_rdma_op == op) { |
480 | atomic_inc(&rm->m_refcount); | 480 | atomic_inc(&rm->m_refcount); |
481 | found = rm; | 481 | found = rm; |
482 | goto out; | 482 | goto out; |
@@ -484,7 +484,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn, | |||
484 | } | 484 | } |
485 | 485 | ||
486 | list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { | 486 | list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { |
487 | if (rm->rdma.m_rdma_op == op) { | 487 | if (&rm->rdma.m_rdma_op == op) { |
488 | atomic_inc(&rm->m_refcount); | 488 | atomic_inc(&rm->m_refcount); |
489 | found = rm; | 489 | found = rm; |
490 | break; | 490 | break; |
@@ -544,19 +544,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) | |||
544 | spin_lock(&rs->rs_lock); | 544 | spin_lock(&rs->rs_lock); |
545 | 545 | ||
546 | if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { | 546 | if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { |
547 | struct rds_rdma_op *ro = rm->rdma.m_rdma_op; | 547 | struct rds_rdma_op *ro = &rm->rdma.m_rdma_op; |
548 | struct rds_notifier *notifier; | 548 | struct rds_notifier *notifier; |
549 | 549 | ||
550 | list_del_init(&rm->m_sock_item); | 550 | list_del_init(&rm->m_sock_item); |
551 | rds_send_sndbuf_remove(rs, rm); | 551 | rds_send_sndbuf_remove(rs, rm); |
552 | 552 | ||
553 | if (ro && ro->r_notifier && (status || ro->r_notify)) { | 553 | if (ro->r_active && ro->r_notifier && |
554 | (status || ro->r_notify)) { | ||
554 | notifier = ro->r_notifier; | 555 | notifier = ro->r_notifier; |
555 | list_add_tail(¬ifier->n_list, | 556 | list_add_tail(¬ifier->n_list, |
556 | &rs->rs_notify_queue); | 557 | &rs->rs_notify_queue); |
557 | if (!notifier->n_status) | 558 | if (!notifier->n_status) |
558 | notifier->n_status = status; | 559 | notifier->n_status = status; |
559 | rm->rdma.m_rdma_op->r_notifier = NULL; | 560 | rm->rdma.m_rdma_op.r_notifier = NULL; |
560 | } | 561 | } |
561 | was_on_sock = 1; | 562 | was_on_sock = 1; |
562 | rm->m_rs = NULL; | 563 | rm->m_rs = NULL; |
@@ -763,9 +764,37 @@ out: | |||
763 | */ | 764 | */ |
764 | static int rds_rm_size(struct msghdr *msg, int data_len) | 765 | static int rds_rm_size(struct msghdr *msg, int data_len) |
765 | { | 766 | { |
767 | struct cmsghdr *cmsg; | ||
766 | int size = 0; | 768 | int size = 0; |
769 | int retval; | ||
770 | |||
771 | for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
772 | if (!CMSG_OK(msg, cmsg)) | ||
773 | return -EINVAL; | ||
774 | |||
775 | if (cmsg->cmsg_level != SOL_RDS) | ||
776 | continue; | ||
777 | |||
778 | switch (cmsg->cmsg_type) { | ||
779 | case RDS_CMSG_RDMA_ARGS: | ||
780 | retval = rds_rdma_extra_size(CMSG_DATA(cmsg)); | ||
781 | if (retval < 0) | ||
782 | return retval; | ||
783 | size += retval; | ||
784 | break; | ||
785 | |||
786 | case RDS_CMSG_RDMA_DEST: | ||
787 | case RDS_CMSG_RDMA_MAP: | ||
788 | /* these are valid but do no add any size */ | ||
789 | break; | ||
790 | |||
791 | default: | ||
792 | return -EINVAL; | ||
793 | } | ||
794 | |||
795 | } | ||
767 | 796 | ||
768 | size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); | 797 | size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); |
769 | 798 | ||
770 | return size; | 799 | return size; |
771 | } | 800 | } |
@@ -896,11 +925,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
896 | if (ret) | 925 | if (ret) |
897 | goto out; | 926 | goto out; |
898 | 927 | ||
899 | if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op) && | 928 | if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && |
900 | !conn->c_trans->xmit_rdma) { | 929 | !conn->c_trans->xmit_rdma) { |
901 | if (printk_ratelimit()) | 930 | if (printk_ratelimit()) |
902 | printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", | 931 | printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", |
903 | rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); | 932 | &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); |
904 | ret = -EOPNOTSUPP; | 933 | ret = -EOPNOTSUPP; |
905 | goto out; | 934 | goto out; |
906 | } | 935 | } |