aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2010-01-12 17:13:15 -0500
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:11:38 -0400
commitff87e97a9d70c9ae133d3d3d7792b26ab85f4297 (patch)
tree32de73cdf5e4353e89b3351eaae695f69faa868b /net
parent21f79afa5fda2820671a8f64c3d0e43bb118053b (diff)
RDS: make m_rdma_op a member of rds_message
This eliminates a separate memory alloc, although it is now necessary to add an "r_active" flag, since it is no longer to use the m_rdma_op pointer as an indicator of if an rdma op is present. rdma SGs allocated from rm sg pool. rds_rm_size also gets bigger. It's a little inefficient to run through CMSGs twice, but it makes later steps a lot smoother. Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net')
-rw-r--r--net/rds/ib_send.c20
-rw-r--r--net/rds/iw_send.c16
-rw-r--r--net/rds/message.c9
-rw-r--r--net/rds/rdma.c113
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rds/send.c59
6 files changed, 129 insertions, 90 deletions
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 575fce463c65..f0edfdb2866c 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -85,8 +85,8 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
85 rm->data.m_sg, rm->data.m_nents, 85 rm->data.m_sg, rm->data.m_nents,
86 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
87 87
88 if (rm->rdma.m_rdma_op) { 88 if (rm->rdma.m_rdma_op.r_active) {
89 rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); 89 rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
90 90
91 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
92 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -110,10 +110,10 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
110 */ 110 */
111 rds_ib_send_rdma_complete(rm, wc_status); 111 rds_ib_send_rdma_complete(rm, wc_status);
112 112
113 if (rm->rdma.m_rdma_op->r_write) 113 if (rm->rdma.m_rdma_op.r_write)
114 rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
115 else 115 else
116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
117 } 117 }
118 118
119 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -243,8 +243,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
243 243
244 rm = rds_send_get_message(conn, send->s_op); 244 rm = rds_send_get_message(conn, send->s_op);
245 if (rm) { 245 if (rm) {
246 if (rm->rdma.m_rdma_op) 246 if (rm->rdma.m_rdma_op.r_active)
247 rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op); 247 rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
248 rds_ib_send_rdma_complete(rm, wc.status); 248 rds_ib_send_rdma_complete(rm, wc.status);
249 rds_message_put(rm); 249 rds_message_put(rm);
250 } 250 }
@@ -560,10 +560,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
560 560
561 /* If it has a RDMA op, tell the peer we did it. This is 561 /* If it has a RDMA op, tell the peer we did it. This is
562 * used by the peer to release use-once RDMA MRs. */ 562 * used by the peer to release use-once RDMA MRs. */
563 if (rm->rdma.m_rdma_op) { 563 if (rm->rdma.m_rdma_op.r_active) {
564 struct rds_ext_header_rdma ext_hdr; 564 struct rds_ext_header_rdma ext_hdr;
565 565
566 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); 566 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op.r_key);
567 rds_message_add_extension(&rm->m_inc.i_hdr, 567 rds_message_add_extension(&rm->m_inc.i_hdr,
568 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 568 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
569 } 569 }
@@ -601,7 +601,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
601 * or when requested by the user. Right now, we let 601 * or when requested by the user. Right now, we let
602 * the application choose. 602 * the application choose.
603 */ 603 */
604 if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) 604 if (rm->rdma.m_rdma_op.r_active && rm->rdma.m_rdma_op.r_fence)
605 send_flags = IB_SEND_FENCE; 605 send_flags = IB_SEND_FENCE;
606 606
607 /* 607 /*
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 62234b804d93..9b79a1b10445 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -85,8 +85,8 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
85 rm->data.m_sg, rm->data.m_nents, 85 rm->data.m_sg, rm->data.m_nents,
86 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
87 87
88 if (rm->rdma.m_rdma_op) { 88 if (rm->rdma.m_rdma_op.r_active) {
89 rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma.m_rdma_op);
90 90
91 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
92 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -110,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
110 */ 110 */
111 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
112 112
113 if (rm->rdma.m_rdma_op->r_write) 113 if (rm->rdma.m_rdma_op.r_write)
114 rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
115 else 115 else
116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes);
117 } 117 }
118 118
119 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -591,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
591 591
592 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
593 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
594 if (rm->rdma.m_rdma_op) { 594 if (rm->rdma.m_rdma_op.r_active) {
595 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
596 596
597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op.r_key);
598 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
600 } 600 }
@@ -632,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
632 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
633 * the application choose. 633 * the application choose.
634 */ 634 */
635 if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence) 635 if (rm->rdma.m_rdma_op.r_active && rm->rdma.m_rdma_op.r_fence)
636 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
637 637
638 /* 638 /*
diff --git a/net/rds/message.c b/net/rds/message.c
index fb382fbb5b6f..4352ce79b376 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -69,8 +69,8 @@ static void rds_message_purge(struct rds_message *rm)
69 } 69 }
70 rm->data.m_nents = 0; 70 rm->data.m_nents = 0;
71 71
72 if (rm->rdma.m_rdma_op) 72 if (rm->rdma.m_rdma_op.r_active)
73 rds_rdma_free_op(rm->rdma.m_rdma_op); 73 rds_rdma_free_op(&rm->rdma.m_rdma_op);
74 if (rm->rdma.m_rdma_mr) 74 if (rm->rdma.m_rdma_mr)
75 rds_mr_put(rm->rdma.m_rdma_mr); 75 rds_mr_put(rm->rdma.m_rdma_mr);
76} 76}
@@ -259,14 +259,17 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
259{ 259{
260 struct rds_message *rm; 260 struct rds_message *rm;
261 unsigned int i; 261 unsigned int i;
262 int num_sgs = ceil(total_len, PAGE_SIZE);
263 int extra_bytes = num_sgs * sizeof(struct scatterlist);
262 264
263 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 265 rm = rds_message_alloc(extra_bytes, GFP_KERNEL);
264 if (!rm) 266 if (!rm)
265 return ERR_PTR(-ENOMEM); 267 return ERR_PTR(-ENOMEM);
266 268
267 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 269 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
268 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 270 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
269 rm->data.m_nents = ceil(total_len, PAGE_SIZE); 271 rm->data.m_nents = ceil(total_len, PAGE_SIZE);
272 rm->data.m_sg = rds_message_alloc_sgs(rm, num_sgs);
270 273
271 for (i = 0; i < rm->data.m_nents; ++i) { 274 for (i = 0; i < rm->data.m_nents; ++i) {
272 sg_set_page(&rm->data.m_sg[i], 275 sg_set_page(&rm->data.m_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a21edad33950..7ff3379bab14 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -458,26 +458,60 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
458 } 458 }
459 459
460 kfree(ro->r_notifier); 460 kfree(ro->r_notifier);
461 kfree(ro); 461 ro->r_notifier = NULL;
462 ro->r_active = 0;
463}
464
465/*
466 * Count the number of pages needed to describe an incoming iovec.
467 */
468static int rds_rdma_pages(struct rds_rdma_args *args)
469{
470 struct rds_iovec vec;
471 struct rds_iovec __user *local_vec;
472 unsigned int tot_pages = 0;
473 unsigned int nr_pages;
474 unsigned int i;
475
476 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
477
478 /* figure out the number of pages in the vector */
479 for (i = 0; i < args->nr_local; i++) {
480 if (copy_from_user(&vec, &local_vec[i],
481 sizeof(struct rds_iovec)))
482 return -EFAULT;
483
484 nr_pages = rds_pages_in_vec(&vec);
485 if (nr_pages == 0)
486 return -EINVAL;
487
488 tot_pages += nr_pages;
489 }
490
491 return tot_pages;
492}
493
494int rds_rdma_extra_size(struct rds_rdma_args *args)
495{
496 return rds_rdma_pages(args) * sizeof(struct scatterlist);
462} 497}
463 498
464/* 499/*
465 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 500 * args is a pointer to an in-kernel copy in the sendmsg cmsg.
466 */ 501 */
467static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 502static int rds_rdma_prepare(struct rds_message *rm,
468 struct rds_rdma_args *args) 503 struct rds_sock *rs,
504 struct rds_rdma_args *args)
469{ 505{
470 struct rds_iovec vec; 506 struct rds_iovec vec;
471 struct rds_rdma_op *op = NULL; 507 struct rds_rdma_op *op = &rm->rdma.m_rdma_op;
472 unsigned int nr_pages; 508 unsigned int nr_pages;
473 unsigned int max_pages;
474 unsigned int nr_bytes; 509 unsigned int nr_bytes;
475 struct page **pages = NULL; 510 struct page **pages = NULL;
476 struct rds_iovec __user *local_vec; 511 struct rds_iovec __user *local_vec;
477 struct scatterlist *sg;
478 unsigned int nr; 512 unsigned int nr;
479 unsigned int i, j; 513 unsigned int i, j;
480 int ret; 514 int ret = 0;
481 515
482 516
483 if (rs->rs_bound_addr == 0) { 517 if (rs->rs_bound_addr == 0) {
@@ -490,44 +524,21 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
490 goto out; 524 goto out;
491 } 525 }
492 526
493 nr_pages = 0; 527 nr_pages = rds_rdma_pages(args);
494 max_pages = 0; 528 if (nr_pages < 0)
495
496 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
497
498 /* figure out the number of pages in the vector */
499 for (i = 0; i < args->nr_local; i++) {
500 if (copy_from_user(&vec, &local_vec[i],
501 sizeof(struct rds_iovec))) {
502 ret = -EFAULT;
503 goto out;
504 }
505
506 nr = rds_pages_in_vec(&vec);
507 if (nr == 0) {
508 ret = -EINVAL;
509 goto out;
510 }
511
512 max_pages = max(nr, max_pages);
513 nr_pages += nr;
514 }
515
516 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
517 if (!pages) {
518 ret = -ENOMEM;
519 goto out; 529 goto out;
520 }
521 530
522 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 531 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
523 if (!op) { 532 if (!pages) {
524 ret = -ENOMEM; 533 ret = -ENOMEM;
525 goto out; 534 goto out;
526 } 535 }
527 536
537 op->r_sg = rds_message_alloc_sgs(rm, nr_pages);
528 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 538 op->r_write = !!(args->flags & RDS_RDMA_READWRITE);
529 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 539 op->r_fence = !!(args->flags & RDS_RDMA_FENCE);
530 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 540 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
541 op->r_active = 1;
531 op->r_recverr = rs->rs_recverr; 542 op->r_recverr = rs->rs_recverr;
532 WARN_ON(!nr_pages); 543 WARN_ON(!nr_pages);
533 sg_init_table(op->r_sg, nr_pages); 544 sg_init_table(op->r_sg, nr_pages);
@@ -564,6 +575,8 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
564 (unsigned long long)args->remote_vec.addr, 575 (unsigned long long)args->remote_vec.addr,
565 op->r_key); 576 op->r_key);
566 577
578 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
579
567 for (i = 0; i < args->nr_local; i++) { 580 for (i = 0; i < args->nr_local; i++) {
568 if (copy_from_user(&vec, &local_vec[i], 581 if (copy_from_user(&vec, &local_vec[i],
569 sizeof(struct rds_iovec))) { 582 sizeof(struct rds_iovec))) {
@@ -580,11 +593,6 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
580 rs->rs_user_addr = vec.addr; 593 rs->rs_user_addr = vec.addr;
581 rs->rs_user_bytes = vec.bytes; 594 rs->rs_user_bytes = vec.bytes;
582 595
583 /* did the user change the vec under us? */
584 if (nr > max_pages || op->r_nents + nr > nr_pages) {
585 ret = -EINVAL;
586 goto out;
587 }
588 /* If it's a WRITE operation, we want to pin the pages for reading. 596 /* If it's a WRITE operation, we want to pin the pages for reading.
589 * If it's a READ operation, we need to pin the pages for writing. 597 * If it's a READ operation, we need to pin the pages for writing.
590 */ 598 */
@@ -599,6 +607,7 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
599 607
600 for (j = 0; j < nr; j++) { 608 for (j = 0; j < nr; j++) {
601 unsigned int offset = vec.addr & ~PAGE_MASK; 609 unsigned int offset = vec.addr & ~PAGE_MASK;
610 struct scatterlist *sg;
602 611
603 sg = &op->r_sg[op->r_nents + j]; 612 sg = &op->r_sg[op->r_nents + j];
604 sg_set_page(sg, pages[j], 613 sg_set_page(sg, pages[j],
@@ -628,12 +637,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
628 ret = 0; 637 ret = 0;
629out: 638out:
630 kfree(pages); 639 kfree(pages);
631 if (ret) { 640 if (ret)
632 if (op) 641 rds_rdma_free_op(op);
633 rds_rdma_free_op(op); 642
634 op = ERR_PTR(ret); 643 return ret;
635 }
636 return op;
637} 644}
638 645
639/* 646/*
@@ -643,17 +650,17 @@ out:
643int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, 650int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
644 struct cmsghdr *cmsg) 651 struct cmsghdr *cmsg)
645{ 652{
646 struct rds_rdma_op *op; 653 int ret;
647 654
648 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || 655 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
649 rm->rdma.m_rdma_op) 656 rm->rdma.m_rdma_op.r_active)
650 return -EINVAL; 657 return -EINVAL;
651 658
652 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); 659 ret = rds_rdma_prepare(rm, rs, CMSG_DATA(cmsg));
653 if (IS_ERR(op)) 660 if (ret)
654 return PTR_ERR(op); 661 return ret;
662
655 rds_stats_inc(s_send_rdma); 663 rds_stats_inc(s_send_rdma);
656 rm->rdma.m_rdma_op = op;
657 return 0; 664 return 0;
658} 665}
659 666
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7c4adbe8c284..0bb4957e0cfc 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -316,7 +316,7 @@ struct rds_message {
316 rds_rdma_cookie_t m_rdma_cookie; 316 rds_rdma_cookie_t m_rdma_cookie;
317 struct { 317 struct {
318 struct { 318 struct {
319 struct rds_rdma_op *m_rdma_op; 319 struct rds_rdma_op m_rdma_op;
320 struct rds_mr *m_rdma_mr; 320 struct rds_mr *m_rdma_mr;
321 } rdma; 321 } rdma;
322 struct { 322 struct {
diff --git a/net/rds/send.c b/net/rds/send.c
index 89e26ffdc812..72dbe7fc4f54 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -235,7 +235,7 @@ int rds_send_xmit(struct rds_connection *conn)
235 * connection. 235 * connection.
236 * Therefore, we never retransmit messages with RDMA ops. 236 * Therefore, we never retransmit messages with RDMA ops.
237 */ 237 */
238 if (rm->rdma.m_rdma_op && 238 if (rm->rdma.m_rdma_op.r_active &&
239 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 239 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
240 spin_lock_irqsave(&conn->c_lock, flags); 240 spin_lock_irqsave(&conn->c_lock, flags);
241 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 241 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
@@ -267,8 +267,8 @@ int rds_send_xmit(struct rds_connection *conn)
267 * keep this simple and require that the transport either 267 * keep this simple and require that the transport either
268 * send the whole rdma or none of it. 268 * send the whole rdma or none of it.
269 */ 269 */
270 if (rm->rdma.m_rdma_op && !conn->c_xmit_rdma_sent) { 270 if (rm->rdma.m_rdma_op.r_active && !conn->c_xmit_rdma_sent) {
271 ret = conn->c_trans->xmit_rdma(conn, rm->rdma.m_rdma_op); 271 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma.m_rdma_op);
272 if (ret) 272 if (ret)
273 break; 273 break;
274 conn->c_xmit_rdma_sent = 1; 274 conn->c_xmit_rdma_sent = 1;
@@ -418,9 +418,9 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
418 418
419 spin_lock_irqsave(&rm->m_rs_lock, flags); 419 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 420
421 ro = rm->rdma.m_rdma_op; 421 ro = &rm->rdma.m_rdma_op;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
423 ro && ro->r_notify && ro->r_notifier) { 423 ro->r_active && ro->r_notify && ro->r_notifier) {
424 notifier = ro->r_notifier; 424 notifier = ro->r_notifier;
425 rs = rm->m_rs; 425 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 426 sock_hold(rds_rs_to_sk(rs));
@@ -452,8 +452,8 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
452{ 452{
453 struct rds_rdma_op *ro; 453 struct rds_rdma_op *ro;
454 454
455 ro = rm->rdma.m_rdma_op; 455 ro = &rm->rdma.m_rdma_op;
456 if (ro && ro->r_notify && ro->r_notifier) { 456 if (ro->r_active && ro->r_notify && ro->r_notifier) {
457 ro->r_notifier->n_status = status; 457 ro->r_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 459 ro->r_notifier = NULL;
@@ -476,7 +476,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 476 spin_lock_irqsave(&conn->c_lock, flags);
477 477
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->rdma.m_rdma_op == op) { 479 if (&rm->rdma.m_rdma_op == op) {
480 atomic_inc(&rm->m_refcount); 480 atomic_inc(&rm->m_refcount);
481 found = rm; 481 found = rm;
482 goto out; 482 goto out;
@@ -484,7 +484,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 484 }
485 485
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->rdma.m_rdma_op == op) { 487 if (&rm->rdma.m_rdma_op == op) {
488 atomic_inc(&rm->m_refcount); 488 atomic_inc(&rm->m_refcount);
489 found = rm; 489 found = rm;
490 break; 490 break;
@@ -544,19 +544,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 544 spin_lock(&rs->rs_lock);
545 545
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->rdma.m_rdma_op; 547 struct rds_rdma_op *ro = &rm->rdma.m_rdma_op;
548 struct rds_notifier *notifier; 548 struct rds_notifier *notifier;
549 549
550 list_del_init(&rm->m_sock_item); 550 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 551 rds_send_sndbuf_remove(rs, rm);
552 552
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 553 if (ro->r_active && ro->r_notifier &&
554 (status || ro->r_notify)) {
554 notifier = ro->r_notifier; 555 notifier = ro->r_notifier;
555 list_add_tail(&notifier->n_list, 556 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 557 &rs->rs_notify_queue);
557 if (!notifier->n_status) 558 if (!notifier->n_status)
558 notifier->n_status = status; 559 notifier->n_status = status;
559 rm->rdma.m_rdma_op->r_notifier = NULL; 560 rm->rdma.m_rdma_op.r_notifier = NULL;
560 } 561 }
561 was_on_sock = 1; 562 was_on_sock = 1;
562 rm->m_rs = NULL; 563 rm->m_rs = NULL;
@@ -763,9 +764,37 @@ out:
763 */ 764 */
764static int rds_rm_size(struct msghdr *msg, int data_len) 765static int rds_rm_size(struct msghdr *msg, int data_len)
765{ 766{
767 struct cmsghdr *cmsg;
766 int size = 0; 768 int size = 0;
769 int retval;
770
771 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
772 if (!CMSG_OK(msg, cmsg))
773 return -EINVAL;
774
775 if (cmsg->cmsg_level != SOL_RDS)
776 continue;
777
778 switch (cmsg->cmsg_type) {
779 case RDS_CMSG_RDMA_ARGS:
780 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
781 if (retval < 0)
782 return retval;
783 size += retval;
784 break;
785
786 case RDS_CMSG_RDMA_DEST:
787 case RDS_CMSG_RDMA_MAP:
788 /* these are valid but do no add any size */
789 break;
790
791 default:
792 return -EINVAL;
793 }
794
795 }
767 796
768 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist); 797 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
769 798
770 return size; 799 return size;
771} 800}
@@ -896,11 +925,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
896 if (ret) 925 if (ret)
897 goto out; 926 goto out;
898 927
899 if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op) && 928 if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) &&
900 !conn->c_trans->xmit_rdma) { 929 !conn->c_trans->xmit_rdma) {
901 if (printk_ratelimit()) 930 if (printk_ratelimit())
902 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 931 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
903 rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); 932 &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
904 ret = -EOPNOTSUPP; 933 ret = -EOPNOTSUPP;
905 goto out; 934 goto out;
906 } 935 }