diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-01-12 17:13:15 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:11:38 -0400 |
commit | ff87e97a9d70c9ae133d3d3d7792b26ab85f4297 (patch) | |
tree | 32de73cdf5e4353e89b3351eaae695f69faa868b /net/rds/rdma.c | |
parent | 21f79afa5fda2820671a8f64c3d0e43bb118053b (diff) |
RDS: make m_rdma_op a member of rds_message
This eliminates a separate memory alloc, although
it is now necessary to add an "r_active" flag, since
it is no longer to use the m_rdma_op pointer as an
indicator of if an rdma op is present.
rdma SGs allocated from rm sg pool.
rds_rm_size also gets bigger. It's a little inefficient to
run through CMSGs twice, but it makes later steps a lot smoother.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net/rds/rdma.c')
-rw-r--r-- | net/rds/rdma.c | 113 |
1 files changed, 60 insertions, 53 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a21edad33950..7ff3379bab14 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -458,26 +458,60 @@ void rds_rdma_free_op(struct rds_rdma_op *ro) | |||
458 | } | 458 | } |
459 | 459 | ||
460 | kfree(ro->r_notifier); | 460 | kfree(ro->r_notifier); |
461 | kfree(ro); | 461 | ro->r_notifier = NULL; |
462 | ro->r_active = 0; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Count the number of pages needed to describe an incoming iovec. | ||
467 | */ | ||
468 | static int rds_rdma_pages(struct rds_rdma_args *args) | ||
469 | { | ||
470 | struct rds_iovec vec; | ||
471 | struct rds_iovec __user *local_vec; | ||
472 | unsigned int tot_pages = 0; | ||
473 | unsigned int nr_pages; | ||
474 | unsigned int i; | ||
475 | |||
476 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
477 | |||
478 | /* figure out the number of pages in the vector */ | ||
479 | for (i = 0; i < args->nr_local; i++) { | ||
480 | if (copy_from_user(&vec, &local_vec[i], | ||
481 | sizeof(struct rds_iovec))) | ||
482 | return -EFAULT; | ||
483 | |||
484 | nr_pages = rds_pages_in_vec(&vec); | ||
485 | if (nr_pages == 0) | ||
486 | return -EINVAL; | ||
487 | |||
488 | tot_pages += nr_pages; | ||
489 | } | ||
490 | |||
491 | return tot_pages; | ||
492 | } | ||
493 | |||
494 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
495 | { | ||
496 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
462 | } | 497 | } |
463 | 498 | ||
464 | /* | 499 | /* |
465 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. | 500 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. |
466 | */ | 501 | */ |
467 | static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | 502 | static int rds_rdma_prepare(struct rds_message *rm, |
468 | struct rds_rdma_args *args) | 503 | struct rds_sock *rs, |
504 | struct rds_rdma_args *args) | ||
469 | { | 505 | { |
470 | struct rds_iovec vec; | 506 | struct rds_iovec vec; |
471 | struct rds_rdma_op *op = NULL; | 507 | struct rds_rdma_op *op = &rm->rdma.m_rdma_op; |
472 | unsigned int nr_pages; | 508 | unsigned int nr_pages; |
473 | unsigned int max_pages; | ||
474 | unsigned int nr_bytes; | 509 | unsigned int nr_bytes; |
475 | struct page **pages = NULL; | 510 | struct page **pages = NULL; |
476 | struct rds_iovec __user *local_vec; | 511 | struct rds_iovec __user *local_vec; |
477 | struct scatterlist *sg; | ||
478 | unsigned int nr; | 512 | unsigned int nr; |
479 | unsigned int i, j; | 513 | unsigned int i, j; |
480 | int ret; | 514 | int ret = 0; |
481 | 515 | ||
482 | 516 | ||
483 | if (rs->rs_bound_addr == 0) { | 517 | if (rs->rs_bound_addr == 0) { |
@@ -490,44 +524,21 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
490 | goto out; | 524 | goto out; |
491 | } | 525 | } |
492 | 526 | ||
493 | nr_pages = 0; | 527 | nr_pages = rds_rdma_pages(args); |
494 | max_pages = 0; | 528 | if (nr_pages < 0) |
495 | |||
496 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
497 | |||
498 | /* figure out the number of pages in the vector */ | ||
499 | for (i = 0; i < args->nr_local; i++) { | ||
500 | if (copy_from_user(&vec, &local_vec[i], | ||
501 | sizeof(struct rds_iovec))) { | ||
502 | ret = -EFAULT; | ||
503 | goto out; | ||
504 | } | ||
505 | |||
506 | nr = rds_pages_in_vec(&vec); | ||
507 | if (nr == 0) { | ||
508 | ret = -EINVAL; | ||
509 | goto out; | ||
510 | } | ||
511 | |||
512 | max_pages = max(nr, max_pages); | ||
513 | nr_pages += nr; | ||
514 | } | ||
515 | |||
516 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); | ||
517 | if (!pages) { | ||
518 | ret = -ENOMEM; | ||
519 | goto out; | 529 | goto out; |
520 | } | ||
521 | 530 | ||
522 | op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); | 531 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
523 | if (!op) { | 532 | if (!pages) { |
524 | ret = -ENOMEM; | 533 | ret = -ENOMEM; |
525 | goto out; | 534 | goto out; |
526 | } | 535 | } |
527 | 536 | ||
537 | op->r_sg = rds_message_alloc_sgs(rm, nr_pages); | ||
528 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); | 538 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); |
529 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); | 539 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); |
530 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | 540 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); |
541 | op->r_active = 1; | ||
531 | op->r_recverr = rs->rs_recverr; | 542 | op->r_recverr = rs->rs_recverr; |
532 | WARN_ON(!nr_pages); | 543 | WARN_ON(!nr_pages); |
533 | sg_init_table(op->r_sg, nr_pages); | 544 | sg_init_table(op->r_sg, nr_pages); |
@@ -564,6 +575,8 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
564 | (unsigned long long)args->remote_vec.addr, | 575 | (unsigned long long)args->remote_vec.addr, |
565 | op->r_key); | 576 | op->r_key); |
566 | 577 | ||
578 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
579 | |||
567 | for (i = 0; i < args->nr_local; i++) { | 580 | for (i = 0; i < args->nr_local; i++) { |
568 | if (copy_from_user(&vec, &local_vec[i], | 581 | if (copy_from_user(&vec, &local_vec[i], |
569 | sizeof(struct rds_iovec))) { | 582 | sizeof(struct rds_iovec))) { |
@@ -580,11 +593,6 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
580 | rs->rs_user_addr = vec.addr; | 593 | rs->rs_user_addr = vec.addr; |
581 | rs->rs_user_bytes = vec.bytes; | 594 | rs->rs_user_bytes = vec.bytes; |
582 | 595 | ||
583 | /* did the user change the vec under us? */ | ||
584 | if (nr > max_pages || op->r_nents + nr > nr_pages) { | ||
585 | ret = -EINVAL; | ||
586 | goto out; | ||
587 | } | ||
588 | /* If it's a WRITE operation, we want to pin the pages for reading. | 596 | /* If it's a WRITE operation, we want to pin the pages for reading. |
589 | * If it's a READ operation, we need to pin the pages for writing. | 597 | * If it's a READ operation, we need to pin the pages for writing. |
590 | */ | 598 | */ |
@@ -599,6 +607,7 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
599 | 607 | ||
600 | for (j = 0; j < nr; j++) { | 608 | for (j = 0; j < nr; j++) { |
601 | unsigned int offset = vec.addr & ~PAGE_MASK; | 609 | unsigned int offset = vec.addr & ~PAGE_MASK; |
610 | struct scatterlist *sg; | ||
602 | 611 | ||
603 | sg = &op->r_sg[op->r_nents + j]; | 612 | sg = &op->r_sg[op->r_nents + j]; |
604 | sg_set_page(sg, pages[j], | 613 | sg_set_page(sg, pages[j], |
@@ -628,12 +637,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
628 | ret = 0; | 637 | ret = 0; |
629 | out: | 638 | out: |
630 | kfree(pages); | 639 | kfree(pages); |
631 | if (ret) { | 640 | if (ret) |
632 | if (op) | 641 | rds_rdma_free_op(op); |
633 | rds_rdma_free_op(op); | 642 | |
634 | op = ERR_PTR(ret); | 643 | return ret; |
635 | } | ||
636 | return op; | ||
637 | } | 644 | } |
638 | 645 | ||
639 | /* | 646 | /* |
@@ -643,17 +650,17 @@ out: | |||
643 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | 650 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, |
644 | struct cmsghdr *cmsg) | 651 | struct cmsghdr *cmsg) |
645 | { | 652 | { |
646 | struct rds_rdma_op *op; | 653 | int ret; |
647 | 654 | ||
648 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || | 655 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || |
649 | rm->rdma.m_rdma_op) | 656 | rm->rdma.m_rdma_op.r_active) |
650 | return -EINVAL; | 657 | return -EINVAL; |
651 | 658 | ||
652 | op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); | 659 | ret = rds_rdma_prepare(rm, rs, CMSG_DATA(cmsg)); |
653 | if (IS_ERR(op)) | 660 | if (ret) |
654 | return PTR_ERR(op); | 661 | return ret; |
662 | |||
655 | rds_stats_inc(s_send_rdma); | 663 | rds_stats_inc(s_send_rdma); |
656 | rm->rdma.m_rdma_op = op; | ||
657 | return 0; | 664 | return 0; |
658 | } | 665 | } |
659 | 666 | ||