diff options
Diffstat (limited to 'net/rds/rdma.c')
-rw-r--r-- | net/rds/rdma.c | 126 |
1 files changed, 84 insertions, 42 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 1a41debca1ce..8920f2a83327 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao) | |||
479 | 479 | ||
480 | 480 | ||
481 | /* | 481 | /* |
482 | * Count the number of pages needed to describe an incoming iovec. | 482 | * Count the number of pages needed to describe an incoming iovec array. |
483 | */ | 483 | */ |
484 | static int rds_rdma_pages(struct rds_rdma_args *args) | 484 | static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs) |
485 | { | ||
486 | int tot_pages = 0; | ||
487 | unsigned int nr_pages; | ||
488 | unsigned int i; | ||
489 | |||
490 | /* figure out the number of pages in the vector */ | ||
491 | for (i = 0; i < nr_iovecs; i++) { | ||
492 | nr_pages = rds_pages_in_vec(&iov[i]); | ||
493 | if (nr_pages == 0) | ||
494 | return -EINVAL; | ||
495 | |||
496 | tot_pages += nr_pages; | ||
497 | |||
498 | /* | ||
499 | * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, | ||
500 | * so tot_pages cannot overflow without first going negative. | ||
501 | */ | ||
502 | if (tot_pages < 0) | ||
503 | return -EINVAL; | ||
504 | } | ||
505 | |||
506 | return tot_pages; | ||
507 | } | ||
508 | |||
509 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
485 | { | 510 | { |
486 | struct rds_iovec vec; | 511 | struct rds_iovec vec; |
487 | struct rds_iovec __user *local_vec; | 512 | struct rds_iovec __user *local_vec; |
488 | unsigned int tot_pages = 0; | 513 | int tot_pages = 0; |
489 | unsigned int nr_pages; | 514 | unsigned int nr_pages; |
490 | unsigned int i; | 515 | unsigned int i; |
491 | 516 | ||
@@ -502,14 +527,16 @@ static int rds_rdma_pages(struct rds_rdma_args *args) | |||
502 | return -EINVAL; | 527 | return -EINVAL; |
503 | 528 | ||
504 | tot_pages += nr_pages; | 529 | tot_pages += nr_pages; |
505 | } | ||
506 | 530 | ||
507 | return tot_pages; | 531 | /* |
508 | } | 532 | * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, |
533 | * so tot_pages cannot overflow without first going negative. | ||
534 | */ | ||
535 | if (tot_pages < 0) | ||
536 | return -EINVAL; | ||
537 | } | ||
509 | 538 | ||
510 | int rds_rdma_extra_size(struct rds_rdma_args *args) | 539 | return tot_pages * sizeof(struct scatterlist); |
511 | { | ||
512 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
513 | } | 540 | } |
514 | 541 | ||
515 | /* | 542 | /* |
@@ -520,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
520 | struct cmsghdr *cmsg) | 547 | struct cmsghdr *cmsg) |
521 | { | 548 | { |
522 | struct rds_rdma_args *args; | 549 | struct rds_rdma_args *args; |
523 | struct rds_iovec vec; | ||
524 | struct rm_rdma_op *op = &rm->rdma; | 550 | struct rm_rdma_op *op = &rm->rdma; |
525 | int nr_pages; | 551 | int nr_pages; |
526 | unsigned int nr_bytes; | 552 | unsigned int nr_bytes; |
527 | struct page **pages = NULL; | 553 | struct page **pages = NULL; |
528 | struct rds_iovec __user *local_vec; | 554 | struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack; |
529 | unsigned int nr; | 555 | int iov_size; |
530 | unsigned int i, j; | 556 | unsigned int i, j; |
531 | int ret = 0; | 557 | int ret = 0; |
532 | 558 | ||
@@ -546,9 +572,26 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
546 | goto out; | 572 | goto out; |
547 | } | 573 | } |
548 | 574 | ||
549 | nr_pages = rds_rdma_pages(args); | 575 | /* Check whether to allocate the iovec area */ |
550 | if (nr_pages < 0) | 576 | iov_size = args->nr_local * sizeof(struct rds_iovec); |
577 | if (args->nr_local > UIO_FASTIOV) { | ||
578 | iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); | ||
579 | if (!iovs) { | ||
580 | ret = -ENOMEM; | ||
581 | goto out; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) { | ||
586 | ret = -EFAULT; | ||
587 | goto out; | ||
588 | } | ||
589 | |||
590 | nr_pages = rds_rdma_pages(iovs, args->nr_local); | ||
591 | if (nr_pages < 0) { | ||
592 | ret = -EINVAL; | ||
551 | goto out; | 593 | goto out; |
594 | } | ||
552 | 595 | ||
553 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 596 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
554 | if (!pages) { | 597 | if (!pages) { |
@@ -564,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
564 | op->op_recverr = rs->rs_recverr; | 607 | op->op_recverr = rs->rs_recverr; |
565 | WARN_ON(!nr_pages); | 608 | WARN_ON(!nr_pages); |
566 | op->op_sg = rds_message_alloc_sgs(rm, nr_pages); | 609 | op->op_sg = rds_message_alloc_sgs(rm, nr_pages); |
610 | if (!op->op_sg) { | ||
611 | ret = -ENOMEM; | ||
612 | goto out; | ||
613 | } | ||
567 | 614 | ||
568 | if (op->op_notify || op->op_recverr) { | 615 | if (op->op_notify || op->op_recverr) { |
569 | /* We allocate an uninitialized notifier here, because | 616 | /* We allocate an uninitialized notifier here, because |
@@ -597,50 +644,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
597 | (unsigned long long)args->remote_vec.addr, | 644 | (unsigned long long)args->remote_vec.addr, |
598 | op->op_rkey); | 645 | op->op_rkey); |
599 | 646 | ||
600 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
601 | |||
602 | for (i = 0; i < args->nr_local; i++) { | 647 | for (i = 0; i < args->nr_local; i++) { |
603 | if (copy_from_user(&vec, &local_vec[i], | 648 | struct rds_iovec *iov = &iovs[i]; |
604 | sizeof(struct rds_iovec))) { | 649 | /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */ |
605 | ret = -EFAULT; | 650 | unsigned int nr = rds_pages_in_vec(iov); |
606 | goto out; | ||
607 | } | ||
608 | |||
609 | nr = rds_pages_in_vec(&vec); | ||
610 | if (nr == 0) { | ||
611 | ret = -EINVAL; | ||
612 | goto out; | ||
613 | } | ||
614 | 651 | ||
615 | rs->rs_user_addr = vec.addr; | 652 | rs->rs_user_addr = iov->addr; |
616 | rs->rs_user_bytes = vec.bytes; | 653 | rs->rs_user_bytes = iov->bytes; |
617 | 654 | ||
618 | /* If it's a WRITE operation, we want to pin the pages for reading. | 655 | /* If it's a WRITE operation, we want to pin the pages for reading. |
619 | * If it's a READ operation, we need to pin the pages for writing. | 656 | * If it's a READ operation, we need to pin the pages for writing. |
620 | */ | 657 | */ |
621 | ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); | 658 | ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); |
622 | if (ret < 0) | 659 | if (ret < 0) |
623 | goto out; | 660 | goto out; |
624 | 661 | ||
625 | rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", | 662 | rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n", |
626 | nr_bytes, nr, vec.bytes, vec.addr); | 663 | nr_bytes, nr, iov->bytes, iov->addr); |
627 | 664 | ||
628 | nr_bytes += vec.bytes; | 665 | nr_bytes += iov->bytes; |
629 | 666 | ||
630 | for (j = 0; j < nr; j++) { | 667 | for (j = 0; j < nr; j++) { |
631 | unsigned int offset = vec.addr & ~PAGE_MASK; | 668 | unsigned int offset = iov->addr & ~PAGE_MASK; |
632 | struct scatterlist *sg; | 669 | struct scatterlist *sg; |
633 | 670 | ||
634 | sg = &op->op_sg[op->op_nents + j]; | 671 | sg = &op->op_sg[op->op_nents + j]; |
635 | sg_set_page(sg, pages[j], | 672 | sg_set_page(sg, pages[j], |
636 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), | 673 | min_t(unsigned int, iov->bytes, PAGE_SIZE - offset), |
637 | offset); | 674 | offset); |
638 | 675 | ||
639 | rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", | 676 | rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n", |
640 | sg->offset, sg->length, vec.addr, vec.bytes); | 677 | sg->offset, sg->length, iov->addr, iov->bytes); |
641 | 678 | ||
642 | vec.addr += sg->length; | 679 | iov->addr += sg->length; |
643 | vec.bytes -= sg->length; | 680 | iov->bytes -= sg->length; |
644 | } | 681 | } |
645 | 682 | ||
646 | op->op_nents += nr; | 683 | op->op_nents += nr; |
@@ -655,13 +692,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
655 | } | 692 | } |
656 | op->op_bytes = nr_bytes; | 693 | op->op_bytes = nr_bytes; |
657 | 694 | ||
658 | ret = 0; | ||
659 | out: | 695 | out: |
696 | if (iovs != iovstack) | ||
697 | sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); | ||
660 | kfree(pages); | 698 | kfree(pages); |
661 | if (ret) | 699 | if (ret) |
662 | rds_rdma_free_op(op); | 700 | rds_rdma_free_op(op); |
663 | 701 | else | |
664 | rds_stats_inc(s_send_rdma); | 702 | rds_stats_inc(s_send_rdma); |
665 | 703 | ||
666 | return ret; | 704 | return ret; |
667 | } | 705 | } |
@@ -773,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | |||
773 | rm->atomic.op_active = 1; | 811 | rm->atomic.op_active = 1; |
774 | rm->atomic.op_recverr = rs->rs_recverr; | 812 | rm->atomic.op_recverr = rs->rs_recverr; |
775 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); | 813 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); |
814 | if (!rm->atomic.op_sg) { | ||
815 | ret = -ENOMEM; | ||
816 | goto err; | ||
817 | } | ||
776 | 818 | ||
777 | /* verify 8 byte-aligned */ | 819 | /* verify 8 byte-aligned */ |
778 | if (args->local_addr & 0x7) { | 820 | if (args->local_addr & 0x7) { |