diff options
| -rw-r--r-- | net/rds/rdma.c | 104 |
1 files changed, 65 insertions, 39 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 334acdd32ab6..caa4d9866d92 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
| @@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao) | |||
| 479 | 479 | ||
| 480 | 480 | ||
| 481 | /* | 481 | /* |
| 482 | * Count the number of pages needed to describe an incoming iovec. | 482 | * Count the number of pages needed to describe an incoming iovec array. |
| 483 | */ | 483 | */ |
| 484 | static int rds_rdma_pages(struct rds_rdma_args *args) | 484 | static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs) |
| 485 | { | ||
| 486 | int tot_pages = 0; | ||
| 487 | unsigned int nr_pages; | ||
| 488 | unsigned int i; | ||
| 489 | |||
| 490 | /* figure out the number of pages in the vector */ | ||
| 491 | for (i = 0; i < nr_iovecs; i++) { | ||
| 492 | nr_pages = rds_pages_in_vec(&iov[i]); | ||
| 493 | if (nr_pages == 0) | ||
| 494 | return -EINVAL; | ||
| 495 | |||
| 496 | tot_pages += nr_pages; | ||
| 497 | |||
| 498 | /* | ||
| 499 | * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, | ||
| 500 | * so tot_pages cannot overflow without first going negative. | ||
| 501 | */ | ||
| 502 | if (tot_pages < 0) | ||
| 503 | return -EINVAL; | ||
| 504 | } | ||
| 505 | |||
| 506 | return tot_pages; | ||
| 507 | } | ||
| 508 | |||
| 509 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
| 485 | { | 510 | { |
| 486 | struct rds_iovec vec; | 511 | struct rds_iovec vec; |
| 487 | struct rds_iovec __user *local_vec; | 512 | struct rds_iovec __user *local_vec; |
| 488 | unsigned int tot_pages = 0; | 513 | int tot_pages = 0; |
| 489 | unsigned int nr_pages; | 514 | unsigned int nr_pages; |
| 490 | unsigned int i; | 515 | unsigned int i; |
| 491 | 516 | ||
| @@ -507,16 +532,11 @@ static int rds_rdma_pages(struct rds_rdma_args *args) | |||
| 507 | * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, | 532 | * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, |
| 508 | * so tot_pages cannot overflow without first going negative. | 533 | * so tot_pages cannot overflow without first going negative. |
| 509 | */ | 534 | */ |
| 510 | if ((int)tot_pages < 0) | 535 | if (tot_pages < 0) |
| 511 | return -EINVAL; | 536 | return -EINVAL; |
| 512 | } | 537 | } |
| 513 | 538 | ||
| 514 | return tot_pages; | 539 | return tot_pages * sizeof(struct scatterlist); |
| 515 | } | ||
| 516 | |||
| 517 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
| 518 | { | ||
| 519 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
| 520 | } | 540 | } |
| 521 | 541 | ||
| 522 | /* | 542 | /* |
| @@ -527,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
| 527 | struct cmsghdr *cmsg) | 547 | struct cmsghdr *cmsg) |
| 528 | { | 548 | { |
| 529 | struct rds_rdma_args *args; | 549 | struct rds_rdma_args *args; |
| 530 | struct rds_iovec vec; | ||
| 531 | struct rm_rdma_op *op = &rm->rdma; | 550 | struct rm_rdma_op *op = &rm->rdma; |
| 532 | int nr_pages; | 551 | int nr_pages; |
| 533 | unsigned int nr_bytes; | 552 | unsigned int nr_bytes; |
| 534 | struct page **pages = NULL; | 553 | struct page **pages = NULL; |
| 535 | struct rds_iovec __user *local_vec; | 554 | struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack; |
| 536 | unsigned int nr; | 555 | int iov_size; |
| 537 | unsigned int i, j; | 556 | unsigned int i, j; |
| 538 | int ret = 0; | 557 | int ret = 0; |
| 539 | 558 | ||
| @@ -553,7 +572,22 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
| 553 | goto out; | 572 | goto out; |
| 554 | } | 573 | } |
| 555 | 574 | ||
| 556 | nr_pages = rds_rdma_pages(args); | 575 | /* Check whether to allocate the iovec area */ |
| 576 | iov_size = args->nr_local * sizeof(struct rds_iovec); | ||
| 577 | if (args->nr_local > UIO_FASTIOV) { | ||
| 578 | iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); | ||
| 579 | if (!iovs) { | ||
| 580 | ret = -ENOMEM; | ||
| 581 | goto out; | ||
| 582 | } | ||
| 583 | } | ||
| 584 | |||
| 585 | if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) { | ||
| 586 | ret = -EFAULT; | ||
| 587 | goto out; | ||
| 588 | } | ||
| 589 | |||
| 590 | nr_pages = rds_rdma_pages(iovs, args->nr_local); | ||
| 557 | if (nr_pages < 0) { | 591 | if (nr_pages < 0) { |
| 558 | ret = -EINVAL; | 592 | ret = -EINVAL; |
| 559 | goto out; | 593 | goto out; |
| @@ -606,50 +640,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
| 606 | (unsigned long long)args->remote_vec.addr, | 640 | (unsigned long long)args->remote_vec.addr, |
| 607 | op->op_rkey); | 641 | op->op_rkey); |
| 608 | 642 | ||
| 609 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
| 610 | |||
| 611 | for (i = 0; i < args->nr_local; i++) { | 643 | for (i = 0; i < args->nr_local; i++) { |
| 612 | if (copy_from_user(&vec, &local_vec[i], | 644 | struct rds_iovec *iov = &iovs[i]; |
| 613 | sizeof(struct rds_iovec))) { | 645 | /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */ |
| 614 | ret = -EFAULT; | 646 | unsigned int nr = rds_pages_in_vec(iov); |
| 615 | goto out; | ||
| 616 | } | ||
| 617 | |||
| 618 | nr = rds_pages_in_vec(&vec); | ||
| 619 | if (nr == 0) { | ||
| 620 | ret = -EINVAL; | ||
| 621 | goto out; | ||
| 622 | } | ||
| 623 | 647 | ||
| 624 | rs->rs_user_addr = vec.addr; | 648 | rs->rs_user_addr = iov->addr; |
| 625 | rs->rs_user_bytes = vec.bytes; | 649 | rs->rs_user_bytes = iov->bytes; |
| 626 | 650 | ||
| 627 | /* If it's a WRITE operation, we want to pin the pages for reading. | 651 | /* If it's a WRITE operation, we want to pin the pages for reading. |
| 628 | * If it's a READ operation, we need to pin the pages for writing. | 652 | * If it's a READ operation, we need to pin the pages for writing. |
| 629 | */ | 653 | */ |
| 630 | ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); | 654 | ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write); |
| 631 | if (ret < 0) | 655 | if (ret < 0) |
| 632 | goto out; | 656 | goto out; |
| 633 | 657 | ||
| 634 | rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", | 658 | rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n", |
| 635 | nr_bytes, nr, vec.bytes, vec.addr); | 659 | nr_bytes, nr, iov->bytes, iov->addr); |
| 636 | 660 | ||
| 637 | nr_bytes += vec.bytes; | 661 | nr_bytes += iov->bytes; |
| 638 | 662 | ||
| 639 | for (j = 0; j < nr; j++) { | 663 | for (j = 0; j < nr; j++) { |
| 640 | unsigned int offset = vec.addr & ~PAGE_MASK; | 664 | unsigned int offset = iov->addr & ~PAGE_MASK; |
| 641 | struct scatterlist *sg; | 665 | struct scatterlist *sg; |
| 642 | 666 | ||
| 643 | sg = &op->op_sg[op->op_nents + j]; | 667 | sg = &op->op_sg[op->op_nents + j]; |
| 644 | sg_set_page(sg, pages[j], | 668 | sg_set_page(sg, pages[j], |
| 645 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), | 669 | min_t(unsigned int, iov->bytes, PAGE_SIZE - offset), |
| 646 | offset); | 670 | offset); |
| 647 | 671 | ||
| 648 | rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", | 672 | rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n", |
| 649 | sg->offset, sg->length, vec.addr, vec.bytes); | 673 | sg->offset, sg->length, iov->addr, iov->bytes); |
| 650 | 674 | ||
| 651 | vec.addr += sg->length; | 675 | iov->addr += sg->length; |
| 652 | vec.bytes -= sg->length; | 676 | iov->bytes -= sg->length; |
| 653 | } | 677 | } |
| 654 | 678 | ||
| 655 | op->op_nents += nr; | 679 | op->op_nents += nr; |
| @@ -665,6 +689,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
| 665 | op->op_bytes = nr_bytes; | 689 | op->op_bytes = nr_bytes; |
| 666 | 690 | ||
| 667 | out: | 691 | out: |
| 692 | if (iovs != iovstack) | ||
| 693 | sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); | ||
| 668 | kfree(pages); | 694 | kfree(pages); |
| 669 | if (ret) | 695 | if (ret) |
| 670 | rds_rdma_free_op(op); | 696 | rds_rdma_free_op(op); |
