aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAndy Grover <andy.grover@oracle.com>2010-10-28 11:40:58 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-30 19:34:17 -0400
commitfc8162e3c034af743d8def435fda6396603d321f (patch)
treeb003a652740eb0de1fe71c634618b6666e9bae3c /net
parentf4a3fc03c1d73753879fb655b8cd628b29f6706b (diff)
RDS: Copy rds_iovecs into kernel memory instead of rereading from userspace
Change rds_rdma_pages to take a passed-in rds_iovec array instead of doing copy_from_user itself. Change rds_cmsg_rdma_args to copy rds_iovec array once only. This eliminates the possibility of userspace changing it after our sanity checks. Implement stack-based storage for small numbers of iovecs, based on net/socket.c, to save an alloc in the extremely common case. Although this patch reduces iovec copies in cmsg_rdma_args to 1, we still do another one in rds_rdma_extra_size. Getting rid of that one will be trickier, so it'll be a separate patch. Signed-off-by: Andy Grover <andy.grover@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/rdma.c104
1 files changed, 65 insertions, 39 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 334acdd32ab6..caa4d9866d92 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
479 479
480 480
481/* 481/*
482 * Count the number of pages needed to describe an incoming iovec. 482 * Count the number of pages needed to describe an incoming iovec array.
483 */ 483 */
484static int rds_rdma_pages(struct rds_rdma_args *args) 484static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
485{
486 int tot_pages = 0;
487 unsigned int nr_pages;
488 unsigned int i;
489
490 /* figure out the number of pages in the vector */
491 for (i = 0; i < nr_iovecs; i++) {
492 nr_pages = rds_pages_in_vec(&iov[i]);
493 if (nr_pages == 0)
494 return -EINVAL;
495
496 tot_pages += nr_pages;
497
498 /*
499 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
500 * so tot_pages cannot overflow without first going negative.
501 */
502 if (tot_pages < 0)
503 return -EINVAL;
504 }
505
506 return tot_pages;
507}
508
509int rds_rdma_extra_size(struct rds_rdma_args *args)
485{ 510{
486 struct rds_iovec vec; 511 struct rds_iovec vec;
487 struct rds_iovec __user *local_vec; 512 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0; 513 int tot_pages = 0;
489 unsigned int nr_pages; 514 unsigned int nr_pages;
490 unsigned int i; 515 unsigned int i;
491 516
@@ -507,16 +532,11 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
507 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1, 532 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
508 * so tot_pages cannot overflow without first going negative. 533 * so tot_pages cannot overflow without first going negative.
509 */ 534 */
510 if ((int)tot_pages < 0) 535 if (tot_pages < 0)
511 return -EINVAL; 536 return -EINVAL;
512 } 537 }
513 538
514 return tot_pages; 539 return tot_pages * sizeof(struct scatterlist);
515}
516
517int rds_rdma_extra_size(struct rds_rdma_args *args)
518{
519 return rds_rdma_pages(args) * sizeof(struct scatterlist);
520} 540}
521 541
522/* 542/*
@@ -527,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
527 struct cmsghdr *cmsg) 547 struct cmsghdr *cmsg)
528{ 548{
529 struct rds_rdma_args *args; 549 struct rds_rdma_args *args;
530 struct rds_iovec vec;
531 struct rm_rdma_op *op = &rm->rdma; 550 struct rm_rdma_op *op = &rm->rdma;
532 int nr_pages; 551 int nr_pages;
533 unsigned int nr_bytes; 552 unsigned int nr_bytes;
534 struct page **pages = NULL; 553 struct page **pages = NULL;
535 struct rds_iovec __user *local_vec; 554 struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
536 unsigned int nr; 555 int iov_size;
537 unsigned int i, j; 556 unsigned int i, j;
538 int ret = 0; 557 int ret = 0;
539 558
@@ -553,7 +572,22 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
553 goto out; 572 goto out;
554 } 573 }
555 574
556 nr_pages = rds_rdma_pages(args); 575 /* Check whether to allocate the iovec area */
576 iov_size = args->nr_local * sizeof(struct rds_iovec);
577 if (args->nr_local > UIO_FASTIOV) {
578 iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
579 if (!iovs) {
580 ret = -ENOMEM;
581 goto out;
582 }
583 }
584
585 if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
586 ret = -EFAULT;
587 goto out;
588 }
589
590 nr_pages = rds_rdma_pages(iovs, args->nr_local);
557 if (nr_pages < 0) { 591 if (nr_pages < 0) {
558 ret = -EINVAL; 592 ret = -EINVAL;
559 goto out; 593 goto out;
@@ -606,50 +640,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
606 (unsigned long long)args->remote_vec.addr, 640 (unsigned long long)args->remote_vec.addr,
607 op->op_rkey); 641 op->op_rkey);
608 642
609 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
610
611 for (i = 0; i < args->nr_local; i++) { 643 for (i = 0; i < args->nr_local; i++) {
612 if (copy_from_user(&vec, &local_vec[i], 644 struct rds_iovec *iov = &iovs[i];
613 sizeof(struct rds_iovec))) { 645 /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
614 ret = -EFAULT; 646 unsigned int nr = rds_pages_in_vec(iov);
615 goto out;
616 }
617
618 nr = rds_pages_in_vec(&vec);
619 if (nr == 0) {
620 ret = -EINVAL;
621 goto out;
622 }
623 647
624 rs->rs_user_addr = vec.addr; 648 rs->rs_user_addr = iov->addr;
625 rs->rs_user_bytes = vec.bytes; 649 rs->rs_user_bytes = iov->bytes;
626 650
627 /* If it's a WRITE operation, we want to pin the pages for reading. 651 /* If it's a WRITE operation, we want to pin the pages for reading.
628 * If it's a READ operation, we need to pin the pages for writing. 652 * If it's a READ operation, we need to pin the pages for writing.
629 */ 653 */
630 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); 654 ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
631 if (ret < 0) 655 if (ret < 0)
632 goto out; 656 goto out;
633 657
634 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", 658 rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
635 nr_bytes, nr, vec.bytes, vec.addr); 659 nr_bytes, nr, iov->bytes, iov->addr);
636 660
637 nr_bytes += vec.bytes; 661 nr_bytes += iov->bytes;
638 662
639 for (j = 0; j < nr; j++) { 663 for (j = 0; j < nr; j++) {
640 unsigned int offset = vec.addr & ~PAGE_MASK; 664 unsigned int offset = iov->addr & ~PAGE_MASK;
641 struct scatterlist *sg; 665 struct scatterlist *sg;
642 666
643 sg = &op->op_sg[op->op_nents + j]; 667 sg = &op->op_sg[op->op_nents + j];
644 sg_set_page(sg, pages[j], 668 sg_set_page(sg, pages[j],
645 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 669 min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
646 offset); 670 offset);
647 671
648 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", 672 rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
649 sg->offset, sg->length, vec.addr, vec.bytes); 673 sg->offset, sg->length, iov->addr, iov->bytes);
650 674
651 vec.addr += sg->length; 675 iov->addr += sg->length;
652 vec.bytes -= sg->length; 676 iov->bytes -= sg->length;
653 } 677 }
654 678
655 op->op_nents += nr; 679 op->op_nents += nr;
@@ -665,6 +689,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
665 op->op_bytes = nr_bytes; 689 op->op_bytes = nr_bytes;
666 690
667out: 691out:
692 if (iovs != iovstack)
693 sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
668 kfree(pages); 694 kfree(pages);
669 if (ret) 695 if (ret)
670 rds_rdma_free_op(op); 696 rds_rdma_free_op(op);