diff options
Diffstat (limited to 'net/rds/rdma.c')
| -rw-r--r-- | net/rds/rdma.c | 339 |
1 files changed, 226 insertions, 113 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 75fd13bb631b..48064673fc76 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | #include <linux/rbtree.h> | 35 | #include <linux/rbtree.h> |
| 36 | #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ | 36 | #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ |
| 37 | 37 | ||
| 38 | #include "rdma.h" | 38 | #include "rds.h" |
| 39 | 39 | ||
| 40 | /* | 40 | /* |
| 41 | * XXX | 41 | * XXX |
| @@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs) | |||
| 130 | { | 130 | { |
| 131 | struct rds_mr *mr; | 131 | struct rds_mr *mr; |
| 132 | struct rb_node *node; | 132 | struct rb_node *node; |
| 133 | unsigned long flags; | ||
| 133 | 134 | ||
| 134 | /* Release any MRs associated with this socket */ | 135 | /* Release any MRs associated with this socket */ |
| 136 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
| 135 | while ((node = rb_first(&rs->rs_rdma_keys))) { | 137 | while ((node = rb_first(&rs->rs_rdma_keys))) { |
| 136 | mr = container_of(node, struct rds_mr, r_rb_node); | 138 | mr = container_of(node, struct rds_mr, r_rb_node); |
| 137 | if (mr->r_trans == rs->rs_transport) | 139 | if (mr->r_trans == rs->rs_transport) |
| 138 | mr->r_invalidate = 0; | 140 | mr->r_invalidate = 0; |
| 141 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | ||
| 142 | RB_CLEAR_NODE(&mr->r_rb_node); | ||
| 143 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
| 144 | rds_destroy_mr(mr); | ||
| 139 | rds_mr_put(mr); | 145 | rds_mr_put(mr); |
| 146 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
| 140 | } | 147 | } |
| 148 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
| 141 | 149 | ||
| 142 | if (rs->rs_transport && rs->rs_transport->flush_mrs) | 150 | if (rs->rs_transport && rs->rs_transport->flush_mrs) |
| 143 | rs->rs_transport->flush_mrs(); | 151 | rs->rs_transport->flush_mrs(); |
| @@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
| 181 | goto out; | 189 | goto out; |
| 182 | } | 190 | } |
| 183 | 191 | ||
| 184 | if (rs->rs_transport->get_mr == NULL) { | 192 | if (!rs->rs_transport->get_mr) { |
| 185 | ret = -EOPNOTSUPP; | 193 | ret = -EOPNOTSUPP; |
| 186 | goto out; | 194 | goto out; |
| 187 | } | 195 | } |
| @@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
| 197 | 205 | ||
| 198 | /* XXX clamp nr_pages to limit the size of this alloc? */ | 206 | /* XXX clamp nr_pages to limit the size of this alloc? */ |
| 199 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 207 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
| 200 | if (pages == NULL) { | 208 | if (!pages) { |
| 201 | ret = -ENOMEM; | 209 | ret = -ENOMEM; |
| 202 | goto out; | 210 | goto out; |
| 203 | } | 211 | } |
| 204 | 212 | ||
| 205 | mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); | 213 | mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); |
| 206 | if (mr == NULL) { | 214 | if (!mr) { |
| 207 | ret = -ENOMEM; | 215 | ret = -ENOMEM; |
| 208 | goto out; | 216 | goto out; |
| 209 | } | 217 | } |
| @@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
| 230 | * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to | 238 | * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to |
| 231 | * the zero page. | 239 | * the zero page. |
| 232 | */ | 240 | */ |
| 233 | ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); | 241 | ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1); |
| 234 | if (ret < 0) | 242 | if (ret < 0) |
| 235 | goto out; | 243 | goto out; |
| 236 | 244 | ||
| 237 | nents = ret; | 245 | nents = ret; |
| 238 | sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); | 246 | sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); |
| 239 | if (sg == NULL) { | 247 | if (!sg) { |
| 240 | ret = -ENOMEM; | 248 | ret = -ENOMEM; |
| 241 | goto out; | 249 | goto out; |
| 242 | } | 250 | } |
| @@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) | |||
| 406 | 414 | ||
| 407 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | 415 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
| 408 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | 416 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); |
| 409 | if (mr && (mr->r_use_once || force)) { | 417 | if (!mr) { |
| 418 | printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key); | ||
| 419 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
| 420 | return; | ||
| 421 | } | ||
| 422 | |||
| 423 | if (mr->r_use_once || force) { | ||
| 410 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | 424 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); |
| 411 | RB_CLEAR_NODE(&mr->r_rb_node); | 425 | RB_CLEAR_NODE(&mr->r_rb_node); |
| 412 | zot_me = 1; | 426 | zot_me = 1; |
| 413 | } else if (mr) | 427 | } |
| 414 | atomic_inc(&mr->r_refcount); | ||
| 415 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | 428 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); |
| 416 | 429 | ||
| 417 | /* May have to issue a dma_sync on this memory region. | 430 | /* May have to issue a dma_sync on this memory region. |
| 418 | * Note we could avoid this if the operation was a RDMA READ, | 431 | * Note we could avoid this if the operation was a RDMA READ, |
| 419 | * but at this point we can't tell. */ | 432 | * but at this point we can't tell. */ |
| 420 | if (mr != NULL) { | 433 | if (mr->r_trans->sync_mr) |
| 421 | if (mr->r_trans->sync_mr) | 434 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); |
| 422 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); | 435 | |
| 423 | 436 | /* If the MR was marked as invalidate, this will | |
| 424 | /* If the MR was marked as invalidate, this will | 437 | * trigger an async flush. */ |
| 425 | * trigger an async flush. */ | 438 | if (zot_me) |
| 426 | if (zot_me) | 439 | rds_destroy_mr(mr); |
| 427 | rds_destroy_mr(mr); | 440 | rds_mr_put(mr); |
| 428 | rds_mr_put(mr); | ||
| 429 | } | ||
| 430 | } | 441 | } |
| 431 | 442 | ||
| 432 | void rds_rdma_free_op(struct rds_rdma_op *ro) | 443 | void rds_rdma_free_op(struct rm_rdma_op *ro) |
| 433 | { | 444 | { |
| 434 | unsigned int i; | 445 | unsigned int i; |
| 435 | 446 | ||
| 436 | for (i = 0; i < ro->r_nents; i++) { | 447 | for (i = 0; i < ro->op_nents; i++) { |
| 437 | struct page *page = sg_page(&ro->r_sg[i]); | 448 | struct page *page = sg_page(&ro->op_sg[i]); |
| 438 | 449 | ||
| 439 | /* Mark page dirty if it was possibly modified, which | 450 | /* Mark page dirty if it was possibly modified, which |
| 440 | * is the case for a RDMA_READ which copies from remote | 451 | * is the case for a RDMA_READ which copies from remote |
| 441 | * to local memory */ | 452 | * to local memory */ |
| 442 | if (!ro->r_write) { | 453 | if (!ro->op_write) { |
| 443 | BUG_ON(in_interrupt()); | 454 | BUG_ON(irqs_disabled()); |
| 444 | set_page_dirty(page); | 455 | set_page_dirty(page); |
| 445 | } | 456 | } |
| 446 | put_page(page); | 457 | put_page(page); |
| 447 | } | 458 | } |
| 448 | 459 | ||
| 449 | kfree(ro->r_notifier); | 460 | kfree(ro->op_notifier); |
| 450 | kfree(ro); | 461 | ro->op_notifier = NULL; |
| 462 | ro->op_active = 0; | ||
| 463 | } | ||
| 464 | |||
| 465 | void rds_atomic_free_op(struct rm_atomic_op *ao) | ||
| 466 | { | ||
| 467 | struct page *page = sg_page(ao->op_sg); | ||
| 468 | |||
| 469 | /* Mark page dirty if it was possibly modified, which | ||
| 470 | * is the case for a RDMA_READ which copies from remote | ||
| 471 | * to local memory */ | ||
| 472 | set_page_dirty(page); | ||
| 473 | put_page(page); | ||
| 474 | |||
| 475 | kfree(ao->op_notifier); | ||
| 476 | ao->op_notifier = NULL; | ||
| 477 | ao->op_active = 0; | ||
| 478 | } | ||
| 479 | |||
| 480 | |||
| 481 | /* | ||
| 482 | * Count the number of pages needed to describe an incoming iovec. | ||
| 483 | */ | ||
| 484 | static int rds_rdma_pages(struct rds_rdma_args *args) | ||
| 485 | { | ||
| 486 | struct rds_iovec vec; | ||
| 487 | struct rds_iovec __user *local_vec; | ||
| 488 | unsigned int tot_pages = 0; | ||
| 489 | unsigned int nr_pages; | ||
| 490 | unsigned int i; | ||
| 491 | |||
| 492 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
| 493 | |||
| 494 | /* figure out the number of pages in the vector */ | ||
| 495 | for (i = 0; i < args->nr_local; i++) { | ||
| 496 | if (copy_from_user(&vec, &local_vec[i], | ||
| 497 | sizeof(struct rds_iovec))) | ||
| 498 | return -EFAULT; | ||
| 499 | |||
| 500 | nr_pages = rds_pages_in_vec(&vec); | ||
| 501 | if (nr_pages == 0) | ||
| 502 | return -EINVAL; | ||
| 503 | |||
| 504 | tot_pages += nr_pages; | ||
| 505 | } | ||
| 506 | |||
| 507 | return tot_pages; | ||
| 508 | } | ||
| 509 | |||
| 510 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
| 511 | { | ||
| 512 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
| 451 | } | 513 | } |
| 452 | 514 | ||
| 453 | /* | 515 | /* |
| 454 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. | 516 | * The application asks for a RDMA transfer. |
| 517 | * Extract all arguments and set up the rdma_op | ||
| 455 | */ | 518 | */ |
| 456 | static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | 519 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, |
| 457 | struct rds_rdma_args *args) | 520 | struct cmsghdr *cmsg) |
| 458 | { | 521 | { |
| 522 | struct rds_rdma_args *args; | ||
| 459 | struct rds_iovec vec; | 523 | struct rds_iovec vec; |
| 460 | struct rds_rdma_op *op = NULL; | 524 | struct rm_rdma_op *op = &rm->rdma; |
| 461 | unsigned int nr_pages; | 525 | unsigned int nr_pages; |
| 462 | unsigned int max_pages; | ||
| 463 | unsigned int nr_bytes; | 526 | unsigned int nr_bytes; |
| 464 | struct page **pages = NULL; | 527 | struct page **pages = NULL; |
| 465 | struct rds_iovec __user *local_vec; | 528 | struct rds_iovec __user *local_vec; |
| 466 | struct scatterlist *sg; | ||
| 467 | unsigned int nr; | 529 | unsigned int nr; |
| 468 | unsigned int i, j; | 530 | unsigned int i, j; |
| 469 | int ret; | 531 | int ret = 0; |
| 532 | |||
| 533 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) | ||
| 534 | || rm->rdma.op_active) | ||
| 535 | return -EINVAL; | ||
| 470 | 536 | ||
| 537 | args = CMSG_DATA(cmsg); | ||
| 471 | 538 | ||
| 472 | if (rs->rs_bound_addr == 0) { | 539 | if (rs->rs_bound_addr == 0) { |
| 473 | ret = -ENOTCONN; /* XXX not a great errno */ | 540 | ret = -ENOTCONN; /* XXX not a great errno */ |
| @@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 479 | goto out; | 546 | goto out; |
| 480 | } | 547 | } |
| 481 | 548 | ||
| 482 | nr_pages = 0; | 549 | nr_pages = rds_rdma_pages(args); |
| 483 | max_pages = 0; | 550 | if (nr_pages < 0) |
| 484 | |||
| 485 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
| 486 | |||
| 487 | /* figure out the number of pages in the vector */ | ||
| 488 | for (i = 0; i < args->nr_local; i++) { | ||
| 489 | if (copy_from_user(&vec, &local_vec[i], | ||
| 490 | sizeof(struct rds_iovec))) { | ||
| 491 | ret = -EFAULT; | ||
| 492 | goto out; | ||
| 493 | } | ||
| 494 | |||
| 495 | nr = rds_pages_in_vec(&vec); | ||
| 496 | if (nr == 0) { | ||
| 497 | ret = -EINVAL; | ||
| 498 | goto out; | ||
| 499 | } | ||
| 500 | |||
| 501 | max_pages = max(nr, max_pages); | ||
| 502 | nr_pages += nr; | ||
| 503 | } | ||
| 504 | |||
| 505 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); | ||
| 506 | if (pages == NULL) { | ||
| 507 | ret = -ENOMEM; | ||
| 508 | goto out; | 551 | goto out; |
| 509 | } | ||
| 510 | 552 | ||
| 511 | op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); | 553 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
| 512 | if (op == NULL) { | 554 | if (!pages) { |
| 513 | ret = -ENOMEM; | 555 | ret = -ENOMEM; |
| 514 | goto out; | 556 | goto out; |
| 515 | } | 557 | } |
| 516 | 558 | ||
| 517 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); | 559 | op->op_write = !!(args->flags & RDS_RDMA_READWRITE); |
| 518 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); | 560 | op->op_fence = !!(args->flags & RDS_RDMA_FENCE); |
| 519 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | 561 | op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); |
| 520 | op->r_recverr = rs->rs_recverr; | 562 | op->op_silent = !!(args->flags & RDS_RDMA_SILENT); |
| 563 | op->op_active = 1; | ||
| 564 | op->op_recverr = rs->rs_recverr; | ||
| 521 | WARN_ON(!nr_pages); | 565 | WARN_ON(!nr_pages); |
| 522 | sg_init_table(op->r_sg, nr_pages); | 566 | op->op_sg = rds_message_alloc_sgs(rm, nr_pages); |
| 523 | 567 | ||
| 524 | if (op->r_notify || op->r_recverr) { | 568 | if (op->op_notify || op->op_recverr) { |
| 525 | /* We allocate an uninitialized notifier here, because | 569 | /* We allocate an uninitialized notifier here, because |
| 526 | * we don't want to do that in the completion handler. We | 570 | * we don't want to do that in the completion handler. We |
| 527 | * would have to use GFP_ATOMIC there, and don't want to deal | 571 | * would have to use GFP_ATOMIC there, and don't want to deal |
| 528 | * with failed allocations. | 572 | * with failed allocations. |
| 529 | */ | 573 | */ |
| 530 | op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); | 574 | op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); |
| 531 | if (!op->r_notifier) { | 575 | if (!op->op_notifier) { |
| 532 | ret = -ENOMEM; | 576 | ret = -ENOMEM; |
| 533 | goto out; | 577 | goto out; |
| 534 | } | 578 | } |
| 535 | op->r_notifier->n_user_token = args->user_token; | 579 | op->op_notifier->n_user_token = args->user_token; |
| 536 | op->r_notifier->n_status = RDS_RDMA_SUCCESS; | 580 | op->op_notifier->n_status = RDS_RDMA_SUCCESS; |
| 537 | } | 581 | } |
| 538 | 582 | ||
| 539 | /* The cookie contains the R_Key of the remote memory region, and | 583 | /* The cookie contains the R_Key of the remote memory region, and |
| @@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 543 | * destination address (which is really an offset into the MR) | 587 | * destination address (which is really an offset into the MR) |
| 544 | * FIXME: We may want to move this into ib_rdma.c | 588 | * FIXME: We may want to move this into ib_rdma.c |
| 545 | */ | 589 | */ |
| 546 | op->r_key = rds_rdma_cookie_key(args->cookie); | 590 | op->op_rkey = rds_rdma_cookie_key(args->cookie); |
| 547 | op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); | 591 | op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); |
| 548 | 592 | ||
| 549 | nr_bytes = 0; | 593 | nr_bytes = 0; |
| 550 | 594 | ||
| 551 | rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", | 595 | rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", |
| 552 | (unsigned long long)args->nr_local, | 596 | (unsigned long long)args->nr_local, |
| 553 | (unsigned long long)args->remote_vec.addr, | 597 | (unsigned long long)args->remote_vec.addr, |
| 554 | op->r_key); | 598 | op->op_rkey); |
| 599 | |||
| 600 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
| 555 | 601 | ||
| 556 | for (i = 0; i < args->nr_local; i++) { | 602 | for (i = 0; i < args->nr_local; i++) { |
| 557 | if (copy_from_user(&vec, &local_vec[i], | 603 | if (copy_from_user(&vec, &local_vec[i], |
| @@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 569 | rs->rs_user_addr = vec.addr; | 615 | rs->rs_user_addr = vec.addr; |
| 570 | rs->rs_user_bytes = vec.bytes; | 616 | rs->rs_user_bytes = vec.bytes; |
| 571 | 617 | ||
| 572 | /* did the user change the vec under us? */ | ||
| 573 | if (nr > max_pages || op->r_nents + nr > nr_pages) { | ||
| 574 | ret = -EINVAL; | ||
| 575 | goto out; | ||
| 576 | } | ||
| 577 | /* If it's a WRITE operation, we want to pin the pages for reading. | 618 | /* If it's a WRITE operation, we want to pin the pages for reading. |
| 578 | * If it's a READ operation, we need to pin the pages for writing. | 619 | * If it's a READ operation, we need to pin the pages for writing. |
| 579 | */ | 620 | */ |
| 580 | ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); | 621 | ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); |
| 581 | if (ret < 0) | 622 | if (ret < 0) |
| 582 | goto out; | 623 | goto out; |
| 583 | 624 | ||
| @@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 588 | 629 | ||
| 589 | for (j = 0; j < nr; j++) { | 630 | for (j = 0; j < nr; j++) { |
| 590 | unsigned int offset = vec.addr & ~PAGE_MASK; | 631 | unsigned int offset = vec.addr & ~PAGE_MASK; |
| 632 | struct scatterlist *sg; | ||
| 591 | 633 | ||
| 592 | sg = &op->r_sg[op->r_nents + j]; | 634 | sg = &op->op_sg[op->op_nents + j]; |
| 593 | sg_set_page(sg, pages[j], | 635 | sg_set_page(sg, pages[j], |
| 594 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), | 636 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), |
| 595 | offset); | 637 | offset); |
| @@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 601 | vec.bytes -= sg->length; | 643 | vec.bytes -= sg->length; |
| 602 | } | 644 | } |
| 603 | 645 | ||
| 604 | op->r_nents += nr; | 646 | op->op_nents += nr; |
| 605 | } | 647 | } |
| 606 | 648 | ||
| 607 | |||
| 608 | if (nr_bytes > args->remote_vec.bytes) { | 649 | if (nr_bytes > args->remote_vec.bytes) { |
| 609 | rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", | 650 | rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", |
| 610 | nr_bytes, | 651 | nr_bytes, |
| @@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
| 612 | ret = -EINVAL; | 653 | ret = -EINVAL; |
| 613 | goto out; | 654 | goto out; |
| 614 | } | 655 | } |
| 615 | op->r_bytes = nr_bytes; | 656 | op->op_bytes = nr_bytes; |
| 616 | 657 | ||
| 617 | ret = 0; | 658 | ret = 0; |
| 618 | out: | 659 | out: |
| 619 | kfree(pages); | 660 | kfree(pages); |
| 620 | if (ret) { | 661 | if (ret) |
| 621 | if (op) | 662 | rds_rdma_free_op(op); |
| 622 | rds_rdma_free_op(op); | ||
| 623 | op = ERR_PTR(ret); | ||
| 624 | } | ||
| 625 | return op; | ||
| 626 | } | ||
| 627 | |||
| 628 | /* | ||
| 629 | * The application asks for a RDMA transfer. | ||
| 630 | * Extract all arguments and set up the rdma_op | ||
| 631 | */ | ||
| 632 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | ||
| 633 | struct cmsghdr *cmsg) | ||
| 634 | { | ||
| 635 | struct rds_rdma_op *op; | ||
| 636 | |||
| 637 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || | ||
| 638 | rm->m_rdma_op != NULL) | ||
| 639 | return -EINVAL; | ||
| 640 | 663 | ||
| 641 | op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); | ||
| 642 | if (IS_ERR(op)) | ||
| 643 | return PTR_ERR(op); | ||
| 644 | rds_stats_inc(s_send_rdma); | 664 | rds_stats_inc(s_send_rdma); |
| 645 | rm->m_rdma_op = op; | 665 | |
| 646 | return 0; | 666 | return ret; |
| 647 | } | 667 | } |
| 648 | 668 | ||
| 649 | /* | 669 | /* |
| @@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | |||
| 673 | 693 | ||
| 674 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | 694 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
| 675 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | 695 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); |
| 676 | if (mr == NULL) | 696 | if (!mr) |
| 677 | err = -EINVAL; /* invalid r_key */ | 697 | err = -EINVAL; /* invalid r_key */ |
| 678 | else | 698 | else |
| 679 | atomic_inc(&mr->r_refcount); | 699 | atomic_inc(&mr->r_refcount); |
| @@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | |||
| 681 | 701 | ||
| 682 | if (mr) { | 702 | if (mr) { |
| 683 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); | 703 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); |
| 684 | rm->m_rdma_mr = mr; | 704 | rm->rdma.op_rdma_mr = mr; |
| 685 | } | 705 | } |
| 686 | return err; | 706 | return err; |
| 687 | } | 707 | } |
| @@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | |||
| 699 | rm->m_rdma_cookie != 0) | 719 | rm->m_rdma_cookie != 0) |
| 700 | return -EINVAL; | 720 | return -EINVAL; |
| 701 | 721 | ||
| 702 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); | 722 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr); |
| 723 | } | ||
| 724 | |||
| 725 | /* | ||
| 726 | * Fill in rds_message for an atomic request. | ||
| 727 | */ | ||
| 728 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | ||
| 729 | struct cmsghdr *cmsg) | ||
| 730 | { | ||
| 731 | struct page *page = NULL; | ||
| 732 | struct rds_atomic_args *args; | ||
| 733 | int ret = 0; | ||
| 734 | |||
| 735 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) | ||
| 736 | || rm->atomic.op_active) | ||
| 737 | return -EINVAL; | ||
| 738 | |||
| 739 | args = CMSG_DATA(cmsg); | ||
| 740 | |||
| 741 | /* Nonmasked & masked cmsg ops converted to masked hw ops */ | ||
| 742 | switch (cmsg->cmsg_type) { | ||
| 743 | case RDS_CMSG_ATOMIC_FADD: | ||
| 744 | rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; | ||
| 745 | rm->atomic.op_m_fadd.add = args->fadd.add; | ||
| 746 | rm->atomic.op_m_fadd.nocarry_mask = 0; | ||
| 747 | break; | ||
| 748 | case RDS_CMSG_MASKED_ATOMIC_FADD: | ||
| 749 | rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; | ||
| 750 | rm->atomic.op_m_fadd.add = args->m_fadd.add; | ||
| 751 | rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask; | ||
| 752 | break; | ||
| 753 | case RDS_CMSG_ATOMIC_CSWP: | ||
| 754 | rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; | ||
| 755 | rm->atomic.op_m_cswp.compare = args->cswp.compare; | ||
| 756 | rm->atomic.op_m_cswp.swap = args->cswp.swap; | ||
| 757 | rm->atomic.op_m_cswp.compare_mask = ~0; | ||
| 758 | rm->atomic.op_m_cswp.swap_mask = ~0; | ||
| 759 | break; | ||
| 760 | case RDS_CMSG_MASKED_ATOMIC_CSWP: | ||
| 761 | rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; | ||
| 762 | rm->atomic.op_m_cswp.compare = args->m_cswp.compare; | ||
| 763 | rm->atomic.op_m_cswp.swap = args->m_cswp.swap; | ||
| 764 | rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask; | ||
| 765 | rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask; | ||
| 766 | break; | ||
| 767 | default: | ||
| 768 | BUG(); /* should never happen */ | ||
| 769 | } | ||
| 770 | |||
| 771 | rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | ||
| 772 | rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); | ||
| 773 | rm->atomic.op_active = 1; | ||
| 774 | rm->atomic.op_recverr = rs->rs_recverr; | ||
| 775 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); | ||
| 776 | |||
| 777 | /* verify 8 byte-aligned */ | ||
| 778 | if (args->local_addr & 0x7) { | ||
| 779 | ret = -EFAULT; | ||
| 780 | goto err; | ||
| 781 | } | ||
| 782 | |||
| 783 | ret = rds_pin_pages(args->local_addr, 1, &page, 1); | ||
| 784 | if (ret != 1) | ||
| 785 | goto err; | ||
| 786 | ret = 0; | ||
| 787 | |||
| 788 | sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); | ||
| 789 | |||
| 790 | if (rm->atomic.op_notify || rm->atomic.op_recverr) { | ||
| 791 | /* We allocate an uninitialized notifier here, because | ||
| 792 | * we don't want to do that in the completion handler. We | ||
| 793 | * would have to use GFP_ATOMIC there, and don't want to deal | ||
| 794 | * with failed allocations. | ||
| 795 | */ | ||
| 796 | rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); | ||
| 797 | if (!rm->atomic.op_notifier) { | ||
| 798 | ret = -ENOMEM; | ||
| 799 | goto err; | ||
| 800 | } | ||
| 801 | |||
| 802 | rm->atomic.op_notifier->n_user_token = args->user_token; | ||
| 803 | rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; | ||
| 804 | } | ||
| 805 | |||
| 806 | rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie); | ||
| 807 | rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); | ||
| 808 | |||
| 809 | return ret; | ||
| 810 | err: | ||
| 811 | if (page) | ||
| 812 | put_page(page); | ||
| 813 | kfree(rm->atomic.op_notifier); | ||
| 814 | |||
| 815 | return ret; | ||
| 703 | } | 816 | } |
