diff options
Diffstat (limited to 'net/rds/rdma.c')
-rw-r--r-- | net/rds/rdma.c | 339 |
1 files changed, 226 insertions, 113 deletions
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 75fd13bb631b..48064673fc76 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/rbtree.h> | 35 | #include <linux/rbtree.h> |
36 | #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ | 36 | #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ |
37 | 37 | ||
38 | #include "rdma.h" | 38 | #include "rds.h" |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * XXX | 41 | * XXX |
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs) | |||
130 | { | 130 | { |
131 | struct rds_mr *mr; | 131 | struct rds_mr *mr; |
132 | struct rb_node *node; | 132 | struct rb_node *node; |
133 | unsigned long flags; | ||
133 | 134 | ||
134 | /* Release any MRs associated with this socket */ | 135 | /* Release any MRs associated with this socket */ |
136 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
135 | while ((node = rb_first(&rs->rs_rdma_keys))) { | 137 | while ((node = rb_first(&rs->rs_rdma_keys))) { |
136 | mr = container_of(node, struct rds_mr, r_rb_node); | 138 | mr = container_of(node, struct rds_mr, r_rb_node); |
137 | if (mr->r_trans == rs->rs_transport) | 139 | if (mr->r_trans == rs->rs_transport) |
138 | mr->r_invalidate = 0; | 140 | mr->r_invalidate = 0; |
141 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | ||
142 | RB_CLEAR_NODE(&mr->r_rb_node); | ||
143 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
144 | rds_destroy_mr(mr); | ||
139 | rds_mr_put(mr); | 145 | rds_mr_put(mr); |
146 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
140 | } | 147 | } |
148 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
141 | 149 | ||
142 | if (rs->rs_transport && rs->rs_transport->flush_mrs) | 150 | if (rs->rs_transport && rs->rs_transport->flush_mrs) |
143 | rs->rs_transport->flush_mrs(); | 151 | rs->rs_transport->flush_mrs(); |
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
181 | goto out; | 189 | goto out; |
182 | } | 190 | } |
183 | 191 | ||
184 | if (rs->rs_transport->get_mr == NULL) { | 192 | if (!rs->rs_transport->get_mr) { |
185 | ret = -EOPNOTSUPP; | 193 | ret = -EOPNOTSUPP; |
186 | goto out; | 194 | goto out; |
187 | } | 195 | } |
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
197 | 205 | ||
198 | /* XXX clamp nr_pages to limit the size of this alloc? */ | 206 | /* XXX clamp nr_pages to limit the size of this alloc? */ |
199 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 207 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
200 | if (pages == NULL) { | 208 | if (!pages) { |
201 | ret = -ENOMEM; | 209 | ret = -ENOMEM; |
202 | goto out; | 210 | goto out; |
203 | } | 211 | } |
204 | 212 | ||
205 | mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); | 213 | mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); |
206 | if (mr == NULL) { | 214 | if (!mr) { |
207 | ret = -ENOMEM; | 215 | ret = -ENOMEM; |
208 | goto out; | 216 | goto out; |
209 | } | 217 | } |
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
230 | * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to | 238 | * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to |
231 | * the zero page. | 239 | * the zero page. |
232 | */ | 240 | */ |
233 | ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); | 241 | ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1); |
234 | if (ret < 0) | 242 | if (ret < 0) |
235 | goto out; | 243 | goto out; |
236 | 244 | ||
237 | nents = ret; | 245 | nents = ret; |
238 | sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); | 246 | sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); |
239 | if (sg == NULL) { | 247 | if (!sg) { |
240 | ret = -ENOMEM; | 248 | ret = -ENOMEM; |
241 | goto out; | 249 | goto out; |
242 | } | 250 | } |
@@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) | |||
406 | 414 | ||
407 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | 415 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
408 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | 416 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); |
409 | if (mr && (mr->r_use_once || force)) { | 417 | if (!mr) { |
418 | printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key); | ||
419 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
420 | return; | ||
421 | } | ||
422 | |||
423 | if (mr->r_use_once || force) { | ||
410 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | 424 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); |
411 | RB_CLEAR_NODE(&mr->r_rb_node); | 425 | RB_CLEAR_NODE(&mr->r_rb_node); |
412 | zot_me = 1; | 426 | zot_me = 1; |
413 | } else if (mr) | 427 | } |
414 | atomic_inc(&mr->r_refcount); | ||
415 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | 428 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); |
416 | 429 | ||
417 | /* May have to issue a dma_sync on this memory region. | 430 | /* May have to issue a dma_sync on this memory region. |
418 | * Note we could avoid this if the operation was a RDMA READ, | 431 | * Note we could avoid this if the operation was a RDMA READ, |
419 | * but at this point we can't tell. */ | 432 | * but at this point we can't tell. */ |
420 | if (mr != NULL) { | 433 | if (mr->r_trans->sync_mr) |
421 | if (mr->r_trans->sync_mr) | 434 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); |
422 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); | 435 | |
423 | 436 | /* If the MR was marked as invalidate, this will | |
424 | /* If the MR was marked as invalidate, this will | 437 | * trigger an async flush. */ |
425 | * trigger an async flush. */ | 438 | if (zot_me) |
426 | if (zot_me) | 439 | rds_destroy_mr(mr); |
427 | rds_destroy_mr(mr); | 440 | rds_mr_put(mr); |
428 | rds_mr_put(mr); | ||
429 | } | ||
430 | } | 441 | } |
431 | 442 | ||
432 | void rds_rdma_free_op(struct rds_rdma_op *ro) | 443 | void rds_rdma_free_op(struct rm_rdma_op *ro) |
433 | { | 444 | { |
434 | unsigned int i; | 445 | unsigned int i; |
435 | 446 | ||
436 | for (i = 0; i < ro->r_nents; i++) { | 447 | for (i = 0; i < ro->op_nents; i++) { |
437 | struct page *page = sg_page(&ro->r_sg[i]); | 448 | struct page *page = sg_page(&ro->op_sg[i]); |
438 | 449 | ||
439 | /* Mark page dirty if it was possibly modified, which | 450 | /* Mark page dirty if it was possibly modified, which |
440 | * is the case for a RDMA_READ which copies from remote | 451 | * is the case for a RDMA_READ which copies from remote |
441 | * to local memory */ | 452 | * to local memory */ |
442 | if (!ro->r_write) { | 453 | if (!ro->op_write) { |
443 | BUG_ON(in_interrupt()); | 454 | BUG_ON(irqs_disabled()); |
444 | set_page_dirty(page); | 455 | set_page_dirty(page); |
445 | } | 456 | } |
446 | put_page(page); | 457 | put_page(page); |
447 | } | 458 | } |
448 | 459 | ||
449 | kfree(ro->r_notifier); | 460 | kfree(ro->op_notifier); |
450 | kfree(ro); | 461 | ro->op_notifier = NULL; |
462 | ro->op_active = 0; | ||
463 | } | ||
464 | |||
465 | void rds_atomic_free_op(struct rm_atomic_op *ao) | ||
466 | { | ||
467 | struct page *page = sg_page(ao->op_sg); | ||
468 | |||
469 | /* Mark page dirty if it was possibly modified, which | ||
470 | * is the case for a RDMA_READ which copies from remote | ||
471 | * to local memory */ | ||
472 | set_page_dirty(page); | ||
473 | put_page(page); | ||
474 | |||
475 | kfree(ao->op_notifier); | ||
476 | ao->op_notifier = NULL; | ||
477 | ao->op_active = 0; | ||
478 | } | ||
479 | |||
480 | |||
481 | /* | ||
482 | * Count the number of pages needed to describe an incoming iovec. | ||
483 | */ | ||
484 | static int rds_rdma_pages(struct rds_rdma_args *args) | ||
485 | { | ||
486 | struct rds_iovec vec; | ||
487 | struct rds_iovec __user *local_vec; | ||
488 | unsigned int tot_pages = 0; | ||
489 | unsigned int nr_pages; | ||
490 | unsigned int i; | ||
491 | |||
492 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
493 | |||
494 | /* figure out the number of pages in the vector */ | ||
495 | for (i = 0; i < args->nr_local; i++) { | ||
496 | if (copy_from_user(&vec, &local_vec[i], | ||
497 | sizeof(struct rds_iovec))) | ||
498 | return -EFAULT; | ||
499 | |||
500 | nr_pages = rds_pages_in_vec(&vec); | ||
501 | if (nr_pages == 0) | ||
502 | return -EINVAL; | ||
503 | |||
504 | tot_pages += nr_pages; | ||
505 | } | ||
506 | |||
507 | return tot_pages; | ||
508 | } | ||
509 | |||
510 | int rds_rdma_extra_size(struct rds_rdma_args *args) | ||
511 | { | ||
512 | return rds_rdma_pages(args) * sizeof(struct scatterlist); | ||
451 | } | 513 | } |
452 | 514 | ||
453 | /* | 515 | /* |
454 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. | 516 | * The application asks for a RDMA transfer. |
517 | * Extract all arguments and set up the rdma_op | ||
455 | */ | 518 | */ |
456 | static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | 519 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, |
457 | struct rds_rdma_args *args) | 520 | struct cmsghdr *cmsg) |
458 | { | 521 | { |
522 | struct rds_rdma_args *args; | ||
459 | struct rds_iovec vec; | 523 | struct rds_iovec vec; |
460 | struct rds_rdma_op *op = NULL; | 524 | struct rm_rdma_op *op = &rm->rdma; |
461 | unsigned int nr_pages; | 525 | unsigned int nr_pages; |
462 | unsigned int max_pages; | ||
463 | unsigned int nr_bytes; | 526 | unsigned int nr_bytes; |
464 | struct page **pages = NULL; | 527 | struct page **pages = NULL; |
465 | struct rds_iovec __user *local_vec; | 528 | struct rds_iovec __user *local_vec; |
466 | struct scatterlist *sg; | ||
467 | unsigned int nr; | 529 | unsigned int nr; |
468 | unsigned int i, j; | 530 | unsigned int i, j; |
469 | int ret; | 531 | int ret = 0; |
532 | |||
533 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) | ||
534 | || rm->rdma.op_active) | ||
535 | return -EINVAL; | ||
470 | 536 | ||
537 | args = CMSG_DATA(cmsg); | ||
471 | 538 | ||
472 | if (rs->rs_bound_addr == 0) { | 539 | if (rs->rs_bound_addr == 0) { |
473 | ret = -ENOTCONN; /* XXX not a great errno */ | 540 | ret = -ENOTCONN; /* XXX not a great errno */ |
@@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
479 | goto out; | 546 | goto out; |
480 | } | 547 | } |
481 | 548 | ||
482 | nr_pages = 0; | 549 | nr_pages = rds_rdma_pages(args); |
483 | max_pages = 0; | 550 | if (nr_pages < 0) |
484 | |||
485 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
486 | |||
487 | /* figure out the number of pages in the vector */ | ||
488 | for (i = 0; i < args->nr_local; i++) { | ||
489 | if (copy_from_user(&vec, &local_vec[i], | ||
490 | sizeof(struct rds_iovec))) { | ||
491 | ret = -EFAULT; | ||
492 | goto out; | ||
493 | } | ||
494 | |||
495 | nr = rds_pages_in_vec(&vec); | ||
496 | if (nr == 0) { | ||
497 | ret = -EINVAL; | ||
498 | goto out; | ||
499 | } | ||
500 | |||
501 | max_pages = max(nr, max_pages); | ||
502 | nr_pages += nr; | ||
503 | } | ||
504 | |||
505 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); | ||
506 | if (pages == NULL) { | ||
507 | ret = -ENOMEM; | ||
508 | goto out; | 551 | goto out; |
509 | } | ||
510 | 552 | ||
511 | op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); | 553 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); |
512 | if (op == NULL) { | 554 | if (!pages) { |
513 | ret = -ENOMEM; | 555 | ret = -ENOMEM; |
514 | goto out; | 556 | goto out; |
515 | } | 557 | } |
516 | 558 | ||
517 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); | 559 | op->op_write = !!(args->flags & RDS_RDMA_READWRITE); |
518 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); | 560 | op->op_fence = !!(args->flags & RDS_RDMA_FENCE); |
519 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | 561 | op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); |
520 | op->r_recverr = rs->rs_recverr; | 562 | op->op_silent = !!(args->flags & RDS_RDMA_SILENT); |
563 | op->op_active = 1; | ||
564 | op->op_recverr = rs->rs_recverr; | ||
521 | WARN_ON(!nr_pages); | 565 | WARN_ON(!nr_pages); |
522 | sg_init_table(op->r_sg, nr_pages); | 566 | op->op_sg = rds_message_alloc_sgs(rm, nr_pages); |
523 | 567 | ||
524 | if (op->r_notify || op->r_recverr) { | 568 | if (op->op_notify || op->op_recverr) { |
525 | /* We allocate an uninitialized notifier here, because | 569 | /* We allocate an uninitialized notifier here, because |
526 | * we don't want to do that in the completion handler. We | 570 | * we don't want to do that in the completion handler. We |
527 | * would have to use GFP_ATOMIC there, and don't want to deal | 571 | * would have to use GFP_ATOMIC there, and don't want to deal |
528 | * with failed allocations. | 572 | * with failed allocations. |
529 | */ | 573 | */ |
530 | op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); | 574 | op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); |
531 | if (!op->r_notifier) { | 575 | if (!op->op_notifier) { |
532 | ret = -ENOMEM; | 576 | ret = -ENOMEM; |
533 | goto out; | 577 | goto out; |
534 | } | 578 | } |
535 | op->r_notifier->n_user_token = args->user_token; | 579 | op->op_notifier->n_user_token = args->user_token; |
536 | op->r_notifier->n_status = RDS_RDMA_SUCCESS; | 580 | op->op_notifier->n_status = RDS_RDMA_SUCCESS; |
537 | } | 581 | } |
538 | 582 | ||
539 | /* The cookie contains the R_Key of the remote memory region, and | 583 | /* The cookie contains the R_Key of the remote memory region, and |
@@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
543 | * destination address (which is really an offset into the MR) | 587 | * destination address (which is really an offset into the MR) |
544 | * FIXME: We may want to move this into ib_rdma.c | 588 | * FIXME: We may want to move this into ib_rdma.c |
545 | */ | 589 | */ |
546 | op->r_key = rds_rdma_cookie_key(args->cookie); | 590 | op->op_rkey = rds_rdma_cookie_key(args->cookie); |
547 | op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); | 591 | op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); |
548 | 592 | ||
549 | nr_bytes = 0; | 593 | nr_bytes = 0; |
550 | 594 | ||
551 | rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", | 595 | rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", |
552 | (unsigned long long)args->nr_local, | 596 | (unsigned long long)args->nr_local, |
553 | (unsigned long long)args->remote_vec.addr, | 597 | (unsigned long long)args->remote_vec.addr, |
554 | op->r_key); | 598 | op->op_rkey); |
599 | |||
600 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
555 | 601 | ||
556 | for (i = 0; i < args->nr_local; i++) { | 602 | for (i = 0; i < args->nr_local; i++) { |
557 | if (copy_from_user(&vec, &local_vec[i], | 603 | if (copy_from_user(&vec, &local_vec[i], |
@@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
569 | rs->rs_user_addr = vec.addr; | 615 | rs->rs_user_addr = vec.addr; |
570 | rs->rs_user_bytes = vec.bytes; | 616 | rs->rs_user_bytes = vec.bytes; |
571 | 617 | ||
572 | /* did the user change the vec under us? */ | ||
573 | if (nr > max_pages || op->r_nents + nr > nr_pages) { | ||
574 | ret = -EINVAL; | ||
575 | goto out; | ||
576 | } | ||
577 | /* If it's a WRITE operation, we want to pin the pages for reading. | 618 | /* If it's a WRITE operation, we want to pin the pages for reading. |
578 | * If it's a READ operation, we need to pin the pages for writing. | 619 | * If it's a READ operation, we need to pin the pages for writing. |
579 | */ | 620 | */ |
580 | ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); | 621 | ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write); |
581 | if (ret < 0) | 622 | if (ret < 0) |
582 | goto out; | 623 | goto out; |
583 | 624 | ||
@@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
588 | 629 | ||
589 | for (j = 0; j < nr; j++) { | 630 | for (j = 0; j < nr; j++) { |
590 | unsigned int offset = vec.addr & ~PAGE_MASK; | 631 | unsigned int offset = vec.addr & ~PAGE_MASK; |
632 | struct scatterlist *sg; | ||
591 | 633 | ||
592 | sg = &op->r_sg[op->r_nents + j]; | 634 | sg = &op->op_sg[op->op_nents + j]; |
593 | sg_set_page(sg, pages[j], | 635 | sg_set_page(sg, pages[j], |
594 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), | 636 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), |
595 | offset); | 637 | offset); |
@@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
601 | vec.bytes -= sg->length; | 643 | vec.bytes -= sg->length; |
602 | } | 644 | } |
603 | 645 | ||
604 | op->r_nents += nr; | 646 | op->op_nents += nr; |
605 | } | 647 | } |
606 | 648 | ||
607 | |||
608 | if (nr_bytes > args->remote_vec.bytes) { | 649 | if (nr_bytes > args->remote_vec.bytes) { |
609 | rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", | 650 | rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", |
610 | nr_bytes, | 651 | nr_bytes, |
@@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | |||
612 | ret = -EINVAL; | 653 | ret = -EINVAL; |
613 | goto out; | 654 | goto out; |
614 | } | 655 | } |
615 | op->r_bytes = nr_bytes; | 656 | op->op_bytes = nr_bytes; |
616 | 657 | ||
617 | ret = 0; | 658 | ret = 0; |
618 | out: | 659 | out: |
619 | kfree(pages); | 660 | kfree(pages); |
620 | if (ret) { | 661 | if (ret) |
621 | if (op) | 662 | rds_rdma_free_op(op); |
622 | rds_rdma_free_op(op); | ||
623 | op = ERR_PTR(ret); | ||
624 | } | ||
625 | return op; | ||
626 | } | ||
627 | |||
628 | /* | ||
629 | * The application asks for a RDMA transfer. | ||
630 | * Extract all arguments and set up the rdma_op | ||
631 | */ | ||
632 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | ||
633 | struct cmsghdr *cmsg) | ||
634 | { | ||
635 | struct rds_rdma_op *op; | ||
636 | |||
637 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || | ||
638 | rm->m_rdma_op != NULL) | ||
639 | return -EINVAL; | ||
640 | 663 | ||
641 | op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); | ||
642 | if (IS_ERR(op)) | ||
643 | return PTR_ERR(op); | ||
644 | rds_stats_inc(s_send_rdma); | 664 | rds_stats_inc(s_send_rdma); |
645 | rm->m_rdma_op = op; | 665 | |
646 | return 0; | 666 | return ret; |
647 | } | 667 | } |
648 | 668 | ||
649 | /* | 669 | /* |
@@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | |||
673 | 693 | ||
674 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | 694 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); |
675 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | 695 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); |
676 | if (mr == NULL) | 696 | if (!mr) |
677 | err = -EINVAL; /* invalid r_key */ | 697 | err = -EINVAL; /* invalid r_key */ |
678 | else | 698 | else |
679 | atomic_inc(&mr->r_refcount); | 699 | atomic_inc(&mr->r_refcount); |
@@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | |||
681 | 701 | ||
682 | if (mr) { | 702 | if (mr) { |
683 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); | 703 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); |
684 | rm->m_rdma_mr = mr; | 704 | rm->rdma.op_rdma_mr = mr; |
685 | } | 705 | } |
686 | return err; | 706 | return err; |
687 | } | 707 | } |
@@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | |||
699 | rm->m_rdma_cookie != 0) | 719 | rm->m_rdma_cookie != 0) |
700 | return -EINVAL; | 720 | return -EINVAL; |
701 | 721 | ||
702 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); | 722 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr); |
723 | } | ||
724 | |||
725 | /* | ||
726 | * Fill in rds_message for an atomic request. | ||
727 | */ | ||
728 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | ||
729 | struct cmsghdr *cmsg) | ||
730 | { | ||
731 | struct page *page = NULL; | ||
732 | struct rds_atomic_args *args; | ||
733 | int ret = 0; | ||
734 | |||
735 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) | ||
736 | || rm->atomic.op_active) | ||
737 | return -EINVAL; | ||
738 | |||
739 | args = CMSG_DATA(cmsg); | ||
740 | |||
741 | /* Nonmasked & masked cmsg ops converted to masked hw ops */ | ||
742 | switch (cmsg->cmsg_type) { | ||
743 | case RDS_CMSG_ATOMIC_FADD: | ||
744 | rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; | ||
745 | rm->atomic.op_m_fadd.add = args->fadd.add; | ||
746 | rm->atomic.op_m_fadd.nocarry_mask = 0; | ||
747 | break; | ||
748 | case RDS_CMSG_MASKED_ATOMIC_FADD: | ||
749 | rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; | ||
750 | rm->atomic.op_m_fadd.add = args->m_fadd.add; | ||
751 | rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask; | ||
752 | break; | ||
753 | case RDS_CMSG_ATOMIC_CSWP: | ||
754 | rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; | ||
755 | rm->atomic.op_m_cswp.compare = args->cswp.compare; | ||
756 | rm->atomic.op_m_cswp.swap = args->cswp.swap; | ||
757 | rm->atomic.op_m_cswp.compare_mask = ~0; | ||
758 | rm->atomic.op_m_cswp.swap_mask = ~0; | ||
759 | break; | ||
760 | case RDS_CMSG_MASKED_ATOMIC_CSWP: | ||
761 | rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; | ||
762 | rm->atomic.op_m_cswp.compare = args->m_cswp.compare; | ||
763 | rm->atomic.op_m_cswp.swap = args->m_cswp.swap; | ||
764 | rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask; | ||
765 | rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask; | ||
766 | break; | ||
767 | default: | ||
768 | BUG(); /* should never happen */ | ||
769 | } | ||
770 | |||
771 | rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | ||
772 | rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT); | ||
773 | rm->atomic.op_active = 1; | ||
774 | rm->atomic.op_recverr = rs->rs_recverr; | ||
775 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); | ||
776 | |||
777 | /* verify 8 byte-aligned */ | ||
778 | if (args->local_addr & 0x7) { | ||
779 | ret = -EFAULT; | ||
780 | goto err; | ||
781 | } | ||
782 | |||
783 | ret = rds_pin_pages(args->local_addr, 1, &page, 1); | ||
784 | if (ret != 1) | ||
785 | goto err; | ||
786 | ret = 0; | ||
787 | |||
788 | sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); | ||
789 | |||
790 | if (rm->atomic.op_notify || rm->atomic.op_recverr) { | ||
791 | /* We allocate an uninitialized notifier here, because | ||
792 | * we don't want to do that in the completion handler. We | ||
793 | * would have to use GFP_ATOMIC there, and don't want to deal | ||
794 | * with failed allocations. | ||
795 | */ | ||
796 | rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); | ||
797 | if (!rm->atomic.op_notifier) { | ||
798 | ret = -ENOMEM; | ||
799 | goto err; | ||
800 | } | ||
801 | |||
802 | rm->atomic.op_notifier->n_user_token = args->user_token; | ||
803 | rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; | ||
804 | } | ||
805 | |||
806 | rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie); | ||
807 | rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); | ||
808 | |||
809 | return ret; | ||
810 | err: | ||
811 | if (page) | ||
812 | put_page(page); | ||
813 | kfree(rm->atomic.op_notifier); | ||
814 | |||
815 | return ret; | ||
703 | } | 816 | } |