diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-01-20 00:25:26 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:11:55 -0400 |
commit | 241eef3e2f51fe4ad50abacd7f79c4e2d468197e (patch) | |
tree | 020170cb9c3bea79d767b19cd7362a51b8446667 /net/rds/ib_send.c | |
parent | d37c9359056f4f07b37e59810f0ece1031e280b2 (diff) |
RDS: Implement silent atomics
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net/rds/ib_send.c')
-rw-r--r-- | net/rds/ib_send.c | 62 |
1 files changed, 32 insertions, 30 deletions
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index d839b403d46b..e6745d827c3a 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c | |||
@@ -225,15 +225,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
225 | /* In the error case, wc.opcode sometimes contains garbage */ | 225 | /* In the error case, wc.opcode sometimes contains garbage */ |
226 | switch (send->s_wr.opcode) { | 226 | switch (send->s_wr.opcode) { |
227 | case IB_WR_SEND: | 227 | case IB_WR_SEND: |
228 | if (send->s_rm) | ||
229 | rds_ib_send_unmap_rm(ic, send, wc.status); | ||
230 | break; | ||
231 | case IB_WR_RDMA_WRITE: | 228 | case IB_WR_RDMA_WRITE: |
232 | case IB_WR_RDMA_READ: | 229 | case IB_WR_RDMA_READ: |
233 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 230 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
234 | case IB_WR_ATOMIC_CMP_AND_SWP: | 231 | case IB_WR_ATOMIC_CMP_AND_SWP: |
235 | /* Nothing to be done - the SG list will be unmapped | 232 | if (send->s_rm) |
236 | * when the SEND completes. */ | 233 | rds_ib_send_unmap_rm(ic, send, wc.status); |
237 | break; | 234 | break; |
238 | default: | 235 | default: |
239 | if (printk_ratelimit()) | 236 | if (printk_ratelimit()) |
@@ -425,6 +422,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted) | |||
425 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | 422 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); |
426 | } | 423 | } |
427 | 424 | ||
425 | static inline void rds_ib_set_wr_signal_state(struct rds_ib_connection *ic, | ||
426 | struct rds_ib_send_work *send, | ||
427 | bool notify) | ||
428 | { | ||
429 | /* | ||
430 | * We want to delay signaling completions just enough to get | ||
431 | * the batching benefits but not so much that we create dead time | ||
432 | * on the wire. | ||
433 | */ | ||
434 | if (ic->i_unsignaled_wrs-- == 0 || notify) { | ||
435 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
436 | send->s_wr.send_flags |= IB_SEND_SIGNALED; | ||
437 | } | ||
438 | } | ||
439 | |||
428 | /* | 440 | /* |
429 | * This can be called multiple times for a given message. The first time | 441 | * This can be called multiple times for a given message. The first time |
430 | * we see a message we map its scatterlist into the IB device so that | 442 | * we see a message we map its scatterlist into the IB device so that |
@@ -517,7 +529,6 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
517 | rm->data.m_count = 0; | 529 | rm->data.m_count = 0; |
518 | } | 530 | } |
519 | 531 | ||
520 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
521 | rds_message_addref(rm); | 532 | rds_message_addref(rm); |
522 | ic->i_rm = rm; | 533 | ic->i_rm = rm; |
523 | 534 | ||
@@ -608,15 +619,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
608 | } | 619 | } |
609 | } | 620 | } |
610 | 621 | ||
611 | /* | 622 | rds_ib_set_wr_signal_state(ic, send, 0); |
612 | * We want to delay signaling completions just enough to get | ||
613 | * the batching benefits but not so much that we create dead time | ||
614 | * on the wire. | ||
615 | */ | ||
616 | if (ic->i_unsignaled_wrs-- == 0) { | ||
617 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
618 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
619 | } | ||
620 | 623 | ||
621 | /* | 624 | /* |
622 | * Always signal the last one if we're stopping due to flow control. | 625 | * Always signal the last one if we're stopping due to flow control. |
@@ -656,7 +659,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
656 | /* if we finished the message then send completion owns it */ | 659 | /* if we finished the message then send completion owns it */ |
657 | if (scat == &rm->data.m_sg[rm->data.m_count]) { | 660 | if (scat == &rm->data.m_sg[rm->data.m_count]) { |
658 | prev->s_rm = ic->i_rm; | 661 | prev->s_rm = ic->i_rm; |
659 | prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | 662 | prev->s_wr.send_flags |= IB_SEND_SOLICITED; |
660 | ic->i_rm = NULL; | 663 | ic->i_rm = NULL; |
661 | } | 664 | } |
662 | 665 | ||
@@ -698,9 +701,10 @@ out: | |||
698 | * A simplified version of the rdma case, we always map 1 SG, and | 701 | * A simplified version of the rdma case, we always map 1 SG, and |
699 | * only 8 bytes, for the return value from the atomic operation. | 702 | * only 8 bytes, for the return value from the atomic operation. |
700 | */ | 703 | */ |
701 | int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | 704 | int rds_ib_xmit_atomic(struct rds_connection *conn, struct rds_message *rm) |
702 | { | 705 | { |
703 | struct rds_ib_connection *ic = conn->c_transport_data; | 706 | struct rds_ib_connection *ic = conn->c_transport_data; |
707 | struct rm_atomic_op *op = &rm->atomic; | ||
704 | struct rds_ib_send_work *send = NULL; | 708 | struct rds_ib_send_work *send = NULL; |
705 | struct ib_send_wr *failed_wr; | 709 | struct ib_send_wr *failed_wr; |
706 | struct rds_ib_device *rds_ibdev; | 710 | struct rds_ib_device *rds_ibdev; |
@@ -731,12 +735,20 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | |||
731 | send->s_wr.wr.atomic.compare_add = op->op_swap_add; | 735 | send->s_wr.wr.atomic.compare_add = op->op_swap_add; |
732 | send->s_wr.wr.atomic.swap = 0; | 736 | send->s_wr.wr.atomic.swap = 0; |
733 | } | 737 | } |
734 | send->s_wr.send_flags = IB_SEND_SIGNALED; | 738 | rds_ib_set_wr_signal_state(ic, send, op->op_notify); |
735 | send->s_wr.num_sge = 1; | 739 | send->s_wr.num_sge = 1; |
736 | send->s_wr.next = NULL; | 740 | send->s_wr.next = NULL; |
737 | send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; | 741 | send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; |
738 | send->s_wr.wr.atomic.rkey = op->op_rkey; | 742 | send->s_wr.wr.atomic.rkey = op->op_rkey; |
739 | 743 | ||
744 | /* | ||
745 | * If there is no data or rdma ops in the message, then | ||
746 | * we must fill in s_rm ourselves, so we properly clean up | ||
747 | * on completion. | ||
748 | */ | ||
749 | if (!rm->rdma.m_rdma_op.r_active && !rm->data.op_active) | ||
750 | send->s_rm = rm; | ||
751 | |||
740 | /* map 8 byte retval buffer to the device */ | 752 | /* map 8 byte retval buffer to the device */ |
741 | ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); | 753 | ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); |
742 | rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); | 754 | rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); |
@@ -836,14 +848,8 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | |||
836 | for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { | 848 | for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { |
837 | send->s_wr.send_flags = 0; | 849 | send->s_wr.send_flags = 0; |
838 | send->s_queued = jiffies; | 850 | send->s_queued = jiffies; |
839 | /* | 851 | |
840 | * We want to delay signaling completions just enough to get | 852 | rds_ib_set_wr_signal_state(ic, send, op->r_notify); |
841 | * the batching benefits but not so much that we create dead time on the wire. | ||
842 | */ | ||
843 | if (ic->i_unsignaled_wrs-- == 0) { | ||
844 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
845 | send->s_wr.send_flags = IB_SEND_SIGNALED; | ||
846 | } | ||
847 | 853 | ||
848 | send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; | 854 | send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; |
849 | send->s_wr.wr.rdma.remote_addr = remote_addr; | 855 | send->s_wr.wr.rdma.remote_addr = remote_addr; |
@@ -884,10 +890,6 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | |||
884 | send = ic->i_sends; | 890 | send = ic->i_sends; |
885 | } | 891 | } |
886 | 892 | ||
887 | /* if we finished the message then send completion owns it */ | ||
888 | if (scat == &op->r_sg[op->r_count]) | ||
889 | prev->s_wr.send_flags = IB_SEND_SIGNALED; | ||
890 | |||
891 | if (i < work_alloc) { | 893 | if (i < work_alloc) { |
892 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); | 894 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); |
893 | work_alloc = i; | 895 | work_alloc = i; |