diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-01-12 17:33:38 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:11:41 -0400 |
commit | 15133f6e67d8d646d0744336b4daa3135452cb0d (patch) | |
tree | e5675d5a3ab240edc9a66af6b891dd75fa9eabae /net/rds/ib_send.c | |
parent | a63273d4992603979ddb181b6a8f07082839b39f (diff) |
RDS: Implement atomic operations
Implement a CMSG-based interface to do FADD and CSWP ops.
Alter send routines to handle atomic ops.
Add atomic counters to stats.
Add xmit_atomic() to struct rds_transport
Inline rds_ib_send_unmap_rdma into unmap_rm
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Diffstat (limited to 'net/rds/ib_send.c')
-rw-r--r-- | net/rds/ib_send.c | 140 |
1 files changed, 124 insertions, 16 deletions
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f0edfdb2866c..b2bd164434ad 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c | |||
@@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm, | |||
62 | rds_rdma_send_complete(rm, notify_status); | 62 | rds_rdma_send_complete(rm, notify_status); |
63 | } | 63 | } |
64 | 64 | ||
65 | static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, | 65 | static void rds_ib_send_atomic_complete(struct rds_message *rm, |
66 | struct rds_rdma_op *op) | 66 | int wc_status) |
67 | { | 67 | { |
68 | if (op->r_mapped) { | 68 | int notify_status; |
69 | ib_dma_unmap_sg(ic->i_cm_id->device, | 69 | |
70 | op->r_sg, op->r_nents, | 70 | if (wc_status != IB_WC_SUCCESS) |
71 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 71 | notify_status = RDS_RDMA_OTHER_ERROR; |
72 | op->r_mapped = 0; | 72 | else |
73 | } | 73 | notify_status = RDS_RDMA_SUCCESS; |
74 | |||
75 | rds_atomic_send_complete(rm, notify_status); | ||
74 | } | 76 | } |
75 | 77 | ||
76 | static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | 78 | static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, |
@@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
86 | DMA_TO_DEVICE); | 88 | DMA_TO_DEVICE); |
87 | 89 | ||
88 | if (rm->rdma.m_rdma_op.r_active) { | 90 | if (rm->rdma.m_rdma_op.r_active) { |
89 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); | 91 | struct rds_rdma_op *op = &rm->rdma.m_rdma_op; |
92 | |||
93 | if (op->r_mapped) { | ||
94 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
95 | op->r_sg, op->r_nents, | ||
96 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
97 | op->r_mapped = 0; | ||
98 | } | ||
90 | 99 | ||
91 | /* If the user asked for a completion notification on this | 100 | /* If the user asked for a completion notification on this |
92 | * message, we can implement three different semantics: | 101 | * message, we can implement three different semantics: |
@@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); | 125 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
117 | } | 126 | } |
118 | 127 | ||
128 | if (rm->atomic.op_active) { | ||
129 | struct rm_atomic_op *op = &rm->atomic; | ||
130 | |||
131 | /* unmap atomic recvbuf */ | ||
132 | if (op->op_mapped) { | ||
133 | ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, | ||
134 | DMA_FROM_DEVICE); | ||
135 | op->op_mapped = 0; | ||
136 | } | ||
137 | |||
138 | rds_ib_send_atomic_complete(rm, wc_status); | ||
139 | |||
140 | if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP) | ||
141 | rds_stats_inc(s_atomic_cswp); | ||
142 | else | ||
143 | rds_stats_inc(s_atomic_fadd); | ||
144 | } | ||
145 | |||
119 | /* If anyone waited for this message to get flushed out, wake | 146 | /* If anyone waited for this message to get flushed out, wake |
120 | * them up now */ | 147 | * them up now */ |
121 | rds_message_unmapped(rm); | 148 | rds_message_unmapped(rm); |
@@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic) | |||
158 | u32 i; | 185 | u32 i; |
159 | 186 | ||
160 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | 187 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { |
161 | if (send->s_wr.opcode == 0xdead) | 188 | if (!send->s_rm || send->s_wr.opcode == 0xdead) |
162 | continue; | 189 | continue; |
163 | if (send->s_rm) | 190 | rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); |
164 | rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); | ||
165 | if (send->s_op) | ||
166 | rds_ib_send_unmap_rdma(ic, send->s_op); | ||
167 | } | 191 | } |
168 | } | 192 | } |
169 | 193 | ||
@@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
218 | break; | 242 | break; |
219 | case IB_WR_RDMA_WRITE: | 243 | case IB_WR_RDMA_WRITE: |
220 | case IB_WR_RDMA_READ: | 244 | case IB_WR_RDMA_READ: |
245 | case IB_WR_ATOMIC_FETCH_AND_ADD: | ||
246 | case IB_WR_ATOMIC_CMP_AND_SWP: | ||
221 | /* Nothing to be done - the SG list will be unmapped | 247 | /* Nothing to be done - the SG list will be unmapped |
222 | * when the SEND completes. */ | 248 | * when the SEND completes. */ |
223 | break; | 249 | break; |
@@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
243 | 269 | ||
244 | rm = rds_send_get_message(conn, send->s_op); | 270 | rm = rds_send_get_message(conn, send->s_op); |
245 | if (rm) { | 271 | if (rm) { |
246 | if (rm->rdma.m_rdma_op.r_active) | 272 | rds_ib_send_unmap_rm(ic, send, wc.status); |
247 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); | ||
248 | rds_ib_send_rdma_complete(rm, wc.status); | 273 | rds_ib_send_rdma_complete(rm, wc.status); |
249 | rds_message_put(rm); | 274 | rds_message_put(rm); |
250 | } | 275 | } |
@@ -736,6 +761,89 @@ out: | |||
736 | return ret; | 761 | return ret; |
737 | } | 762 | } |
738 | 763 | ||
764 | /* | ||
765 | * Issue atomic operation. | ||
766 | * A simplified version of the rdma case, we always map 1 SG, and | ||
767 | * only 8 bytes, for the return value from the atomic operation. | ||
768 | */ | ||
769 | int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | ||
770 | { | ||
771 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
772 | struct rds_ib_send_work *send = NULL; | ||
773 | struct ib_send_wr *failed_wr; | ||
774 | struct rds_ib_device *rds_ibdev; | ||
775 | u32 pos; | ||
776 | u32 work_alloc; | ||
777 | int ret; | ||
778 | |||
779 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
780 | |||
781 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); | ||
782 | if (work_alloc != 1) { | ||
783 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
784 | rds_ib_stats_inc(s_ib_tx_ring_full); | ||
785 | ret = -ENOMEM; | ||
786 | goto out; | ||
787 | } | ||
788 | |||
789 | /* address of send request in ring */ | ||
790 | send = &ic->i_sends[pos]; | ||
791 | send->s_queued = jiffies; | ||
792 | |||
793 | if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { | ||
794 | send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; | ||
795 | send->s_wr.wr.atomic.compare_add = op->op_compare; | ||
796 | send->s_wr.wr.atomic.swap = op->op_swap_add; | ||
797 | } else { /* FADD */ | ||
798 | send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; | ||
799 | send->s_wr.wr.atomic.compare_add = op->op_swap_add; | ||
800 | send->s_wr.wr.atomic.swap = 0; | ||
801 | } | ||
802 | send->s_wr.send_flags = IB_SEND_SIGNALED; | ||
803 | send->s_wr.num_sge = 1; | ||
804 | send->s_wr.next = NULL; | ||
805 | send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; | ||
806 | send->s_wr.wr.atomic.rkey = op->op_rkey; | ||
807 | |||
808 | /* map 8 byte retval buffer to the device */ | ||
809 | ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); | ||
810 | rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); | ||
811 | if (ret != 1) { | ||
812 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
813 | rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); | ||
814 | ret = -ENOMEM; /* XXX ? */ | ||
815 | goto out; | ||
816 | } | ||
817 | |||
818 | /* Convert our struct scatterlist to struct ib_sge */ | ||
819 | send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); | ||
820 | send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); | ||
821 | send->s_sge[0].lkey = ic->i_mr->lkey; | ||
822 | |||
823 | rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, | ||
824 | send->s_sge[0].addr, send->s_sge[0].length); | ||
825 | |||
826 | failed_wr = &send->s_wr; | ||
827 | ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); | ||
828 | rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, | ||
829 | send, &send->s_wr, ret, failed_wr); | ||
830 | BUG_ON(failed_wr != &send->s_wr); | ||
831 | if (ret) { | ||
832 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " | ||
833 | "returned %d\n", &conn->c_faddr, ret); | ||
834 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
835 | goto out; | ||
836 | } | ||
837 | |||
838 | if (unlikely(failed_wr != &send->s_wr)) { | ||
839 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); | ||
840 | BUG_ON(failed_wr != &send->s_wr); | ||
841 | } | ||
842 | |||
843 | out: | ||
844 | return ret; | ||
845 | } | ||
846 | |||
739 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | 847 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) |
740 | { | 848 | { |
741 | struct rds_ib_connection *ic = conn->c_transport_data; | 849 | struct rds_ib_connection *ic = conn->c_transport_data; |