diff options
author | Andy Grover <andy.grover@oracle.com> | 2010-01-12 17:33:38 -0500 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:11:41 -0400 |
commit | 15133f6e67d8d646d0744336b4daa3135452cb0d (patch) | |
tree | e5675d5a3ab240edc9a66af6b891dd75fa9eabae | |
parent | a63273d4992603979ddb181b6a8f07082839b39f (diff) |
RDS: Implement atomic operations
Implement a CMSG-based interface to do FADD and CSWP ops.
Alter send routines to handle atomic ops.
Add atomic counters to stats.
Add xmit_atomic() to struct rds_transport
Inline rds_ib_send_unmap_rdma into unmap_rm
Signed-off-by: Andy Grover <andy.grover@oracle.com>
-rw-r--r-- | include/linux/rds.h | 19 | ||||
-rw-r--r-- | net/rds/ib.c | 1 | ||||
-rw-r--r-- | net/rds/ib.h | 1 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 4 | ||||
-rw-r--r-- | net/rds/ib_send.c | 140 | ||||
-rw-r--r-- | net/rds/rdma.c | 73 | ||||
-rw-r--r-- | net/rds/rds.h | 33 | ||||
-rw-r--r-- | net/rds/send.c | 71 | ||||
-rw-r--r-- | net/rds/stats.c | 2 |
9 files changed, 321 insertions, 23 deletions
diff --git a/include/linux/rds.h b/include/linux/rds.h index 7f3971d9fc5c..9239152abf7a 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h | |||
@@ -73,6 +73,8 @@ | |||
73 | #define RDS_CMSG_RDMA_MAP 3 | 73 | #define RDS_CMSG_RDMA_MAP 3 |
74 | #define RDS_CMSG_RDMA_STATUS 4 | 74 | #define RDS_CMSG_RDMA_STATUS 4 |
75 | #define RDS_CMSG_CONG_UPDATE 5 | 75 | #define RDS_CMSG_CONG_UPDATE 5 |
76 | #define RDS_CMSG_ATOMIC_FADD 6 | ||
77 | #define RDS_CMSG_ATOMIC_CSWP 7 | ||
76 | 78 | ||
77 | #define RDS_INFO_FIRST 10000 | 79 | #define RDS_INFO_FIRST 10000 |
78 | #define RDS_INFO_COUNTERS 10000 | 80 | #define RDS_INFO_COUNTERS 10000 |
@@ -237,6 +239,23 @@ struct rds_rdma_args { | |||
237 | u_int64_t user_token; | 239 | u_int64_t user_token; |
238 | }; | 240 | }; |
239 | 241 | ||
242 | struct rds_atomic_args { | ||
243 | rds_rdma_cookie_t cookie; | ||
244 | uint64_t local_addr; | ||
245 | uint64_t remote_addr; | ||
246 | union { | ||
247 | struct { | ||
248 | uint64_t compare; | ||
249 | uint64_t swap; | ||
250 | } cswp; | ||
251 | struct { | ||
252 | uint64_t add; | ||
253 | } fadd; | ||
254 | }; | ||
255 | uint64_t flags; | ||
256 | uint64_t user_token; | ||
257 | }; | ||
258 | |||
240 | struct rds_rdma_notify { | 259 | struct rds_rdma_notify { |
241 | u_int64_t user_token; | 260 | u_int64_t user_token; |
242 | int32_t status; | 261 | int32_t status; |
diff --git a/net/rds/ib.c b/net/rds/ib.c index 8f2d6dd7700a..f0d29656baff 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -264,6 +264,7 @@ struct rds_transport rds_ib_transport = { | |||
264 | .xmit = rds_ib_xmit, | 264 | .xmit = rds_ib_xmit, |
265 | .xmit_cong_map = NULL, | 265 | .xmit_cong_map = NULL, |
266 | .xmit_rdma = rds_ib_xmit_rdma, | 266 | .xmit_rdma = rds_ib_xmit_rdma, |
267 | .xmit_atomic = rds_ib_xmit_atomic, | ||
267 | .recv = rds_ib_recv, | 268 | .recv = rds_ib_recv, |
268 | .conn_alloc = rds_ib_conn_alloc, | 269 | .conn_alloc = rds_ib_conn_alloc, |
269 | .conn_free = rds_ib_conn_free, | 270 | .conn_free = rds_ib_conn_free, |
diff --git a/net/rds/ib.h b/net/rds/ib.h index 64df4e79b29f..d2fd0aa4fde7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -336,6 +336,7 @@ void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); | |||
336 | void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); | 336 | void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); |
337 | int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, | 337 | int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, |
338 | u32 *adv_credits, int need_posted, int max_posted); | 338 | u32 *adv_credits, int need_posted, int max_posted); |
339 | int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); | ||
339 | 340 | ||
340 | /* ib_stats.c */ | 341 | /* ib_stats.c */ |
341 | DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); | 342 | DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); |
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0f3b5a2f3fe0..242231f09464 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c | |||
@@ -298,7 +298,9 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) | |||
298 | ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, | 298 | ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, |
299 | (IB_ACCESS_LOCAL_WRITE | | 299 | (IB_ACCESS_LOCAL_WRITE | |
300 | IB_ACCESS_REMOTE_READ | | 300 | IB_ACCESS_REMOTE_READ | |
301 | IB_ACCESS_REMOTE_WRITE), | 301 | IB_ACCESS_REMOTE_WRITE| |
302 | IB_ACCESS_REMOTE_ATOMIC), | ||
303 | |||
302 | &pool->fmr_attr); | 304 | &pool->fmr_attr); |
303 | if (IS_ERR(ibmr->fmr)) { | 305 | if (IS_ERR(ibmr->fmr)) { |
304 | err = PTR_ERR(ibmr->fmr); | 306 | err = PTR_ERR(ibmr->fmr); |
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f0edfdb2866c..b2bd164434ad 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c | |||
@@ -62,15 +62,17 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm, | |||
62 | rds_rdma_send_complete(rm, notify_status); | 62 | rds_rdma_send_complete(rm, notify_status); |
63 | } | 63 | } |
64 | 64 | ||
65 | static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, | 65 | static void rds_ib_send_atomic_complete(struct rds_message *rm, |
66 | struct rds_rdma_op *op) | 66 | int wc_status) |
67 | { | 67 | { |
68 | if (op->r_mapped) { | 68 | int notify_status; |
69 | ib_dma_unmap_sg(ic->i_cm_id->device, | 69 | |
70 | op->r_sg, op->r_nents, | 70 | if (wc_status != IB_WC_SUCCESS) |
71 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 71 | notify_status = RDS_RDMA_OTHER_ERROR; |
72 | op->r_mapped = 0; | 72 | else |
73 | } | 73 | notify_status = RDS_RDMA_SUCCESS; |
74 | |||
75 | rds_atomic_send_complete(rm, notify_status); | ||
74 | } | 76 | } |
75 | 77 | ||
76 | static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | 78 | static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, |
@@ -86,7 +88,14 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
86 | DMA_TO_DEVICE); | 88 | DMA_TO_DEVICE); |
87 | 89 | ||
88 | if (rm->rdma.m_rdma_op.r_active) { | 90 | if (rm->rdma.m_rdma_op.r_active) { |
89 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); | 91 | struct rds_rdma_op *op = &rm->rdma.m_rdma_op; |
92 | |||
93 | if (op->r_mapped) { | ||
94 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
95 | op->r_sg, op->r_nents, | ||
96 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
97 | op->r_mapped = 0; | ||
98 | } | ||
90 | 99 | ||
91 | /* If the user asked for a completion notification on this | 100 | /* If the user asked for a completion notification on this |
92 | * message, we can implement three different semantics: | 101 | * message, we can implement three different semantics: |
@@ -116,6 +125,24 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | |||
116 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); | 125 | rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op.r_bytes); |
117 | } | 126 | } |
118 | 127 | ||
128 | if (rm->atomic.op_active) { | ||
129 | struct rm_atomic_op *op = &rm->atomic; | ||
130 | |||
131 | /* unmap atomic recvbuf */ | ||
132 | if (op->op_mapped) { | ||
133 | ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1, | ||
134 | DMA_FROM_DEVICE); | ||
135 | op->op_mapped = 0; | ||
136 | } | ||
137 | |||
138 | rds_ib_send_atomic_complete(rm, wc_status); | ||
139 | |||
140 | if (rm->atomic.op_type == RDS_ATOMIC_TYPE_CSWP) | ||
141 | rds_stats_inc(s_atomic_cswp); | ||
142 | else | ||
143 | rds_stats_inc(s_atomic_fadd); | ||
144 | } | ||
145 | |||
119 | /* If anyone waited for this message to get flushed out, wake | 146 | /* If anyone waited for this message to get flushed out, wake |
120 | * them up now */ | 147 | * them up now */ |
121 | rds_message_unmapped(rm); | 148 | rds_message_unmapped(rm); |
@@ -158,12 +185,9 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic) | |||
158 | u32 i; | 185 | u32 i; |
159 | 186 | ||
160 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | 187 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { |
161 | if (send->s_wr.opcode == 0xdead) | 188 | if (!send->s_rm || send->s_wr.opcode == 0xdead) |
162 | continue; | 189 | continue; |
163 | if (send->s_rm) | 190 | rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); |
164 | rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); | ||
165 | if (send->s_op) | ||
166 | rds_ib_send_unmap_rdma(ic, send->s_op); | ||
167 | } | 191 | } |
168 | } | 192 | } |
169 | 193 | ||
@@ -218,6 +242,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
218 | break; | 242 | break; |
219 | case IB_WR_RDMA_WRITE: | 243 | case IB_WR_RDMA_WRITE: |
220 | case IB_WR_RDMA_READ: | 244 | case IB_WR_RDMA_READ: |
245 | case IB_WR_ATOMIC_FETCH_AND_ADD: | ||
246 | case IB_WR_ATOMIC_CMP_AND_SWP: | ||
221 | /* Nothing to be done - the SG list will be unmapped | 247 | /* Nothing to be done - the SG list will be unmapped |
222 | * when the SEND completes. */ | 248 | * when the SEND completes. */ |
223 | break; | 249 | break; |
@@ -243,8 +269,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | |||
243 | 269 | ||
244 | rm = rds_send_get_message(conn, send->s_op); | 270 | rm = rds_send_get_message(conn, send->s_op); |
245 | if (rm) { | 271 | if (rm) { |
246 | if (rm->rdma.m_rdma_op.r_active) | 272 | rds_ib_send_unmap_rm(ic, send, wc.status); |
247 | rds_ib_send_unmap_rdma(ic, &rm->rdma.m_rdma_op); | ||
248 | rds_ib_send_rdma_complete(rm, wc.status); | 273 | rds_ib_send_rdma_complete(rm, wc.status); |
249 | rds_message_put(rm); | 274 | rds_message_put(rm); |
250 | } | 275 | } |
@@ -736,6 +761,89 @@ out: | |||
736 | return ret; | 761 | return ret; |
737 | } | 762 | } |
738 | 763 | ||
764 | /* | ||
765 | * Issue atomic operation. | ||
766 | * A simplified version of the rdma case, we always map 1 SG, and | ||
767 | * only 8 bytes, for the return value from the atomic operation. | ||
768 | */ | ||
769 | int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | ||
770 | { | ||
771 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
772 | struct rds_ib_send_work *send = NULL; | ||
773 | struct ib_send_wr *failed_wr; | ||
774 | struct rds_ib_device *rds_ibdev; | ||
775 | u32 pos; | ||
776 | u32 work_alloc; | ||
777 | int ret; | ||
778 | |||
779 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
780 | |||
781 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); | ||
782 | if (work_alloc != 1) { | ||
783 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
784 | rds_ib_stats_inc(s_ib_tx_ring_full); | ||
785 | ret = -ENOMEM; | ||
786 | goto out; | ||
787 | } | ||
788 | |||
789 | /* address of send request in ring */ | ||
790 | send = &ic->i_sends[pos]; | ||
791 | send->s_queued = jiffies; | ||
792 | |||
793 | if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { | ||
794 | send->s_wr.opcode = IB_WR_ATOMIC_CMP_AND_SWP; | ||
795 | send->s_wr.wr.atomic.compare_add = op->op_compare; | ||
796 | send->s_wr.wr.atomic.swap = op->op_swap_add; | ||
797 | } else { /* FADD */ | ||
798 | send->s_wr.opcode = IB_WR_ATOMIC_FETCH_AND_ADD; | ||
799 | send->s_wr.wr.atomic.compare_add = op->op_swap_add; | ||
800 | send->s_wr.wr.atomic.swap = 0; | ||
801 | } | ||
802 | send->s_wr.send_flags = IB_SEND_SIGNALED; | ||
803 | send->s_wr.num_sge = 1; | ||
804 | send->s_wr.next = NULL; | ||
805 | send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; | ||
806 | send->s_wr.wr.atomic.rkey = op->op_rkey; | ||
807 | |||
808 | /* map 8 byte retval buffer to the device */ | ||
809 | ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE); | ||
810 | rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret); | ||
811 | if (ret != 1) { | ||
812 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
813 | rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); | ||
814 | ret = -ENOMEM; /* XXX ? */ | ||
815 | goto out; | ||
816 | } | ||
817 | |||
818 | /* Convert our struct scatterlist to struct ib_sge */ | ||
819 | send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg); | ||
820 | send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg); | ||
821 | send->s_sge[0].lkey = ic->i_mr->lkey; | ||
822 | |||
823 | rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr, | ||
824 | send->s_sge[0].addr, send->s_sge[0].length); | ||
825 | |||
826 | failed_wr = &send->s_wr; | ||
827 | ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); | ||
828 | rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, | ||
829 | send, &send->s_wr, ret, failed_wr); | ||
830 | BUG_ON(failed_wr != &send->s_wr); | ||
831 | if (ret) { | ||
832 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " | ||
833 | "returned %d\n", &conn->c_faddr, ret); | ||
834 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
835 | goto out; | ||
836 | } | ||
837 | |||
838 | if (unlikely(failed_wr != &send->s_wr)) { | ||
839 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); | ||
840 | BUG_ON(failed_wr != &send->s_wr); | ||
841 | } | ||
842 | |||
843 | out: | ||
844 | return ret; | ||
845 | } | ||
846 | |||
739 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | 847 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) |
740 | { | 848 | { |
741 | struct rds_ib_connection *ic = conn->c_transport_data; | 849 | struct rds_ib_connection *ic = conn->c_transport_data; |
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4fda33045598..a7019df38c70 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -719,3 +719,76 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | |||
719 | 719 | ||
720 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); | 720 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr); |
721 | } | 721 | } |
722 | |||
723 | /* | ||
724 | * Fill in rds_message for an atomic request. | ||
725 | */ | ||
726 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | ||
727 | struct cmsghdr *cmsg) | ||
728 | { | ||
729 | struct page *page = NULL; | ||
730 | struct rds_atomic_args *args; | ||
731 | int ret = 0; | ||
732 | |||
733 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args)) | ||
734 | || rm->atomic.op_active) | ||
735 | return -EINVAL; | ||
736 | |||
737 | args = CMSG_DATA(cmsg); | ||
738 | |||
739 | if (cmsg->cmsg_type == RDS_CMSG_ATOMIC_CSWP) { | ||
740 | rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP; | ||
741 | rm->atomic.op_swap_add = args->cswp.swap; | ||
742 | rm->atomic.op_compare = args->cswp.compare; | ||
743 | } else { | ||
744 | rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD; | ||
745 | rm->atomic.op_swap_add = args->fadd.add; | ||
746 | } | ||
747 | |||
748 | rm->m_rdma_cookie = args->cookie; | ||
749 | rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | ||
750 | rm->atomic.op_recverr = rs->rs_recverr; | ||
751 | rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); | ||
752 | |||
753 | /* verify 8 byte-aligned */ | ||
754 | if (args->local_addr & 0x7) { | ||
755 | ret = -EFAULT; | ||
756 | goto err; | ||
757 | } | ||
758 | |||
759 | ret = rds_pin_pages(args->local_addr, 1, &page, 1); | ||
760 | if (ret != 1) | ||
761 | goto err; | ||
762 | ret = 0; | ||
763 | |||
764 | sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr)); | ||
765 | |||
766 | if (rm->atomic.op_notify || rm->atomic.op_recverr) { | ||
767 | /* We allocate an uninitialized notifier here, because | ||
768 | * we don't want to do that in the completion handler. We | ||
769 | * would have to use GFP_ATOMIC there, and don't want to deal | ||
770 | * with failed allocations. | ||
771 | */ | ||
772 | rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL); | ||
773 | if (!rm->atomic.op_notifier) { | ||
774 | ret = -ENOMEM; | ||
775 | goto err; | ||
776 | } | ||
777 | |||
778 | rm->atomic.op_notifier->n_user_token = args->user_token; | ||
779 | rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS; | ||
780 | } | ||
781 | |||
782 | rm->atomic.op_rkey = rds_rdma_cookie_key(rm->m_rdma_cookie); | ||
783 | rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie); | ||
784 | |||
785 | rm->atomic.op_active = 1; | ||
786 | |||
787 | return ret; | ||
788 | err: | ||
789 | if (page) | ||
790 | put_page(page); | ||
791 | kfree(rm->atomic.op_notifier); | ||
792 | |||
793 | return ret; | ||
794 | } | ||
diff --git a/net/rds/rds.h b/net/rds/rds.h index 0bb4957e0cfc..830e2bbb3332 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -97,6 +97,7 @@ struct rds_connection { | |||
97 | unsigned int c_xmit_hdr_off; | 97 | unsigned int c_xmit_hdr_off; |
98 | unsigned int c_xmit_data_off; | 98 | unsigned int c_xmit_data_off; |
99 | unsigned int c_xmit_rdma_sent; | 99 | unsigned int c_xmit_rdma_sent; |
100 | unsigned int c_xmit_atomic_sent; | ||
100 | 101 | ||
101 | spinlock_t c_lock; /* protect msg queues */ | 102 | spinlock_t c_lock; /* protect msg queues */ |
102 | u64 c_next_tx_seq; | 103 | u64 c_next_tx_seq; |
@@ -260,6 +261,10 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) | |||
260 | return cookie >> 32; | 261 | return cookie >> 32; |
261 | } | 262 | } |
262 | 263 | ||
264 | /* atomic operation types */ | ||
265 | #define RDS_ATOMIC_TYPE_CSWP 0 | ||
266 | #define RDS_ATOMIC_TYPE_FADD 1 | ||
267 | |||
263 | /* | 268 | /* |
264 | * m_sock_item and m_conn_item are on lists that are serialized under | 269 | * m_sock_item and m_conn_item are on lists that are serialized under |
265 | * conn->c_lock. m_sock_item has additional meaning in that once it is empty | 270 | * conn->c_lock. m_sock_item has additional meaning in that once it is empty |
@@ -315,11 +320,27 @@ struct rds_message { | |||
315 | struct rds_sock *m_rs; | 320 | struct rds_sock *m_rs; |
316 | rds_rdma_cookie_t m_rdma_cookie; | 321 | rds_rdma_cookie_t m_rdma_cookie; |
317 | struct { | 322 | struct { |
318 | struct { | 323 | struct rm_atomic_op { |
324 | int op_type; | ||
325 | uint64_t op_swap_add; | ||
326 | uint64_t op_compare; | ||
327 | |||
328 | u32 op_rkey; | ||
329 | u64 op_remote_addr; | ||
330 | unsigned int op_notify:1; | ||
331 | unsigned int op_recverr:1; | ||
332 | unsigned int op_mapped:1; | ||
333 | unsigned int op_active:1; | ||
334 | struct rds_notifier *op_notifier; | ||
335 | struct scatterlist *op_sg; | ||
336 | |||
337 | struct rds_mr *op_rdma_mr; | ||
338 | } atomic; | ||
339 | struct rm_rdma_op { | ||
319 | struct rds_rdma_op m_rdma_op; | 340 | struct rds_rdma_op m_rdma_op; |
320 | struct rds_mr *m_rdma_mr; | 341 | struct rds_mr *m_rdma_mr; |
321 | } rdma; | 342 | } rdma; |
322 | struct { | 343 | struct rm_data_op { |
323 | unsigned int m_nents; | 344 | unsigned int m_nents; |
324 | unsigned int m_count; | 345 | unsigned int m_count; |
325 | struct scatterlist *m_sg; | 346 | struct scatterlist *m_sg; |
@@ -397,6 +418,7 @@ struct rds_transport { | |||
397 | int (*xmit_cong_map)(struct rds_connection *conn, | 418 | int (*xmit_cong_map)(struct rds_connection *conn, |
398 | struct rds_cong_map *map, unsigned long offset); | 419 | struct rds_cong_map *map, unsigned long offset); |
399 | int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); | 420 | int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); |
421 | int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); | ||
400 | int (*recv)(struct rds_connection *conn); | 422 | int (*recv)(struct rds_connection *conn); |
401 | int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, | 423 | int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, |
402 | size_t size); | 424 | size_t size); |
@@ -546,6 +568,8 @@ struct rds_statistics { | |||
546 | uint64_t s_cong_update_received; | 568 | uint64_t s_cong_update_received; |
547 | uint64_t s_cong_send_error; | 569 | uint64_t s_cong_send_error; |
548 | uint64_t s_cong_send_blocked; | 570 | uint64_t s_cong_send_blocked; |
571 | uint64_t s_atomic_cswp; | ||
572 | uint64_t s_atomic_fadd; | ||
549 | }; | 573 | }; |
550 | 574 | ||
551 | /* af_rds.c */ | 575 | /* af_rds.c */ |
@@ -722,7 +746,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
722 | int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | 746 | int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, |
723 | struct cmsghdr *cmsg); | 747 | struct cmsghdr *cmsg); |
724 | void rds_rdma_free_op(struct rds_rdma_op *ro); | 748 | void rds_rdma_free_op(struct rds_rdma_op *ro); |
725 | void rds_rdma_send_complete(struct rds_message *rm, int); | 749 | void rds_rdma_send_complete(struct rds_message *rm, int wc_status); |
750 | void rds_atomic_send_complete(struct rds_message *rm, int wc_status); | ||
751 | int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, | ||
752 | struct cmsghdr *cmsg); | ||
726 | 753 | ||
727 | extern void __rds_put_mr_final(struct rds_mr *mr); | 754 | extern void __rds_put_mr_final(struct rds_mr *mr); |
728 | static inline void rds_mr_put(struct rds_mr *mr) | 755 | static inline void rds_mr_put(struct rds_mr *mr) |
diff --git a/net/rds/send.c b/net/rds/send.c index b751a8e77c41..f3f4e79274bf 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -73,6 +73,7 @@ void rds_send_reset(struct rds_connection *conn) | |||
73 | conn->c_xmit_hdr_off = 0; | 73 | conn->c_xmit_hdr_off = 0; |
74 | conn->c_xmit_data_off = 0; | 74 | conn->c_xmit_data_off = 0; |
75 | conn->c_xmit_rdma_sent = 0; | 75 | conn->c_xmit_rdma_sent = 0; |
76 | conn->c_xmit_atomic_sent = 0; | ||
76 | 77 | ||
77 | conn->c_map_queued = 0; | 78 | conn->c_map_queued = 0; |
78 | 79 | ||
@@ -171,6 +172,7 @@ int rds_send_xmit(struct rds_connection *conn) | |||
171 | conn->c_xmit_hdr_off = 0; | 172 | conn->c_xmit_hdr_off = 0; |
172 | conn->c_xmit_data_off = 0; | 173 | conn->c_xmit_data_off = 0; |
173 | conn->c_xmit_rdma_sent = 0; | 174 | conn->c_xmit_rdma_sent = 0; |
175 | conn->c_xmit_atomic_sent = 0; | ||
174 | 176 | ||
175 | /* Release the reference to the previous message. */ | 177 | /* Release the reference to the previous message. */ |
176 | rds_message_put(rm); | 178 | rds_message_put(rm); |
@@ -262,6 +264,17 @@ int rds_send_xmit(struct rds_connection *conn) | |||
262 | conn->c_xmit_rm = rm; | 264 | conn->c_xmit_rm = rm; |
263 | } | 265 | } |
264 | 266 | ||
267 | |||
268 | if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) { | ||
269 | ret = conn->c_trans->xmit_atomic(conn, &rm->atomic); | ||
270 | if (ret) | ||
271 | break; | ||
272 | conn->c_xmit_atomic_sent = 1; | ||
273 | /* The transport owns the mapped memory for now. | ||
274 | * You can't unmap it while it's on the send queue */ | ||
275 | set_bit(RDS_MSG_MAPPED, &rm->m_flags); | ||
276 | } | ||
277 | |||
265 | /* | 278 | /* |
266 | * Try and send an rdma message. Let's see if we can | 279 | * Try and send an rdma message. Let's see if we can |
267 | * keep this simple and require that the transport either | 280 | * keep this simple and require that the transport either |
@@ -443,6 +456,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) | |||
443 | EXPORT_SYMBOL_GPL(rds_rdma_send_complete); | 456 | EXPORT_SYMBOL_GPL(rds_rdma_send_complete); |
444 | 457 | ||
445 | /* | 458 | /* |
459 | * Just like above, except looks at atomic op | ||
460 | */ | ||
461 | void rds_atomic_send_complete(struct rds_message *rm, int status) | ||
462 | { | ||
463 | struct rds_sock *rs = NULL; | ||
464 | struct rm_atomic_op *ao; | ||
465 | struct rds_notifier *notifier; | ||
466 | |||
467 | spin_lock(&rm->m_rs_lock); | ||
468 | |||
469 | ao = &rm->atomic; | ||
470 | if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) | ||
471 | && ao->op_active && ao->op_notify && ao->op_notifier) { | ||
472 | notifier = ao->op_notifier; | ||
473 | rs = rm->m_rs; | ||
474 | sock_hold(rds_rs_to_sk(rs)); | ||
475 | |||
476 | notifier->n_status = status; | ||
477 | spin_lock(&rs->rs_lock); | ||
478 | list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); | ||
479 | spin_unlock(&rs->rs_lock); | ||
480 | |||
481 | ao->op_notifier = NULL; | ||
482 | } | ||
483 | |||
484 | spin_unlock(&rm->m_rs_lock); | ||
485 | |||
486 | if (rs) { | ||
487 | rds_wake_sk_sleep(rs); | ||
488 | sock_put(rds_rs_to_sk(rs)); | ||
489 | } | ||
490 | } | ||
491 | EXPORT_SYMBOL_GPL(rds_atomic_send_complete); | ||
492 | |||
493 | /* | ||
446 | * This is the same as rds_rdma_send_complete except we | 494 | * This is the same as rds_rdma_send_complete except we |
447 | * don't do any locking - we have all the ingredients (message, | 495 | * don't do any locking - we have all the ingredients (message, |
448 | * socket, socket lock) and can just move the notifier. | 496 | * socket, socket lock) and can just move the notifier. |
@@ -788,6 +836,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len) | |||
788 | /* these are valid but do no add any size */ | 836 | /* these are valid but do no add any size */ |
789 | break; | 837 | break; |
790 | 838 | ||
839 | case RDS_CMSG_ATOMIC_CSWP: | ||
840 | case RDS_CMSG_ATOMIC_FADD: | ||
841 | size += sizeof(struct scatterlist); | ||
842 | break; | ||
843 | |||
791 | default: | 844 | default: |
792 | return -EINVAL; | 845 | return -EINVAL; |
793 | } | 846 | } |
@@ -813,7 +866,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, | |||
813 | continue; | 866 | continue; |
814 | 867 | ||
815 | /* As a side effect, RDMA_DEST and RDMA_MAP will set | 868 | /* As a side effect, RDMA_DEST and RDMA_MAP will set |
816 | * rm->m_rdma_cookie and rm->m_rdma_mr. | 869 | * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr. |
817 | */ | 870 | */ |
818 | switch (cmsg->cmsg_type) { | 871 | switch (cmsg->cmsg_type) { |
819 | case RDS_CMSG_RDMA_ARGS: | 872 | case RDS_CMSG_RDMA_ARGS: |
@@ -829,6 +882,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, | |||
829 | if (!ret) | 882 | if (!ret) |
830 | *allocated_mr = 1; | 883 | *allocated_mr = 1; |
831 | break; | 884 | break; |
885 | case RDS_CMSG_ATOMIC_CSWP: | ||
886 | case RDS_CMSG_ATOMIC_FADD: | ||
887 | ret = rds_cmsg_atomic(rs, rm, cmsg); | ||
888 | break; | ||
832 | 889 | ||
833 | default: | 890 | default: |
834 | return -EINVAL; | 891 | return -EINVAL; |
@@ -926,10 +983,18 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
926 | goto out; | 983 | goto out; |
927 | 984 | ||
928 | if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && | 985 | if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) && |
929 | !conn->c_trans->xmit_rdma) { | 986 | !conn->c_trans->xmit_rdma) { |
930 | if (printk_ratelimit()) | 987 | if (printk_ratelimit()) |
931 | printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", | 988 | printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", |
932 | &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); | 989 | &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma); |
990 | ret = -EOPNOTSUPP; | ||
991 | goto out; | ||
992 | } | ||
993 | |||
994 | if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { | ||
995 | if (printk_ratelimit()) | ||
996 | printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", | ||
997 | &rm->atomic, conn->c_trans->xmit_atomic); | ||
933 | ret = -EOPNOTSUPP; | 998 | ret = -EOPNOTSUPP; |
934 | goto out; | 999 | goto out; |
935 | } | 1000 | } |
diff --git a/net/rds/stats.c b/net/rds/stats.c index 7598eb07cfb1..c66d95d9c262 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c | |||
@@ -75,6 +75,8 @@ static const char *const rds_stat_names[] = { | |||
75 | "cong_update_received", | 75 | "cong_update_received", |
76 | "cong_send_error", | 76 | "cong_send_error", |
77 | "cong_send_blocked", | 77 | "cong_send_blocked", |
78 | "s_atomic_cswp", | ||
79 | "s_atomic_fadd", | ||
78 | }; | 80 | }; |
79 | 81 | ||
80 | void rds_stats_info_copy(struct rds_info_iterator *iter, | 82 | void rds_stats_info_copy(struct rds_info_iterator *iter, |