aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cifs/smbdirect.c
diff options
context:
space:
mode:
authorLong Li <longli@microsoft.com>2017-11-22 19:38:42 -0500
committerSteve French <smfrench@gmail.com>2018-01-24 20:49:06 -0500
commitd649e1bba3caee93bb000ff5ac6a65dfc115f8c2 (patch)
tree57f250f91444a9f74fd66508290feeac98779b2a /fs/cifs/smbdirect.c
parent2fef137a2e6a2e5a7984f991e6b9546ddd93c6f2 (diff)
CIFS: SMBD: Implement function to send data via RDMA send
The transport doesn't maintain send buffers or send queue for transferring payload via RDMA send. There is no data copy in the transport on send. Signed-off-by: Long Li <longli@microsoft.com> Signed-off-by: Steve French <smfrench@gmail.com> Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com> Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Diffstat (limited to 'fs/cifs/smbdirect.c')
-rw-r--r--fs/cifs/smbdirect.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index d8c5fea3707c..3351873db93f 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -41,6 +41,12 @@ static int smbd_post_recv(
41 struct smbd_response *response); 41 struct smbd_response *response);
42 42
43static int smbd_post_send_empty(struct smbd_connection *info); 43static int smbd_post_send_empty(struct smbd_connection *info);
44static int smbd_post_send_data(
45 struct smbd_connection *info,
46 struct kvec *iov, int n_vec, int remaining_data_length);
47static int smbd_post_send_page(struct smbd_connection *info,
48 struct page *page, unsigned long offset,
49 size_t size, int remaining_data_length);
44 50
45/* SMBD version number */ 51/* SMBD version number */
46#define SMBD_V1 0x0100 52#define SMBD_V1 0x0100
@@ -177,6 +183,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work)
177 log_rdma_event(INFO, "cancelling send immediate work\n"); 183 log_rdma_event(INFO, "cancelling send immediate work\n");
178 cancel_delayed_work_sync(&info->send_immediate_work); 184 cancel_delayed_work_sync(&info->send_immediate_work);
179 185
186 log_rdma_event(INFO, "wait for all send to finish\n");
187 wait_event(info->wait_smbd_send_pending,
188 info->smbd_send_pending == 0);
189
180 log_rdma_event(INFO, "wait for all recv to finish\n"); 190 log_rdma_event(INFO, "wait for all recv to finish\n");
181 wake_up_interruptible(&info->wait_reassembly_queue); 191 wake_up_interruptible(&info->wait_reassembly_queue);
182 wait_event(info->wait_smbd_recv_pending, 192 wait_event(info->wait_smbd_recv_pending,
@@ -1078,6 +1088,24 @@ dma_mapping_failure:
1078} 1088}
1079 1089
1080/* 1090/*
1091 * Send a page
1092 * page: the page to send
1093 * offset: offset in the page to send
1094 * size: length in the page to send
1095 * remaining_data_length: remaining data to send in this payload
1096 */
1097static int smbd_post_send_page(struct smbd_connection *info, struct page *page,
1098 unsigned long offset, size_t size, int remaining_data_length)
1099{
1100 struct scatterlist sgl;
1101
1102 sg_init_table(&sgl, 1);
1103 sg_set_page(&sgl, page, size, offset);
1104
1105 return smbd_post_send_sgl(info, &sgl, size, remaining_data_length);
1106}
1107
1108/*
1081 * Send an empty message 1109 * Send an empty message
1082 * Empty message is used to extend credits to peer to for keep live 1110 * Empty message is used to extend credits to peer to for keep live
1083 * while there is no upper layer payload to send at the time 1111 * while there is no upper layer payload to send at the time
@@ -1089,6 +1117,35 @@ static int smbd_post_send_empty(struct smbd_connection *info)
1089} 1117}
1090 1118
1091/* 1119/*
1120 * Send a data buffer
1121 * iov: the iov array describing the data buffers
1122 * n_vec: number of iov array
1123 * remaining_data_length: remaining data to send following this packet
1124 * in segmented SMBD packet
1125 */
1126static int smbd_post_send_data(
1127 struct smbd_connection *info, struct kvec *iov, int n_vec,
1128 int remaining_data_length)
1129{
1130 int i;
1131 u32 data_length = 0;
1132 struct scatterlist sgl[SMBDIRECT_MAX_SGE];
1133
1134 if (n_vec > SMBDIRECT_MAX_SGE) {
1135 cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
1136 return -ENOMEM;
1137 }
1138
1139 sg_init_table(sgl, n_vec);
1140 for (i = 0; i < n_vec; i++) {
1141 data_length += iov[i].iov_len;
1142 sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len);
1143 }
1144
1145 return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length);
1146}
1147
1148/*
1092 * Post a receive request to the transport 1149 * Post a receive request to the transport
1093 * The remote peer can only send data when a receive request is posted 1150 * The remote peer can only send data when a receive request is posted
1094 * The interaction is controlled by send/receive credit system 1151 * The interaction is controlled by send/receive credit system
@@ -1652,6 +1709,9 @@ struct smbd_connection *_smbd_get_connection(
1652 queue_delayed_work(info->workqueue, &info->idle_timer_work, 1709 queue_delayed_work(info->workqueue, &info->idle_timer_work,
1653 info->keep_alive_interval*HZ); 1710 info->keep_alive_interval*HZ);
1654 1711
1712 init_waitqueue_head(&info->wait_smbd_send_pending);
1713 info->smbd_send_pending = 0;
1714
1655 init_waitqueue_head(&info->wait_smbd_recv_pending); 1715 init_waitqueue_head(&info->wait_smbd_recv_pending);
1656 info->smbd_recv_pending = 0; 1716 info->smbd_recv_pending = 0;
1657 1717
@@ -1943,3 +2003,189 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
1943 msg->msg_iter.count = 0; 2003 msg->msg_iter.count = 0;
1944 return rc; 2004 return rc;
1945} 2005}
2006
2007/*
2008 * Send data to transport
2009 * Each rqst is transported as a SMBDirect payload
2010 * rqst: the data to write
2011 * return value: 0 if successfully write, otherwise error code
2012 */
2013int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
2014{
2015 struct kvec vec;
2016 int nvecs;
2017 int size;
2018 int buflen = 0, remaining_data_length;
2019 int start, i, j;
2020 int max_iov_size =
2021 info->max_send_size - sizeof(struct smbd_data_transfer);
2022 struct kvec iov[SMBDIRECT_MAX_SGE];
2023 int rc;
2024
2025 info->smbd_send_pending++;
2026 if (info->transport_status != SMBD_CONNECTED) {
2027 rc = -ENODEV;
2028 goto done;
2029 }
2030
2031 /*
2032 * This usually means a configuration error
2033 * We use RDMA read/write for packet size > rdma_readwrite_threshold
2034 * as long as it's properly configured we should never get into this
2035 * situation
2036 */
2037 if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) {
2038 log_write(ERR, "maximum send segment %x exceeding %x\n",
2039 rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE);
2040 rc = -EINVAL;
2041 goto done;
2042 }
2043
2044 /*
2045 * Remove the RFC1002 length defined in MS-SMB2 section 2.1
2046 * It is used only for TCP transport
2047 * In future we may want to add a transport layer under protocol
2048 * layer so this will only be issued to TCP transport
2049 */
2050 iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4;
2051 iov[0].iov_len = rqst->rq_iov[0].iov_len - 4;
2052 buflen += iov[0].iov_len;
2053
2054 /* total up iov array first */
2055 for (i = 1; i < rqst->rq_nvec; i++) {
2056 iov[i].iov_base = rqst->rq_iov[i].iov_base;
2057 iov[i].iov_len = rqst->rq_iov[i].iov_len;
2058 buflen += iov[i].iov_len;
2059 }
2060
2061 /* add in the page array if there is one */
2062 if (rqst->rq_npages) {
2063 buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
2064 buflen += rqst->rq_tailsz;
2065 }
2066
2067 if (buflen + sizeof(struct smbd_data_transfer) >
2068 info->max_fragmented_send_size) {
2069 log_write(ERR, "payload size %d > max size %d\n",
2070 buflen, info->max_fragmented_send_size);
2071 rc = -EINVAL;
2072 goto done;
2073 }
2074
2075 remaining_data_length = buflen;
2076
2077 log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
2078 "rq_tailsz=%d buflen=%d\n",
2079 rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
2080 rqst->rq_tailsz, buflen);
2081
2082 start = i = iov[0].iov_len ? 0 : 1;
2083 buflen = 0;
2084 while (true) {
2085 buflen += iov[i].iov_len;
2086 if (buflen > max_iov_size) {
2087 if (i > start) {
2088 remaining_data_length -=
2089 (buflen-iov[i].iov_len);
2090 log_write(INFO, "sending iov[] from start=%d "
2091 "i=%d nvecs=%d "
2092 "remaining_data_length=%d\n",
2093 start, i, i-start,
2094 remaining_data_length);
2095 rc = smbd_post_send_data(
2096 info, &iov[start], i-start,
2097 remaining_data_length);
2098 if (rc)
2099 goto done;
2100 } else {
2101 /* iov[start] is too big, break it */
2102 nvecs = (buflen+max_iov_size-1)/max_iov_size;
2103 log_write(INFO, "iov[%d] iov_base=%p buflen=%d"
2104 " break to %d vectors\n",
2105 start, iov[start].iov_base,
2106 buflen, nvecs);
2107 for (j = 0; j < nvecs; j++) {
2108 vec.iov_base =
2109 (char *)iov[start].iov_base +
2110 j*max_iov_size;
2111 vec.iov_len = max_iov_size;
2112 if (j == nvecs-1)
2113 vec.iov_len =
2114 buflen -
2115 max_iov_size*(nvecs-1);
2116 remaining_data_length -= vec.iov_len;
2117 log_write(INFO,
2118 "sending vec j=%d iov_base=%p"
2119 " iov_len=%zu "
2120 "remaining_data_length=%d\n",
2121 j, vec.iov_base, vec.iov_len,
2122 remaining_data_length);
2123 rc = smbd_post_send_data(
2124 info, &vec, 1,
2125 remaining_data_length);
2126 if (rc)
2127 goto done;
2128 }
2129 i++;
2130 }
2131 start = i;
2132 buflen = 0;
2133 } else {
2134 i++;
2135 if (i == rqst->rq_nvec) {
2136 /* send out all remaining vecs */
2137 remaining_data_length -= buflen;
2138 log_write(INFO,
2139 "sending iov[] from start=%d i=%d "
2140 "nvecs=%d remaining_data_length=%d\n",
2141 start, i, i-start,
2142 remaining_data_length);
2143 rc = smbd_post_send_data(info, &iov[start],
2144 i-start, remaining_data_length);
2145 if (rc)
2146 goto done;
2147 break;
2148 }
2149 }
2150 log_write(INFO, "looping i=%d buflen=%d\n", i, buflen);
2151 }
2152
2153 /* now sending pages if there are any */
2154 for (i = 0; i < rqst->rq_npages; i++) {
2155 buflen = (i == rqst->rq_npages-1) ?
2156 rqst->rq_tailsz : rqst->rq_pagesz;
2157 nvecs = (buflen + max_iov_size - 1) / max_iov_size;
2158 log_write(INFO, "sending pages buflen=%d nvecs=%d\n",
2159 buflen, nvecs);
2160 for (j = 0; j < nvecs; j++) {
2161 size = max_iov_size;
2162 if (j == nvecs-1)
2163 size = buflen - j*max_iov_size;
2164 remaining_data_length -= size;
2165 log_write(INFO, "sending pages i=%d offset=%d size=%d"
2166 " remaining_data_length=%d\n",
2167 i, j*max_iov_size, size, remaining_data_length);
2168 rc = smbd_post_send_page(
2169 info, rqst->rq_pages[i], j*max_iov_size,
2170 size, remaining_data_length);
2171 if (rc)
2172 goto done;
2173 }
2174 }
2175
2176done:
2177 /*
2178 * As an optimization, we don't wait for individual I/O to finish
2179 * before sending the next one.
2180 * Send them all and wait for pending send count to get to 0
2181 * that means all the I/Os have been out and we are good to return
2182 */
2183
2184 wait_event(info->wait_send_payload_pending,
2185 atomic_read(&info->send_payload_pending) == 0);
2186
2187 info->smbd_send_pending--;
2188 wake_up(&info->wait_smbd_send_pending);
2189
2190 return rc;
2191}