diff options
author | Tom Tucker <tom@opengridcomputing.com> | 2007-12-12 17:13:27 -0500 |
---|---|---|
committer | J. Bruce Fields <bfields@citi.umich.edu> | 2008-02-01 16:42:14 -0500 |
commit | ef1eac0a3fa86b06aa2d87021f157d13abc1903f (patch) | |
tree | a91fab9fc27b3318747fa70e8b2021765ef801df | |
parent | c06b540a54ad01d2fda8cfb5d8823b9b3d8d1cb2 (diff) |
rdma: ONCRPC RDMA protocol marshalling
This logic parses the ONCRDMA protocol headers that
precede the actual RPC header. It is placed in a separate
file to keep all protocol aware code in a single place.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_marshal.c | 412 |
1 files changed, 412 insertions, 0 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c new file mode 100644 index 000000000000..9530ef2d40dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c | |||
@@ -0,0 +1,412 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/xdr.h> | ||
43 | #include <linux/sunrpc/debug.h> | ||
44 | #include <asm/unaligned.h> | ||
45 | #include <linux/sunrpc/rpc_rdma.h> | ||
46 | #include <linux/sunrpc/svc_rdma.h> | ||
47 | |||
48 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
49 | |||
50 | /* | ||
51 | * Decodes a read chunk list. The expected format is as follows: | ||
52 | * descrim : xdr_one | ||
53 | * position : u32 offset into XDR stream | ||
54 | * handle : u32 RKEY | ||
55 | * . . . | ||
56 | * end-of-list: xdr_zero | ||
57 | */ | ||
58 | static u32 *decode_read_list(u32 *va, u32 *vaend) | ||
59 | { | ||
60 | struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; | ||
61 | |||
62 | while (ch->rc_discrim != xdr_zero) { | ||
63 | u64 ch_offset; | ||
64 | |||
65 | if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > | ||
66 | (unsigned long)vaend) { | ||
67 | dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | ch->rc_discrim = ntohl(ch->rc_discrim); | ||
72 | ch->rc_position = ntohl(ch->rc_position); | ||
73 | ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); | ||
74 | ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); | ||
75 | va = (u32 *)&ch->rc_target.rs_offset; | ||
76 | xdr_decode_hyper(va, &ch_offset); | ||
77 | put_unaligned(ch_offset, (u64 *)va); | ||
78 | ch++; | ||
79 | } | ||
80 | return (u32 *)&ch->rc_position; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Determine number of chunks and total bytes in chunk list. The chunk | ||
85 | * list has already been verified to fit within the RPCRDMA header. | ||
86 | */ | ||
87 | void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, | ||
88 | int *ch_count, int *byte_count) | ||
89 | { | ||
90 | /* compute the number of bytes represented by read chunks */ | ||
91 | *byte_count = 0; | ||
92 | *ch_count = 0; | ||
93 | for (; ch->rc_discrim != 0; ch++) { | ||
94 | *byte_count = *byte_count + ch->rc_target.rs_length; | ||
95 | *ch_count = *ch_count + 1; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Decodes a write chunk list. The expected format is as follows: | ||
101 | * descrim : xdr_one | ||
102 | * nchunks : <count> | ||
103 | * handle : u32 RKEY ---+ | ||
104 | * length : u32 <len of segment> | | ||
105 | * offset : remove va + <count> | ||
106 | * . . . | | ||
107 | * ---+ | ||
108 | */ | ||
109 | static u32 *decode_write_list(u32 *va, u32 *vaend) | ||
110 | { | ||
111 | int ch_no; | ||
112 | struct rpcrdma_write_array *ary = | ||
113 | (struct rpcrdma_write_array *)va; | ||
114 | |||
115 | /* Check for not write-array */ | ||
116 | if (ary->wc_discrim == xdr_zero) | ||
117 | return (u32 *)&ary->wc_nchunks; | ||
118 | |||
119 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
120 | (unsigned long)vaend) { | ||
121 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
122 | return NULL; | ||
123 | } | ||
124 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
125 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
126 | if (((unsigned long)&ary->wc_array[0] + | ||
127 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
128 | (unsigned long)vaend) { | ||
129 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
130 | ary, ary->wc_nchunks, vaend); | ||
131 | return NULL; | ||
132 | } | ||
133 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
134 | u64 ch_offset; | ||
135 | |||
136 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
137 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
138 | ary->wc_array[ch_no].wc_target.rs_length = | ||
139 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
140 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
141 | xdr_decode_hyper(va, &ch_offset); | ||
142 | put_unaligned(ch_offset, (u64 *)va); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
147 | * address skips the list terminator | ||
148 | */ | ||
149 | return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; | ||
150 | } | ||
151 | |||
152 | static u32 *decode_reply_array(u32 *va, u32 *vaend) | ||
153 | { | ||
154 | int ch_no; | ||
155 | struct rpcrdma_write_array *ary = | ||
156 | (struct rpcrdma_write_array *)va; | ||
157 | |||
158 | /* Check for no reply-array */ | ||
159 | if (ary->wc_discrim == xdr_zero) | ||
160 | return (u32 *)&ary->wc_nchunks; | ||
161 | |||
162 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
163 | (unsigned long)vaend) { | ||
164 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
165 | return NULL; | ||
166 | } | ||
167 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
168 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
169 | if (((unsigned long)&ary->wc_array[0] + | ||
170 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
171 | (unsigned long)vaend) { | ||
172 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
173 | ary, ary->wc_nchunks, vaend); | ||
174 | return NULL; | ||
175 | } | ||
176 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
177 | u64 ch_offset; | ||
178 | |||
179 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
180 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
181 | ary->wc_array[ch_no].wc_target.rs_length = | ||
182 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
183 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
184 | xdr_decode_hyper(va, &ch_offset); | ||
185 | put_unaligned(ch_offset, (u64 *)va); | ||
186 | } | ||
187 | |||
188 | return (u32 *)&ary->wc_array[ch_no]; | ||
189 | } | ||
190 | |||
191 | int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, | ||
192 | struct svc_rqst *rqstp) | ||
193 | { | ||
194 | struct rpcrdma_msg *rmsgp = NULL; | ||
195 | u32 *va; | ||
196 | u32 *vaend; | ||
197 | u32 hdr_len; | ||
198 | |||
199 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
200 | |||
201 | /* Verify that there's enough bytes for header + something */ | ||
202 | if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { | ||
203 | dprintk("svcrdma: header too short = %d\n", | ||
204 | rqstp->rq_arg.len); | ||
205 | return -EINVAL; | ||
206 | } | ||
207 | |||
208 | /* Decode the header */ | ||
209 | rmsgp->rm_xid = ntohl(rmsgp->rm_xid); | ||
210 | rmsgp->rm_vers = ntohl(rmsgp->rm_vers); | ||
211 | rmsgp->rm_credit = ntohl(rmsgp->rm_credit); | ||
212 | rmsgp->rm_type = ntohl(rmsgp->rm_type); | ||
213 | |||
214 | if (rmsgp->rm_vers != RPCRDMA_VERSION) | ||
215 | return -ENOSYS; | ||
216 | |||
217 | /* Pull in the extra for the padded case and bump our pointer */ | ||
218 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
219 | int hdrlen; | ||
220 | rmsgp->rm_body.rm_padded.rm_align = | ||
221 | ntohl(rmsgp->rm_body.rm_padded.rm_align); | ||
222 | rmsgp->rm_body.rm_padded.rm_thresh = | ||
223 | ntohl(rmsgp->rm_body.rm_padded.rm_thresh); | ||
224 | |||
225 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
226 | rqstp->rq_arg.head[0].iov_base = va; | ||
227 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
228 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
229 | if (hdrlen > rqstp->rq_arg.len) | ||
230 | return -EINVAL; | ||
231 | return hdrlen; | ||
232 | } | ||
233 | |||
234 | /* The chunk list may contain either a read chunk list or a write | ||
235 | * chunk list and a reply chunk list. | ||
236 | */ | ||
237 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
238 | vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); | ||
239 | va = decode_read_list(va, vaend); | ||
240 | if (!va) | ||
241 | return -EINVAL; | ||
242 | va = decode_write_list(va, vaend); | ||
243 | if (!va) | ||
244 | return -EINVAL; | ||
245 | va = decode_reply_array(va, vaend); | ||
246 | if (!va) | ||
247 | return -EINVAL; | ||
248 | |||
249 | rqstp->rq_arg.head[0].iov_base = va; | ||
250 | hdr_len = (unsigned long)va - (unsigned long)rmsgp; | ||
251 | rqstp->rq_arg.head[0].iov_len -= hdr_len; | ||
252 | |||
253 | *rdma_req = rmsgp; | ||
254 | return hdr_len; | ||
255 | } | ||
256 | |||
257 | int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) | ||
258 | { | ||
259 | struct rpcrdma_msg *rmsgp = NULL; | ||
260 | struct rpcrdma_read_chunk *ch; | ||
261 | struct rpcrdma_write_array *ary; | ||
262 | u32 *va; | ||
263 | u32 hdrlen; | ||
264 | |||
265 | dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", | ||
266 | rqstp); | ||
267 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
268 | |||
269 | /* Pull in the extra for the padded case and bump our pointer */ | ||
270 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
271 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
272 | rqstp->rq_arg.head[0].iov_base = va; | ||
273 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
274 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
275 | return hdrlen; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Skip all chunks to find RPC msg. These were previously processed | ||
280 | */ | ||
281 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
282 | |||
283 | /* Skip read-list */ | ||
284 | for (ch = (struct rpcrdma_read_chunk *)va; | ||
285 | ch->rc_discrim != xdr_zero; ch++); | ||
286 | va = (u32 *)&ch->rc_position; | ||
287 | |||
288 | /* Skip write-list */ | ||
289 | ary = (struct rpcrdma_write_array *)va; | ||
290 | if (ary->wc_discrim == xdr_zero) | ||
291 | va = (u32 *)&ary->wc_nchunks; | ||
292 | else | ||
293 | /* | ||
294 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
295 | * address skips the list terminator | ||
296 | */ | ||
297 | va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; | ||
298 | |||
299 | /* Skip reply-array */ | ||
300 | ary = (struct rpcrdma_write_array *)va; | ||
301 | if (ary->wc_discrim == xdr_zero) | ||
302 | va = (u32 *)&ary->wc_nchunks; | ||
303 | else | ||
304 | va = (u32 *)&ary->wc_array[ary->wc_nchunks]; | ||
305 | |||
306 | rqstp->rq_arg.head[0].iov_base = va; | ||
307 | hdrlen = (unsigned long)va - (unsigned long)rmsgp; | ||
308 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
309 | |||
310 | return hdrlen; | ||
311 | } | ||
312 | |||
313 | int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, | ||
314 | struct rpcrdma_msg *rmsgp, | ||
315 | enum rpcrdma_errcode err, u32 *va) | ||
316 | { | ||
317 | u32 *startp = va; | ||
318 | |||
319 | *va++ = htonl(rmsgp->rm_xid); | ||
320 | *va++ = htonl(rmsgp->rm_vers); | ||
321 | *va++ = htonl(xprt->sc_max_requests); | ||
322 | *va++ = htonl(RDMA_ERROR); | ||
323 | *va++ = htonl(err); | ||
324 | if (err == ERR_VERS) { | ||
325 | *va++ = htonl(RPCRDMA_VERSION); | ||
326 | *va++ = htonl(RPCRDMA_VERSION); | ||
327 | } | ||
328 | |||
329 | return (int)((unsigned long)va - (unsigned long)startp); | ||
330 | } | ||
331 | |||
332 | int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) | ||
333 | { | ||
334 | struct rpcrdma_write_array *wr_ary; | ||
335 | |||
336 | /* There is no read-list in a reply */ | ||
337 | |||
338 | /* skip write list */ | ||
339 | wr_ary = (struct rpcrdma_write_array *) | ||
340 | &rmsgp->rm_body.rm_chunks[1]; | ||
341 | if (wr_ary->wc_discrim) | ||
342 | wr_ary = (struct rpcrdma_write_array *) | ||
343 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. | ||
344 | wc_target.rs_length; | ||
345 | else | ||
346 | wr_ary = (struct rpcrdma_write_array *) | ||
347 | &wr_ary->wc_nchunks; | ||
348 | |||
349 | /* skip reply array */ | ||
350 | if (wr_ary->wc_discrim) | ||
351 | wr_ary = (struct rpcrdma_write_array *) | ||
352 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; | ||
353 | else | ||
354 | wr_ary = (struct rpcrdma_write_array *) | ||
355 | &wr_ary->wc_nchunks; | ||
356 | |||
357 | return (unsigned long) wr_ary - (unsigned long) rmsgp; | ||
358 | } | ||
359 | |||
360 | void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) | ||
361 | { | ||
362 | struct rpcrdma_write_array *ary; | ||
363 | |||
364 | /* no read-list */ | ||
365 | rmsgp->rm_body.rm_chunks[0] = xdr_zero; | ||
366 | |||
367 | /* write-array discrim */ | ||
368 | ary = (struct rpcrdma_write_array *) | ||
369 | &rmsgp->rm_body.rm_chunks[1]; | ||
370 | ary->wc_discrim = xdr_one; | ||
371 | ary->wc_nchunks = htonl(chunks); | ||
372 | |||
373 | /* write-list terminator */ | ||
374 | ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; | ||
375 | |||
376 | /* reply-array discriminator */ | ||
377 | ary->wc_array[chunks].wc_target.rs_length = xdr_zero; | ||
378 | } | ||
379 | |||
380 | void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, | ||
381 | int chunks) | ||
382 | { | ||
383 | ary->wc_discrim = xdr_one; | ||
384 | ary->wc_nchunks = htonl(chunks); | ||
385 | } | ||
386 | |||
387 | void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, | ||
388 | int chunk_no, | ||
389 | u32 rs_handle, u64 rs_offset, | ||
390 | u32 write_len) | ||
391 | { | ||
392 | struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; | ||
393 | seg->rs_handle = htonl(rs_handle); | ||
394 | seg->rs_length = htonl(write_len); | ||
395 | xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); | ||
396 | } | ||
397 | |||
398 | void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, | ||
399 | struct rpcrdma_msg *rdma_argp, | ||
400 | struct rpcrdma_msg *rdma_resp, | ||
401 | enum rpcrdma_proc rdma_type) | ||
402 | { | ||
403 | rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); | ||
404 | rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); | ||
405 | rdma_resp->rm_credit = htonl(xprt->sc_max_requests); | ||
406 | rdma_resp->rm_type = htonl(rdma_type); | ||
407 | |||
408 | /* Encode <nul> chunks lists */ | ||
409 | rdma_resp->rm_body.rm_chunks[0] = xdr_zero; | ||
410 | rdma_resp->rm_body.rm_chunks[1] = xdr_zero; | ||
411 | rdma_resp->rm_body.rm_chunks[2] = xdr_zero; | ||
412 | } | ||