diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-14 15:31:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-14 15:31:14 -0400 |
commit | 8acd3a60bcca17c6d89c73cee3ad6057eb83ba1e (patch) | |
tree | d610c8d39246c33c499ee9d92d302d3ca9e89ae3 /net | |
parent | c269bc00fcb876ae3b85f178f1e34601185c8ccc (diff) | |
parent | 107e0008dfb8bd6366bc8827f5bbbc0c1f795d2d (diff) |
Merge branch 'for-2.6.28' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.28' of git://linux-nfs.org/~bfields/linux: (59 commits)
svcrdma: Fix IRD/ORD polarity
svcrdma: Update svc_rdma_send_error to use DMA LKEY
svcrdma: Modify the RPC reply path to use FRMR when available
svcrdma: Modify the RPC recv path to use FRMR when available
svcrdma: Add support to svc_rdma_send to handle chained WR
svcrdma: Modify post recv path to use local dma key
svcrdma: Add a service to register a Fast Reg MR with the device
svcrdma: Query device for Fast Reg support during connection setup
svcrdma: Add FRMR get/put services
NLM: Remove unused argument from svc_addsock() function
NLM: Remove "proto" argument from lockd_up()
NLM: Always start both UDP and TCP listeners
lockd: Remove unused fields in the nlm_reboot structure
lockd: Add helper to sanity check incoming NOTIFY requests
lockd: change nlmclnt_grant() to take a "struct sockaddr *"
lockd: Adjust nlmsvc_lookup_host() to accomodate AF_INET6 addresses
lockd: Adjust nlmclnt_lookup_host() signature to accomodate non-AF_INET
lockd: Support non-AF_INET addresses in nlm_lookup_host()
NLM: Convert nlm_lookup_host() to use a single argument
svcrdma: Add Fast Reg MR Data Types
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/clnt.c | 2 | ||||
-rw-r--r-- | net/sunrpc/rpcb_clnt.c | 81 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 251 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 39 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 17 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 187 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 255 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 364 |
8 files changed, 989 insertions, 207 deletions
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76739e928d0d..da0789fa1b88 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru | |||
174 | clnt->cl_procinfo = version->procs; | 174 | clnt->cl_procinfo = version->procs; |
175 | clnt->cl_maxproc = version->nrprocs; | 175 | clnt->cl_maxproc = version->nrprocs; |
176 | clnt->cl_protname = program->name; | 176 | clnt->cl_protname = program->name; |
177 | clnt->cl_prog = program->number; | 177 | clnt->cl_prog = args->prognumber ? : program->number; |
178 | clnt->cl_vers = version->number; | 178 | clnt->cl_vers = version->number; |
179 | clnt->cl_stats = program->stats; | 179 | clnt->cl_stats = program->stats; |
180 | clnt->cl_metrics = rpc_alloc_iostats(clnt); | 180 | clnt->cl_metrics = rpc_alloc_iostats(clnt); |
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24db2b4d12d3..34abc91058d8 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/in6.h> | 20 | #include <linux/in6.h> |
21 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <net/ipv6.h> | ||
23 | 24 | ||
24 | #include <linux/sunrpc/clnt.h> | 25 | #include <linux/sunrpc/clnt.h> |
25 | #include <linux/sunrpc/sched.h> | 26 | #include <linux/sunrpc/sched.h> |
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, | |||
176 | } | 177 | } |
177 | 178 | ||
178 | static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | 179 | static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, |
179 | u32 version, struct rpc_message *msg, | 180 | u32 version, struct rpc_message *msg) |
180 | int *result) | ||
181 | { | 181 | { |
182 | struct rpc_clnt *rpcb_clnt; | 182 | struct rpc_clnt *rpcb_clnt; |
183 | int error = 0; | 183 | int result, error = 0; |
184 | 184 | ||
185 | *result = 0; | 185 | msg->rpc_resp = &result; |
186 | 186 | ||
187 | rpcb_clnt = rpcb_create_local(addr, addrlen, version); | 187 | rpcb_clnt = rpcb_create_local(addr, addrlen, version); |
188 | if (!IS_ERR(rpcb_clnt)) { | 188 | if (!IS_ERR(rpcb_clnt)) { |
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
191 | } else | 191 | } else |
192 | error = PTR_ERR(rpcb_clnt); | 192 | error = PTR_ERR(rpcb_clnt); |
193 | 193 | ||
194 | if (error < 0) | 194 | if (error < 0) { |
195 | printk(KERN_WARNING "RPC: failed to contact local rpcbind " | 195 | printk(KERN_WARNING "RPC: failed to contact local rpcbind " |
196 | "server (errno %d).\n", -error); | 196 | "server (errno %d).\n", -error); |
197 | dprintk("RPC: registration status %d/%d\n", error, *result); | 197 | return error; |
198 | } | ||
198 | 199 | ||
199 | return error; | 200 | if (!result) |
201 | return -EACCES; | ||
202 | return 0; | ||
200 | } | 203 | } |
201 | 204 | ||
202 | /** | 205 | /** |
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
205 | * @vers: RPC version number to bind | 208 | * @vers: RPC version number to bind |
206 | * @prot: transport protocol to register | 209 | * @prot: transport protocol to register |
207 | * @port: port value to register | 210 | * @port: port value to register |
208 | * @okay: OUT: result code | 211 | * |
212 | * Returns zero if the registration request was dispatched successfully | ||
213 | * and the rpcbind daemon returned success. Otherwise, returns an errno | ||
214 | * value that reflects the nature of the error (request could not be | ||
215 | * dispatched, timed out, or rpcbind returned an error). | ||
209 | * | 216 | * |
210 | * RPC services invoke this function to advertise their contact | 217 | * RPC services invoke this function to advertise their contact |
211 | * information via the system's rpcbind daemon. RPC services | 218 | * information via the system's rpcbind daemon. RPC services |
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
217 | * all registered transports for [program, version] from the local | 224 | * all registered transports for [program, version] from the local |
218 | * rpcbind database. | 225 | * rpcbind database. |
219 | * | 226 | * |
220 | * Returns zero if the registration request was dispatched | ||
221 | * successfully and a reply was received. The rpcbind daemon's | ||
222 | * boolean result code is stored in *okay. | ||
223 | * | ||
224 | * Returns an errno value and sets *result to zero if there was | ||
225 | * some problem that prevented the rpcbind request from being | ||
226 | * dispatched, or if the rpcbind daemon did not respond within | ||
227 | * the timeout. | ||
228 | * | ||
229 | * This function uses rpcbind protocol version 2 to contact the | 227 | * This function uses rpcbind protocol version 2 to contact the |
230 | * local rpcbind daemon. | 228 | * local rpcbind daemon. |
231 | * | 229 | * |
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, | |||
236 | * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 | 234 | * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 |
237 | * addresses). | 235 | * addresses). |
238 | */ | 236 | */ |
239 | int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | 237 | int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) |
240 | { | 238 | { |
241 | struct rpcbind_args map = { | 239 | struct rpcbind_args map = { |
242 | .r_prog = prog, | 240 | .r_prog = prog, |
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
246 | }; | 244 | }; |
247 | struct rpc_message msg = { | 245 | struct rpc_message msg = { |
248 | .rpc_argp = &map, | 246 | .rpc_argp = &map, |
249 | .rpc_resp = okay, | ||
250 | }; | 247 | }; |
251 | 248 | ||
252 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " | 249 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " |
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
259 | 256 | ||
260 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, | 257 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, |
261 | sizeof(rpcb_inaddr_loopback), | 258 | sizeof(rpcb_inaddr_loopback), |
262 | RPCBVERS_2, &msg, okay); | 259 | RPCBVERS_2, &msg); |
263 | } | 260 | } |
264 | 261 | ||
265 | /* | 262 | /* |
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, | |||
290 | 287 | ||
291 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, | 288 | return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, |
292 | sizeof(rpcb_inaddr_loopback), | 289 | sizeof(rpcb_inaddr_loopback), |
293 | RPCBVERS_4, msg, msg->rpc_resp); | 290 | RPCBVERS_4, msg); |
294 | } | 291 | } |
295 | 292 | ||
296 | /* | 293 | /* |
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
304 | char buf[64]; | 301 | char buf[64]; |
305 | 302 | ||
306 | /* Construct AF_INET6 universal address */ | 303 | /* Construct AF_INET6 universal address */ |
307 | snprintf(buf, sizeof(buf), | 304 | if (ipv6_addr_any(&address_to_register->sin6_addr)) |
308 | NIP6_FMT".%u.%u", | 305 | snprintf(buf, sizeof(buf), "::.%u.%u", |
309 | NIP6(address_to_register->sin6_addr), | 306 | port >> 8, port & 0xff); |
310 | port >> 8, port & 0xff); | 307 | else |
308 | snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u", | ||
309 | NIP6(address_to_register->sin6_addr), | ||
310 | port >> 8, port & 0xff); | ||
311 | map->r_addr = buf; | 311 | map->r_addr = buf; |
312 | 312 | ||
313 | dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " | 313 | dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " |
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
321 | 321 | ||
322 | return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, | 322 | return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, |
323 | sizeof(rpcb_in6addr_loopback), | 323 | sizeof(rpcb_in6addr_loopback), |
324 | RPCBVERS_4, msg, msg->rpc_resp); | 324 | RPCBVERS_4, msg); |
325 | } | 325 | } |
326 | 326 | ||
327 | /** | 327 | /** |
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
330 | * @version: RPC version number of service to (un)register | 330 | * @version: RPC version number of service to (un)register |
331 | * @address: address family, IP address, and port to (un)register | 331 | * @address: address family, IP address, and port to (un)register |
332 | * @netid: netid of transport protocol to (un)register | 332 | * @netid: netid of transport protocol to (un)register |
333 | * @result: result code from rpcbind RPC call | 333 | * |
334 | * Returns zero if the registration request was dispatched successfully | ||
335 | * and the rpcbind daemon returned success. Otherwise, returns an errno | ||
336 | * value that reflects the nature of the error (request could not be | ||
337 | * dispatched, timed out, or rpcbind returned an error). | ||
334 | * | 338 | * |
335 | * RPC services invoke this function to advertise their contact | 339 | * RPC services invoke this function to advertise their contact |
336 | * information via the system's rpcbind daemon. RPC services | 340 | * information via the system's rpcbind daemon. RPC services |
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
342 | * to zero. Callers pass a netid of "" to unregister all | 346 | * to zero. Callers pass a netid of "" to unregister all |
343 | * transport netids associated with [program, version, address]. | 347 | * transport netids associated with [program, version, address]. |
344 | * | 348 | * |
345 | * Returns zero if the registration request was dispatched | ||
346 | * successfully and a reply was received. The rpcbind daemon's | ||
347 | * result code is stored in *result. | ||
348 | * | ||
349 | * Returns an errno value and sets *result to zero if there was | ||
350 | * some problem that prevented the rpcbind request from being | ||
351 | * dispatched, or if the rpcbind daemon did not respond within | ||
352 | * the timeout. | ||
353 | * | ||
354 | * This function uses rpcbind protocol version 4 to contact the | 349 | * This function uses rpcbind protocol version 4 to contact the |
355 | * local rpcbind daemon. The local rpcbind daemon must support | 350 | * local rpcbind daemon. The local rpcbind daemon must support |
356 | * version 4 of the rpcbind protocol in order for these functions | 351 | * version 4 of the rpcbind protocol in order for these functions |
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, | |||
372 | * advertises the service on all IPv4 and IPv6 addresses. | 367 | * advertises the service on all IPv4 and IPv6 addresses. |
373 | */ | 368 | */ |
374 | int rpcb_v4_register(const u32 program, const u32 version, | 369 | int rpcb_v4_register(const u32 program, const u32 version, |
375 | const struct sockaddr *address, const char *netid, | 370 | const struct sockaddr *address, const char *netid) |
376 | int *result) | ||
377 | { | 371 | { |
378 | struct rpcbind_args map = { | 372 | struct rpcbind_args map = { |
379 | .r_prog = program, | 373 | .r_prog = program, |
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version, | |||
383 | }; | 377 | }; |
384 | struct rpc_message msg = { | 378 | struct rpc_message msg = { |
385 | .rpc_argp = &map, | 379 | .rpc_argp = &map, |
386 | .rpc_resp = result, | ||
387 | }; | 380 | }; |
388 | 381 | ||
389 | *result = 0; | ||
390 | |||
391 | switch (address->sa_family) { | 382 | switch (address->sa_family) { |
392 | case AF_INET: | 383 | case AF_INET: |
393 | return rpcb_register_netid4((struct sockaddr_in *)address, | 384 | return rpcb_register_netid4((struct sockaddr_in *)address, |
@@ -633,7 +624,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) | |||
633 | static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, | 624 | static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, |
634 | struct rpcbind_args *rpcb) | 625 | struct rpcbind_args *rpcb) |
635 | { | 626 | { |
636 | dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", | 627 | dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", |
637 | rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); | 628 | rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); |
638 | *p++ = htonl(rpcb->r_prog); | 629 | *p++ = htonl(rpcb->r_prog); |
639 | *p++ = htonl(rpcb->r_vers); | 630 | *p++ = htonl(rpcb->r_vers); |
@@ -648,7 +639,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, | |||
648 | unsigned short *portp) | 639 | unsigned short *portp) |
649 | { | 640 | { |
650 | *portp = (unsigned short) ntohl(*p++); | 641 | *portp = (unsigned short) ntohl(*p++); |
651 | dprintk("RPC: rpcb_decode_getport result %u\n", | 642 | dprintk("RPC: rpcb getport result: %u\n", |
652 | *portp); | 643 | *portp); |
653 | return 0; | 644 | return 0; |
654 | } | 645 | } |
@@ -657,7 +648,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, | |||
657 | unsigned int *boolp) | 648 | unsigned int *boolp) |
658 | { | 649 | { |
659 | *boolp = (unsigned int) ntohl(*p++); | 650 | *boolp = (unsigned int) ntohl(*p++); |
660 | dprintk("RPC: rpcb_decode_set: call %s\n", | 651 | dprintk("RPC: rpcb set/unset call %s\n", |
661 | (*boolp ? "succeeded" : "failed")); | 652 | (*boolp ? "succeeded" : "failed")); |
662 | return 0; | 653 | return 0; |
663 | } | 654 | } |
@@ -665,7 +656,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, | |||
665 | static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, | 656 | static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, |
666 | struct rpcbind_args *rpcb) | 657 | struct rpcbind_args *rpcb) |
667 | { | 658 | { |
668 | dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", | 659 | dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", |
669 | rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); | 660 | rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); |
670 | *p++ = htonl(rpcb->r_prog); | 661 | *p++ = htonl(rpcb->r_prog); |
671 | *p++ = htonl(rpcb->r_vers); | 662 | *p++ = htonl(rpcb->r_vers); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a32cb7c4bb4..54c98d876847 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -28,6 +28,8 @@ | |||
28 | 28 | ||
29 | #define RPCDBG_FACILITY RPCDBG_SVCDSP | 29 | #define RPCDBG_FACILITY RPCDBG_SVCDSP |
30 | 30 | ||
31 | static void svc_unregister(const struct svc_serv *serv); | ||
32 | |||
31 | #define svc_serv_is_pooled(serv) ((serv)->sv_function) | 33 | #define svc_serv_is_pooled(serv) ((serv)->sv_function) |
32 | 34 | ||
33 | /* | 35 | /* |
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) | |||
357 | */ | 359 | */ |
358 | static struct svc_serv * | 360 | static struct svc_serv * |
359 | __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | 361 | __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, |
360 | void (*shutdown)(struct svc_serv *serv)) | 362 | sa_family_t family, void (*shutdown)(struct svc_serv *serv)) |
361 | { | 363 | { |
362 | struct svc_serv *serv; | 364 | struct svc_serv *serv; |
363 | unsigned int vers; | 365 | unsigned int vers; |
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
366 | 368 | ||
367 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) | 369 | if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) |
368 | return NULL; | 370 | return NULL; |
371 | serv->sv_family = family; | ||
369 | serv->sv_name = prog->pg_name; | 372 | serv->sv_name = prog->pg_name; |
370 | serv->sv_program = prog; | 373 | serv->sv_program = prog; |
371 | serv->sv_nrthreads = 1; | 374 | serv->sv_nrthreads = 1; |
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
416 | spin_lock_init(&pool->sp_lock); | 419 | spin_lock_init(&pool->sp_lock); |
417 | } | 420 | } |
418 | 421 | ||
419 | |||
420 | /* Remove any stale portmap registrations */ | 422 | /* Remove any stale portmap registrations */ |
421 | svc_register(serv, 0, 0); | 423 | svc_unregister(serv); |
422 | 424 | ||
423 | return serv; | 425 | return serv; |
424 | } | 426 | } |
425 | 427 | ||
426 | struct svc_serv * | 428 | struct svc_serv * |
427 | svc_create(struct svc_program *prog, unsigned int bufsize, | 429 | svc_create(struct svc_program *prog, unsigned int bufsize, |
428 | void (*shutdown)(struct svc_serv *serv)) | 430 | sa_family_t family, void (*shutdown)(struct svc_serv *serv)) |
429 | { | 431 | { |
430 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); | 432 | return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); |
431 | } | 433 | } |
432 | EXPORT_SYMBOL(svc_create); | 434 | EXPORT_SYMBOL(svc_create); |
433 | 435 | ||
434 | struct svc_serv * | 436 | struct svc_serv * |
435 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | 437 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, |
436 | void (*shutdown)(struct svc_serv *serv), | 438 | sa_family_t family, void (*shutdown)(struct svc_serv *serv), |
437 | svc_thread_fn func, struct module *mod) | 439 | svc_thread_fn func, struct module *mod) |
438 | { | 440 | { |
439 | struct svc_serv *serv; | 441 | struct svc_serv *serv; |
440 | unsigned int npools = svc_pool_map_get(); | 442 | unsigned int npools = svc_pool_map_get(); |
441 | 443 | ||
442 | serv = __svc_create(prog, bufsize, npools, shutdown); | 444 | serv = __svc_create(prog, bufsize, npools, family, shutdown); |
443 | 445 | ||
444 | if (serv != NULL) { | 446 | if (serv != NULL) { |
445 | serv->sv_function = func; | 447 | serv->sv_function = func; |
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv) | |||
486 | if (svc_serv_is_pooled(serv)) | 488 | if (svc_serv_is_pooled(serv)) |
487 | svc_pool_map_put(); | 489 | svc_pool_map_put(); |
488 | 490 | ||
489 | /* Unregister service with the portmapper */ | 491 | svc_unregister(serv); |
490 | svc_register(serv, 0, 0); | ||
491 | kfree(serv->sv_pools); | 492 | kfree(serv->sv_pools); |
492 | kfree(serv); | 493 | kfree(serv); |
493 | } | 494 | } |
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
718 | } | 719 | } |
719 | EXPORT_SYMBOL(svc_exit_thread); | 720 | EXPORT_SYMBOL(svc_exit_thread); |
720 | 721 | ||
722 | #ifdef CONFIG_SUNRPC_REGISTER_V4 | ||
723 | |||
721 | /* | 724 | /* |
722 | * Register an RPC service with the local portmapper. | 725 | * Register an "inet" protocol family netid with the local |
723 | * To unregister a service, call this routine with | 726 | * rpcbind daemon via an rpcbind v4 SET request. |
724 | * proto and port == 0. | 727 | * |
728 | * No netconfig infrastructure is available in the kernel, so | ||
729 | * we map IP_ protocol numbers to netids by hand. | ||
730 | * | ||
731 | * Returns zero on success; a negative errno value is returned | ||
732 | * if any error occurs. | ||
725 | */ | 733 | */ |
726 | int | 734 | static int __svc_rpcb_register4(const u32 program, const u32 version, |
727 | svc_register(struct svc_serv *serv, int proto, unsigned short port) | 735 | const unsigned short protocol, |
736 | const unsigned short port) | ||
737 | { | ||
738 | struct sockaddr_in sin = { | ||
739 | .sin_family = AF_INET, | ||
740 | .sin_addr.s_addr = htonl(INADDR_ANY), | ||
741 | .sin_port = htons(port), | ||
742 | }; | ||
743 | char *netid; | ||
744 | |||
745 | switch (protocol) { | ||
746 | case IPPROTO_UDP: | ||
747 | netid = RPCBIND_NETID_UDP; | ||
748 | break; | ||
749 | case IPPROTO_TCP: | ||
750 | netid = RPCBIND_NETID_TCP; | ||
751 | break; | ||
752 | default: | ||
753 | return -EPROTONOSUPPORT; | ||
754 | } | ||
755 | |||
756 | return rpcb_v4_register(program, version, | ||
757 | (struct sockaddr *)&sin, netid); | ||
758 | } | ||
759 | |||
760 | /* | ||
761 | * Register an "inet6" protocol family netid with the local | ||
762 | * rpcbind daemon via an rpcbind v4 SET request. | ||
763 | * | ||
764 | * No netconfig infrastructure is available in the kernel, so | ||
765 | * we map IP_ protocol numbers to netids by hand. | ||
766 | * | ||
767 | * Returns zero on success; a negative errno value is returned | ||
768 | * if any error occurs. | ||
769 | */ | ||
770 | static int __svc_rpcb_register6(const u32 program, const u32 version, | ||
771 | const unsigned short protocol, | ||
772 | const unsigned short port) | ||
773 | { | ||
774 | struct sockaddr_in6 sin6 = { | ||
775 | .sin6_family = AF_INET6, | ||
776 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
777 | .sin6_port = htons(port), | ||
778 | }; | ||
779 | char *netid; | ||
780 | |||
781 | switch (protocol) { | ||
782 | case IPPROTO_UDP: | ||
783 | netid = RPCBIND_NETID_UDP6; | ||
784 | break; | ||
785 | case IPPROTO_TCP: | ||
786 | netid = RPCBIND_NETID_TCP6; | ||
787 | break; | ||
788 | default: | ||
789 | return -EPROTONOSUPPORT; | ||
790 | } | ||
791 | |||
792 | return rpcb_v4_register(program, version, | ||
793 | (struct sockaddr *)&sin6, netid); | ||
794 | } | ||
795 | |||
796 | /* | ||
797 | * Register a kernel RPC service via rpcbind version 4. | ||
798 | * | ||
799 | * Returns zero on success; a negative errno value is returned | ||
800 | * if any error occurs. | ||
801 | */ | ||
802 | static int __svc_register(const u32 program, const u32 version, | ||
803 | const sa_family_t family, | ||
804 | const unsigned short protocol, | ||
805 | const unsigned short port) | ||
806 | { | ||
807 | int error; | ||
808 | |||
809 | switch (family) { | ||
810 | case AF_INET: | ||
811 | return __svc_rpcb_register4(program, version, | ||
812 | protocol, port); | ||
813 | case AF_INET6: | ||
814 | error = __svc_rpcb_register6(program, version, | ||
815 | protocol, port); | ||
816 | if (error < 0) | ||
817 | return error; | ||
818 | |||
819 | /* | ||
820 | * Work around bug in some versions of Linux rpcbind | ||
821 | * which don't allow registration of both inet and | ||
822 | * inet6 netids. | ||
823 | * | ||
824 | * Error return ignored for now. | ||
825 | */ | ||
826 | __svc_rpcb_register4(program, version, | ||
827 | protocol, port); | ||
828 | return 0; | ||
829 | } | ||
830 | |||
831 | return -EAFNOSUPPORT; | ||
832 | } | ||
833 | |||
834 | #else /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
835 | |||
836 | /* | ||
837 | * Register a kernel RPC service via rpcbind version 2. | ||
838 | * | ||
839 | * Returns zero on success; a negative errno value is returned | ||
840 | * if any error occurs. | ||
841 | */ | ||
842 | static int __svc_register(const u32 program, const u32 version, | ||
843 | sa_family_t family, | ||
844 | const unsigned short protocol, | ||
845 | const unsigned short port) | ||
846 | { | ||
847 | if (family != AF_INET) | ||
848 | return -EAFNOSUPPORT; | ||
849 | |||
850 | return rpcb_register(program, version, protocol, port); | ||
851 | } | ||
852 | |||
853 | #endif /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
854 | |||
855 | /** | ||
856 | * svc_register - register an RPC service with the local portmapper | ||
857 | * @serv: svc_serv struct for the service to register | ||
858 | * @proto: transport protocol number to advertise | ||
859 | * @port: port to advertise | ||
860 | * | ||
861 | * Service is registered for any address in serv's address family | ||
862 | */ | ||
863 | int svc_register(const struct svc_serv *serv, const unsigned short proto, | ||
864 | const unsigned short port) | ||
728 | { | 865 | { |
729 | struct svc_program *progp; | 866 | struct svc_program *progp; |
730 | unsigned long flags; | ||
731 | unsigned int i; | 867 | unsigned int i; |
732 | int error = 0, dummy; | 868 | int error = 0; |
733 | 869 | ||
734 | if (!port) | 870 | BUG_ON(proto == 0 && port == 0); |
735 | clear_thread_flag(TIF_SIGPENDING); | ||
736 | 871 | ||
737 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | 872 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { |
738 | for (i = 0; i < progp->pg_nvers; i++) { | 873 | for (i = 0; i < progp->pg_nvers; i++) { |
739 | if (progp->pg_vers[i] == NULL) | 874 | if (progp->pg_vers[i] == NULL) |
740 | continue; | 875 | continue; |
741 | 876 | ||
742 | dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", | 877 | dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", |
743 | progp->pg_name, | 878 | progp->pg_name, |
879 | i, | ||
744 | proto == IPPROTO_UDP? "udp" : "tcp", | 880 | proto == IPPROTO_UDP? "udp" : "tcp", |
745 | port, | 881 | port, |
746 | i, | 882 | serv->sv_family, |
747 | progp->pg_vers[i]->vs_hidden? | 883 | progp->pg_vers[i]->vs_hidden? |
748 | " (but not telling portmap)" : ""); | 884 | " (but not telling portmap)" : ""); |
749 | 885 | ||
750 | if (progp->pg_vers[i]->vs_hidden) | 886 | if (progp->pg_vers[i]->vs_hidden) |
751 | continue; | 887 | continue; |
752 | 888 | ||
753 | error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); | 889 | error = __svc_register(progp->pg_prog, i, |
890 | serv->sv_family, proto, port); | ||
754 | if (error < 0) | 891 | if (error < 0) |
755 | break; | 892 | break; |
756 | if (port && !dummy) { | ||
757 | error = -EACCES; | ||
758 | break; | ||
759 | } | ||
760 | } | 893 | } |
761 | } | 894 | } |
762 | 895 | ||
763 | if (!port) { | 896 | return error; |
764 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 897 | } |
765 | recalc_sigpending(); | 898 | |
766 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | 899 | #ifdef CONFIG_SUNRPC_REGISTER_V4 |
900 | |||
901 | static void __svc_unregister(const u32 program, const u32 version, | ||
902 | const char *progname) | ||
903 | { | ||
904 | struct sockaddr_in6 sin6 = { | ||
905 | .sin6_family = AF_INET6, | ||
906 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
907 | .sin6_port = 0, | ||
908 | }; | ||
909 | int error; | ||
910 | |||
911 | error = rpcb_v4_register(program, version, | ||
912 | (struct sockaddr *)&sin6, ""); | ||
913 | dprintk("svc: %s(%sv%u), error %d\n", | ||
914 | __func__, progname, version, error); | ||
915 | } | ||
916 | |||
917 | #else /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
918 | |||
919 | static void __svc_unregister(const u32 program, const u32 version, | ||
920 | const char *progname) | ||
921 | { | ||
922 | int error; | ||
923 | |||
924 | error = rpcb_register(program, version, 0, 0); | ||
925 | dprintk("svc: %s(%sv%u), error %d\n", | ||
926 | __func__, progname, version, error); | ||
927 | } | ||
928 | |||
929 | #endif /* CONFIG_SUNRPC_REGISTER_V4 */ | ||
930 | |||
931 | /* | ||
932 | * All netids, bind addresses and ports registered for [program, version] | ||
933 | * are removed from the local rpcbind database (if the service is not | ||
934 | * hidden) to make way for a new instance of the service. | ||
935 | * | ||
936 | * The result of unregistration is reported via dprintk for those who want | ||
937 | * verification of the result, but is otherwise not important. | ||
938 | */ | ||
939 | static void svc_unregister(const struct svc_serv *serv) | ||
940 | { | ||
941 | struct svc_program *progp; | ||
942 | unsigned long flags; | ||
943 | unsigned int i; | ||
944 | |||
945 | clear_thread_flag(TIF_SIGPENDING); | ||
946 | |||
947 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | ||
948 | for (i = 0; i < progp->pg_nvers; i++) { | ||
949 | if (progp->pg_vers[i] == NULL) | ||
950 | continue; | ||
951 | if (progp->pg_vers[i]->vs_hidden) | ||
952 | continue; | ||
953 | |||
954 | __svc_unregister(progp->pg_prog, i, progp->pg_name); | ||
955 | } | ||
767 | } | 956 | } |
768 | 957 | ||
769 | return error; | 958 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
959 | recalc_sigpending(); | ||
960 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
770 | } | 961 | } |
771 | 962 | ||
772 | /* | 963 | /* |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e46c825f4954..bf5b5cdafebf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c | |||
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, | |||
159 | } | 159 | } |
160 | EXPORT_SYMBOL_GPL(svc_xprt_init); | 160 | EXPORT_SYMBOL_GPL(svc_xprt_init); |
161 | 161 | ||
162 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | 162 | static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, |
163 | int flags) | 163 | struct svc_serv *serv, |
164 | unsigned short port, int flags) | ||
164 | { | 165 | { |
165 | struct svc_xprt_class *xcl; | ||
166 | struct sockaddr_in sin = { | 166 | struct sockaddr_in sin = { |
167 | .sin_family = AF_INET, | 167 | .sin_family = AF_INET, |
168 | .sin_addr.s_addr = htonl(INADDR_ANY), | 168 | .sin_addr.s_addr = htonl(INADDR_ANY), |
169 | .sin_port = htons(port), | 169 | .sin_port = htons(port), |
170 | }; | 170 | }; |
171 | struct sockaddr_in6 sin6 = { | ||
172 | .sin6_family = AF_INET6, | ||
173 | .sin6_addr = IN6ADDR_ANY_INIT, | ||
174 | .sin6_port = htons(port), | ||
175 | }; | ||
176 | struct sockaddr *sap; | ||
177 | size_t len; | ||
178 | |||
179 | switch (serv->sv_family) { | ||
180 | case AF_INET: | ||
181 | sap = (struct sockaddr *)&sin; | ||
182 | len = sizeof(sin); | ||
183 | break; | ||
184 | case AF_INET6: | ||
185 | sap = (struct sockaddr *)&sin6; | ||
186 | len = sizeof(sin6); | ||
187 | break; | ||
188 | default: | ||
189 | return ERR_PTR(-EAFNOSUPPORT); | ||
190 | } | ||
191 | |||
192 | return xcl->xcl_ops->xpo_create(serv, sap, len, flags); | ||
193 | } | ||
194 | |||
195 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | ||
196 | int flags) | ||
197 | { | ||
198 | struct svc_xprt_class *xcl; | ||
199 | |||
171 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | 200 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); |
172 | spin_lock(&svc_xprt_class_lock); | 201 | spin_lock(&svc_xprt_class_lock); |
173 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | 202 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { |
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | |||
180 | goto err; | 209 | goto err; |
181 | 210 | ||
182 | spin_unlock(&svc_xprt_class_lock); | 211 | spin_unlock(&svc_xprt_class_lock); |
183 | newxprt = xcl->xcl_ops-> | 212 | newxprt = __svc_xpo_create(xcl, serv, port, flags); |
184 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | ||
185 | flags); | ||
186 | if (IS_ERR(newxprt)) { | 213 | if (IS_ERR(newxprt)) { |
187 | module_put(xcl->xcl_owner); | 214 | module_put(xcl->xcl_owner); |
188 | return PTR_ERR(newxprt); | 215 | return PTR_ERR(newxprt); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e65719f1ef6..95293f549e9c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1114 | struct svc_sock *svsk; | 1114 | struct svc_sock *svsk; |
1115 | struct sock *inet; | 1115 | struct sock *inet; |
1116 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1116 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
1117 | int val; | ||
1117 | 1118 | ||
1118 | dprintk("svc: svc_setup_socket %p\n", sock); | 1119 | dprintk("svc: svc_setup_socket %p\n", sock); |
1119 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1120 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1146 | else | 1147 | else |
1147 | svc_tcp_init(svsk, serv); | 1148 | svc_tcp_init(svsk, serv); |
1148 | 1149 | ||
1150 | /* | ||
1151 | * We start one listener per sv_serv. We want AF_INET | ||
1152 | * requests to be automatically shunted to our AF_INET6 | ||
1153 | * listener using a mapped IPv4 address. Make sure | ||
1154 | * no-one starts an equivalent IPv4 listener, which | ||
1155 | * would steal our incoming connections. | ||
1156 | */ | ||
1157 | val = 0; | ||
1158 | if (serv->sv_family == AF_INET6) | ||
1159 | kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, | ||
1160 | (char *)&val, sizeof(val)); | ||
1161 | |||
1149 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1162 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1150 | svsk, svsk->sk_sk); | 1163 | svsk, svsk->sk_sk); |
1151 | 1164 | ||
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1154 | 1167 | ||
1155 | int svc_addsock(struct svc_serv *serv, | 1168 | int svc_addsock(struct svc_serv *serv, |
1156 | int fd, | 1169 | int fd, |
1157 | char *name_return, | 1170 | char *name_return) |
1158 | int *proto) | ||
1159 | { | 1171 | { |
1160 | int err = 0; | 1172 | int err = 0; |
1161 | struct socket *so = sockfd_lookup(fd, &err); | 1173 | struct socket *so = sockfd_lookup(fd, &err); |
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv, | |||
1190 | sockfd_put(so); | 1202 | sockfd_put(so); |
1191 | return err; | 1203 | return err; |
1192 | } | 1204 | } |
1193 | if (proto) *proto = so->sk->sk_protocol; | ||
1194 | return one_sock_name(name_return, svsk); | 1205 | return one_sock_name(name_return, svsk); |
1195 | } | 1206 | } |
1196 | EXPORT_SYMBOL_GPL(svc_addsock); | 1207 | EXPORT_SYMBOL_GPL(svc_addsock); |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 74de31a06616..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
116 | * | 116 | * |
117 | * Assumptions: | 117 | * Assumptions: |
118 | * - chunk[0]->position points to pages[0] at an offset of 0 | 118 | * - chunk[0]->position points to pages[0] at an offset of 0 |
119 | * - pages[] is not physically or virtually contigous and consists of | 119 | * - pages[] is not physically or virtually contiguous and consists of |
120 | * PAGE_SIZE elements. | 120 | * PAGE_SIZE elements. |
121 | * | 121 | * |
122 | * Output: | 122 | * Output: |
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
125 | * chunk in the read list | 125 | * chunk in the read list |
126 | * | 126 | * |
127 | */ | 127 | */ |
128 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | 128 | static int map_read_chunks(struct svcxprt_rdma *xprt, |
129 | struct svc_rqst *rqstp, | 129 | struct svc_rqst *rqstp, |
130 | struct svc_rdma_op_ctxt *head, | 130 | struct svc_rdma_op_ctxt *head, |
131 | struct rpcrdma_msg *rmsgp, | 131 | struct rpcrdma_msg *rmsgp, |
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | |||
211 | return sge_no; | 211 | return sge_no; |
212 | } | 212 | } |
213 | 213 | ||
214 | static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | 214 | /* Map a read-chunk-list to an XDR and fast register the page-list. |
215 | struct svc_rdma_op_ctxt *ctxt, | 215 | * |
216 | struct kvec *vec, | 216 | * Assumptions: |
217 | u64 *sgl_offset, | 217 | * - chunk[0] position points to pages[0] at an offset of 0 |
218 | int count) | 218 | * - pages[] will be made physically contiguous by creating a one-off memory |
219 | * region using the fastreg verb. | ||
220 | * - byte_count is # of bytes in read-chunk-list | ||
221 | * - ch_count is # of chunks in read-chunk-list | ||
222 | * | ||
223 | * Output: | ||
224 | * - sge array pointing into pages[] array. | ||
225 | * - chunk_sge array specifying sge index and count for each | ||
226 | * chunk in the read list | ||
227 | */ | ||
228 | static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, | ||
229 | struct svc_rqst *rqstp, | ||
230 | struct svc_rdma_op_ctxt *head, | ||
231 | struct rpcrdma_msg *rmsgp, | ||
232 | struct svc_rdma_req_map *rpl_map, | ||
233 | struct svc_rdma_req_map *chl_map, | ||
234 | int ch_count, | ||
235 | int byte_count) | ||
236 | { | ||
237 | int page_no; | ||
238 | int ch_no; | ||
239 | u32 offset; | ||
240 | struct rpcrdma_read_chunk *ch; | ||
241 | struct svc_rdma_fastreg_mr *frmr; | ||
242 | int ret = 0; | ||
243 | |||
244 | frmr = svc_rdma_get_frmr(xprt); | ||
245 | if (IS_ERR(frmr)) | ||
246 | return -ENOMEM; | ||
247 | |||
248 | head->frmr = frmr; | ||
249 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
250 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
251 | head->arg.pages = &head->pages[head->count]; | ||
252 | head->hdr_count = head->count; /* save count of hdr pages */ | ||
253 | head->arg.page_base = 0; | ||
254 | head->arg.page_len = byte_count; | ||
255 | head->arg.len = rqstp->rq_arg.len + byte_count; | ||
256 | head->arg.buflen = rqstp->rq_arg.buflen + byte_count; | ||
257 | |||
258 | /* Fast register the page list */ | ||
259 | frmr->kva = page_address(rqstp->rq_arg.pages[0]); | ||
260 | frmr->direction = DMA_FROM_DEVICE; | ||
261 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | ||
262 | frmr->map_len = byte_count; | ||
263 | frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; | ||
264 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
265 | frmr->page_list->page_list[page_no] = | ||
266 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
267 | page_address(rqstp->rq_arg.pages[page_no]), | ||
268 | PAGE_SIZE, DMA_TO_DEVICE); | ||
269 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
270 | frmr->page_list->page_list[page_no])) | ||
271 | goto fatal_err; | ||
272 | atomic_inc(&xprt->sc_dma_used); | ||
273 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
274 | } | ||
275 | head->count += page_no; | ||
276 | |||
277 | /* rq_respages points one past arg pages */ | ||
278 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
279 | |||
280 | /* Create the reply and chunk maps */ | ||
281 | offset = 0; | ||
282 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
283 | for (ch_no = 0; ch_no < ch_count; ch_no++) { | ||
284 | rpl_map->sge[ch_no].iov_base = frmr->kva + offset; | ||
285 | rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; | ||
286 | chl_map->ch[ch_no].count = 1; | ||
287 | chl_map->ch[ch_no].start = ch_no; | ||
288 | offset += ch->rc_target.rs_length; | ||
289 | ch++; | ||
290 | } | ||
291 | |||
292 | ret = svc_rdma_fastreg(xprt, frmr); | ||
293 | if (ret) | ||
294 | goto fatal_err; | ||
295 | |||
296 | return ch_no; | ||
297 | |||
298 | fatal_err: | ||
299 | printk("svcrdma: error fast registering xdr for xprt %p", xprt); | ||
300 | svc_rdma_put_frmr(xprt, frmr); | ||
301 | return -EIO; | ||
302 | } | ||
303 | |||
304 | static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, | ||
305 | struct svc_rdma_op_ctxt *ctxt, | ||
306 | struct svc_rdma_fastreg_mr *frmr, | ||
307 | struct kvec *vec, | ||
308 | u64 *sgl_offset, | ||
309 | int count) | ||
219 | { | 310 | { |
220 | int i; | 311 | int i; |
221 | 312 | ||
222 | ctxt->count = count; | 313 | ctxt->count = count; |
223 | ctxt->direction = DMA_FROM_DEVICE; | 314 | ctxt->direction = DMA_FROM_DEVICE; |
224 | for (i = 0; i < count; i++) { | 315 | for (i = 0; i < count; i++) { |
225 | atomic_inc(&xprt->sc_dma_used); | 316 | ctxt->sge[i].length = 0; /* in case map fails */ |
226 | ctxt->sge[i].addr = | 317 | if (!frmr) { |
227 | ib_dma_map_single(xprt->sc_cm_id->device, | 318 | ctxt->sge[i].addr = |
228 | vec[i].iov_base, vec[i].iov_len, | 319 | ib_dma_map_single(xprt->sc_cm_id->device, |
229 | DMA_FROM_DEVICE); | 320 | vec[i].iov_base, |
321 | vec[i].iov_len, | ||
322 | DMA_FROM_DEVICE); | ||
323 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
324 | ctxt->sge[i].addr)) | ||
325 | return -EINVAL; | ||
326 | ctxt->sge[i].lkey = xprt->sc_dma_lkey; | ||
327 | atomic_inc(&xprt->sc_dma_used); | ||
328 | } else { | ||
329 | ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; | ||
330 | ctxt->sge[i].lkey = frmr->mr->lkey; | ||
331 | } | ||
230 | ctxt->sge[i].length = vec[i].iov_len; | 332 | ctxt->sge[i].length = vec[i].iov_len; |
231 | ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; | ||
232 | *sgl_offset = *sgl_offset + vec[i].iov_len; | 333 | *sgl_offset = *sgl_offset + vec[i].iov_len; |
233 | } | 334 | } |
335 | return 0; | ||
234 | } | 336 | } |
235 | 337 | ||
236 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | 338 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) |
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
278 | struct svc_rdma_op_ctxt *hdr_ctxt) | 380 | struct svc_rdma_op_ctxt *hdr_ctxt) |
279 | { | 381 | { |
280 | struct ib_send_wr read_wr; | 382 | struct ib_send_wr read_wr; |
383 | struct ib_send_wr inv_wr; | ||
281 | int err = 0; | 384 | int err = 0; |
282 | int ch_no; | 385 | int ch_no; |
283 | int ch_count; | 386 | int ch_count; |
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
301 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | 404 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); |
302 | if (ch_count > RPCSVC_MAXPAGES) | 405 | if (ch_count > RPCSVC_MAXPAGES) |
303 | return -EINVAL; | 406 | return -EINVAL; |
304 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | 407 | |
305 | rpl_map, chl_map, | 408 | if (!xprt->sc_frmr_pg_list_len) |
306 | ch_count, byte_count); | 409 | sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, |
410 | rpl_map, chl_map, ch_count, | ||
411 | byte_count); | ||
412 | else | ||
413 | sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, | ||
414 | rpl_map, chl_map, ch_count, | ||
415 | byte_count); | ||
416 | if (sge_count < 0) { | ||
417 | err = -EIO; | ||
418 | goto out; | ||
419 | } | ||
420 | |||
307 | sgl_offset = 0; | 421 | sgl_offset = 0; |
308 | ch_no = 0; | 422 | ch_no = 0; |
309 | 423 | ||
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, | |||
312 | next_sge: | 426 | next_sge: |
313 | ctxt = svc_rdma_get_context(xprt); | 427 | ctxt = svc_rdma_get_context(xprt); |
314 | ctxt->direction = DMA_FROM_DEVICE; | 428 | ctxt->direction = DMA_FROM_DEVICE; |
429 | ctxt->frmr = hdr_ctxt->frmr; | ||
430 | ctxt->read_hdr = NULL; | ||
315 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 431 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
432 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
316 | 433 | ||
317 | /* Prepare READ WR */ | 434 | /* Prepare READ WR */ |
318 | memset(&read_wr, 0, sizeof read_wr); | 435 | memset(&read_wr, 0, sizeof read_wr); |
319 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
320 | read_wr.wr_id = (unsigned long)ctxt; | 436 | read_wr.wr_id = (unsigned long)ctxt; |
321 | read_wr.opcode = IB_WR_RDMA_READ; | 437 | read_wr.opcode = IB_WR_RDMA_READ; |
438 | ctxt->wr_op = read_wr.opcode; | ||
322 | read_wr.send_flags = IB_SEND_SIGNALED; | 439 | read_wr.send_flags = IB_SEND_SIGNALED; |
323 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | 440 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; |
324 | read_wr.wr.rdma.remote_addr = | 441 | read_wr.wr.rdma.remote_addr = |
@@ -327,10 +444,15 @@ next_sge: | |||
327 | read_wr.sg_list = ctxt->sge; | 444 | read_wr.sg_list = ctxt->sge; |
328 | read_wr.num_sge = | 445 | read_wr.num_sge = |
329 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); | 446 | rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); |
330 | rdma_set_ctxt_sge(xprt, ctxt, | 447 | err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, |
331 | &rpl_map->sge[chl_map->ch[ch_no].start], | 448 | &rpl_map->sge[chl_map->ch[ch_no].start], |
332 | &sgl_offset, | 449 | &sgl_offset, |
333 | read_wr.num_sge); | 450 | read_wr.num_sge); |
451 | if (err) { | ||
452 | svc_rdma_unmap_dma(ctxt); | ||
453 | svc_rdma_put_context(ctxt, 0); | ||
454 | goto out; | ||
455 | } | ||
334 | if (((ch+1)->rc_discrim == 0) && | 456 | if (((ch+1)->rc_discrim == 0) && |
335 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { | 457 | (read_wr.num_sge == chl_map->ch[ch_no].count)) { |
336 | /* | 458 | /* |
@@ -339,6 +461,29 @@ next_sge: | |||
339 | * the client and the RPC needs to be enqueued. | 461 | * the client and the RPC needs to be enqueued. |
340 | */ | 462 | */ |
341 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | 463 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); |
464 | if (hdr_ctxt->frmr) { | ||
465 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
466 | /* | ||
467 | * Invalidate the local MR used to map the data | ||
468 | * sink. | ||
469 | */ | ||
470 | if (xprt->sc_dev_caps & | ||
471 | SVCRDMA_DEVCAP_READ_W_INV) { | ||
472 | read_wr.opcode = | ||
473 | IB_WR_RDMA_READ_WITH_INV; | ||
474 | ctxt->wr_op = read_wr.opcode; | ||
475 | read_wr.ex.invalidate_rkey = | ||
476 | ctxt->frmr->mr->lkey; | ||
477 | } else { | ||
478 | /* Prepare INVALIDATE WR */ | ||
479 | memset(&inv_wr, 0, sizeof inv_wr); | ||
480 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
481 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
482 | inv_wr.ex.invalidate_rkey = | ||
483 | hdr_ctxt->frmr->mr->lkey; | ||
484 | read_wr.next = &inv_wr; | ||
485 | } | ||
486 | } | ||
342 | ctxt->read_hdr = hdr_ctxt; | 487 | ctxt->read_hdr = hdr_ctxt; |
343 | } | 488 | } |
344 | /* Post the read */ | 489 | /* Post the read */ |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 84d328329d98..9a7a8e7ae038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -69,9 +69,127 @@ | |||
69 | * array is only concerned with the reply we are assured that we have | 69 | * array is only concerned with the reply we are assured that we have |
70 | * on extra page for the RPCRMDA header. | 70 | * on extra page for the RPCRMDA header. |
71 | */ | 71 | */ |
72 | static void xdr_to_sge(struct svcxprt_rdma *xprt, | 72 | int fast_reg_xdr(struct svcxprt_rdma *xprt, |
73 | struct xdr_buf *xdr, | 73 | struct xdr_buf *xdr, |
74 | struct svc_rdma_req_map *vec) | 74 | struct svc_rdma_req_map *vec) |
75 | { | ||
76 | int sge_no; | ||
77 | u32 sge_bytes; | ||
78 | u32 page_bytes; | ||
79 | u32 page_off; | ||
80 | int page_no = 0; | ||
81 | u8 *frva; | ||
82 | struct svc_rdma_fastreg_mr *frmr; | ||
83 | |||
84 | frmr = svc_rdma_get_frmr(xprt); | ||
85 | if (IS_ERR(frmr)) | ||
86 | return -ENOMEM; | ||
87 | vec->frmr = frmr; | ||
88 | |||
89 | /* Skip the RPCRDMA header */ | ||
90 | sge_no = 1; | ||
91 | |||
92 | /* Map the head. */ | ||
93 | frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); | ||
94 | vec->sge[sge_no].iov_base = xdr->head[0].iov_base; | ||
95 | vec->sge[sge_no].iov_len = xdr->head[0].iov_len; | ||
96 | vec->count = 2; | ||
97 | sge_no++; | ||
98 | |||
99 | /* Build the FRMR */ | ||
100 | frmr->kva = frva; | ||
101 | frmr->direction = DMA_TO_DEVICE; | ||
102 | frmr->access_flags = 0; | ||
103 | frmr->map_len = PAGE_SIZE; | ||
104 | frmr->page_list_len = 1; | ||
105 | frmr->page_list->page_list[page_no] = | ||
106 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
107 | (void *)xdr->head[0].iov_base, | ||
108 | PAGE_SIZE, DMA_TO_DEVICE); | ||
109 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
110 | frmr->page_list->page_list[page_no])) | ||
111 | goto fatal_err; | ||
112 | atomic_inc(&xprt->sc_dma_used); | ||
113 | |||
114 | page_off = xdr->page_base; | ||
115 | page_bytes = xdr->page_len + page_off; | ||
116 | if (!page_bytes) | ||
117 | goto encode_tail; | ||
118 | |||
119 | /* Map the pages */ | ||
120 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
121 | vec->sge[sge_no].iov_len = page_bytes; | ||
122 | sge_no++; | ||
123 | while (page_bytes) { | ||
124 | struct page *page; | ||
125 | |||
126 | page = xdr->pages[page_no++]; | ||
127 | sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); | ||
128 | page_bytes -= sge_bytes; | ||
129 | |||
130 | frmr->page_list->page_list[page_no] = | ||
131 | ib_dma_map_page(xprt->sc_cm_id->device, page, 0, | ||
132 | PAGE_SIZE, DMA_TO_DEVICE); | ||
133 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
134 | frmr->page_list->page_list[page_no])) | ||
135 | goto fatal_err; | ||
136 | |||
137 | atomic_inc(&xprt->sc_dma_used); | ||
138 | page_off = 0; /* reset for next time through loop */ | ||
139 | frmr->map_len += PAGE_SIZE; | ||
140 | frmr->page_list_len++; | ||
141 | } | ||
142 | vec->count++; | ||
143 | |||
144 | encode_tail: | ||
145 | /* Map tail */ | ||
146 | if (0 == xdr->tail[0].iov_len) | ||
147 | goto done; | ||
148 | |||
149 | vec->count++; | ||
150 | vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; | ||
151 | |||
152 | if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == | ||
153 | ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { | ||
154 | /* | ||
155 | * If head and tail use the same page, we don't need | ||
156 | * to map it again. | ||
157 | */ | ||
158 | vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; | ||
159 | } else { | ||
160 | void *va; | ||
161 | |||
162 | /* Map another page for the tail */ | ||
163 | page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; | ||
164 | va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); | ||
165 | vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; | ||
166 | |||
167 | frmr->page_list->page_list[page_no] = | ||
168 | ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, | ||
169 | DMA_TO_DEVICE); | ||
170 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, | ||
171 | frmr->page_list->page_list[page_no])) | ||
172 | goto fatal_err; | ||
173 | atomic_inc(&xprt->sc_dma_used); | ||
174 | frmr->map_len += PAGE_SIZE; | ||
175 | frmr->page_list_len++; | ||
176 | } | ||
177 | |||
178 | done: | ||
179 | if (svc_rdma_fastreg(xprt, frmr)) | ||
180 | goto fatal_err; | ||
181 | |||
182 | return 0; | ||
183 | |||
184 | fatal_err: | ||
185 | printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); | ||
186 | svc_rdma_put_frmr(xprt, frmr); | ||
187 | return -EIO; | ||
188 | } | ||
189 | |||
190 | static int map_xdr(struct svcxprt_rdma *xprt, | ||
191 | struct xdr_buf *xdr, | ||
192 | struct svc_rdma_req_map *vec) | ||
75 | { | 193 | { |
76 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | 194 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; |
77 | int sge_no; | 195 | int sge_no; |
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
83 | BUG_ON(xdr->len != | 201 | BUG_ON(xdr->len != |
84 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); | 202 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); |
85 | 203 | ||
204 | if (xprt->sc_frmr_pg_list_len) | ||
205 | return fast_reg_xdr(xprt, xdr, vec); | ||
206 | |||
86 | /* Skip the first sge, this is for the RPCRDMA header */ | 207 | /* Skip the first sge, this is for the RPCRDMA header */ |
87 | sge_no = 1; | 208 | sge_no = 1; |
88 | 209 | ||
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, | |||
116 | 237 | ||
117 | BUG_ON(sge_no > sge_max); | 238 | BUG_ON(sge_no > sge_max); |
118 | vec->count = sge_no; | 239 | vec->count = sge_no; |
240 | return 0; | ||
119 | } | 241 | } |
120 | 242 | ||
121 | /* Assumptions: | 243 | /* Assumptions: |
244 | * - We are using FRMR | ||
245 | * - or - | ||
122 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | 246 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE |
123 | */ | 247 | */ |
124 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | 248 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, |
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
158 | sge_no = 0; | 282 | sge_no = 0; |
159 | 283 | ||
160 | /* Copy the remaining SGE */ | 284 | /* Copy the remaining SGE */ |
161 | while (bc != 0 && xdr_sge_no < vec->count) { | 285 | while (bc != 0) { |
162 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 286 | sge_bytes = min_t(size_t, |
163 | sge_bytes = min((size_t)bc, | 287 | bc, vec->sge[xdr_sge_no].iov_len-sge_off); |
164 | (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off)); | ||
165 | sge[sge_no].length = sge_bytes; | 288 | sge[sge_no].length = sge_bytes; |
166 | atomic_inc(&xprt->sc_dma_used); | 289 | if (!vec->frmr) { |
167 | sge[sge_no].addr = | 290 | sge[sge_no].addr = |
168 | ib_dma_map_single(xprt->sc_cm_id->device, | 291 | ib_dma_map_single(xprt->sc_cm_id->device, |
169 | (void *) | 292 | (void *) |
170 | vec->sge[xdr_sge_no].iov_base + sge_off, | 293 | vec->sge[xdr_sge_no].iov_base + sge_off, |
171 | sge_bytes, DMA_TO_DEVICE); | 294 | sge_bytes, DMA_TO_DEVICE); |
172 | if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, | 295 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, |
173 | sge[sge_no].addr)) | 296 | sge[sge_no].addr)) |
174 | goto err; | 297 | goto err; |
298 | atomic_inc(&xprt->sc_dma_used); | ||
299 | sge[sge_no].lkey = xprt->sc_dma_lkey; | ||
300 | } else { | ||
301 | sge[sge_no].addr = (unsigned long) | ||
302 | vec->sge[xdr_sge_no].iov_base + sge_off; | ||
303 | sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
304 | } | ||
305 | ctxt->count++; | ||
306 | ctxt->frmr = vec->frmr; | ||
175 | sge_off = 0; | 307 | sge_off = 0; |
176 | sge_no++; | 308 | sge_no++; |
177 | ctxt->count++; | ||
178 | xdr_sge_no++; | 309 | xdr_sge_no++; |
310 | BUG_ON(xdr_sge_no > vec->count); | ||
179 | bc -= sge_bytes; | 311 | bc -= sge_bytes; |
180 | } | 312 | } |
181 | 313 | ||
182 | BUG_ON(bc != 0); | ||
183 | BUG_ON(xdr_sge_no > vec->count); | ||
184 | |||
185 | /* Prepare WRITE WR */ | 314 | /* Prepare WRITE WR */ |
186 | memset(&write_wr, 0, sizeof write_wr); | 315 | memset(&write_wr, 0, sizeof write_wr); |
187 | ctxt->wr_op = IB_WR_RDMA_WRITE; | 316 | ctxt->wr_op = IB_WR_RDMA_WRITE; |
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, | |||
226 | res_ary = (struct rpcrdma_write_array *) | 355 | res_ary = (struct rpcrdma_write_array *) |
227 | &rdma_resp->rm_body.rm_chunks[1]; | 356 | &rdma_resp->rm_body.rm_chunks[1]; |
228 | 357 | ||
229 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 358 | if (vec->frmr) |
359 | max_write = vec->frmr->map_len; | ||
360 | else | ||
361 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
230 | 362 | ||
231 | /* Write chunks start at the pagelist */ | 363 | /* Write chunks start at the pagelist */ |
232 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | 364 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; |
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
297 | res_ary = (struct rpcrdma_write_array *) | 429 | res_ary = (struct rpcrdma_write_array *) |
298 | &rdma_resp->rm_body.rm_chunks[2]; | 430 | &rdma_resp->rm_body.rm_chunks[2]; |
299 | 431 | ||
300 | max_write = xprt->sc_max_sge * PAGE_SIZE; | 432 | if (vec->frmr) |
433 | max_write = vec->frmr->map_len; | ||
434 | else | ||
435 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
301 | 436 | ||
302 | /* xdr offset starts at RPC message */ | 437 | /* xdr offset starts at RPC message */ |
303 | for (xdr_off = 0, chunk_no = 0; | 438 | for (xdr_off = 0, chunk_no = 0; |
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, | |||
307 | ch = &arg_ary->wc_array[chunk_no].wc_target; | 442 | ch = &arg_ary->wc_array[chunk_no].wc_target; |
308 | write_len = min(xfer_len, ch->rs_length); | 443 | write_len = min(xfer_len, ch->rs_length); |
309 | 444 | ||
310 | |||
311 | /* Prepare the reply chunk given the length actually | 445 | /* Prepare the reply chunk given the length actually |
312 | * written */ | 446 | * written */ |
313 | rs_offset = get_unaligned(&(ch->rs_offset)); | 447 | rs_offset = get_unaligned(&(ch->rs_offset)); |
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
366 | int byte_count) | 500 | int byte_count) |
367 | { | 501 | { |
368 | struct ib_send_wr send_wr; | 502 | struct ib_send_wr send_wr; |
503 | struct ib_send_wr inv_wr; | ||
369 | int sge_no; | 504 | int sge_no; |
370 | int sge_bytes; | 505 | int sge_bytes; |
371 | int page_no; | 506 | int page_no; |
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
385 | /* Prepare the context */ | 520 | /* Prepare the context */ |
386 | ctxt->pages[0] = page; | 521 | ctxt->pages[0] = page; |
387 | ctxt->count = 1; | 522 | ctxt->count = 1; |
523 | ctxt->frmr = vec->frmr; | ||
524 | if (vec->frmr) | ||
525 | set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
526 | else | ||
527 | clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); | ||
388 | 528 | ||
389 | /* Prepare the SGE for the RPCRDMA Header */ | 529 | /* Prepare the SGE for the RPCRDMA Header */ |
390 | atomic_inc(&rdma->sc_dma_used); | ||
391 | ctxt->sge[0].addr = | 530 | ctxt->sge[0].addr = |
392 | ib_dma_map_page(rdma->sc_cm_id->device, | 531 | ib_dma_map_page(rdma->sc_cm_id->device, |
393 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | 532 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); |
533 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) | ||
534 | goto err; | ||
535 | atomic_inc(&rdma->sc_dma_used); | ||
536 | |||
394 | ctxt->direction = DMA_TO_DEVICE; | 537 | ctxt->direction = DMA_TO_DEVICE; |
538 | |||
395 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | 539 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); |
396 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | 540 | ctxt->sge[0].lkey = rdma->sc_dma_lkey; |
397 | 541 | ||
398 | /* Determine how many of our SGE are to be transmitted */ | 542 | /* Determine how many of our SGE are to be transmitted */ |
399 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { | 543 | for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { |
400 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); | 544 | sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); |
401 | byte_count -= sge_bytes; | 545 | byte_count -= sge_bytes; |
402 | atomic_inc(&rdma->sc_dma_used); | 546 | if (!vec->frmr) { |
403 | ctxt->sge[sge_no].addr = | 547 | ctxt->sge[sge_no].addr = |
404 | ib_dma_map_single(rdma->sc_cm_id->device, | 548 | ib_dma_map_single(rdma->sc_cm_id->device, |
405 | vec->sge[sge_no].iov_base, | 549 | vec->sge[sge_no].iov_base, |
406 | sge_bytes, DMA_TO_DEVICE); | 550 | sge_bytes, DMA_TO_DEVICE); |
551 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, | ||
552 | ctxt->sge[sge_no].addr)) | ||
553 | goto err; | ||
554 | atomic_inc(&rdma->sc_dma_used); | ||
555 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; | ||
556 | } else { | ||
557 | ctxt->sge[sge_no].addr = (unsigned long) | ||
558 | vec->sge[sge_no].iov_base; | ||
559 | ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; | ||
560 | } | ||
407 | ctxt->sge[sge_no].length = sge_bytes; | 561 | ctxt->sge[sge_no].length = sge_bytes; |
408 | ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey; | ||
409 | } | 562 | } |
410 | BUG_ON(byte_count != 0); | 563 | BUG_ON(byte_count != 0); |
411 | 564 | ||
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
417 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | 570 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; |
418 | ctxt->count++; | 571 | ctxt->count++; |
419 | rqstp->rq_respages[page_no] = NULL; | 572 | rqstp->rq_respages[page_no] = NULL; |
420 | /* If there are more pages than SGE, terminate SGE list */ | 573 | /* |
574 | * If there are more pages than SGE, terminate SGE | ||
575 | * list so that svc_rdma_unmap_dma doesn't attempt to | ||
576 | * unmap garbage. | ||
577 | */ | ||
421 | if (page_no+1 >= sge_no) | 578 | if (page_no+1 >= sge_no) |
422 | ctxt->sge[page_no+1].length = 0; | 579 | ctxt->sge[page_no+1].length = 0; |
423 | } | 580 | } |
424 | BUG_ON(sge_no > rdma->sc_max_sge); | 581 | BUG_ON(sge_no > rdma->sc_max_sge); |
582 | BUG_ON(sge_no > ctxt->count); | ||
425 | memset(&send_wr, 0, sizeof send_wr); | 583 | memset(&send_wr, 0, sizeof send_wr); |
426 | ctxt->wr_op = IB_WR_SEND; | 584 | ctxt->wr_op = IB_WR_SEND; |
427 | send_wr.wr_id = (unsigned long)ctxt; | 585 | send_wr.wr_id = (unsigned long)ctxt; |
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
429 | send_wr.num_sge = sge_no; | 587 | send_wr.num_sge = sge_no; |
430 | send_wr.opcode = IB_WR_SEND; | 588 | send_wr.opcode = IB_WR_SEND; |
431 | send_wr.send_flags = IB_SEND_SIGNALED; | 589 | send_wr.send_flags = IB_SEND_SIGNALED; |
590 | if (vec->frmr) { | ||
591 | /* Prepare INVALIDATE WR */ | ||
592 | memset(&inv_wr, 0, sizeof inv_wr); | ||
593 | inv_wr.opcode = IB_WR_LOCAL_INV; | ||
594 | inv_wr.send_flags = IB_SEND_SIGNALED; | ||
595 | inv_wr.ex.invalidate_rkey = | ||
596 | vec->frmr->mr->lkey; | ||
597 | send_wr.next = &inv_wr; | ||
598 | } | ||
432 | 599 | ||
433 | ret = svc_rdma_send(rdma, &send_wr); | 600 | ret = svc_rdma_send(rdma, &send_wr); |
434 | if (ret) | 601 | if (ret) |
435 | svc_rdma_put_context(ctxt, 1); | 602 | goto err; |
436 | 603 | ||
437 | return ret; | 604 | return 0; |
605 | |||
606 | err: | ||
607 | svc_rdma_put_frmr(rdma, vec->frmr); | ||
608 | svc_rdma_put_context(ctxt, 1); | ||
609 | return -EIO; | ||
438 | } | 610 | } |
439 | 611 | ||
440 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | 612 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) |
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
477 | ctxt = svc_rdma_get_context(rdma); | 649 | ctxt = svc_rdma_get_context(rdma); |
478 | ctxt->direction = DMA_TO_DEVICE; | 650 | ctxt->direction = DMA_TO_DEVICE; |
479 | vec = svc_rdma_get_req_map(); | 651 | vec = svc_rdma_get_req_map(); |
480 | xdr_to_sge(rdma, &rqstp->rq_res, vec); | 652 | ret = map_xdr(rdma, &rqstp->rq_res, vec); |
481 | 653 | if (ret) | |
654 | goto err0; | ||
482 | inline_bytes = rqstp->rq_res.len; | 655 | inline_bytes = rqstp->rq_res.len; |
483 | 656 | ||
484 | /* Create the RDMA response header */ | 657 | /* Create the RDMA response header */ |
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
498 | if (ret < 0) { | 671 | if (ret < 0) { |
499 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | 672 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", |
500 | ret); | 673 | ret); |
501 | goto error; | 674 | goto err1; |
502 | } | 675 | } |
503 | inline_bytes -= ret; | 676 | inline_bytes -= ret; |
504 | 677 | ||
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
508 | if (ret < 0) { | 681 | if (ret < 0) { |
509 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | 682 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", |
510 | ret); | 683 | ret); |
511 | goto error; | 684 | goto err1; |
512 | } | 685 | } |
513 | inline_bytes -= ret; | 686 | inline_bytes -= ret; |
514 | 687 | ||
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
517 | svc_rdma_put_req_map(vec); | 690 | svc_rdma_put_req_map(vec); |
518 | dprintk("svcrdma: send_reply returns %d\n", ret); | 691 | dprintk("svcrdma: send_reply returns %d\n", ret); |
519 | return ret; | 692 | return ret; |
520 | error: | 693 | |
694 | err1: | ||
695 | put_page(res_page); | ||
696 | err0: | ||
521 | svc_rdma_put_req_map(vec); | 697 | svc_rdma_put_req_map(vec); |
522 | svc_rdma_put_context(ctxt, 0); | 698 | svc_rdma_put_context(ctxt, 0); |
523 | put_page(res_page); | ||
524 | return ret; | 699 | return ret; |
525 | } | 700 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69728c6..6fb493cbd29f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | |||
100 | ctxt->xprt = xprt; | 100 | ctxt->xprt = xprt; |
101 | INIT_LIST_HEAD(&ctxt->dto_q); | 101 | INIT_LIST_HEAD(&ctxt->dto_q); |
102 | ctxt->count = 0; | 102 | ctxt->count = 0; |
103 | ctxt->frmr = NULL; | ||
103 | atomic_inc(&xprt->sc_ctxt_used); | 104 | atomic_inc(&xprt->sc_ctxt_used); |
104 | return ctxt; | 105 | return ctxt; |
105 | } | 106 | } |
106 | 107 | ||
107 | static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | 108 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) |
108 | { | 109 | { |
109 | struct svcxprt_rdma *xprt = ctxt->xprt; | 110 | struct svcxprt_rdma *xprt = ctxt->xprt; |
110 | int i; | 111 | int i; |
111 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { | 112 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { |
112 | atomic_dec(&xprt->sc_dma_used); | 113 | /* |
113 | ib_dma_unmap_single(xprt->sc_cm_id->device, | 114 | * Unmap the DMA addr in the SGE if the lkey matches |
114 | ctxt->sge[i].addr, | 115 | * the sc_dma_lkey, otherwise, ignore it since it is |
115 | ctxt->sge[i].length, | 116 | * an FRMR lkey and will be unmapped later when the |
116 | ctxt->direction); | 117 | * last WR that uses it completes. |
118 | */ | ||
119 | if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { | ||
120 | atomic_dec(&xprt->sc_dma_used); | ||
121 | ib_dma_unmap_single(xprt->sc_cm_id->device, | ||
122 | ctxt->sge[i].addr, | ||
123 | ctxt->sge[i].length, | ||
124 | ctxt->direction); | ||
125 | } | ||
117 | } | 126 | } |
118 | } | 127 | } |
119 | 128 | ||
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) | |||
150 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | 159 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); |
151 | } | 160 | } |
152 | map->count = 0; | 161 | map->count = 0; |
162 | map->frmr = NULL; | ||
153 | return map; | 163 | return map; |
154 | } | 164 | } |
155 | 165 | ||
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) | |||
316 | } | 326 | } |
317 | 327 | ||
318 | /* | 328 | /* |
329 | * Processs a completion context | ||
330 | */ | ||
331 | static void process_context(struct svcxprt_rdma *xprt, | ||
332 | struct svc_rdma_op_ctxt *ctxt) | ||
333 | { | ||
334 | svc_rdma_unmap_dma(ctxt); | ||
335 | |||
336 | switch (ctxt->wr_op) { | ||
337 | case IB_WR_SEND: | ||
338 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
339 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
340 | svc_rdma_put_context(ctxt, 1); | ||
341 | break; | ||
342 | |||
343 | case IB_WR_RDMA_WRITE: | ||
344 | svc_rdma_put_context(ctxt, 0); | ||
345 | break; | ||
346 | |||
347 | case IB_WR_RDMA_READ: | ||
348 | case IB_WR_RDMA_READ_WITH_INV: | ||
349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
350 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
351 | BUG_ON(!read_hdr); | ||
352 | if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) | ||
353 | svc_rdma_put_frmr(xprt, ctxt->frmr); | ||
354 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
355 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
356 | list_add_tail(&read_hdr->dto_q, | ||
357 | &xprt->sc_read_complete_q); | ||
358 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
359 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
360 | } | ||
361 | svc_rdma_put_context(ctxt, 0); | ||
362 | break; | ||
363 | |||
364 | default: | ||
365 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
366 | "opcode=%d\n", | ||
367 | ctxt->wr_op); | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* | ||
319 | * Send Queue Completion Handler - potentially called on interrupt context. | 373 | * Send Queue Completion Handler - potentially called on interrupt context. |
320 | * | 374 | * |
321 | * Note that caller must hold a transport reference. | 375 | * Note that caller must hold a transport reference. |
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
327 | struct ib_cq *cq = xprt->sc_sq_cq; | 381 | struct ib_cq *cq = xprt->sc_sq_cq; |
328 | int ret; | 382 | int ret; |
329 | 383 | ||
330 | |||
331 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) | 384 | if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) |
332 | return; | 385 | return; |
333 | 386 | ||
334 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | 387 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); |
335 | atomic_inc(&rdma_stat_sq_poll); | 388 | atomic_inc(&rdma_stat_sq_poll); |
336 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | 389 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { |
337 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
338 | xprt = ctxt->xprt; | ||
339 | |||
340 | svc_rdma_unmap_dma(ctxt); | ||
341 | if (wc.status != IB_WC_SUCCESS) | 390 | if (wc.status != IB_WC_SUCCESS) |
342 | /* Close the transport */ | 391 | /* Close the transport */ |
343 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | 392 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) | |||
346 | atomic_dec(&xprt->sc_sq_count); | 395 | atomic_dec(&xprt->sc_sq_count); |
347 | wake_up(&xprt->sc_send_wait); | 396 | wake_up(&xprt->sc_send_wait); |
348 | 397 | ||
349 | switch (ctxt->wr_op) { | 398 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; |
350 | case IB_WR_SEND: | 399 | if (ctxt) |
351 | svc_rdma_put_context(ctxt, 1); | 400 | process_context(xprt, ctxt); |
352 | break; | ||
353 | |||
354 | case IB_WR_RDMA_WRITE: | ||
355 | svc_rdma_put_context(ctxt, 0); | ||
356 | break; | ||
357 | |||
358 | case IB_WR_RDMA_READ: | ||
359 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
360 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | ||
361 | BUG_ON(!read_hdr); | ||
362 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
363 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
364 | list_add_tail(&read_hdr->dto_q, | ||
365 | &xprt->sc_read_complete_q); | ||
366 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
367 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
368 | } | ||
369 | svc_rdma_put_context(ctxt, 0); | ||
370 | break; | ||
371 | 401 | ||
372 | default: | ||
373 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
374 | "opcode=%d, status=%d\n", | ||
375 | wc.opcode, wc.status); | ||
376 | break; | ||
377 | } | ||
378 | svc_xprt_put(&xprt->sc_xprt); | 402 | svc_xprt_put(&xprt->sc_xprt); |
379 | } | 403 | } |
380 | 404 | ||
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | |||
425 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | 449 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); |
426 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | 450 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); |
427 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | 451 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); |
452 | INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); | ||
428 | init_waitqueue_head(&cma_xprt->sc_send_wait); | 453 | init_waitqueue_head(&cma_xprt->sc_send_wait); |
429 | 454 | ||
430 | spin_lock_init(&cma_xprt->sc_lock); | 455 | spin_lock_init(&cma_xprt->sc_lock); |
431 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | 456 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); |
457 | spin_lock_init(&cma_xprt->sc_frmr_q_lock); | ||
432 | 458 | ||
433 | cma_xprt->sc_ord = svcrdma_ord; | 459 | cma_xprt->sc_ord = svcrdma_ord; |
434 | 460 | ||
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
462 | struct ib_recv_wr recv_wr, *bad_recv_wr; | 488 | struct ib_recv_wr recv_wr, *bad_recv_wr; |
463 | struct svc_rdma_op_ctxt *ctxt; | 489 | struct svc_rdma_op_ctxt *ctxt; |
464 | struct page *page; | 490 | struct page *page; |
465 | unsigned long pa; | 491 | dma_addr_t pa; |
466 | int sge_no; | 492 | int sge_no; |
467 | int buflen; | 493 | int buflen; |
468 | int ret; | 494 | int ret; |
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
474 | BUG_ON(sge_no >= xprt->sc_max_sge); | 500 | BUG_ON(sge_no >= xprt->sc_max_sge); |
475 | page = svc_rdma_get_page(); | 501 | page = svc_rdma_get_page(); |
476 | ctxt->pages[sge_no] = page; | 502 | ctxt->pages[sge_no] = page; |
477 | atomic_inc(&xprt->sc_dma_used); | ||
478 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | 503 | pa = ib_dma_map_page(xprt->sc_cm_id->device, |
479 | page, 0, PAGE_SIZE, | 504 | page, 0, PAGE_SIZE, |
480 | DMA_FROM_DEVICE); | 505 | DMA_FROM_DEVICE); |
506 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) | ||
507 | goto err_put_ctxt; | ||
508 | atomic_inc(&xprt->sc_dma_used); | ||
481 | ctxt->sge[sge_no].addr = pa; | 509 | ctxt->sge[sge_no].addr = pa; |
482 | ctxt->sge[sge_no].length = PAGE_SIZE; | 510 | ctxt->sge[sge_no].length = PAGE_SIZE; |
483 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | 511 | ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; |
484 | buflen += PAGE_SIZE; | 512 | buflen += PAGE_SIZE; |
485 | } | 513 | } |
486 | ctxt->count = sge_no; | 514 | ctxt->count = sge_no; |
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
496 | svc_rdma_put_context(ctxt, 1); | 524 | svc_rdma_put_context(ctxt, 1); |
497 | } | 525 | } |
498 | return ret; | 526 | return ret; |
527 | |||
528 | err_put_ctxt: | ||
529 | svc_rdma_put_context(ctxt, 1); | ||
530 | return -ENOMEM; | ||
499 | } | 531 | } |
500 | 532 | ||
501 | /* | 533 | /* |
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, | |||
566 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | 598 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " |
567 | "event=%d\n", cma_id, cma_id->context, event->event); | 599 | "event=%d\n", cma_id, cma_id->context, event->event); |
568 | handle_connect_req(cma_id, | 600 | handle_connect_req(cma_id, |
569 | event->param.conn.responder_resources); | 601 | event->param.conn.initiator_depth); |
570 | break; | 602 | break; |
571 | 603 | ||
572 | case RDMA_CM_EVENT_ESTABLISHED: | 604 | case RDMA_CM_EVENT_ESTABLISHED: |
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | |||
686 | return ERR_PTR(ret); | 718 | return ERR_PTR(ret); |
687 | } | 719 | } |
688 | 720 | ||
721 | static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) | ||
722 | { | ||
723 | struct ib_mr *mr; | ||
724 | struct ib_fast_reg_page_list *pl; | ||
725 | struct svc_rdma_fastreg_mr *frmr; | ||
726 | |||
727 | frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); | ||
728 | if (!frmr) | ||
729 | goto err; | ||
730 | |||
731 | mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); | ||
732 | if (!mr) | ||
733 | goto err_free_frmr; | ||
734 | |||
735 | pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, | ||
736 | RPCSVC_MAXPAGES); | ||
737 | if (!pl) | ||
738 | goto err_free_mr; | ||
739 | |||
740 | frmr->mr = mr; | ||
741 | frmr->page_list = pl; | ||
742 | INIT_LIST_HEAD(&frmr->frmr_list); | ||
743 | return frmr; | ||
744 | |||
745 | err_free_mr: | ||
746 | ib_dereg_mr(mr); | ||
747 | err_free_frmr: | ||
748 | kfree(frmr); | ||
749 | err: | ||
750 | return ERR_PTR(-ENOMEM); | ||
751 | } | ||
752 | |||
753 | static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) | ||
754 | { | ||
755 | struct svc_rdma_fastreg_mr *frmr; | ||
756 | |||
757 | while (!list_empty(&xprt->sc_frmr_q)) { | ||
758 | frmr = list_entry(xprt->sc_frmr_q.next, | ||
759 | struct svc_rdma_fastreg_mr, frmr_list); | ||
760 | list_del_init(&frmr->frmr_list); | ||
761 | ib_dereg_mr(frmr->mr); | ||
762 | ib_free_fast_reg_page_list(frmr->page_list); | ||
763 | kfree(frmr); | ||
764 | } | ||
765 | } | ||
766 | |||
767 | struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) | ||
768 | { | ||
769 | struct svc_rdma_fastreg_mr *frmr = NULL; | ||
770 | |||
771 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
772 | if (!list_empty(&rdma->sc_frmr_q)) { | ||
773 | frmr = list_entry(rdma->sc_frmr_q.next, | ||
774 | struct svc_rdma_fastreg_mr, frmr_list); | ||
775 | list_del_init(&frmr->frmr_list); | ||
776 | frmr->map_len = 0; | ||
777 | frmr->page_list_len = 0; | ||
778 | } | ||
779 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
780 | if (frmr) | ||
781 | return frmr; | ||
782 | |||
783 | return rdma_alloc_frmr(rdma); | ||
784 | } | ||
785 | |||
786 | static void frmr_unmap_dma(struct svcxprt_rdma *xprt, | ||
787 | struct svc_rdma_fastreg_mr *frmr) | ||
788 | { | ||
789 | int page_no; | ||
790 | for (page_no = 0; page_no < frmr->page_list_len; page_no++) { | ||
791 | dma_addr_t addr = frmr->page_list->page_list[page_no]; | ||
792 | if (ib_dma_mapping_error(frmr->mr->device, addr)) | ||
793 | continue; | ||
794 | atomic_dec(&xprt->sc_dma_used); | ||
795 | ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, | ||
796 | frmr->direction); | ||
797 | } | ||
798 | } | ||
799 | |||
800 | void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, | ||
801 | struct svc_rdma_fastreg_mr *frmr) | ||
802 | { | ||
803 | if (frmr) { | ||
804 | frmr_unmap_dma(rdma, frmr); | ||
805 | spin_lock_bh(&rdma->sc_frmr_q_lock); | ||
806 | BUG_ON(!list_empty(&frmr->frmr_list)); | ||
807 | list_add(&frmr->frmr_list, &rdma->sc_frmr_q); | ||
808 | spin_unlock_bh(&rdma->sc_frmr_q_lock); | ||
809 | } | ||
810 | } | ||
811 | |||
689 | /* | 812 | /* |
690 | * This is the xpo_recvfrom function for listening endpoints. Its | 813 | * This is the xpo_recvfrom function for listening endpoints. Its |
691 | * purpose is to accept incoming connections. The CMA callback handler | 814 | * purpose is to accept incoming connections. The CMA callback handler |
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
704 | struct rdma_conn_param conn_param; | 827 | struct rdma_conn_param conn_param; |
705 | struct ib_qp_init_attr qp_attr; | 828 | struct ib_qp_init_attr qp_attr; |
706 | struct ib_device_attr devattr; | 829 | struct ib_device_attr devattr; |
830 | int dma_mr_acc; | ||
831 | int need_dma_mr; | ||
707 | int ret; | 832 | int ret; |
708 | int i; | 833 | int i; |
709 | 834 | ||
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
819 | } | 944 | } |
820 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | 945 | newxprt->sc_qp = newxprt->sc_cm_id->qp; |
821 | 946 | ||
822 | /* Register all of physical memory */ | 947 | /* |
823 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | 948 | * Use the most secure set of MR resources based on the |
824 | IB_ACCESS_LOCAL_WRITE | | 949 | * transport type and available memory management features in |
825 | IB_ACCESS_REMOTE_WRITE); | 950 | * the device. Here's the table implemented below: |
826 | if (IS_ERR(newxprt->sc_phys_mr)) { | 951 | * |
827 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | 952 | * Fast Global DMA Remote WR |
953 | * Reg LKEY MR Access | ||
954 | * Sup'd Sup'd Needed Needed | ||
955 | * | ||
956 | * IWARP N N Y Y | ||
957 | * N Y Y Y | ||
958 | * Y N Y N | ||
959 | * Y Y N - | ||
960 | * | ||
961 | * IB N N Y N | ||
962 | * N Y N - | ||
963 | * Y N Y N | ||
964 | * Y Y N - | ||
965 | * | ||
966 | * NB: iWARP requires remote write access for the data sink | ||
967 | * of an RDMA_READ. IB does not. | ||
968 | */ | ||
969 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { | ||
970 | newxprt->sc_frmr_pg_list_len = | ||
971 | devattr.max_fast_reg_page_list_len; | ||
972 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | ||
973 | } | ||
974 | |||
975 | /* | ||
976 | * Determine if a DMA MR is required and if so, what privs are required | ||
977 | */ | ||
978 | switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { | ||
979 | case RDMA_TRANSPORT_IWARP: | ||
980 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; | ||
981 | if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { | ||
982 | need_dma_mr = 1; | ||
983 | dma_mr_acc = | ||
984 | (IB_ACCESS_LOCAL_WRITE | | ||
985 | IB_ACCESS_REMOTE_WRITE); | ||
986 | } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
987 | need_dma_mr = 1; | ||
988 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
989 | } else | ||
990 | need_dma_mr = 0; | ||
991 | break; | ||
992 | case RDMA_TRANSPORT_IB: | ||
993 | if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
994 | need_dma_mr = 1; | ||
995 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
996 | } else | ||
997 | need_dma_mr = 0; | ||
998 | break; | ||
999 | default: | ||
828 | goto errout; | 1000 | goto errout; |
829 | } | 1001 | } |
830 | 1002 | ||
1003 | /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ | ||
1004 | if (need_dma_mr) { | ||
1005 | /* Register all of physical memory */ | ||
1006 | newxprt->sc_phys_mr = | ||
1007 | ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); | ||
1008 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
1009 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", | ||
1010 | ret); | ||
1011 | goto errout; | ||
1012 | } | ||
1013 | newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; | ||
1014 | } else | ||
1015 | newxprt->sc_dma_lkey = | ||
1016 | newxprt->sc_cm_id->device->local_dma_lkey; | ||
1017 | |||
831 | /* Post receive buffers */ | 1018 | /* Post receive buffers */ |
832 | for (i = 0; i < newxprt->sc_max_requests; i++) { | 1019 | for (i = 0; i < newxprt->sc_max_requests; i++) { |
833 | ret = svc_rdma_post_recv(newxprt); | 1020 | ret = svc_rdma_post_recv(newxprt); |
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work) | |||
961 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); | 1148 | WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); |
962 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); | 1149 | WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); |
963 | 1150 | ||
1151 | /* De-allocate fastreg mr */ | ||
1152 | rdma_dealloc_frmr_q(rdma); | ||
1153 | |||
964 | /* Destroy the QP if present (not a listener) */ | 1154 | /* Destroy the QP if present (not a listener) */ |
965 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) | 1155 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) |
966 | ib_destroy_qp(rdma->sc_qp); | 1156 | ib_destroy_qp(rdma->sc_qp); |
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) | |||
1014 | return 1; | 1204 | return 1; |
1015 | } | 1205 | } |
1016 | 1206 | ||
1207 | /* | ||
1208 | * Attempt to register the kvec representing the RPC memory with the | ||
1209 | * device. | ||
1210 | * | ||
1211 | * Returns: | ||
1212 | * NULL : The device does not support fastreg or there were no more | ||
1213 | * fastreg mr. | ||
1214 | * frmr : The kvec register request was successfully posted. | ||
1215 | * <0 : An error was encountered attempting to register the kvec. | ||
1216 | */ | ||
1217 | int svc_rdma_fastreg(struct svcxprt_rdma *xprt, | ||
1218 | struct svc_rdma_fastreg_mr *frmr) | ||
1219 | { | ||
1220 | struct ib_send_wr fastreg_wr; | ||
1221 | u8 key; | ||
1222 | |||
1223 | /* Bump the key */ | ||
1224 | key = (u8)(frmr->mr->lkey & 0x000000FF); | ||
1225 | ib_update_fast_reg_key(frmr->mr, ++key); | ||
1226 | |||
1227 | /* Prepare FASTREG WR */ | ||
1228 | memset(&fastreg_wr, 0, sizeof fastreg_wr); | ||
1229 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1230 | fastreg_wr.send_flags = IB_SEND_SIGNALED; | ||
1231 | fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; | ||
1232 | fastreg_wr.wr.fast_reg.page_list = frmr->page_list; | ||
1233 | fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; | ||
1234 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1235 | fastreg_wr.wr.fast_reg.length = frmr->map_len; | ||
1236 | fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; | ||
1237 | fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; | ||
1238 | return svc_rdma_send(xprt, &fastreg_wr); | ||
1239 | } | ||
1240 | |||
1017 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | 1241 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) |
1018 | { | 1242 | { |
1019 | struct ib_send_wr *bad_wr; | 1243 | struct ib_send_wr *bad_wr, *n_wr; |
1244 | int wr_count; | ||
1245 | int i; | ||
1020 | int ret; | 1246 | int ret; |
1021 | 1247 | ||
1022 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | 1248 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) |
1023 | return -ENOTCONN; | 1249 | return -ENOTCONN; |
1024 | 1250 | ||
1025 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | 1251 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); |
1026 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | 1252 | wr_count = 1; |
1027 | wr->opcode); | 1253 | for (n_wr = wr->next; n_wr; n_wr = n_wr->next) |
1254 | wr_count++; | ||
1255 | |||
1028 | /* If the SQ is full, wait until an SQ entry is available */ | 1256 | /* If the SQ is full, wait until an SQ entry is available */ |
1029 | while (1) { | 1257 | while (1) { |
1030 | spin_lock_bh(&xprt->sc_lock); | 1258 | spin_lock_bh(&xprt->sc_lock); |
1031 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | 1259 | if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { |
1032 | spin_unlock_bh(&xprt->sc_lock); | 1260 | spin_unlock_bh(&xprt->sc_lock); |
1033 | atomic_inc(&rdma_stat_sq_starve); | 1261 | atomic_inc(&rdma_stat_sq_starve); |
1034 | 1262 | ||
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | |||
1043 | return 0; | 1271 | return 0; |
1044 | continue; | 1272 | continue; |
1045 | } | 1273 | } |
1046 | /* Bumped used SQ WR count and post */ | 1274 | /* Take a transport ref for each WR posted */ |
1047 | svc_xprt_get(&xprt->sc_xprt); | 1275 | for (i = 0; i < wr_count; i++) |
1276 | svc_xprt_get(&xprt->sc_xprt); | ||
1277 | |||
1278 | /* Bump used SQ WR count and post */ | ||
1279 | atomic_add(wr_count, &xprt->sc_sq_count); | ||
1048 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | 1280 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); |
1049 | if (!ret) | 1281 | if (ret) { |
1050 | atomic_inc(&xprt->sc_sq_count); | 1282 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); |
1051 | else { | 1283 | atomic_sub(wr_count, &xprt->sc_sq_count); |
1052 | svc_xprt_put(&xprt->sc_xprt); | 1284 | for (i = 0; i < wr_count; i ++) |
1285 | svc_xprt_put(&xprt->sc_xprt); | ||
1053 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | 1286 | dprintk("svcrdma: failed to post SQ WR rc=%d, " |
1054 | "sc_sq_count=%d, sc_sq_depth=%d\n", | 1287 | "sc_sq_count=%d, sc_sq_depth=%d\n", |
1055 | ret, atomic_read(&xprt->sc_sq_count), | 1288 | ret, atomic_read(&xprt->sc_sq_count), |
1056 | xprt->sc_sq_depth); | 1289 | xprt->sc_sq_depth); |
1057 | } | 1290 | } |
1058 | spin_unlock_bh(&xprt->sc_lock); | 1291 | spin_unlock_bh(&xprt->sc_lock); |
1292 | if (ret) | ||
1293 | wake_up(&xprt->sc_send_wait); | ||
1059 | break; | 1294 | break; |
1060 | } | 1295 | } |
1061 | return ret; | 1296 | return ret; |
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1079 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | 1314 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); |
1080 | 1315 | ||
1081 | /* Prepare SGE for local address */ | 1316 | /* Prepare SGE for local address */ |
1082 | atomic_inc(&xprt->sc_dma_used); | ||
1083 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | 1317 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, |
1084 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | 1318 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); |
1085 | sge.lkey = xprt->sc_phys_mr->lkey; | 1319 | if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { |
1320 | put_page(p); | ||
1321 | return; | ||
1322 | } | ||
1323 | atomic_inc(&xprt->sc_dma_used); | ||
1324 | sge.lkey = xprt->sc_dma_lkey; | ||
1086 | sge.length = length; | 1325 | sge.length = length; |
1087 | 1326 | ||
1088 | ctxt = svc_rdma_get_context(xprt); | 1327 | ctxt = svc_rdma_get_context(xprt); |
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1103 | if (ret) { | 1342 | if (ret) { |
1104 | dprintk("svcrdma: Error %d posting send for protocol error\n", | 1343 | dprintk("svcrdma: Error %d posting send for protocol error\n", |
1105 | ret); | 1344 | ret); |
1345 | ib_dma_unmap_page(xprt->sc_cm_id->device, | ||
1346 | sge.addr, PAGE_SIZE, | ||
1347 | DMA_FROM_DEVICE); | ||
1106 | svc_rdma_put_context(ctxt, 1); | 1348 | svc_rdma_put_context(ctxt, 1); |
1107 | } | 1349 | } |
1108 | } | 1350 | } |