diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/sunrpc/Makefile | 2 | ||||
| -rw-r--r-- | net/sunrpc/auth.c | 116 | ||||
| -rw-r--r-- | net/sunrpc/auth_generic.c | 293 | ||||
| -rw-r--r-- | net/sunrpc/auth_gss/auth_gss.c | 47 | ||||
| -rw-r--r-- | net/sunrpc/auth_gss/gss_mech_switch.c | 2 | ||||
| -rw-r--r-- | net/sunrpc/auth_null.c | 4 | ||||
| -rw-r--r-- | net/sunrpc/auth_unix.c | 110 | ||||
| -rw-r--r-- | net/sunrpc/backchannel_rqst.c | 2 | ||||
| -rw-r--r-- | net/sunrpc/clnt.c | 29 | ||||
| -rw-r--r-- | net/sunrpc/rpc_pipe.c | 19 | ||||
| -rw-r--r-- | net/sunrpc/rpcb_clnt.c | 12 | ||||
| -rw-r--r-- | net/sunrpc/sched.c | 5 | ||||
| -rw-r--r-- | net/sunrpc/xprtmultipath.c | 4 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/Makefile | 3 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/backchannel.c | 39 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 337 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 209 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 78 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 91 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 255 | ||||
| -rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 80 | ||||
| -rw-r--r-- | net/sunrpc/xprtsock.c | 10 |
23 files changed, 487 insertions, 1268 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 090658c3da12..9488600451e8 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
| @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | |||
| 9 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ | 9 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ |
| 10 | 10 | ||
| 11 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | 11 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
| 12 | auth.o auth_null.o auth_unix.o auth_generic.o \ | 12 | auth.o auth_null.o auth_unix.o \ |
| 13 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 13 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
| 14 | addr.o rpcb_clnt.o timer.o xdr.o \ | 14 | addr.o rpcb_clnt.o timer.o xdr.o \ |
| 15 | sunrpc_syms.o cache.o rpc_pipe.o \ | 15 | sunrpc_syms.o cache.o rpc_pipe.o \ |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ad8ead738981..1ff9768f5456 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
| @@ -39,6 +39,20 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { | |||
| 39 | static LIST_HEAD(cred_unused); | 39 | static LIST_HEAD(cred_unused); |
| 40 | static unsigned long number_cred_unused; | 40 | static unsigned long number_cred_unused; |
| 41 | 41 | ||
| 42 | static struct cred machine_cred = { | ||
| 43 | .usage = ATOMIC_INIT(1), | ||
| 44 | }; | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Return the machine_cred pointer to be used whenever | ||
| 48 | * the a generic machine credential is needed. | ||
| 49 | */ | ||
| 50 | const struct cred *rpc_machine_cred(void) | ||
| 51 | { | ||
| 52 | return &machine_cred; | ||
| 53 | } | ||
| 54 | EXPORT_SYMBOL_GPL(rpc_machine_cred); | ||
| 55 | |||
| 42 | #define MAX_HASHTABLE_BITS (14) | 56 | #define MAX_HASHTABLE_BITS (14) |
| 43 | static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) | 57 | static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) |
| 44 | { | 58 | { |
| @@ -346,29 +360,6 @@ out_nocache: | |||
| 346 | } | 360 | } |
| 347 | EXPORT_SYMBOL_GPL(rpcauth_init_credcache); | 361 | EXPORT_SYMBOL_GPL(rpcauth_init_credcache); |
| 348 | 362 | ||
| 349 | /* | ||
| 350 | * Setup a credential key lifetime timeout notification | ||
| 351 | */ | ||
| 352 | int | ||
| 353 | rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred) | ||
| 354 | { | ||
| 355 | if (!cred->cr_auth->au_ops->key_timeout) | ||
| 356 | return 0; | ||
| 357 | return cred->cr_auth->au_ops->key_timeout(auth, cred); | ||
| 358 | } | ||
| 359 | EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify); | ||
| 360 | |||
| 361 | bool | ||
| 362 | rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred) | ||
| 363 | { | ||
| 364 | if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) | ||
| 365 | return false; | ||
| 366 | if (!cred->cr_ops->crkey_to_expire) | ||
| 367 | return false; | ||
| 368 | return cred->cr_ops->crkey_to_expire(cred); | ||
| 369 | } | ||
| 370 | EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); | ||
| 371 | |||
| 372 | char * | 363 | char * |
| 373 | rpcauth_stringify_acceptor(struct rpc_cred *cred) | 364 | rpcauth_stringify_acceptor(struct rpc_cred *cred) |
| 374 | { | 365 | { |
| @@ -587,13 +578,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, | |||
| 587 | hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { | 578 | hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { |
| 588 | if (!entry->cr_ops->crmatch(acred, entry, flags)) | 579 | if (!entry->cr_ops->crmatch(acred, entry, flags)) |
| 589 | continue; | 580 | continue; |
| 590 | if (flags & RPCAUTH_LOOKUP_RCU) { | ||
| 591 | if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) || | ||
| 592 | refcount_read(&entry->cr_count) == 0) | ||
| 593 | continue; | ||
| 594 | cred = entry; | ||
| 595 | break; | ||
| 596 | } | ||
| 597 | cred = get_rpccred(entry); | 581 | cred = get_rpccred(entry); |
| 598 | if (cred) | 582 | if (cred) |
| 599 | break; | 583 | break; |
| @@ -603,9 +587,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, | |||
| 603 | if (cred != NULL) | 587 | if (cred != NULL) |
| 604 | goto found; | 588 | goto found; |
| 605 | 589 | ||
| 606 | if (flags & RPCAUTH_LOOKUP_RCU) | ||
| 607 | return ERR_PTR(-ECHILD); | ||
| 608 | |||
| 609 | new = auth->au_ops->crcreate(auth, acred, flags, gfp); | 590 | new = auth->au_ops->crcreate(auth, acred, flags, gfp); |
| 610 | if (IS_ERR(new)) { | 591 | if (IS_ERR(new)) { |
| 611 | cred = new; | 592 | cred = new; |
| @@ -656,9 +637,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) | |||
| 656 | auth->au_ops->au_name); | 637 | auth->au_ops->au_name); |
| 657 | 638 | ||
| 658 | memset(&acred, 0, sizeof(acred)); | 639 | memset(&acred, 0, sizeof(acred)); |
| 659 | acred.uid = cred->fsuid; | 640 | acred.cred = cred; |
| 660 | acred.gid = cred->fsgid; | ||
| 661 | acred.group_info = cred->group_info; | ||
| 662 | ret = auth->au_ops->lookup_cred(auth, &acred, flags); | 641 | ret = auth->au_ops->lookup_cred(auth, &acred, flags); |
| 663 | return ret; | 642 | return ret; |
| 664 | } | 643 | } |
| @@ -672,31 +651,41 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, | |||
| 672 | INIT_LIST_HEAD(&cred->cr_lru); | 651 | INIT_LIST_HEAD(&cred->cr_lru); |
| 673 | refcount_set(&cred->cr_count, 1); | 652 | refcount_set(&cred->cr_count, 1); |
| 674 | cred->cr_auth = auth; | 653 | cred->cr_auth = auth; |
| 654 | cred->cr_flags = 0; | ||
| 675 | cred->cr_ops = ops; | 655 | cred->cr_ops = ops; |
| 676 | cred->cr_expire = jiffies; | 656 | cred->cr_expire = jiffies; |
| 677 | cred->cr_uid = acred->uid; | 657 | cred->cr_cred = get_cred(acred->cred); |
| 678 | } | 658 | } |
| 679 | EXPORT_SYMBOL_GPL(rpcauth_init_cred); | 659 | EXPORT_SYMBOL_GPL(rpcauth_init_cred); |
| 680 | 660 | ||
| 681 | struct rpc_cred * | 661 | static struct rpc_cred * |
| 682 | rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) | 662 | rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) |
| 683 | { | 663 | { |
| 684 | dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, | 664 | struct rpc_auth *auth = task->tk_client->cl_auth; |
| 685 | cred->cr_auth->au_ops->au_name, cred); | 665 | struct auth_cred acred = { |
| 686 | return get_rpccred(cred); | 666 | .cred = get_task_cred(&init_task), |
| 667 | }; | ||
| 668 | struct rpc_cred *ret; | ||
| 669 | |||
| 670 | dprintk("RPC: %5u looking up %s cred\n", | ||
| 671 | task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); | ||
| 672 | ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); | ||
| 673 | put_cred(acred.cred); | ||
| 674 | return ret; | ||
| 687 | } | 675 | } |
| 688 | EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); | ||
| 689 | 676 | ||
| 690 | static struct rpc_cred * | 677 | static struct rpc_cred * |
| 691 | rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) | 678 | rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) |
| 692 | { | 679 | { |
| 693 | struct rpc_auth *auth = task->tk_client->cl_auth; | 680 | struct rpc_auth *auth = task->tk_client->cl_auth; |
| 694 | struct auth_cred acred = { | 681 | struct auth_cred acred = { |
| 695 | .uid = GLOBAL_ROOT_UID, | 682 | .principal = task->tk_client->cl_principal, |
| 696 | .gid = GLOBAL_ROOT_GID, | 683 | .cred = init_task.cred, |
| 697 | }; | 684 | }; |
| 698 | 685 | ||
| 699 | dprintk("RPC: %5u looking up %s cred\n", | 686 | if (!acred.principal) |
| 687 | return NULL; | ||
| 688 | dprintk("RPC: %5u looking up %s machine cred\n", | ||
| 700 | task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); | 689 | task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); |
| 701 | return auth->au_ops->lookup_cred(auth, &acred, lookupflags); | 690 | return auth->au_ops->lookup_cred(auth, &acred, lookupflags); |
| 702 | } | 691 | } |
| @@ -712,18 +701,33 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) | |||
| 712 | } | 701 | } |
| 713 | 702 | ||
| 714 | static int | 703 | static int |
| 715 | rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) | 704 | rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) |
| 716 | { | 705 | { |
| 717 | struct rpc_rqst *req = task->tk_rqstp; | 706 | struct rpc_rqst *req = task->tk_rqstp; |
| 718 | struct rpc_cred *new; | 707 | struct rpc_cred *new = NULL; |
| 719 | int lookupflags = 0; | 708 | int lookupflags = 0; |
| 709 | struct rpc_auth *auth = task->tk_client->cl_auth; | ||
| 710 | struct auth_cred acred = { | ||
| 711 | .cred = cred, | ||
| 712 | }; | ||
| 720 | 713 | ||
| 721 | if (flags & RPC_TASK_ASYNC) | 714 | if (flags & RPC_TASK_ASYNC) |
| 722 | lookupflags |= RPCAUTH_LOOKUP_NEW; | 715 | lookupflags |= RPCAUTH_LOOKUP_NEW; |
| 723 | if (cred != NULL) | 716 | if (task->tk_op_cred) |
| 724 | new = cred->cr_ops->crbind(task, cred, lookupflags); | 717 | /* Task must use exactly this rpc_cred */ |
| 725 | else if (flags & RPC_TASK_ROOTCREDS) | 718 | new = get_rpccred(task->tk_op_cred); |
| 719 | else if (cred != NULL && cred != &machine_cred) | ||
| 720 | new = auth->au_ops->lookup_cred(auth, &acred, lookupflags); | ||
| 721 | else if (cred == &machine_cred) | ||
| 722 | new = rpcauth_bind_machine_cred(task, lookupflags); | ||
| 723 | |||
| 724 | /* If machine cred couldn't be bound, try a root cred */ | ||
| 725 | if (new) | ||
| 726 | ; | ||
| 727 | else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) | ||
| 726 | new = rpcauth_bind_root_cred(task, lookupflags); | 728 | new = rpcauth_bind_root_cred(task, lookupflags); |
| 729 | else if (flags & RPC_TASK_NULLCREDS) | ||
| 730 | new = authnull_ops.lookup_cred(NULL, NULL, 0); | ||
| 727 | else | 731 | else |
| 728 | new = rpcauth_bind_new_cred(task, lookupflags); | 732 | new = rpcauth_bind_new_cred(task, lookupflags); |
| 729 | if (IS_ERR(new)) | 733 | if (IS_ERR(new)) |
| @@ -901,15 +905,10 @@ int __init rpcauth_init_module(void) | |||
| 901 | err = rpc_init_authunix(); | 905 | err = rpc_init_authunix(); |
| 902 | if (err < 0) | 906 | if (err < 0) |
| 903 | goto out1; | 907 | goto out1; |
| 904 | err = rpc_init_generic_auth(); | ||
| 905 | if (err < 0) | ||
| 906 | goto out2; | ||
| 907 | err = register_shrinker(&rpc_cred_shrinker); | 908 | err = register_shrinker(&rpc_cred_shrinker); |
| 908 | if (err < 0) | 909 | if (err < 0) |
| 909 | goto out3; | 910 | goto out2; |
| 910 | return 0; | 911 | return 0; |
| 911 | out3: | ||
| 912 | rpc_destroy_generic_auth(); | ||
| 913 | out2: | 912 | out2: |
| 914 | rpc_destroy_authunix(); | 913 | rpc_destroy_authunix(); |
| 915 | out1: | 914 | out1: |
| @@ -919,6 +918,5 @@ out1: | |||
| 919 | void rpcauth_remove_module(void) | 918 | void rpcauth_remove_module(void) |
| 920 | { | 919 | { |
| 921 | rpc_destroy_authunix(); | 920 | rpc_destroy_authunix(); |
| 922 | rpc_destroy_generic_auth(); | ||
| 923 | unregister_shrinker(&rpc_cred_shrinker); | 921 | unregister_shrinker(&rpc_cred_shrinker); |
| 924 | } | 922 | } |
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c deleted file mode 100644 index ab4a3be1542a..000000000000 --- a/net/sunrpc/auth_generic.c +++ /dev/null | |||
| @@ -1,293 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Generic RPC credential | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com> | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/err.h> | ||
| 8 | #include <linux/slab.h> | ||
| 9 | #include <linux/types.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/sched.h> | ||
| 12 | #include <linux/sunrpc/auth.h> | ||
| 13 | #include <linux/sunrpc/clnt.h> | ||
| 14 | #include <linux/sunrpc/debug.h> | ||
| 15 | #include <linux/sunrpc/sched.h> | ||
| 16 | |||
| 17 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 18 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
| 19 | #endif | ||
| 20 | |||
| 21 | #define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID | ||
| 22 | #define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID | ||
| 23 | |||
| 24 | struct generic_cred { | ||
| 25 | struct rpc_cred gc_base; | ||
| 26 | struct auth_cred acred; | ||
| 27 | }; | ||
| 28 | |||
| 29 | static struct rpc_auth generic_auth; | ||
| 30 | static const struct rpc_credops generic_credops; | ||
| 31 | |||
| 32 | /* | ||
| 33 | * Public call interface | ||
| 34 | */ | ||
| 35 | struct rpc_cred *rpc_lookup_cred(void) | ||
| 36 | { | ||
| 37 | return rpcauth_lookupcred(&generic_auth, 0); | ||
| 38 | } | ||
| 39 | EXPORT_SYMBOL_GPL(rpc_lookup_cred); | ||
| 40 | |||
| 41 | struct rpc_cred * | ||
| 42 | rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp) | ||
| 43 | { | ||
| 44 | return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp); | ||
| 45 | } | ||
| 46 | EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred); | ||
| 47 | |||
| 48 | struct rpc_cred *rpc_lookup_cred_nonblock(void) | ||
| 49 | { | ||
| 50 | return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); | ||
| 51 | } | ||
| 52 | EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Public call interface for looking up machine creds. | ||
| 56 | */ | ||
| 57 | struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) | ||
| 58 | { | ||
| 59 | struct auth_cred acred = { | ||
| 60 | .uid = RPC_MACHINE_CRED_USERID, | ||
| 61 | .gid = RPC_MACHINE_CRED_GROUPID, | ||
| 62 | .principal = service_name, | ||
| 63 | .machine_cred = 1, | ||
| 64 | }; | ||
| 65 | |||
| 66 | dprintk("RPC: looking up machine cred for service %s\n", | ||
| 67 | service_name); | ||
| 68 | return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); | ||
| 69 | } | ||
| 70 | EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); | ||
| 71 | |||
| 72 | static struct rpc_cred *generic_bind_cred(struct rpc_task *task, | ||
| 73 | struct rpc_cred *cred, int lookupflags) | ||
| 74 | { | ||
| 75 | struct rpc_auth *auth = task->tk_client->cl_auth; | ||
| 76 | struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; | ||
| 77 | |||
| 78 | return auth->au_ops->lookup_cred(auth, acred, lookupflags); | ||
| 79 | } | ||
| 80 | |||
| 81 | static int | ||
| 82 | generic_hash_cred(struct auth_cred *acred, unsigned int hashbits) | ||
| 83 | { | ||
| 84 | return hash_64(from_kgid(&init_user_ns, acred->gid) | | ||
| 85 | ((u64)from_kuid(&init_user_ns, acred->uid) << | ||
| 86 | (sizeof(gid_t) * 8)), hashbits); | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Lookup generic creds for current process | ||
| 91 | */ | ||
| 92 | static struct rpc_cred * | ||
| 93 | generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | ||
| 94 | { | ||
| 95 | return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL); | ||
| 96 | } | ||
| 97 | |||
| 98 | static struct rpc_cred * | ||
| 99 | generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) | ||
| 100 | { | ||
| 101 | struct generic_cred *gcred; | ||
| 102 | |||
| 103 | gcred = kmalloc(sizeof(*gcred), gfp); | ||
| 104 | if (gcred == NULL) | ||
| 105 | return ERR_PTR(-ENOMEM); | ||
| 106 | |||
| 107 | rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); | ||
| 108 | gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; | ||
| 109 | |||
| 110 | gcred->acred.uid = acred->uid; | ||
| 111 | gcred->acred.gid = acred->gid; | ||
| 112 | gcred->acred.group_info = acred->group_info; | ||
| 113 | gcred->acred.ac_flags = 0; | ||
| 114 | if (gcred->acred.group_info != NULL) | ||
| 115 | get_group_info(gcred->acred.group_info); | ||
| 116 | gcred->acred.machine_cred = acred->machine_cred; | ||
| 117 | gcred->acred.principal = acred->principal; | ||
| 118 | |||
| 119 | dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", | ||
| 120 | gcred->acred.machine_cred ? "machine" : "generic", | ||
| 121 | gcred, | ||
| 122 | from_kuid(&init_user_ns, acred->uid), | ||
| 123 | from_kgid(&init_user_ns, acred->gid)); | ||
| 124 | return &gcred->gc_base; | ||
| 125 | } | ||
| 126 | |||
| 127 | static void | ||
| 128 | generic_free_cred(struct rpc_cred *cred) | ||
| 129 | { | ||
| 130 | struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); | ||
| 131 | |||
| 132 | dprintk("RPC: generic_free_cred %p\n", gcred); | ||
| 133 | if (gcred->acred.group_info != NULL) | ||
| 134 | put_group_info(gcred->acred.group_info); | ||
| 135 | kfree(gcred); | ||
| 136 | } | ||
| 137 | |||
| 138 | static void | ||
| 139 | generic_free_cred_callback(struct rcu_head *head) | ||
| 140 | { | ||
| 141 | struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu); | ||
| 142 | generic_free_cred(cred); | ||
| 143 | } | ||
| 144 | |||
| 145 | static void | ||
| 146 | generic_destroy_cred(struct rpc_cred *cred) | ||
| 147 | { | ||
| 148 | call_rcu(&cred->cr_rcu, generic_free_cred_callback); | ||
| 149 | } | ||
| 150 | |||
| 151 | static int | ||
| 152 | machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags) | ||
| 153 | { | ||
| 154 | if (!gcred->acred.machine_cred || | ||
| 155 | gcred->acred.principal != acred->principal || | ||
| 156 | !uid_eq(gcred->acred.uid, acred->uid) || | ||
| 157 | !gid_eq(gcred->acred.gid, acred->gid)) | ||
| 158 | return 0; | ||
| 159 | return 1; | ||
| 160 | } | ||
| 161 | |||
| 162 | /* | ||
| 163 | * Match credentials against current process creds. | ||
| 164 | */ | ||
| 165 | static int | ||
| 166 | generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) | ||
| 167 | { | ||
| 168 | struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); | ||
| 169 | int i; | ||
| 170 | |||
| 171 | if (acred->machine_cred) | ||
| 172 | return machine_cred_match(acred, gcred, flags); | ||
| 173 | |||
| 174 | if (!uid_eq(gcred->acred.uid, acred->uid) || | ||
| 175 | !gid_eq(gcred->acred.gid, acred->gid) || | ||
| 176 | gcred->acred.machine_cred != 0) | ||
| 177 | goto out_nomatch; | ||
| 178 | |||
| 179 | /* Optimisation in the case where pointers are identical... */ | ||
| 180 | if (gcred->acred.group_info == acred->group_info) | ||
| 181 | goto out_match; | ||
| 182 | |||
| 183 | /* Slow path... */ | ||
| 184 | if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) | ||
| 185 | goto out_nomatch; | ||
| 186 | for (i = 0; i < gcred->acred.group_info->ngroups; i++) { | ||
| 187 | if (!gid_eq(gcred->acred.group_info->gid[i], | ||
| 188 | acred->group_info->gid[i])) | ||
| 189 | goto out_nomatch; | ||
| 190 | } | ||
| 191 | out_match: | ||
| 192 | return 1; | ||
| 193 | out_nomatch: | ||
| 194 | return 0; | ||
| 195 | } | ||
| 196 | |||
| 197 | int __init rpc_init_generic_auth(void) | ||
| 198 | { | ||
| 199 | return rpcauth_init_credcache(&generic_auth); | ||
| 200 | } | ||
| 201 | |||
| 202 | void rpc_destroy_generic_auth(void) | ||
| 203 | { | ||
| 204 | rpcauth_destroy_credcache(&generic_auth); | ||
| 205 | } | ||
| 206 | |||
| 207 | /* | ||
| 208 | * Test the the current time (now) against the underlying credential key expiry | ||
| 209 | * minus a timeout and setup notification. | ||
| 210 | * | ||
| 211 | * The normal case: | ||
| 212 | * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set | ||
| 213 | * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential | ||
| 214 | * rpc_credops crmatch routine to notify this generic cred when it's key | ||
| 215 | * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0. | ||
| 216 | * | ||
| 217 | * The error case: | ||
| 218 | * If the underlying cred lookup fails, return -EACCES. | ||
| 219 | * | ||
| 220 | * The 'almost' error case: | ||
| 221 | * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within | ||
| 222 | * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit | ||
| 223 | * on the acred ac_flags and return 0. | ||
| 224 | */ | ||
| 225 | static int | ||
| 226 | generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) | ||
| 227 | { | ||
| 228 | struct auth_cred *acred = &container_of(cred, struct generic_cred, | ||
| 229 | gc_base)->acred; | ||
| 230 | struct rpc_cred *tcred; | ||
| 231 | int ret = 0; | ||
| 232 | |||
| 233 | |||
| 234 | /* Fast track for non crkey_timeout (no key) underlying credentials */ | ||
| 235 | if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) | ||
| 236 | return 0; | ||
| 237 | |||
| 238 | /* Fast track for the normal case */ | ||
| 239 | if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags)) | ||
| 240 | return 0; | ||
| 241 | |||
| 242 | /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */ | ||
| 243 | tcred = auth->au_ops->lookup_cred(auth, acred, 0); | ||
| 244 | if (IS_ERR(tcred)) | ||
| 245 | return -EACCES; | ||
| 246 | |||
| 247 | /* Test for the almost error case */ | ||
| 248 | ret = tcred->cr_ops->crkey_timeout(tcred); | ||
| 249 | if (ret != 0) { | ||
| 250 | set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); | ||
| 251 | ret = 0; | ||
| 252 | } else { | ||
| 253 | /* In case underlying cred key has been reset */ | ||
| 254 | if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, | ||
| 255 | &acred->ac_flags)) | ||
| 256 | dprintk("RPC: UID %d Credential key reset\n", | ||
| 257 | from_kuid(&init_user_ns, tcred->cr_uid)); | ||
| 258 | /* set up fasttrack for the normal case */ | ||
| 259 | set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); | ||
| 260 | } | ||
| 261 | |||
| 262 | put_rpccred(tcred); | ||
| 263 | return ret; | ||
| 264 | } | ||
| 265 | |||
| 266 | static const struct rpc_authops generic_auth_ops = { | ||
| 267 | .owner = THIS_MODULE, | ||
| 268 | .au_name = "Generic", | ||
| 269 | .hash_cred = generic_hash_cred, | ||
| 270 | .lookup_cred = generic_lookup_cred, | ||
| 271 | .crcreate = generic_create_cred, | ||
| 272 | .key_timeout = generic_key_timeout, | ||
| 273 | }; | ||
| 274 | |||
| 275 | static struct rpc_auth generic_auth = { | ||
| 276 | .au_ops = &generic_auth_ops, | ||
| 277 | .au_count = REFCOUNT_INIT(1), | ||
| 278 | }; | ||
| 279 | |||
| 280 | static bool generic_key_to_expire(struct rpc_cred *cred) | ||
| 281 | { | ||
| 282 | struct auth_cred *acred = &container_of(cred, struct generic_cred, | ||
| 283 | gc_base)->acred; | ||
| 284 | return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); | ||
| 285 | } | ||
| 286 | |||
| 287 | static const struct rpc_credops generic_credops = { | ||
| 288 | .cr_name = "Generic cred", | ||
| 289 | .crdestroy = generic_destroy_cred, | ||
| 290 | .crbind = generic_bind_cred, | ||
| 291 | .crmatch = generic_match, | ||
| 292 | .crkey_to_expire = generic_key_to_expire, | ||
| 293 | }; | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ba765473d1f0..dc86713b32b6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
| @@ -565,7 +565,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) | |||
| 565 | struct gss_cred *gss_cred = container_of(cred, | 565 | struct gss_cred *gss_cred = container_of(cred, |
| 566 | struct gss_cred, gc_base); | 566 | struct gss_cred, gc_base); |
| 567 | struct gss_upcall_msg *gss_new, *gss_msg; | 567 | struct gss_upcall_msg *gss_new, *gss_msg; |
| 568 | kuid_t uid = cred->cr_uid; | 568 | kuid_t uid = cred->cr_cred->fsuid; |
| 569 | 569 | ||
| 570 | gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); | 570 | gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); |
| 571 | if (IS_ERR(gss_new)) | 571 | if (IS_ERR(gss_new)) |
| @@ -604,7 +604,7 @@ gss_refresh_upcall(struct rpc_task *task) | |||
| 604 | int err = 0; | 604 | int err = 0; |
| 605 | 605 | ||
| 606 | dprintk("RPC: %5u %s for uid %u\n", | 606 | dprintk("RPC: %5u %s for uid %u\n", |
| 607 | task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); | 607 | task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); |
| 608 | gss_msg = gss_setup_upcall(gss_auth, cred); | 608 | gss_msg = gss_setup_upcall(gss_auth, cred); |
| 609 | if (PTR_ERR(gss_msg) == -EAGAIN) { | 609 | if (PTR_ERR(gss_msg) == -EAGAIN) { |
| 610 | /* XXX: warning on the first, under the assumption we | 610 | /* XXX: warning on the first, under the assumption we |
| @@ -637,7 +637,7 @@ gss_refresh_upcall(struct rpc_task *task) | |||
| 637 | out: | 637 | out: |
| 638 | dprintk("RPC: %5u %s for uid %u result %d\n", | 638 | dprintk("RPC: %5u %s for uid %u result %d\n", |
| 639 | task->tk_pid, __func__, | 639 | task->tk_pid, __func__, |
| 640 | from_kuid(&init_user_ns, cred->cr_uid), err); | 640 | from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); |
| 641 | return err; | 641 | return err; |
| 642 | } | 642 | } |
| 643 | 643 | ||
| @@ -653,7 +653,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) | |||
| 653 | int err; | 653 | int err; |
| 654 | 654 | ||
| 655 | dprintk("RPC: %s for uid %u\n", | 655 | dprintk("RPC: %s for uid %u\n", |
| 656 | __func__, from_kuid(&init_user_ns, cred->cr_uid)); | 656 | __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); |
| 657 | retry: | 657 | retry: |
| 658 | err = 0; | 658 | err = 0; |
| 659 | /* if gssd is down, just skip upcalling altogether */ | 659 | /* if gssd is down, just skip upcalling altogether */ |
| @@ -701,7 +701,7 @@ out_intr: | |||
| 701 | gss_release_msg(gss_msg); | 701 | gss_release_msg(gss_msg); |
| 702 | out: | 702 | out: |
| 703 | dprintk("RPC: %s for uid %u result %d\n", | 703 | dprintk("RPC: %s for uid %u result %d\n", |
| 704 | __func__, from_kuid(&init_user_ns, cred->cr_uid), err); | 704 | __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); |
| 705 | return err; | 705 | return err; |
| 706 | } | 706 | } |
| 707 | 707 | ||
| @@ -1248,7 +1248,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) | |||
| 1248 | new = kzalloc(sizeof(*gss_cred), GFP_NOIO); | 1248 | new = kzalloc(sizeof(*gss_cred), GFP_NOIO); |
| 1249 | if (new) { | 1249 | if (new) { |
| 1250 | struct auth_cred acred = { | 1250 | struct auth_cred acred = { |
| 1251 | .uid = gss_cred->gc_base.cr_uid, | 1251 | .cred = gss_cred->gc_base.cr_cred, |
| 1252 | }; | 1252 | }; |
| 1253 | struct gss_cl_ctx *ctx = | 1253 | struct gss_cl_ctx *ctx = |
| 1254 | rcu_dereference_protected(gss_cred->gc_ctx, 1); | 1254 | rcu_dereference_protected(gss_cred->gc_ctx, 1); |
| @@ -1343,6 +1343,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) | |||
| 1343 | struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); | 1343 | struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); |
| 1344 | 1344 | ||
| 1345 | RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); | 1345 | RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); |
| 1346 | put_cred(cred->cr_cred); | ||
| 1346 | call_rcu(&cred->cr_rcu, gss_free_cred_callback); | 1347 | call_rcu(&cred->cr_rcu, gss_free_cred_callback); |
| 1347 | if (ctx) | 1348 | if (ctx) |
| 1348 | gss_put_ctx(ctx); | 1349 | gss_put_ctx(ctx); |
| @@ -1361,7 +1362,7 @@ gss_destroy_cred(struct rpc_cred *cred) | |||
| 1361 | static int | 1362 | static int |
| 1362 | gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) | 1363 | gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) |
| 1363 | { | 1364 | { |
| 1364 | return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits); | 1365 | return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits); |
| 1365 | } | 1366 | } |
| 1366 | 1367 | ||
| 1367 | /* | 1368 | /* |
| @@ -1381,7 +1382,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t | |||
| 1381 | int err = -ENOMEM; | 1382 | int err = -ENOMEM; |
| 1382 | 1383 | ||
| 1383 | dprintk("RPC: %s for uid %d, flavor %d\n", | 1384 | dprintk("RPC: %s for uid %d, flavor %d\n", |
| 1384 | __func__, from_kuid(&init_user_ns, acred->uid), | 1385 | __func__, from_kuid(&init_user_ns, acred->cred->fsuid), |
| 1385 | auth->au_flavor); | 1386 | auth->au_flavor); |
| 1386 | 1387 | ||
| 1387 | if (!(cred = kzalloc(sizeof(*cred), gfp))) | 1388 | if (!(cred = kzalloc(sizeof(*cred), gfp))) |
| @@ -1394,9 +1395,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t | |||
| 1394 | */ | 1395 | */ |
| 1395 | cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; | 1396 | cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; |
| 1396 | cred->gc_service = gss_auth->service; | 1397 | cred->gc_service = gss_auth->service; |
| 1397 | cred->gc_principal = NULL; | 1398 | cred->gc_principal = acred->principal; |
| 1398 | if (acred->machine_cred) | ||
| 1399 | cred->gc_principal = acred->principal; | ||
| 1400 | kref_get(&gss_auth->kref); | 1399 | kref_get(&gss_auth->kref); |
| 1401 | return &cred->gc_base; | 1400 | return &cred->gc_base; |
| 1402 | 1401 | ||
| @@ -1518,23 +1517,10 @@ out: | |||
| 1518 | if (gss_cred->gc_principal == NULL) | 1517 | if (gss_cred->gc_principal == NULL) |
| 1519 | return 0; | 1518 | return 0; |
| 1520 | ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; | 1519 | ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; |
| 1521 | goto check_expire; | 1520 | } else { |
| 1522 | } | 1521 | if (gss_cred->gc_principal != NULL) |
| 1523 | if (gss_cred->gc_principal != NULL) | 1522 | return 0; |
| 1524 | return 0; | 1523 | ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid); |
| 1525 | ret = uid_eq(rc->cr_uid, acred->uid); | ||
| 1526 | |||
| 1527 | check_expire: | ||
| 1528 | if (ret == 0) | ||
| 1529 | return ret; | ||
| 1530 | |||
| 1531 | /* Notify acred users of GSS context expiration timeout */ | ||
| 1532 | if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) && | ||
| 1533 | (gss_key_timeout(rc) != 0)) { | ||
| 1534 | /* test will now be done from generic cred */ | ||
| 1535 | test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); | ||
| 1536 | /* tell NFS layer that key will expire soon */ | ||
| 1537 | set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); | ||
| 1538 | } | 1524 | } |
| 1539 | return ret; | 1525 | return ret; |
| 1540 | } | 1526 | } |
| @@ -1607,9 +1593,8 @@ static int gss_renew_cred(struct rpc_task *task) | |||
| 1607 | gc_base); | 1593 | gc_base); |
| 1608 | struct rpc_auth *auth = oldcred->cr_auth; | 1594 | struct rpc_auth *auth = oldcred->cr_auth; |
| 1609 | struct auth_cred acred = { | 1595 | struct auth_cred acred = { |
| 1610 | .uid = oldcred->cr_uid, | 1596 | .cred = oldcred->cr_cred, |
| 1611 | .principal = gss_cred->gc_principal, | 1597 | .principal = gss_cred->gc_principal, |
| 1612 | .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), | ||
| 1613 | }; | 1598 | }; |
| 1614 | struct rpc_cred *new; | 1599 | struct rpc_cred *new; |
| 1615 | 1600 | ||
| @@ -2110,7 +2095,6 @@ static const struct rpc_credops gss_credops = { | |||
| 2110 | .cr_name = "AUTH_GSS", | 2095 | .cr_name = "AUTH_GSS", |
| 2111 | .crdestroy = gss_destroy_cred, | 2096 | .crdestroy = gss_destroy_cred, |
| 2112 | .cr_init = gss_cred_init, | 2097 | .cr_init = gss_cred_init, |
| 2113 | .crbind = rpcauth_generic_bind_cred, | ||
| 2114 | .crmatch = gss_match, | 2098 | .crmatch = gss_match, |
| 2115 | .crmarshal = gss_marshal, | 2099 | .crmarshal = gss_marshal, |
| 2116 | .crrefresh = gss_refresh, | 2100 | .crrefresh = gss_refresh, |
| @@ -2125,7 +2109,6 @@ static const struct rpc_credops gss_credops = { | |||
| 2125 | static const struct rpc_credops gss_nullops = { | 2109 | static const struct rpc_credops gss_nullops = { |
| 2126 | .cr_name = "AUTH_GSS", | 2110 | .cr_name = "AUTH_GSS", |
| 2127 | .crdestroy = gss_destroy_nullcred, | 2111 | .crdestroy = gss_destroy_nullcred, |
| 2128 | .crbind = rpcauth_generic_bind_cred, | ||
| 2129 | .crmatch = gss_match, | 2112 | .crmatch = gss_match, |
| 2130 | .crmarshal = gss_marshal, | 2113 | .crmarshal = gss_marshal, |
| 2131 | .crrefresh = gss_refresh_null, | 2114 | .crrefresh = gss_refresh_null, |
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 16ac0f4cb7d8..379318dff534 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
| @@ -244,7 +244,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) | |||
| 244 | 244 | ||
| 245 | /** | 245 | /** |
| 246 | * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors | 246 | * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors |
| 247 | * @array: array to fill in | 247 | * @array_ptr: array to fill in |
| 248 | * @size: size of "array" | 248 | * @size: size of "array" |
| 249 | * | 249 | * |
| 250 | * Returns the number of array items filled in, or a negative errno. | 250 | * Returns the number of array items filled in, or a negative errno. |
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 2694a1bc026b..d0ceac57c06e 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c | |||
| @@ -36,8 +36,6 @@ nul_destroy(struct rpc_auth *auth) | |||
| 36 | static struct rpc_cred * | 36 | static struct rpc_cred * |
| 37 | nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 37 | nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) |
| 38 | { | 38 | { |
| 39 | if (flags & RPCAUTH_LOOKUP_RCU) | ||
| 40 | return &null_cred; | ||
| 41 | return get_rpccred(&null_cred); | 39 | return get_rpccred(&null_cred); |
| 42 | } | 40 | } |
| 43 | 41 | ||
| @@ -116,7 +114,6 @@ static | |||
| 116 | struct rpc_auth null_auth = { | 114 | struct rpc_auth null_auth = { |
| 117 | .au_cslack = NUL_CALLSLACK, | 115 | .au_cslack = NUL_CALLSLACK, |
| 118 | .au_rslack = NUL_REPLYSLACK, | 116 | .au_rslack = NUL_REPLYSLACK, |
| 119 | .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, | ||
| 120 | .au_ops = &authnull_ops, | 117 | .au_ops = &authnull_ops, |
| 121 | .au_flavor = RPC_AUTH_NULL, | 118 | .au_flavor = RPC_AUTH_NULL, |
| 122 | .au_count = REFCOUNT_INIT(1), | 119 | .au_count = REFCOUNT_INIT(1), |
| @@ -126,7 +123,6 @@ static | |||
| 126 | const struct rpc_credops null_credops = { | 123 | const struct rpc_credops null_credops = { |
| 127 | .cr_name = "AUTH_NULL", | 124 | .cr_name = "AUTH_NULL", |
| 128 | .crdestroy = nul_destroy_cred, | 125 | .crdestroy = nul_destroy_cred, |
| 129 | .crbind = rpcauth_generic_bind_cred, | ||
| 130 | .crmatch = nul_match, | 126 | .crmatch = nul_match, |
| 131 | .crmarshal = nul_marshal, | 127 | .crmarshal = nul_marshal, |
| 132 | .crrefresh = nul_refresh, | 128 | .crrefresh = nul_refresh, |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4c1c7e56288f..387f6b3ffbea 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
| @@ -11,16 +11,11 @@ | |||
| 11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
| 12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
| 13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/mempool.h> | ||
| 14 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sunrpc/auth.h> | 16 | #include <linux/sunrpc/auth.h> |
| 16 | #include <linux/user_namespace.h> | 17 | #include <linux/user_namespace.h> |
| 17 | 18 | ||
| 18 | struct unx_cred { | ||
| 19 | struct rpc_cred uc_base; | ||
| 20 | kgid_t uc_gid; | ||
| 21 | kgid_t uc_gids[UNX_NGROUPS]; | ||
| 22 | }; | ||
| 23 | #define uc_uid uc_base.cr_uid | ||
| 24 | 19 | ||
| 25 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 20 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 26 | # define RPCDBG_FACILITY RPCDBG_AUTH | 21 | # define RPCDBG_FACILITY RPCDBG_AUTH |
| @@ -28,6 +23,7 @@ struct unx_cred { | |||
| 28 | 23 | ||
| 29 | static struct rpc_auth unix_auth; | 24 | static struct rpc_auth unix_auth; |
| 30 | static const struct rpc_credops unix_credops; | 25 | static const struct rpc_credops unix_credops; |
| 26 | static mempool_t *unix_pool; | ||
| 31 | 27 | ||
| 32 | static struct rpc_auth * | 28 | static struct rpc_auth * |
| 33 | unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) | 29 | unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) |
| @@ -42,15 +38,6 @@ static void | |||
| 42 | unx_destroy(struct rpc_auth *auth) | 38 | unx_destroy(struct rpc_auth *auth) |
| 43 | { | 39 | { |
| 44 | dprintk("RPC: destroying UNIX authenticator %p\n", auth); | 40 | dprintk("RPC: destroying UNIX authenticator %p\n", auth); |
| 45 | rpcauth_clear_credcache(auth->au_credcache); | ||
| 46 | } | ||
| 47 | |||
| 48 | static int | ||
| 49 | unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) | ||
| 50 | { | ||
| 51 | return hash_64(from_kgid(&init_user_ns, acred->gid) | | ||
| 52 | ((u64)from_kuid(&init_user_ns, acred->uid) << | ||
| 53 | (sizeof(gid_t) * 8)), hashbits); | ||
| 54 | } | 41 | } |
| 55 | 42 | ||
| 56 | /* | 43 | /* |
| @@ -59,52 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) | |||
| 59 | static struct rpc_cred * | 46 | static struct rpc_cred * |
| 60 | unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | 47 | unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) |
| 61 | { | 48 | { |
| 62 | return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); | 49 | struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); |
| 63 | } | ||
| 64 | |||
| 65 | static struct rpc_cred * | ||
| 66 | unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) | ||
| 67 | { | ||
| 68 | struct unx_cred *cred; | ||
| 69 | unsigned int groups = 0; | ||
| 70 | unsigned int i; | ||
| 71 | 50 | ||
| 72 | dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", | 51 | dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", |
| 73 | from_kuid(&init_user_ns, acred->uid), | 52 | from_kuid(&init_user_ns, acred->cred->fsuid), |
| 74 | from_kgid(&init_user_ns, acred->gid)); | 53 | from_kgid(&init_user_ns, acred->cred->fsgid)); |
| 75 | |||
| 76 | if (!(cred = kmalloc(sizeof(*cred), gfp))) | ||
| 77 | return ERR_PTR(-ENOMEM); | ||
| 78 | 54 | ||
| 79 | rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); | 55 | rpcauth_init_cred(ret, acred, auth, &unix_credops); |
| 80 | cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; | 56 | ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; |
| 81 | 57 | return ret; | |
| 82 | if (acred->group_info != NULL) | ||
| 83 | groups = acred->group_info->ngroups; | ||
| 84 | if (groups > UNX_NGROUPS) | ||
| 85 | groups = UNX_NGROUPS; | ||
| 86 | |||
| 87 | cred->uc_gid = acred->gid; | ||
| 88 | for (i = 0; i < groups; i++) | ||
| 89 | cred->uc_gids[i] = acred->group_info->gid[i]; | ||
| 90 | if (i < UNX_NGROUPS) | ||
| 91 | cred->uc_gids[i] = INVALID_GID; | ||
| 92 | |||
| 93 | return &cred->uc_base; | ||
| 94 | } | ||
| 95 | |||
| 96 | static void | ||
| 97 | unx_free_cred(struct unx_cred *unx_cred) | ||
| 98 | { | ||
| 99 | dprintk("RPC: unx_free_cred %p\n", unx_cred); | ||
| 100 | kfree(unx_cred); | ||
| 101 | } | 58 | } |
| 102 | 59 | ||
| 103 | static void | 60 | static void |
| 104 | unx_free_cred_callback(struct rcu_head *head) | 61 | unx_free_cred_callback(struct rcu_head *head) |
| 105 | { | 62 | { |
| 106 | struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); | 63 | struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); |
| 107 | unx_free_cred(unx_cred); | 64 | dprintk("RPC: unx_free_cred %p\n", rpc_cred); |
| 65 | put_cred(rpc_cred->cr_cred); | ||
| 66 | mempool_free(rpc_cred, unix_pool); | ||
| 108 | } | 67 | } |
| 109 | 68 | ||
| 110 | static void | 69 | static void |
| @@ -114,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred) | |||
| 114 | } | 73 | } |
| 115 | 74 | ||
| 116 | /* | 75 | /* |
| 117 | * Match credentials against current process creds. | 76 | * Match credentials against current the auth_cred. |
| 118 | * The root_override argument takes care of cases where the caller may | ||
| 119 | * request root creds (e.g. for NFS swapping). | ||
| 120 | */ | 77 | */ |
| 121 | static int | 78 | static int |
| 122 | unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) | 79 | unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) |
| 123 | { | 80 | { |
| 124 | struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); | ||
| 125 | unsigned int groups = 0; | 81 | unsigned int groups = 0; |
| 126 | unsigned int i; | 82 | unsigned int i; |
| 127 | 83 | ||
| 84 | if (cred->cr_cred == acred->cred) | ||
| 85 | return 1; | ||
| 128 | 86 | ||
| 129 | if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid)) | 87 | if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) |
| 130 | return 0; | 88 | return 0; |
| 131 | 89 | ||
| 132 | if (acred->group_info != NULL) | 90 | if (acred->cred && acred->cred->group_info != NULL) |
| 133 | groups = acred->group_info->ngroups; | 91 | groups = acred->cred->group_info->ngroups; |
| 134 | if (groups > UNX_NGROUPS) | 92 | if (groups > UNX_NGROUPS) |
| 135 | groups = UNX_NGROUPS; | 93 | groups = UNX_NGROUPS; |
| 94 | if (cred->cr_cred->group_info == NULL) | ||
| 95 | return groups == 0; | ||
| 96 | if (groups != cred->cr_cred->group_info->ngroups) | ||
| 97 | return 0; | ||
| 98 | |||
| 136 | for (i = 0; i < groups ; i++) | 99 | for (i = 0; i < groups ; i++) |
| 137 | if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) | 100 | if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i])) |
| 138 | return 0; | 101 | return 0; |
| 139 | if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) | ||
| 140 | return 0; | ||
| 141 | return 1; | 102 | return 1; |
| 142 | } | 103 | } |
| 143 | 104 | ||
| @@ -149,9 +110,10 @@ static __be32 * | |||
| 149 | unx_marshal(struct rpc_task *task, __be32 *p) | 110 | unx_marshal(struct rpc_task *task, __be32 *p) |
| 150 | { | 111 | { |
| 151 | struct rpc_clnt *clnt = task->tk_client; | 112 | struct rpc_clnt *clnt = task->tk_client; |
| 152 | struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); | 113 | struct rpc_cred *cred = task->tk_rqstp->rq_cred; |
| 153 | __be32 *base, *hold; | 114 | __be32 *base, *hold; |
| 154 | int i; | 115 | int i; |
| 116 | struct group_info *gi = cred->cr_cred->group_info; | ||
| 155 | 117 | ||
| 156 | *p++ = htonl(RPC_AUTH_UNIX); | 118 | *p++ = htonl(RPC_AUTH_UNIX); |
| 157 | base = p++; | 119 | base = p++; |
| @@ -162,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p) | |||
| 162 | */ | 124 | */ |
| 163 | p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); | 125 | p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); |
| 164 | 126 | ||
| 165 | *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); | 127 | *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid)); |
| 166 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); | 128 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid)); |
| 167 | hold = p++; | 129 | hold = p++; |
| 168 | for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) | 130 | if (gi) |
| 169 | *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); | 131 | for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) |
| 132 | *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i])); | ||
| 170 | *hold = htonl(p - hold - 1); /* gid array length */ | 133 | *hold = htonl(p - hold - 1); /* gid array length */ |
| 171 | *base = htonl((p - base - 1) << 2); /* cred length */ | 134 | *base = htonl((p - base - 1) << 2); /* cred length */ |
| 172 | 135 | ||
| @@ -213,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p) | |||
| 213 | 176 | ||
| 214 | int __init rpc_init_authunix(void) | 177 | int __init rpc_init_authunix(void) |
| 215 | { | 178 | { |
| 216 | return rpcauth_init_credcache(&unix_auth); | 179 | unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred)); |
| 180 | return unix_pool ? 0 : -ENOMEM; | ||
| 217 | } | 181 | } |
| 218 | 182 | ||
| 219 | void rpc_destroy_authunix(void) | 183 | void rpc_destroy_authunix(void) |
| 220 | { | 184 | { |
| 221 | rpcauth_destroy_credcache(&unix_auth); | 185 | mempool_destroy(unix_pool); |
| 222 | } | 186 | } |
| 223 | 187 | ||
| 224 | const struct rpc_authops authunix_ops = { | 188 | const struct rpc_authops authunix_ops = { |
| @@ -227,16 +191,13 @@ const struct rpc_authops authunix_ops = { | |||
| 227 | .au_name = "UNIX", | 191 | .au_name = "UNIX", |
| 228 | .create = unx_create, | 192 | .create = unx_create, |
| 229 | .destroy = unx_destroy, | 193 | .destroy = unx_destroy, |
| 230 | .hash_cred = unx_hash_cred, | ||
| 231 | .lookup_cred = unx_lookup_cred, | 194 | .lookup_cred = unx_lookup_cred, |
| 232 | .crcreate = unx_create_cred, | ||
| 233 | }; | 195 | }; |
| 234 | 196 | ||
| 235 | static | 197 | static |
| 236 | struct rpc_auth unix_auth = { | 198 | struct rpc_auth unix_auth = { |
| 237 | .au_cslack = UNX_CALLSLACK, | 199 | .au_cslack = UNX_CALLSLACK, |
| 238 | .au_rslack = NUL_REPLYSLACK, | 200 | .au_rslack = NUL_REPLYSLACK, |
| 239 | .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, | ||
| 240 | .au_ops = &authunix_ops, | 201 | .au_ops = &authunix_ops, |
| 241 | .au_flavor = RPC_AUTH_UNIX, | 202 | .au_flavor = RPC_AUTH_UNIX, |
| 242 | .au_count = REFCOUNT_INIT(1), | 203 | .au_count = REFCOUNT_INIT(1), |
| @@ -246,7 +207,6 @@ static | |||
| 246 | const struct rpc_credops unix_credops = { | 207 | const struct rpc_credops unix_credops = { |
| 247 | .cr_name = "AUTH_UNIX", | 208 | .cr_name = "AUTH_UNIX", |
| 248 | .crdestroy = unx_destroy_cred, | 209 | .crdestroy = unx_destroy_cred, |
| 249 | .crbind = rpcauth_generic_bind_cred, | ||
| 250 | .crmatch = unx_match, | 210 | .crmatch = unx_match, |
| 251 | .crmarshal = unx_marshal, | 211 | .crmarshal = unx_marshal, |
| 252 | .crrefresh = unx_refresh, | 212 | .crrefresh = unx_refresh, |
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index fa5ba6ed3197..ec451b8114b0 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c | |||
| @@ -197,7 +197,7 @@ out_free: | |||
| 197 | /** | 197 | /** |
| 198 | * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. | 198 | * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. |
| 199 | * @xprt: the transport holding the preallocated strucures | 199 | * @xprt: the transport holding the preallocated strucures |
| 200 | * @max_reqs the maximum number of preallocated structures to destroy | 200 | * @max_reqs: the maximum number of preallocated structures to destroy |
| 201 | * | 201 | * |
| 202 | * Since these structures may have been allocated by multiple calls | 202 | * Since these structures may have been allocated by multiple calls |
| 203 | * to xprt_setup_backchannel, we only destroy up to the maximum number | 203 | * to xprt_setup_backchannel, we only destroy up to the maximum number |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 24cbddc44c88..71d9599b5816 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
| @@ -627,6 +627,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, | |||
| 627 | new->cl_noretranstimeo = clnt->cl_noretranstimeo; | 627 | new->cl_noretranstimeo = clnt->cl_noretranstimeo; |
| 628 | new->cl_discrtry = clnt->cl_discrtry; | 628 | new->cl_discrtry = clnt->cl_discrtry; |
| 629 | new->cl_chatty = clnt->cl_chatty; | 629 | new->cl_chatty = clnt->cl_chatty; |
| 630 | new->cl_principal = clnt->cl_principal; | ||
| 630 | return new; | 631 | return new; |
| 631 | 632 | ||
| 632 | out_err: | 633 | out_err: |
| @@ -1029,7 +1030,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) | |||
| 1029 | task->tk_msg.rpc_argp = msg->rpc_argp; | 1030 | task->tk_msg.rpc_argp = msg->rpc_argp; |
| 1030 | task->tk_msg.rpc_resp = msg->rpc_resp; | 1031 | task->tk_msg.rpc_resp = msg->rpc_resp; |
| 1031 | if (msg->rpc_cred != NULL) | 1032 | if (msg->rpc_cred != NULL) |
| 1032 | task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); | 1033 | task->tk_msg.rpc_cred = get_cred(msg->rpc_cred); |
| 1033 | } | 1034 | } |
| 1034 | } | 1035 | } |
| 1035 | 1036 | ||
| @@ -2521,9 +2522,8 @@ static int rpc_ping(struct rpc_clnt *clnt) | |||
| 2521 | .rpc_proc = &rpcproc_null, | 2522 | .rpc_proc = &rpcproc_null, |
| 2522 | }; | 2523 | }; |
| 2523 | int err; | 2524 | int err; |
| 2524 | msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); | 2525 | err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN | |
| 2525 | err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); | 2526 | RPC_TASK_NULLCREDS); |
| 2526 | put_rpccred(msg.rpc_cred); | ||
| 2527 | return err; | 2527 | return err; |
| 2528 | } | 2528 | } |
| 2529 | 2529 | ||
| @@ -2534,15 +2534,15 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, | |||
| 2534 | { | 2534 | { |
| 2535 | struct rpc_message msg = { | 2535 | struct rpc_message msg = { |
| 2536 | .rpc_proc = &rpcproc_null, | 2536 | .rpc_proc = &rpcproc_null, |
| 2537 | .rpc_cred = cred, | ||
| 2538 | }; | 2537 | }; |
| 2539 | struct rpc_task_setup task_setup_data = { | 2538 | struct rpc_task_setup task_setup_data = { |
| 2540 | .rpc_client = clnt, | 2539 | .rpc_client = clnt, |
| 2541 | .rpc_xprt = xprt, | 2540 | .rpc_xprt = xprt, |
| 2542 | .rpc_message = &msg, | 2541 | .rpc_message = &msg, |
| 2542 | .rpc_op_cred = cred, | ||
| 2543 | .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, | 2543 | .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, |
| 2544 | .callback_data = data, | 2544 | .callback_data = data, |
| 2545 | .flags = flags, | 2545 | .flags = flags | RPC_TASK_NULLCREDS, |
| 2546 | }; | 2546 | }; |
| 2547 | 2547 | ||
| 2548 | return rpc_run_task(&task_setup_data); | 2548 | return rpc_run_task(&task_setup_data); |
| @@ -2593,7 +2593,6 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, | |||
| 2593 | void *dummy) | 2593 | void *dummy) |
| 2594 | { | 2594 | { |
| 2595 | struct rpc_cb_add_xprt_calldata *data; | 2595 | struct rpc_cb_add_xprt_calldata *data; |
| 2596 | struct rpc_cred *cred; | ||
| 2597 | struct rpc_task *task; | 2596 | struct rpc_task *task; |
| 2598 | 2597 | ||
| 2599 | data = kmalloc(sizeof(*data), GFP_NOFS); | 2598 | data = kmalloc(sizeof(*data), GFP_NOFS); |
| @@ -2602,11 +2601,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, | |||
| 2602 | data->xps = xprt_switch_get(xps); | 2601 | data->xps = xprt_switch_get(xps); |
| 2603 | data->xprt = xprt_get(xprt); | 2602 | data->xprt = xprt_get(xprt); |
| 2604 | 2603 | ||
| 2605 | cred = authnull_ops.lookup_cred(NULL, NULL, 0); | 2604 | task = rpc_call_null_helper(clnt, xprt, NULL, |
| 2606 | task = rpc_call_null_helper(clnt, xprt, cred, | 2605 | RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, |
| 2607 | RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, | ||
| 2608 | &rpc_cb_add_xprt_call_ops, data); | 2606 | &rpc_cb_add_xprt_call_ops, data); |
| 2609 | put_rpccred(cred); | ||
| 2610 | if (IS_ERR(task)) | 2607 | if (IS_ERR(task)) |
| 2611 | return PTR_ERR(task); | 2608 | return PTR_ERR(task); |
| 2612 | rpc_put_task(task); | 2609 | rpc_put_task(task); |
| @@ -2637,7 +2634,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, | |||
| 2637 | struct rpc_xprt *xprt, | 2634 | struct rpc_xprt *xprt, |
| 2638 | void *data) | 2635 | void *data) |
| 2639 | { | 2636 | { |
| 2640 | struct rpc_cred *cred; | ||
| 2641 | struct rpc_task *task; | 2637 | struct rpc_task *task; |
| 2642 | struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; | 2638 | struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; |
| 2643 | int status = -EADDRINUSE; | 2639 | int status = -EADDRINUSE; |
| @@ -2649,11 +2645,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, | |||
| 2649 | goto out_err; | 2645 | goto out_err; |
| 2650 | 2646 | ||
| 2651 | /* Test the connection */ | 2647 | /* Test the connection */ |
| 2652 | cred = authnull_ops.lookup_cred(NULL, NULL, 0); | 2648 | task = rpc_call_null_helper(clnt, xprt, NULL, |
| 2653 | task = rpc_call_null_helper(clnt, xprt, cred, | 2649 | RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, |
| 2654 | RPC_TASK_SOFT | RPC_TASK_SOFTCONN, | ||
| 2655 | NULL, NULL); | 2650 | NULL, NULL); |
| 2656 | put_rpccred(cred); | ||
| 2657 | if (IS_ERR(task)) { | 2651 | if (IS_ERR(task)) { |
| 2658 | status = PTR_ERR(task); | 2652 | status = PTR_ERR(task); |
| 2659 | goto out_err; | 2653 | goto out_err; |
| @@ -2667,6 +2661,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, | |||
| 2667 | /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ | 2661 | /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ |
| 2668 | xtest->add_xprt_test(clnt, xprt, xtest->data); | 2662 | xtest->add_xprt_test(clnt, xprt, xtest->data); |
| 2669 | 2663 | ||
| 2664 | xprt_put(xprt); | ||
| 2665 | xprt_switch_put(xps); | ||
| 2666 | |||
| 2670 | /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ | 2667 | /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ |
| 2671 | return 1; | 2668 | return 1; |
| 2672 | out_err: | 2669 | out_err: |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4fda18d47e2c..69663681bf9d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
| @@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = { | |||
| 1266 | * that this file will be there and have a certain format. | 1266 | * that this file will be there and have a certain format. |
| 1267 | */ | 1267 | */ |
| 1268 | static int | 1268 | static int |
| 1269 | rpc_show_dummy_info(struct seq_file *m, void *v) | 1269 | rpc_dummy_info_show(struct seq_file *m, void *v) |
| 1270 | { | 1270 | { |
| 1271 | seq_printf(m, "RPC server: %s\n", utsname()->nodename); | 1271 | seq_printf(m, "RPC server: %s\n", utsname()->nodename); |
| 1272 | seq_printf(m, "service: foo (1) version 0\n"); | 1272 | seq_printf(m, "service: foo (1) version 0\n"); |
| @@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v) | |||
| 1275 | seq_printf(m, "port: 0\n"); | 1275 | seq_printf(m, "port: 0\n"); |
| 1276 | return 0; | 1276 | return 0; |
| 1277 | } | 1277 | } |
| 1278 | 1278 | DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info); | |
| 1279 | static int | ||
| 1280 | rpc_dummy_info_open(struct inode *inode, struct file *file) | ||
| 1281 | { | ||
| 1282 | return single_open(file, rpc_show_dummy_info, NULL); | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | static const struct file_operations rpc_dummy_info_operations = { | ||
| 1286 | .owner = THIS_MODULE, | ||
| 1287 | .open = rpc_dummy_info_open, | ||
| 1288 | .read = seq_read, | ||
| 1289 | .llseek = seq_lseek, | ||
| 1290 | .release = single_release, | ||
| 1291 | }; | ||
| 1292 | 1279 | ||
| 1293 | static const struct rpc_filelist gssd_dummy_info_file[] = { | 1280 | static const struct rpc_filelist gssd_dummy_info_file[] = { |
| 1294 | [0] = { | 1281 | [0] = { |
| 1295 | .name = "info", | 1282 | .name = "info", |
| 1296 | .i_fop = &rpc_dummy_info_operations, | 1283 | .i_fop = &rpc_dummy_info_fops, |
| 1297 | .mode = S_IFREG | 0400, | 1284 | .mode = S_IFREG | 0400, |
| 1298 | }, | 1285 | }, |
| 1299 | }; | 1286 | }; |
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c7872bc13860..41a971ac1c63 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
| @@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 752 | goto bailout_nofree; | 752 | goto bailout_nofree; |
| 753 | } | 753 | } |
| 754 | 754 | ||
| 755 | map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); | 755 | map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); |
| 756 | if (!map) { | 756 | if (!map) { |
| 757 | status = -ENOMEM; | 757 | status = -ENOMEM; |
| 758 | dprintk("RPC: %5u %s: no memory available\n", | 758 | dprintk("RPC: %5u %s: no memory available\n", |
| @@ -770,7 +770,13 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 770 | case RPCBVERS_4: | 770 | case RPCBVERS_4: |
| 771 | case RPCBVERS_3: | 771 | case RPCBVERS_3: |
| 772 | map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; | 772 | map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; |
| 773 | map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); | 773 | map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); |
| 774 | if (!map->r_addr) { | ||
| 775 | status = -ENOMEM; | ||
| 776 | dprintk("RPC: %5u %s: no memory available\n", | ||
| 777 | task->tk_pid, __func__); | ||
| 778 | goto bailout_free_args; | ||
| 779 | } | ||
| 774 | map->r_owner = ""; | 780 | map->r_owner = ""; |
| 775 | break; | 781 | break; |
| 776 | case RPCBVERS_2: | 782 | case RPCBVERS_2: |
| @@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task) | |||
| 793 | rpc_put_task(child); | 799 | rpc_put_task(child); |
| 794 | return; | 800 | return; |
| 795 | 801 | ||
| 802 | bailout_free_args: | ||
| 803 | kfree(map); | ||
| 796 | bailout_release_client: | 804 | bailout_release_client: |
| 797 | rpc_release_client(rpcb_clnt); | 805 | rpc_release_client(rpcb_clnt); |
| 798 | bailout_nofree: | 806 | bailout_nofree: |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 57ca5bead1cb..adc3c40cc733 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
| @@ -997,6 +997,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta | |||
| 997 | 997 | ||
| 998 | task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); | 998 | task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); |
| 999 | 999 | ||
| 1000 | task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred); | ||
| 1001 | |||
| 1000 | if (task->tk_ops->rpc_call_prepare != NULL) | 1002 | if (task->tk_ops->rpc_call_prepare != NULL) |
| 1001 | task->tk_action = rpc_prepare_task; | 1003 | task->tk_action = rpc_prepare_task; |
| 1002 | 1004 | ||
| @@ -1054,6 +1056,7 @@ static void rpc_free_task(struct rpc_task *task) | |||
| 1054 | { | 1056 | { |
| 1055 | unsigned short tk_flags = task->tk_flags; | 1057 | unsigned short tk_flags = task->tk_flags; |
| 1056 | 1058 | ||
| 1059 | put_rpccred(task->tk_op_cred); | ||
| 1057 | rpc_release_calldata(task->tk_ops, task->tk_calldata); | 1060 | rpc_release_calldata(task->tk_ops, task->tk_calldata); |
| 1058 | 1061 | ||
| 1059 | if (tk_flags & RPC_TASK_DYNAMIC) { | 1062 | if (tk_flags & RPC_TASK_DYNAMIC) { |
| @@ -1071,7 +1074,7 @@ static void rpc_release_resources_task(struct rpc_task *task) | |||
| 1071 | { | 1074 | { |
| 1072 | xprt_release(task); | 1075 | xprt_release(task); |
| 1073 | if (task->tk_msg.rpc_cred) { | 1076 | if (task->tk_msg.rpc_cred) { |
| 1074 | put_rpccred(task->tk_msg.rpc_cred); | 1077 | put_cred(task->tk_msg.rpc_cred); |
| 1075 | task->tk_msg.rpc_cred = NULL; | 1078 | task->tk_msg.rpc_cred = NULL; |
| 1076 | } | 1079 | } |
| 1077 | rpc_task_release_client(task); | 1080 | rpc_task_release_client(task); |
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index e2d64c7138c3..8394124126f8 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c | |||
| @@ -383,7 +383,7 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, | |||
| 383 | /** | 383 | /** |
| 384 | * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch | 384 | * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch |
| 385 | * @xpi: pointer to rpc_xprt_iter | 385 | * @xpi: pointer to rpc_xprt_iter |
| 386 | * @xps: pointer to a new rpc_xprt_switch or NULL | 386 | * @newswitch: pointer to a new rpc_xprt_switch or NULL |
| 387 | * | 387 | * |
| 388 | * Swaps out the existing xpi->xpi_xpswitch with a new value. | 388 | * Swaps out the existing xpi->xpi_xpswitch with a new value. |
| 389 | */ | 389 | */ |
| @@ -401,7 +401,7 @@ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, | |||
| 401 | 401 | ||
| 402 | /** | 402 | /** |
| 403 | * xprt_iter_destroy - Destroys the xprt iterator | 403 | * xprt_iter_destroy - Destroys the xprt iterator |
| 404 | * @xpi pointer to rpc_xprt_iter | 404 | * @xpi: pointer to rpc_xprt_iter |
| 405 | */ | 405 | */ |
| 406 | void xprt_iter_destroy(struct rpc_xprt_iter *xpi) | 406 | void xprt_iter_destroy(struct rpc_xprt_iter *xpi) |
| 407 | { | 407 | { |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 8bf19e142b6b..8ed0377d7a18 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
| @@ -1,8 +1,7 @@ | |||
| 1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 2 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o | 2 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o |
| 3 | 3 | ||
| 4 | rpcrdma-y := transport.o rpc_rdma.o verbs.o \ | 4 | rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ |
| 5 | fmr_ops.o frwr_ops.o \ | ||
| 6 | svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ | 5 | svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ |
| 7 | svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ | 6 | svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ |
| 8 | module.o | 7 | module.o |
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index edba0d35776b..0de9b3e63770 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * Support for backward direction RPCs on RPC/RDMA. | 5 | * Support for backward direction RPCs on RPC/RDMA. |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/sunrpc/xprt.h> | 8 | #include <linux/sunrpc/xprt.h> |
| 10 | #include <linux/sunrpc/svc.h> | 9 | #include <linux/sunrpc/svc.h> |
| 11 | #include <linux/sunrpc/svc_xprt.h> | 10 | #include <linux/sunrpc/svc_xprt.h> |
| @@ -20,29 +19,16 @@ | |||
| 20 | 19 | ||
| 21 | #undef RPCRDMA_BACKCHANNEL_DEBUG | 20 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
| 22 | 21 | ||
| 23 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, | ||
| 24 | struct rpc_rqst *rqst) | ||
| 25 | { | ||
| 26 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
| 27 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
| 28 | |||
| 29 | spin_lock(&buf->rb_reqslock); | ||
| 30 | list_del(&req->rl_all); | ||
| 31 | spin_unlock(&buf->rb_reqslock); | ||
| 32 | |||
| 33 | rpcrdma_destroy_req(req); | ||
| 34 | } | ||
| 35 | |||
| 36 | static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, | 22 | static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, |
| 37 | unsigned int count) | 23 | unsigned int count) |
| 38 | { | 24 | { |
| 39 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | 25 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
| 26 | struct rpcrdma_req *req; | ||
| 40 | struct rpc_rqst *rqst; | 27 | struct rpc_rqst *rqst; |
| 41 | unsigned int i; | 28 | unsigned int i; |
| 42 | 29 | ||
| 43 | for (i = 0; i < (count << 1); i++) { | 30 | for (i = 0; i < (count << 1); i++) { |
| 44 | struct rpcrdma_regbuf *rb; | 31 | struct rpcrdma_regbuf *rb; |
| 45 | struct rpcrdma_req *req; | ||
| 46 | size_t size; | 32 | size_t size; |
| 47 | 33 | ||
| 48 | req = rpcrdma_create_req(r_xprt); | 34 | req = rpcrdma_create_req(r_xprt); |
| @@ -68,7 +54,7 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, | |||
| 68 | return 0; | 54 | return 0; |
| 69 | 55 | ||
| 70 | out_fail: | 56 | out_fail: |
| 71 | rpcrdma_bc_free_rqst(r_xprt, rqst); | 57 | rpcrdma_req_destroy(req); |
| 72 | return -ENOMEM; | 58 | return -ENOMEM; |
| 73 | } | 59 | } |
| 74 | 60 | ||
| @@ -101,7 +87,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) | |||
| 101 | goto out_free; | 87 | goto out_free; |
| 102 | 88 | ||
| 103 | r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; | 89 | r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; |
| 104 | request_module("svcrdma"); | ||
| 105 | trace_xprtrdma_cb_setup(r_xprt, reqs); | 90 | trace_xprtrdma_cb_setup(r_xprt, reqs); |
| 106 | return 0; | 91 | return 0; |
| 107 | 92 | ||
| @@ -173,21 +158,21 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) | |||
| 173 | */ | 158 | */ |
| 174 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) | 159 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) |
| 175 | { | 160 | { |
| 176 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 161 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 162 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
| 177 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 163 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 178 | int rc; | 164 | int rc; |
| 179 | 165 | ||
| 180 | if (!xprt_connected(rqst->rq_xprt)) | 166 | if (!xprt_connected(xprt)) |
| 181 | goto drop_connection; | 167 | return -ENOTCONN; |
| 182 | 168 | ||
| 183 | if (!xprt_request_get_cong(rqst->rq_xprt, rqst)) | 169 | if (!xprt_request_get_cong(xprt, rqst)) |
| 184 | return -EBADSLT; | 170 | return -EBADSLT; |
| 185 | 171 | ||
| 186 | rc = rpcrdma_bc_marshal_reply(rqst); | 172 | rc = rpcrdma_bc_marshal_reply(rqst); |
| 187 | if (rc < 0) | 173 | if (rc < 0) |
| 188 | goto failed_marshal; | 174 | goto failed_marshal; |
| 189 | 175 | ||
| 190 | rpcrdma_post_recvs(r_xprt, true); | ||
| 191 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) | 176 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) |
| 192 | goto drop_connection; | 177 | goto drop_connection; |
| 193 | return 0; | 178 | return 0; |
| @@ -196,7 +181,7 @@ failed_marshal: | |||
| 196 | if (rc != -ENOTCONN) | 181 | if (rc != -ENOTCONN) |
| 197 | return rc; | 182 | return rc; |
| 198 | drop_connection: | 183 | drop_connection: |
| 199 | xprt_disconnect_done(rqst->rq_xprt); | 184 | xprt_rdma_close(xprt); |
| 200 | return -ENOTCONN; | 185 | return -ENOTCONN; |
| 201 | } | 186 | } |
| 202 | 187 | ||
| @@ -207,7 +192,6 @@ drop_connection: | |||
| 207 | */ | 192 | */ |
| 208 | void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) | 193 | void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) |
| 209 | { | 194 | { |
| 210 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
| 211 | struct rpc_rqst *rqst, *tmp; | 195 | struct rpc_rqst *rqst, *tmp; |
| 212 | 196 | ||
| 213 | spin_lock(&xprt->bc_pa_lock); | 197 | spin_lock(&xprt->bc_pa_lock); |
| @@ -215,7 +199,7 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) | |||
| 215 | list_del(&rqst->rq_bc_pa_list); | 199 | list_del(&rqst->rq_bc_pa_list); |
| 216 | spin_unlock(&xprt->bc_pa_lock); | 200 | spin_unlock(&xprt->bc_pa_lock); |
| 217 | 201 | ||
| 218 | rpcrdma_bc_free_rqst(r_xprt, rqst); | 202 | rpcrdma_req_destroy(rpcr_to_rdmar(rqst)); |
| 219 | 203 | ||
| 220 | spin_lock(&xprt->bc_pa_lock); | 204 | spin_lock(&xprt->bc_pa_lock); |
| 221 | } | 205 | } |
| @@ -231,9 +215,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) | |||
| 231 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 215 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 232 | struct rpc_xprt *xprt = rqst->rq_xprt; | 216 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 233 | 217 | ||
| 234 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", | ||
| 235 | __func__, rqst, req); | ||
| 236 | |||
| 237 | rpcrdma_recv_buffer_put(req->rl_reply); | 218 | rpcrdma_recv_buffer_put(req->rl_reply); |
| 238 | req->rl_reply = NULL; | 219 | req->rl_reply = NULL; |
| 239 | 220 | ||
| @@ -319,7 +300,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, | |||
| 319 | 300 | ||
| 320 | out_overflow: | 301 | out_overflow: |
| 321 | pr_warn("RPC/RDMA backchannel overflow\n"); | 302 | pr_warn("RPC/RDMA backchannel overflow\n"); |
| 322 | xprt_disconnect_done(xprt); | 303 | xprt_force_disconnect(xprt); |
| 323 | /* This receive buffer gets reposted automatically | 304 | /* This receive buffer gets reposted automatically |
| 324 | * when the connection is re-established. | 305 | * when the connection is re-established. |
| 325 | */ | 306 | */ |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c deleted file mode 100644 index fd8fea59fe92..000000000000 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ /dev/null | |||
| @@ -1,337 +0,0 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. | ||
| 4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
| 5 | */ | ||
| 6 | |||
| 7 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
| 8 | * Referred to sometimes as MTHCAFMR mode. | ||
| 9 | * | ||
| 10 | * FMR uses synchronous memory registration and deregistration. | ||
| 11 | * FMR registration is known to be fast, but FMR deregistration | ||
| 12 | * can take tens of usecs to complete. | ||
| 13 | */ | ||
| 14 | |||
| 15 | /* Normal operation | ||
| 16 | * | ||
| 17 | * A Memory Region is prepared for RDMA READ or WRITE using the | ||
| 18 | * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is | ||
| 19 | * finished, the Memory Region is unmapped using the ib_unmap_fmr | ||
| 20 | * verb (fmr_op_unmap). | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <linux/sunrpc/svc_rdma.h> | ||
| 24 | |||
| 25 | #include "xprt_rdma.h" | ||
| 26 | #include <trace/events/rpcrdma.h> | ||
| 27 | |||
| 28 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 29 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
| 30 | #endif | ||
| 31 | |||
| 32 | /* Maximum scatter/gather per FMR */ | ||
| 33 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
| 34 | |||
| 35 | /* Access mode of externally registered pages */ | ||
| 36 | enum { | ||
| 37 | RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE | | ||
| 38 | IB_ACCESS_REMOTE_READ, | ||
| 39 | }; | ||
| 40 | |||
| 41 | bool | ||
| 42 | fmr_is_supported(struct rpcrdma_ia *ia) | ||
| 43 | { | ||
| 44 | if (!ia->ri_device->ops.alloc_fmr) { | ||
| 45 | pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n", | ||
| 46 | ia->ri_device->name); | ||
| 47 | return false; | ||
| 48 | } | ||
| 49 | return true; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void | ||
| 53 | __fmr_unmap(struct rpcrdma_mr *mr) | ||
| 54 | { | ||
| 55 | LIST_HEAD(l); | ||
| 56 | int rc; | ||
| 57 | |||
| 58 | list_add(&mr->fmr.fm_mr->list, &l); | ||
| 59 | rc = ib_unmap_fmr(&l); | ||
| 60 | list_del(&mr->fmr.fm_mr->list); | ||
| 61 | if (rc) | ||
| 62 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | ||
| 63 | mr, rc); | ||
| 64 | } | ||
| 65 | |||
| 66 | /* Release an MR. | ||
| 67 | */ | ||
| 68 | static void | ||
| 69 | fmr_op_release_mr(struct rpcrdma_mr *mr) | ||
| 70 | { | ||
| 71 | int rc; | ||
| 72 | |||
| 73 | kfree(mr->fmr.fm_physaddrs); | ||
| 74 | kfree(mr->mr_sg); | ||
| 75 | |||
| 76 | /* In case this one was left mapped, try to unmap it | ||
| 77 | * to prevent dealloc_fmr from failing with EBUSY | ||
| 78 | */ | ||
| 79 | __fmr_unmap(mr); | ||
| 80 | |||
| 81 | rc = ib_dealloc_fmr(mr->fmr.fm_mr); | ||
| 82 | if (rc) | ||
| 83 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | ||
| 84 | mr, rc); | ||
| 85 | |||
| 86 | kfree(mr); | ||
| 87 | } | ||
| 88 | |||
| 89 | /* MRs are dynamically allocated, so simply clean up and release the MR. | ||
| 90 | * A replacement MR will subsequently be allocated on demand. | ||
| 91 | */ | ||
| 92 | static void | ||
| 93 | fmr_mr_recycle_worker(struct work_struct *work) | ||
| 94 | { | ||
| 95 | struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); | ||
| 96 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | ||
| 97 | |||
| 98 | trace_xprtrdma_mr_recycle(mr); | ||
| 99 | |||
| 100 | trace_xprtrdma_mr_unmap(mr); | ||
| 101 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | ||
| 102 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | ||
| 103 | |||
| 104 | spin_lock(&r_xprt->rx_buf.rb_mrlock); | ||
| 105 | list_del(&mr->mr_all); | ||
| 106 | r_xprt->rx_stats.mrs_recycled++; | ||
| 107 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | ||
| 108 | fmr_op_release_mr(mr); | ||
| 109 | } | ||
| 110 | |||
| 111 | static int | ||
| 112 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) | ||
| 113 | { | ||
| 114 | static struct ib_fmr_attr fmr_attr = { | ||
| 115 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
| 116 | .max_maps = 1, | ||
| 117 | .page_shift = PAGE_SHIFT | ||
| 118 | }; | ||
| 119 | |||
| 120 | mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, | ||
| 121 | sizeof(u64), GFP_KERNEL); | ||
| 122 | if (!mr->fmr.fm_physaddrs) | ||
| 123 | goto out_free; | ||
| 124 | |||
| 125 | mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, | ||
| 126 | sizeof(*mr->mr_sg), GFP_KERNEL); | ||
| 127 | if (!mr->mr_sg) | ||
| 128 | goto out_free; | ||
| 129 | |||
| 130 | sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); | ||
| 131 | |||
| 132 | mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, | ||
| 133 | &fmr_attr); | ||
| 134 | if (IS_ERR(mr->fmr.fm_mr)) | ||
| 135 | goto out_fmr_err; | ||
| 136 | |||
| 137 | INIT_LIST_HEAD(&mr->mr_list); | ||
| 138 | INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker); | ||
| 139 | return 0; | ||
| 140 | |||
| 141 | out_fmr_err: | ||
| 142 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | ||
| 143 | PTR_ERR(mr->fmr.fm_mr)); | ||
| 144 | |||
| 145 | out_free: | ||
| 146 | kfree(mr->mr_sg); | ||
| 147 | kfree(mr->fmr.fm_physaddrs); | ||
| 148 | return -ENOMEM; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* On success, sets: | ||
| 152 | * ep->rep_attr.cap.max_send_wr | ||
| 153 | * ep->rep_attr.cap.max_recv_wr | ||
| 154 | * cdata->max_requests | ||
| 155 | * ia->ri_max_segs | ||
| 156 | */ | ||
| 157 | static int | ||
| 158 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
| 159 | struct rpcrdma_create_data_internal *cdata) | ||
| 160 | { | ||
| 161 | int max_qp_wr; | ||
| 162 | |||
| 163 | max_qp_wr = ia->ri_device->attrs.max_qp_wr; | ||
| 164 | max_qp_wr -= RPCRDMA_BACKWARD_WRS; | ||
| 165 | max_qp_wr -= 1; | ||
| 166 | if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) | ||
| 167 | return -ENOMEM; | ||
| 168 | if (cdata->max_requests > max_qp_wr) | ||
| 169 | cdata->max_requests = max_qp_wr; | ||
| 170 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | ||
| 171 | ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; | ||
| 172 | ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ | ||
| 173 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | ||
| 174 | ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; | ||
| 175 | ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ | ||
| 176 | |||
| 177 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / | ||
| 178 | RPCRDMA_MAX_FMR_SGES); | ||
| 179 | ia->ri_max_segs += 2; /* segments for head and tail buffers */ | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
| 184 | */ | ||
| 185 | static size_t | ||
| 186 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
| 187 | { | ||
| 188 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
| 189 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); | ||
| 190 | } | ||
| 191 | |||
| 192 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
| 193 | * for remote access via RDMA READ or RDMA WRITE. | ||
| 194 | */ | ||
| 195 | static struct rpcrdma_mr_seg * | ||
| 196 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
| 197 | int nsegs, bool writing, struct rpcrdma_mr **out) | ||
| 198 | { | ||
| 199 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 200 | int len, pageoff, i, rc; | ||
| 201 | struct rpcrdma_mr *mr; | ||
| 202 | u64 *dma_pages; | ||
| 203 | |||
| 204 | mr = rpcrdma_mr_get(r_xprt); | ||
| 205 | if (!mr) | ||
| 206 | return ERR_PTR(-EAGAIN); | ||
| 207 | |||
| 208 | pageoff = offset_in_page(seg1->mr_offset); | ||
| 209 | seg1->mr_offset -= pageoff; /* start of page */ | ||
| 210 | seg1->mr_len += pageoff; | ||
| 211 | len = -pageoff; | ||
| 212 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
| 213 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
| 214 | for (i = 0; i < nsegs;) { | ||
| 215 | if (seg->mr_page) | ||
| 216 | sg_set_page(&mr->mr_sg[i], | ||
| 217 | seg->mr_page, | ||
| 218 | seg->mr_len, | ||
| 219 | offset_in_page(seg->mr_offset)); | ||
| 220 | else | ||
| 221 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, | ||
| 222 | seg->mr_len); | ||
| 223 | len += seg->mr_len; | ||
| 224 | ++seg; | ||
| 225 | ++i; | ||
| 226 | /* Check for holes */ | ||
| 227 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
| 228 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | mr->mr_dir = rpcrdma_data_dir(writing); | ||
| 232 | |||
| 233 | mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, | ||
| 234 | mr->mr_sg, i, mr->mr_dir); | ||
| 235 | if (!mr->mr_nents) | ||
| 236 | goto out_dmamap_err; | ||
| 237 | trace_xprtrdma_mr_map(mr); | ||
| 238 | |||
| 239 | for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) | ||
| 240 | dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); | ||
| 241 | rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, | ||
| 242 | dma_pages[0]); | ||
| 243 | if (rc) | ||
| 244 | goto out_maperr; | ||
| 245 | |||
| 246 | mr->mr_handle = mr->fmr.fm_mr->rkey; | ||
| 247 | mr->mr_length = len; | ||
| 248 | mr->mr_offset = dma_pages[0] + pageoff; | ||
| 249 | |||
| 250 | *out = mr; | ||
| 251 | return seg; | ||
| 252 | |||
| 253 | out_dmamap_err: | ||
| 254 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | ||
| 255 | mr->mr_sg, i); | ||
| 256 | rpcrdma_mr_put(mr); | ||
| 257 | return ERR_PTR(-EIO); | ||
| 258 | |||
| 259 | out_maperr: | ||
| 260 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
| 261 | len, (unsigned long long)dma_pages[0], | ||
| 262 | pageoff, mr->mr_nents, rc); | ||
| 263 | rpcrdma_mr_unmap_and_put(mr); | ||
| 264 | return ERR_PTR(-EIO); | ||
| 265 | } | ||
| 266 | |||
| 267 | /* Post Send WR containing the RPC Call message. | ||
| 268 | */ | ||
| 269 | static int | ||
| 270 | fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | ||
| 271 | { | ||
| 272 | return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL); | ||
| 273 | } | ||
| 274 | |||
| 275 | /* Invalidate all memory regions that were registered for "req". | ||
| 276 | * | ||
| 277 | * Sleeps until it is safe for the host CPU to access the | ||
| 278 | * previously mapped memory regions. | ||
| 279 | * | ||
| 280 | * Caller ensures that @mrs is not empty before the call. This | ||
| 281 | * function empties the list. | ||
| 282 | */ | ||
| 283 | static void | ||
| 284 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) | ||
| 285 | { | ||
| 286 | struct rpcrdma_mr *mr; | ||
| 287 | LIST_HEAD(unmap_list); | ||
| 288 | int rc; | ||
| 289 | |||
| 290 | /* ORDER: Invalidate all of the req's MRs first | ||
| 291 | * | ||
| 292 | * ib_unmap_fmr() is slow, so use a single call instead | ||
| 293 | * of one call per mapped FMR. | ||
| 294 | */ | ||
| 295 | list_for_each_entry(mr, mrs, mr_list) { | ||
| 296 | dprintk("RPC: %s: unmapping fmr %p\n", | ||
| 297 | __func__, &mr->fmr); | ||
| 298 | trace_xprtrdma_mr_localinv(mr); | ||
| 299 | list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); | ||
| 300 | } | ||
| 301 | r_xprt->rx_stats.local_inv_needed++; | ||
| 302 | rc = ib_unmap_fmr(&unmap_list); | ||
| 303 | if (rc) | ||
| 304 | goto out_release; | ||
| 305 | |||
| 306 | /* ORDER: Now DMA unmap all of the req's MRs, and return | ||
| 307 | * them to the free MW list. | ||
| 308 | */ | ||
| 309 | while (!list_empty(mrs)) { | ||
| 310 | mr = rpcrdma_mr_pop(mrs); | ||
| 311 | list_del(&mr->fmr.fm_mr->list); | ||
| 312 | rpcrdma_mr_unmap_and_put(mr); | ||
| 313 | } | ||
| 314 | |||
| 315 | return; | ||
| 316 | |||
| 317 | out_release: | ||
| 318 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); | ||
| 319 | |||
| 320 | while (!list_empty(mrs)) { | ||
| 321 | mr = rpcrdma_mr_pop(mrs); | ||
| 322 | list_del(&mr->fmr.fm_mr->list); | ||
| 323 | rpcrdma_mr_recycle(mr); | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
| 328 | .ro_map = fmr_op_map, | ||
| 329 | .ro_send = fmr_op_send, | ||
| 330 | .ro_unmap_sync = fmr_op_unmap_sync, | ||
| 331 | .ro_open = fmr_op_open, | ||
| 332 | .ro_maxpages = fmr_op_maxpages, | ||
| 333 | .ro_init_mr = fmr_op_init_mr, | ||
| 334 | .ro_release_mr = fmr_op_release_mr, | ||
| 335 | .ro_displayname = "fmr", | ||
| 336 | .ro_send_w_inv_ok = 0, | ||
| 337 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index fc6378cc0c1c..6a561056b538 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
| @@ -15,21 +15,21 @@ | |||
| 15 | /* Normal operation | 15 | /* Normal operation |
| 16 | * | 16 | * |
| 17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | 17 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG |
| 18 | * Work Request (frwr_op_map). When the RDMA operation is finished, this | 18 | * Work Request (frwr_map). When the RDMA operation is finished, this |
| 19 | * Memory Region is invalidated using a LOCAL_INV Work Request | 19 | * Memory Region is invalidated using a LOCAL_INV Work Request |
| 20 | * (frwr_op_unmap_sync). | 20 | * (frwr_unmap_sync). |
| 21 | * | 21 | * |
| 22 | * Typically these Work Requests are not signaled, and neither are RDMA | 22 | * Typically these Work Requests are not signaled, and neither are RDMA |
| 23 | * SEND Work Requests (with the exception of signaling occasionally to | 23 | * SEND Work Requests (with the exception of signaling occasionally to |
| 24 | * prevent provider work queue overflows). This greatly reduces HCA | 24 | * prevent provider work queue overflows). This greatly reduces HCA |
| 25 | * interrupt workload. | 25 | * interrupt workload. |
| 26 | * | 26 | * |
| 27 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | 27 | * As an optimization, frwr_unmap marks MRs INVALID before the |
| 28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | 28 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on |
| 29 | * rb_mrs immediately so that no work (like managing a linked list | 29 | * rb_mrs immediately so that no work (like managing a linked list |
| 30 | * under a spinlock) is needed in the completion upcall. | 30 | * under a spinlock) is needed in the completion upcall. |
| 31 | * | 31 | * |
| 32 | * But this means that frwr_op_map() can occasionally encounter an MR | 32 | * But this means that frwr_map() can occasionally encounter an MR |
| 33 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | 33 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue |
| 34 | * ordering prevents a subsequent FAST_REG WR from executing against | 34 | * ordering prevents a subsequent FAST_REG WR from executing against |
| 35 | * that MR while it is still being invalidated. | 35 | * that MR while it is still being invalidated. |
| @@ -57,14 +57,14 @@ | |||
| 57 | * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR | 57 | * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR |
| 58 | * state, and the pending WR was flushed. | 58 | * state, and the pending WR was flushed. |
| 59 | * | 59 | * |
| 60 | * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered | 60 | * When frwr_map encounters FLUSHED and VALID MRs, they are recovered |
| 61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery | 61 | * with ib_dereg_mr and then are re-initialized. Because MR recovery |
| 62 | * allocates fresh resources, it is deferred to a workqueue, and the | 62 | * allocates fresh resources, it is deferred to a workqueue, and the |
| 63 | * recovered MRs are placed back on the rb_mrs list when recovery is | 63 | * recovered MRs are placed back on the rb_mrs list when recovery is |
| 64 | * complete. frwr_op_map allocates another MR for the current RPC while | 64 | * complete. frwr_map allocates another MR for the current RPC while |
| 65 | * the broken MR is reset. | 65 | * the broken MR is reset. |
| 66 | * | 66 | * |
| 67 | * To ensure that frwr_op_map doesn't encounter an MR that is marked | 67 | * To ensure that frwr_map doesn't encounter an MR that is marked |
| 68 | * INVALID but that is about to be flushed due to a previous transport | 68 | * INVALID but that is about to be flushed due to a previous transport |
| 69 | * disconnect, the transport connect worker attempts to drain all | 69 | * disconnect, the transport connect worker attempts to drain all |
| 70 | * pending send queue WRs before the transport is reconnected. | 70 | * pending send queue WRs before the transport is reconnected. |
| @@ -80,8 +80,13 @@ | |||
| 80 | # define RPCDBG_FACILITY RPCDBG_TRANS | 80 | # define RPCDBG_FACILITY RPCDBG_TRANS |
| 81 | #endif | 81 | #endif |
| 82 | 82 | ||
| 83 | bool | 83 | /** |
| 84 | frwr_is_supported(struct rpcrdma_ia *ia) | 84 | * frwr_is_supported - Check if device supports FRWR |
| 85 | * @ia: interface adapter to check | ||
| 86 | * | ||
| 87 | * Returns true if device supports FRWR, otherwise false | ||
| 88 | */ | ||
| 89 | bool frwr_is_supported(struct rpcrdma_ia *ia) | ||
| 85 | { | 90 | { |
| 86 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | 91 | struct ib_device_attr *attrs = &ia->ri_device->attrs; |
| 87 | 92 | ||
| @@ -97,15 +102,18 @@ out_not_supported: | |||
| 97 | return false; | 102 | return false; |
| 98 | } | 103 | } |
| 99 | 104 | ||
| 100 | static void | 105 | /** |
| 101 | frwr_op_release_mr(struct rpcrdma_mr *mr) | 106 | * frwr_release_mr - Destroy one MR |
| 107 | * @mr: MR allocated by frwr_init_mr | ||
| 108 | * | ||
| 109 | */ | ||
| 110 | void frwr_release_mr(struct rpcrdma_mr *mr) | ||
| 102 | { | 111 | { |
| 103 | int rc; | 112 | int rc; |
| 104 | 113 | ||
| 105 | rc = ib_dereg_mr(mr->frwr.fr_mr); | 114 | rc = ib_dereg_mr(mr->frwr.fr_mr); |
| 106 | if (rc) | 115 | if (rc) |
| 107 | pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", | 116 | trace_xprtrdma_frwr_dereg(mr, rc); |
| 108 | mr, rc); | ||
| 109 | kfree(mr->mr_sg); | 117 | kfree(mr->mr_sg); |
| 110 | kfree(mr); | 118 | kfree(mr); |
| 111 | } | 119 | } |
| @@ -117,60 +125,78 @@ static void | |||
| 117 | frwr_mr_recycle_worker(struct work_struct *work) | 125 | frwr_mr_recycle_worker(struct work_struct *work) |
| 118 | { | 126 | { |
| 119 | struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); | 127 | struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); |
| 120 | enum rpcrdma_frwr_state state = mr->frwr.fr_state; | ||
| 121 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | 128 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
| 122 | 129 | ||
| 123 | trace_xprtrdma_mr_recycle(mr); | 130 | trace_xprtrdma_mr_recycle(mr); |
| 124 | 131 | ||
| 125 | if (state != FRWR_FLUSHED_LI) { | 132 | if (mr->mr_dir != DMA_NONE) { |
| 126 | trace_xprtrdma_mr_unmap(mr); | 133 | trace_xprtrdma_mr_unmap(mr); |
| 127 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | 134 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
| 128 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | 135 | mr->mr_sg, mr->mr_nents, mr->mr_dir); |
| 136 | mr->mr_dir = DMA_NONE; | ||
| 129 | } | 137 | } |
| 130 | 138 | ||
| 131 | spin_lock(&r_xprt->rx_buf.rb_mrlock); | 139 | spin_lock(&r_xprt->rx_buf.rb_mrlock); |
| 132 | list_del(&mr->mr_all); | 140 | list_del(&mr->mr_all); |
| 133 | r_xprt->rx_stats.mrs_recycled++; | 141 | r_xprt->rx_stats.mrs_recycled++; |
| 134 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | 142 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); |
| 135 | frwr_op_release_mr(mr); | 143 | |
| 144 | frwr_release_mr(mr); | ||
| 136 | } | 145 | } |
| 137 | 146 | ||
| 138 | static int | 147 | /** |
| 139 | frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) | 148 | * frwr_init_mr - Initialize one MR |
| 149 | * @ia: interface adapter | ||
| 150 | * @mr: generic MR to prepare for FRWR | ||
| 151 | * | ||
| 152 | * Returns zero if successful. Otherwise a negative errno | ||
| 153 | * is returned. | ||
| 154 | */ | ||
| 155 | int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) | ||
| 140 | { | 156 | { |
| 141 | unsigned int depth = ia->ri_max_frwr_depth; | 157 | unsigned int depth = ia->ri_max_frwr_depth; |
| 142 | struct rpcrdma_frwr *frwr = &mr->frwr; | 158 | struct scatterlist *sg; |
| 159 | struct ib_mr *frmr; | ||
| 143 | int rc; | 160 | int rc; |
| 144 | 161 | ||
| 145 | frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); | 162 | frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); |
| 146 | if (IS_ERR(frwr->fr_mr)) | 163 | if (IS_ERR(frmr)) |
| 147 | goto out_mr_err; | 164 | goto out_mr_err; |
| 148 | 165 | ||
| 149 | mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); | 166 | sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL); |
| 150 | if (!mr->mr_sg) | 167 | if (!sg) |
| 151 | goto out_list_err; | 168 | goto out_list_err; |
| 152 | 169 | ||
| 170 | mr->frwr.fr_mr = frmr; | ||
| 171 | mr->frwr.fr_state = FRWR_IS_INVALID; | ||
| 172 | mr->mr_dir = DMA_NONE; | ||
| 153 | INIT_LIST_HEAD(&mr->mr_list); | 173 | INIT_LIST_HEAD(&mr->mr_list); |
| 154 | INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); | 174 | INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); |
| 155 | sg_init_table(mr->mr_sg, depth); | 175 | init_completion(&mr->frwr.fr_linv_done); |
| 156 | init_completion(&frwr->fr_linv_done); | 176 | |
| 177 | sg_init_table(sg, depth); | ||
| 178 | mr->mr_sg = sg; | ||
| 157 | return 0; | 179 | return 0; |
| 158 | 180 | ||
| 159 | out_mr_err: | 181 | out_mr_err: |
| 160 | rc = PTR_ERR(frwr->fr_mr); | 182 | rc = PTR_ERR(frmr); |
| 161 | dprintk("RPC: %s: ib_alloc_mr status %i\n", | 183 | trace_xprtrdma_frwr_alloc(mr, rc); |
| 162 | __func__, rc); | ||
| 163 | return rc; | 184 | return rc; |
| 164 | 185 | ||
| 165 | out_list_err: | 186 | out_list_err: |
| 166 | rc = -ENOMEM; | ||
| 167 | dprintk("RPC: %s: sg allocation failure\n", | 187 | dprintk("RPC: %s: sg allocation failure\n", |
| 168 | __func__); | 188 | __func__); |
| 169 | ib_dereg_mr(frwr->fr_mr); | 189 | ib_dereg_mr(frmr); |
| 170 | return rc; | 190 | return -ENOMEM; |
| 171 | } | 191 | } |
| 172 | 192 | ||
| 173 | /* On success, sets: | 193 | /** |
| 194 | * frwr_open - Prepare an endpoint for use with FRWR | ||
| 195 | * @ia: interface adapter this endpoint will use | ||
| 196 | * @ep: endpoint to prepare | ||
| 197 | * @cdata: transport parameters | ||
| 198 | * | ||
| 199 | * On success, sets: | ||
| 174 | * ep->rep_attr.cap.max_send_wr | 200 | * ep->rep_attr.cap.max_send_wr |
| 175 | * ep->rep_attr.cap.max_recv_wr | 201 | * ep->rep_attr.cap.max_recv_wr |
| 176 | * cdata->max_requests | 202 | * cdata->max_requests |
| @@ -179,10 +205,11 @@ out_list_err: | |||
| 179 | * And these FRWR-related fields: | 205 | * And these FRWR-related fields: |
| 180 | * ia->ri_max_frwr_depth | 206 | * ia->ri_max_frwr_depth |
| 181 | * ia->ri_mrtype | 207 | * ia->ri_mrtype |
| 208 | * | ||
| 209 | * On failure, a negative errno is returned. | ||
| 182 | */ | 210 | */ |
| 183 | static int | 211 | int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, |
| 184 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | 212 | struct rpcrdma_create_data_internal *cdata) |
| 185 | struct rpcrdma_create_data_internal *cdata) | ||
| 186 | { | 213 | { |
| 187 | struct ib_device_attr *attrs = &ia->ri_device->attrs; | 214 | struct ib_device_attr *attrs = &ia->ri_device->attrs; |
| 188 | int max_qp_wr, depth, delta; | 215 | int max_qp_wr, depth, delta; |
| @@ -191,10 +218,17 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
| 191 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) | 218 | if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) |
| 192 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; | 219 | ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; |
| 193 | 220 | ||
| 194 | ia->ri_max_frwr_depth = | 221 | /* Quirk: Some devices advertise a large max_fast_reg_page_list_len |
| 195 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 222 | * capability, but perform optimally when the MRs are not larger |
| 196 | attrs->max_fast_reg_page_list_len); | 223 | * than a page. |
| 197 | dprintk("RPC: %s: device's max FR page list len = %u\n", | 224 | */ |
| 225 | if (attrs->max_sge_rd > 1) | ||
| 226 | ia->ri_max_frwr_depth = attrs->max_sge_rd; | ||
| 227 | else | ||
| 228 | ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len; | ||
| 229 | if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS) | ||
| 230 | ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS; | ||
| 231 | dprintk("RPC: %s: max FR page list depth = %u\n", | ||
| 198 | __func__, ia->ri_max_frwr_depth); | 232 | __func__, ia->ri_max_frwr_depth); |
| 199 | 233 | ||
| 200 | /* Add room for frwr register and invalidate WRs. | 234 | /* Add room for frwr register and invalidate WRs. |
| @@ -242,20 +276,28 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
| 242 | 276 | ||
| 243 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / | 277 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
| 244 | ia->ri_max_frwr_depth); | 278 | ia->ri_max_frwr_depth); |
| 245 | ia->ri_max_segs += 2; /* segments for head and tail buffers */ | 279 | /* Reply chunks require segments for head and tail buffers */ |
| 280 | ia->ri_max_segs += 2; | ||
| 281 | if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) | ||
| 282 | ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; | ||
| 246 | return 0; | 283 | return 0; |
| 247 | } | 284 | } |
| 248 | 285 | ||
| 249 | /* FRWR mode conveys a list of pages per chunk segment. The | 286 | /** |
| 287 | * frwr_maxpages - Compute size of largest payload | ||
| 288 | * @r_xprt: transport | ||
| 289 | * | ||
| 290 | * Returns maximum size of an RPC message, in pages. | ||
| 291 | * | ||
| 292 | * FRWR mode conveys a list of pages per chunk segment. The | ||
| 250 | * maximum length of that list is the FRWR page list depth. | 293 | * maximum length of that list is the FRWR page list depth. |
| 251 | */ | 294 | */ |
| 252 | static size_t | 295 | size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) |
| 253 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
| 254 | { | 296 | { |
| 255 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 297 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 256 | 298 | ||
| 257 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 299 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
| 258 | RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); | 300 | (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); |
| 259 | } | 301 | } |
| 260 | 302 | ||
| 261 | static void | 303 | static void |
| @@ -332,12 +374,25 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) | |||
| 332 | trace_xprtrdma_wc_li_wake(wc, frwr); | 374 | trace_xprtrdma_wc_li_wake(wc, frwr); |
| 333 | } | 375 | } |
| 334 | 376 | ||
| 335 | /* Post a REG_MR Work Request to register a memory region | 377 | /** |
| 378 | * frwr_map - Register a memory region | ||
| 379 | * @r_xprt: controlling transport | ||
| 380 | * @seg: memory region co-ordinates | ||
| 381 | * @nsegs: number of segments remaining | ||
| 382 | * @writing: true when RDMA Write will be used | ||
| 383 | * @xid: XID of RPC using the registered memory | ||
| 384 | * @out: initialized MR | ||
| 385 | * | ||
| 386 | * Prepare a REG_MR Work Request to register a memory region | ||
| 336 | * for remote access via RDMA READ or RDMA WRITE. | 387 | * for remote access via RDMA READ or RDMA WRITE. |
| 388 | * | ||
| 389 | * Returns the next segment or a negative errno pointer. | ||
| 390 | * On success, the prepared MR is planted in @out. | ||
| 337 | */ | 391 | */ |
| 338 | static struct rpcrdma_mr_seg * | 392 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, |
| 339 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | 393 | struct rpcrdma_mr_seg *seg, |
| 340 | int nsegs, bool writing, struct rpcrdma_mr **out) | 394 | int nsegs, bool writing, u32 xid, |
| 395 | struct rpcrdma_mr **out) | ||
| 341 | { | 396 | { |
| 342 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 397 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 343 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; | 398 | bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; |
| @@ -384,13 +439,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
| 384 | mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); | 439 | mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); |
| 385 | if (!mr->mr_nents) | 440 | if (!mr->mr_nents) |
| 386 | goto out_dmamap_err; | 441 | goto out_dmamap_err; |
| 387 | trace_xprtrdma_mr_map(mr); | ||
| 388 | 442 | ||
| 389 | ibmr = frwr->fr_mr; | 443 | ibmr = frwr->fr_mr; |
| 390 | n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); | 444 | n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); |
| 391 | if (unlikely(n != mr->mr_nents)) | 445 | if (unlikely(n != mr->mr_nents)) |
| 392 | goto out_mapmr_err; | 446 | goto out_mapmr_err; |
| 393 | 447 | ||
| 448 | ibmr->iova &= 0x00000000ffffffff; | ||
| 449 | ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32; | ||
| 394 | key = (u8)(ibmr->rkey & 0x000000FF); | 450 | key = (u8)(ibmr->rkey & 0x000000FF); |
| 395 | ib_update_fast_reg_key(ibmr, ++key); | 451 | ib_update_fast_reg_key(ibmr, ++key); |
| 396 | 452 | ||
| @@ -404,32 +460,35 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
| 404 | mr->mr_handle = ibmr->rkey; | 460 | mr->mr_handle = ibmr->rkey; |
| 405 | mr->mr_length = ibmr->length; | 461 | mr->mr_length = ibmr->length; |
| 406 | mr->mr_offset = ibmr->iova; | 462 | mr->mr_offset = ibmr->iova; |
| 463 | trace_xprtrdma_mr_map(mr); | ||
| 407 | 464 | ||
| 408 | *out = mr; | 465 | *out = mr; |
| 409 | return seg; | 466 | return seg; |
| 410 | 467 | ||
| 411 | out_dmamap_err: | 468 | out_dmamap_err: |
| 412 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", | ||
| 413 | mr->mr_sg, i); | ||
| 414 | frwr->fr_state = FRWR_IS_INVALID; | 469 | frwr->fr_state = FRWR_IS_INVALID; |
| 470 | trace_xprtrdma_frwr_sgerr(mr, i); | ||
| 415 | rpcrdma_mr_put(mr); | 471 | rpcrdma_mr_put(mr); |
| 416 | return ERR_PTR(-EIO); | 472 | return ERR_PTR(-EIO); |
| 417 | 473 | ||
| 418 | out_mapmr_err: | 474 | out_mapmr_err: |
| 419 | pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", | 475 | trace_xprtrdma_frwr_maperr(mr, n); |
| 420 | frwr->fr_mr, n, mr->mr_nents); | ||
| 421 | rpcrdma_mr_recycle(mr); | 476 | rpcrdma_mr_recycle(mr); |
| 422 | return ERR_PTR(-EIO); | 477 | return ERR_PTR(-EIO); |
| 423 | } | 478 | } |
| 424 | 479 | ||
| 425 | /* Post Send WR containing the RPC Call message. | 480 | /** |
| 481 | * frwr_send - post Send WR containing the RPC Call message | ||
| 482 | * @ia: interface adapter | ||
| 483 | * @req: Prepared RPC Call | ||
| 426 | * | 484 | * |
| 427 | * For FRMR, chain any FastReg WRs to the Send WR. Only a | 485 | * For FRWR, chain any FastReg WRs to the Send WR. Only a |
| 428 | * single ib_post_send call is needed to register memory | 486 | * single ib_post_send call is needed to register memory |
| 429 | * and then post the Send WR. | 487 | * and then post the Send WR. |
| 488 | * | ||
| 489 | * Returns the result of ib_post_send. | ||
| 430 | */ | 490 | */ |
| 431 | static int | 491 | int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) |
| 432 | frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | ||
| 433 | { | 492 | { |
| 434 | struct ib_send_wr *post_wr; | 493 | struct ib_send_wr *post_wr; |
| 435 | struct rpcrdma_mr *mr; | 494 | struct rpcrdma_mr *mr; |
| @@ -451,15 +510,18 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
| 451 | } | 510 | } |
| 452 | 511 | ||
| 453 | /* If ib_post_send fails, the next ->send_request for | 512 | /* If ib_post_send fails, the next ->send_request for |
| 454 | * @req will queue these MWs for recovery. | 513 | * @req will queue these MRs for recovery. |
| 455 | */ | 514 | */ |
| 456 | return ib_post_send(ia->ri_id->qp, post_wr, NULL); | 515 | return ib_post_send(ia->ri_id->qp, post_wr, NULL); |
| 457 | } | 516 | } |
| 458 | 517 | ||
| 459 | /* Handle a remotely invalidated mr on the @mrs list | 518 | /** |
| 519 | * frwr_reminv - handle a remotely invalidated mr on the @mrs list | ||
| 520 | * @rep: Received reply | ||
| 521 | * @mrs: list of MRs to check | ||
| 522 | * | ||
| 460 | */ | 523 | */ |
| 461 | static void | 524 | void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) |
| 462 | frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | ||
| 463 | { | 525 | { |
| 464 | struct rpcrdma_mr *mr; | 526 | struct rpcrdma_mr *mr; |
| 465 | 527 | ||
| @@ -473,7 +535,10 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | |||
| 473 | } | 535 | } |
| 474 | } | 536 | } |
| 475 | 537 | ||
| 476 | /* Invalidate all memory regions that were registered for "req". | 538 | /** |
| 539 | * frwr_unmap_sync - invalidate memory regions that were registered for @req | ||
| 540 | * @r_xprt: controlling transport | ||
| 541 | * @mrs: list of MRs to process | ||
| 477 | * | 542 | * |
| 478 | * Sleeps until it is safe for the host CPU to access the | 543 | * Sleeps until it is safe for the host CPU to access the |
| 479 | * previously mapped memory regions. | 544 | * previously mapped memory regions. |
| @@ -481,8 +546,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) | |||
| 481 | * Caller ensures that @mrs is not empty before the call. This | 546 | * Caller ensures that @mrs is not empty before the call. This |
| 482 | * function empties the list. | 547 | * function empties the list. |
| 483 | */ | 548 | */ |
| 484 | static void | 549 | void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
| 485 | frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) | ||
| 486 | { | 550 | { |
| 487 | struct ib_send_wr *first, **prev, *last; | 551 | struct ib_send_wr *first, **prev, *last; |
| 488 | const struct ib_send_wr *bad_wr; | 552 | const struct ib_send_wr *bad_wr; |
| @@ -561,20 +625,7 @@ out_release: | |||
| 561 | mr = container_of(frwr, struct rpcrdma_mr, frwr); | 625 | mr = container_of(frwr, struct rpcrdma_mr, frwr); |
| 562 | bad_wr = bad_wr->next; | 626 | bad_wr = bad_wr->next; |
| 563 | 627 | ||
| 564 | list_del(&mr->mr_list); | 628 | list_del_init(&mr->mr_list); |
| 565 | frwr_op_release_mr(mr); | 629 | rpcrdma_mr_recycle(mr); |
| 566 | } | 630 | } |
| 567 | } | 631 | } |
| 568 | |||
| 569 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
| 570 | .ro_map = frwr_op_map, | ||
| 571 | .ro_send = frwr_op_send, | ||
| 572 | .ro_reminv = frwr_op_reminv, | ||
| 573 | .ro_unmap_sync = frwr_op_unmap_sync, | ||
| 574 | .ro_open = frwr_op_open, | ||
| 575 | .ro_maxpages = frwr_op_maxpages, | ||
| 576 | .ro_init_mr = frwr_op_init_mr, | ||
| 577 | .ro_release_mr = frwr_op_release_mr, | ||
| 578 | .ro_displayname = "frwr", | ||
| 579 | .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK, | ||
| 580 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 9f53e0240035..d18614e02b4e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -218,11 +218,12 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, | |||
| 218 | ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); | 218 | ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); |
| 219 | page_base = offset_in_page(xdrbuf->page_base); | 219 | page_base = offset_in_page(xdrbuf->page_base); |
| 220 | while (len) { | 220 | while (len) { |
| 221 | if (unlikely(!*ppages)) { | 221 | /* ACL likes to be lazy in allocating pages - ACLs |
| 222 | /* XXX: Certain upper layer operations do | 222 | * are small by default but can get huge. |
| 223 | * not provide receive buffer pages. | 223 | */ |
| 224 | */ | 224 | if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) { |
| 225 | *ppages = alloc_page(GFP_ATOMIC); | 225 | if (!*ppages) |
| 226 | *ppages = alloc_page(GFP_ATOMIC); | ||
| 226 | if (!*ppages) | 227 | if (!*ppages) |
| 227 | return -ENOBUFS; | 228 | return -ENOBUFS; |
| 228 | } | 229 | } |
| @@ -356,8 +357,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 356 | return nsegs; | 357 | return nsegs; |
| 357 | 358 | ||
| 358 | do { | 359 | do { |
| 359 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 360 | seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr); |
| 360 | false, &mr); | ||
| 361 | if (IS_ERR(seg)) | 361 | if (IS_ERR(seg)) |
| 362 | return PTR_ERR(seg); | 362 | return PTR_ERR(seg); |
| 363 | rpcrdma_mr_push(mr, &req->rl_registered); | 363 | rpcrdma_mr_push(mr, &req->rl_registered); |
| @@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 365 | if (encode_read_segment(xdr, mr, pos) < 0) | 365 | if (encode_read_segment(xdr, mr, pos) < 0) |
| 366 | return -EMSGSIZE; | 366 | return -EMSGSIZE; |
| 367 | 367 | ||
| 368 | trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); | 368 | trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs); |
| 369 | r_xprt->rx_stats.read_chunk_count++; | 369 | r_xprt->rx_stats.read_chunk_count++; |
| 370 | nsegs -= mr->mr_nents; | 370 | nsegs -= mr->mr_nents; |
| 371 | } while (nsegs); | 371 | } while (nsegs); |
| @@ -414,8 +414,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 414 | 414 | ||
| 415 | nchunks = 0; | 415 | nchunks = 0; |
| 416 | do { | 416 | do { |
| 417 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 417 | seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); |
| 418 | true, &mr); | ||
| 419 | if (IS_ERR(seg)) | 418 | if (IS_ERR(seg)) |
| 420 | return PTR_ERR(seg); | 419 | return PTR_ERR(seg); |
| 421 | rpcrdma_mr_push(mr, &req->rl_registered); | 420 | rpcrdma_mr_push(mr, &req->rl_registered); |
| @@ -423,7 +422,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 423 | if (encode_rdma_segment(xdr, mr) < 0) | 422 | if (encode_rdma_segment(xdr, mr) < 0) |
| 424 | return -EMSGSIZE; | 423 | return -EMSGSIZE; |
| 425 | 424 | ||
| 426 | trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); | 425 | trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs); |
| 427 | r_xprt->rx_stats.write_chunk_count++; | 426 | r_xprt->rx_stats.write_chunk_count++; |
| 428 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; | 427 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
| 429 | nchunks++; | 428 | nchunks++; |
| @@ -472,8 +471,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 472 | 471 | ||
| 473 | nchunks = 0; | 472 | nchunks = 0; |
| 474 | do { | 473 | do { |
| 475 | seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, | 474 | seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); |
| 476 | true, &mr); | ||
| 477 | if (IS_ERR(seg)) | 475 | if (IS_ERR(seg)) |
| 478 | return PTR_ERR(seg); | 476 | return PTR_ERR(seg); |
| 479 | rpcrdma_mr_push(mr, &req->rl_registered); | 477 | rpcrdma_mr_push(mr, &req->rl_registered); |
| @@ -481,7 +479,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |||
| 481 | if (encode_rdma_segment(xdr, mr) < 0) | 479 | if (encode_rdma_segment(xdr, mr) < 0) |
| 482 | return -EMSGSIZE; | 480 | return -EMSGSIZE; |
| 483 | 481 | ||
| 484 | trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); | 482 | trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs); |
| 485 | r_xprt->rx_stats.reply_chunk_count++; | 483 | r_xprt->rx_stats.reply_chunk_count++; |
| 486 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; | 484 | r_xprt->rx_stats.total_rdma_request += mr->mr_length; |
| 487 | nchunks++; | 485 | nchunks++; |
| @@ -667,7 +665,7 @@ out_mapping_overflow: | |||
| 667 | 665 | ||
| 668 | out_mapping_err: | 666 | out_mapping_err: |
| 669 | rpcrdma_unmap_sendctx(sc); | 667 | rpcrdma_unmap_sendctx(sc); |
| 670 | pr_err("rpcrdma: Send mapping error\n"); | 668 | trace_xprtrdma_dma_maperr(sge[sge_no].addr); |
| 671 | return false; | 669 | return false; |
| 672 | } | 670 | } |
| 673 | 671 | ||
| @@ -1188,17 +1186,20 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, | |||
| 1188 | p = xdr_inline_decode(xdr, 2 * sizeof(*p)); | 1186 | p = xdr_inline_decode(xdr, 2 * sizeof(*p)); |
| 1189 | if (!p) | 1187 | if (!p) |
| 1190 | break; | 1188 | break; |
| 1191 | dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n", | 1189 | dprintk("RPC: %s: server reports " |
| 1192 | rqst->rq_task->tk_pid, __func__, | 1190 | "version error (%u-%u), xid %08x\n", __func__, |
| 1193 | be32_to_cpup(p), be32_to_cpu(*(p + 1))); | 1191 | be32_to_cpup(p), be32_to_cpu(*(p + 1)), |
| 1192 | be32_to_cpu(rep->rr_xid)); | ||
| 1194 | break; | 1193 | break; |
| 1195 | case err_chunk: | 1194 | case err_chunk: |
| 1196 | dprintk("RPC: %5u: %s: server reports header decoding error\n", | 1195 | dprintk("RPC: %s: server reports " |
| 1197 | rqst->rq_task->tk_pid, __func__); | 1196 | "header decoding error, xid %08x\n", __func__, |
| 1197 | be32_to_cpu(rep->rr_xid)); | ||
| 1198 | break; | 1198 | break; |
| 1199 | default: | 1199 | default: |
| 1200 | dprintk("RPC: %5u: %s: server reports unrecognized error %d\n", | 1200 | dprintk("RPC: %s: server reports " |
| 1201 | rqst->rq_task->tk_pid, __func__, be32_to_cpup(p)); | 1201 | "unrecognized error %d, xid %08x\n", __func__, |
| 1202 | be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); | ||
| 1202 | } | 1203 | } |
| 1203 | 1204 | ||
| 1204 | r_xprt->rx_stats.bad_reply_count++; | 1205 | r_xprt->rx_stats.bad_reply_count++; |
| @@ -1248,7 +1249,6 @@ out: | |||
| 1248 | out_badheader: | 1249 | out_badheader: |
| 1249 | trace_xprtrdma_reply_hdr(rep); | 1250 | trace_xprtrdma_reply_hdr(rep); |
| 1250 | r_xprt->rx_stats.bad_reply_count++; | 1251 | r_xprt->rx_stats.bad_reply_count++; |
| 1251 | status = -EIO; | ||
| 1252 | goto out; | 1252 | goto out; |
| 1253 | } | 1253 | } |
| 1254 | 1254 | ||
| @@ -1262,8 +1262,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) | |||
| 1262 | * RPC has relinquished all its Send Queue entries. | 1262 | * RPC has relinquished all its Send Queue entries. |
| 1263 | */ | 1263 | */ |
| 1264 | if (!list_empty(&req->rl_registered)) | 1264 | if (!list_empty(&req->rl_registered)) |
| 1265 | r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, | 1265 | frwr_unmap_sync(r_xprt, &req->rl_registered); |
| 1266 | &req->rl_registered); | ||
| 1267 | 1266 | ||
| 1268 | /* Ensure that any DMA mapped pages associated with | 1267 | /* Ensure that any DMA mapped pages associated with |
| 1269 | * the Send of the RPC Call have been unmapped before | 1268 | * the Send of the RPC Call have been unmapped before |
| @@ -1292,7 +1291,7 @@ void rpcrdma_deferred_completion(struct work_struct *work) | |||
| 1292 | 1291 | ||
| 1293 | trace_xprtrdma_defer_cmp(rep); | 1292 | trace_xprtrdma_defer_cmp(rep); |
| 1294 | if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) | 1293 | if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) |
| 1295 | r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); | 1294 | frwr_reminv(rep, &req->rl_registered); |
| 1296 | rpcrdma_release_rqst(r_xprt, req); | 1295 | rpcrdma_release_rqst(r_xprt, req); |
| 1297 | rpcrdma_complete_rqst(rep); | 1296 | rpcrdma_complete_rqst(rep); |
| 1298 | } | 1297 | } |
| @@ -1312,11 +1311,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
| 1312 | u32 credits; | 1311 | u32 credits; |
| 1313 | __be32 *p; | 1312 | __be32 *p; |
| 1314 | 1313 | ||
| 1315 | --buf->rb_posted_receives; | ||
| 1316 | |||
| 1317 | if (rep->rr_hdrbuf.head[0].iov_len == 0) | ||
| 1318 | goto out_badstatus; | ||
| 1319 | |||
| 1320 | /* Fixed transport header fields */ | 1314 | /* Fixed transport header fields */ |
| 1321 | xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, | 1315 | xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, |
| 1322 | rep->rr_hdrbuf.head[0].iov_base); | 1316 | rep->rr_hdrbuf.head[0].iov_base); |
| @@ -1356,36 +1350,30 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
| 1356 | } | 1350 | } |
| 1357 | 1351 | ||
| 1358 | req = rpcr_to_rdmar(rqst); | 1352 | req = rpcr_to_rdmar(rqst); |
| 1353 | if (req->rl_reply) { | ||
| 1354 | trace_xprtrdma_leaked_rep(rqst, req->rl_reply); | ||
| 1355 | rpcrdma_recv_buffer_put(req->rl_reply); | ||
| 1356 | } | ||
| 1359 | req->rl_reply = rep; | 1357 | req->rl_reply = rep; |
| 1360 | rep->rr_rqst = rqst; | 1358 | rep->rr_rqst = rqst; |
| 1361 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); | 1359 | clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
| 1362 | 1360 | ||
| 1363 | trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); | 1361 | trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); |
| 1364 | 1362 | queue_work(buf->rb_completion_wq, &rep->rr_work); | |
| 1365 | rpcrdma_post_recvs(r_xprt, false); | ||
| 1366 | queue_work(rpcrdma_receive_wq, &rep->rr_work); | ||
| 1367 | return; | 1363 | return; |
| 1368 | 1364 | ||
| 1369 | out_badversion: | 1365 | out_badversion: |
| 1370 | trace_xprtrdma_reply_vers(rep); | 1366 | trace_xprtrdma_reply_vers(rep); |
| 1371 | goto repost; | 1367 | goto out; |
| 1372 | 1368 | ||
| 1373 | /* The RPC transaction has already been terminated, or the header | ||
| 1374 | * is corrupt. | ||
| 1375 | */ | ||
| 1376 | out_norqst: | 1369 | out_norqst: |
| 1377 | spin_unlock(&xprt->queue_lock); | 1370 | spin_unlock(&xprt->queue_lock); |
| 1378 | trace_xprtrdma_reply_rqst(rep); | 1371 | trace_xprtrdma_reply_rqst(rep); |
| 1379 | goto repost; | 1372 | goto out; |
| 1380 | 1373 | ||
| 1381 | out_shortreply: | 1374 | out_shortreply: |
| 1382 | trace_xprtrdma_reply_short(rep); | 1375 | trace_xprtrdma_reply_short(rep); |
| 1383 | 1376 | ||
| 1384 | /* If no pending RPC transaction was matched, post a replacement | 1377 | out: |
| 1385 | * receive buffer before returning. | ||
| 1386 | */ | ||
| 1387 | repost: | ||
| 1388 | rpcrdma_post_recvs(r_xprt, false); | ||
| 1389 | out_badstatus: | ||
| 1390 | rpcrdma_recv_buffer_put(rep); | 1378 | rpcrdma_recv_buffer_put(rep); |
| 1391 | } | 1379 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index f3c147d70286..b908f2ca08fd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c | |||
| @@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) | |||
| 200 | svc_rdma_send_ctxt_put(rdma, ctxt); | 200 | svc_rdma_send_ctxt_put(rdma, ctxt); |
| 201 | goto drop_connection; | 201 | goto drop_connection; |
| 202 | } | 202 | } |
| 203 | return rc; | 203 | return 0; |
| 204 | 204 | ||
| 205 | drop_connection: | 205 | drop_connection: |
| 206 | dprintk("svcrdma: failed to send bc call\n"); | 206 | dprintk("svcrdma: failed to send bc call\n"); |
| 207 | xprt_disconnect_done(xprt); | ||
| 208 | return -ENOTCONN; | 207 | return -ENOTCONN; |
| 209 | } | 208 | } |
| 210 | 209 | ||
| @@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst) | |||
| 225 | 224 | ||
| 226 | ret = -ENOTCONN; | 225 | ret = -ENOTCONN; |
| 227 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | 226 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); |
| 228 | if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) | 227 | if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { |
| 229 | ret = rpcrdma_bc_send_request(rdma, rqst); | 228 | ret = rpcrdma_bc_send_request(rdma, rqst); |
| 229 | if (ret == -ENOTCONN) | ||
| 230 | svc_close_xprt(sxprt); | ||
| 231 | } | ||
| 230 | 232 | ||
| 231 | mutex_unlock(&sxprt->xpt_mutex); | 233 | mutex_unlock(&sxprt->xpt_mutex); |
| 232 | 234 | ||
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9141068693fa..fbc171ebfe91 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -268,7 +268,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | |||
| 268 | { | 268 | { |
| 269 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 269 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 270 | 270 | ||
| 271 | trace_xprtrdma_inject_dsc(r_xprt); | 271 | trace_xprtrdma_op_inject_dsc(r_xprt); |
| 272 | rdma_disconnect(r_xprt->rx_ia.ri_id); | 272 | rdma_disconnect(r_xprt->rx_ia.ri_id); |
| 273 | } | 273 | } |
| 274 | 274 | ||
| @@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
| 284 | { | 284 | { |
| 285 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 285 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 286 | 286 | ||
| 287 | trace_xprtrdma_destroy(r_xprt); | 287 | trace_xprtrdma_op_destroy(r_xprt); |
| 288 | 288 | ||
| 289 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); | 289 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
| 290 | 290 | ||
| @@ -318,17 +318,12 @@ xprt_setup_rdma(struct xprt_create *args) | |||
| 318 | struct sockaddr *sap; | 318 | struct sockaddr *sap; |
| 319 | int rc; | 319 | int rc; |
| 320 | 320 | ||
| 321 | if (args->addrlen > sizeof(xprt->addr)) { | 321 | if (args->addrlen > sizeof(xprt->addr)) |
| 322 | dprintk("RPC: %s: address too large\n", __func__); | ||
| 323 | return ERR_PTR(-EBADF); | 322 | return ERR_PTR(-EBADF); |
| 324 | } | ||
| 325 | 323 | ||
| 326 | xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); | 324 | xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); |
| 327 | if (xprt == NULL) { | 325 | if (!xprt) |
| 328 | dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", | ||
| 329 | __func__); | ||
| 330 | return ERR_PTR(-ENOMEM); | 326 | return ERR_PTR(-ENOMEM); |
| 331 | } | ||
| 332 | 327 | ||
| 333 | /* 60 second timeout, no retries */ | 328 | /* 60 second timeout, no retries */ |
| 334 | xprt->timeout = &xprt_rdma_default_timeout; | 329 | xprt->timeout = &xprt_rdma_default_timeout; |
| @@ -399,7 +394,7 @@ xprt_setup_rdma(struct xprt_create *args) | |||
| 399 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, | 394 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
| 400 | xprt_rdma_connect_worker); | 395 | xprt_rdma_connect_worker); |
| 401 | 396 | ||
| 402 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); | 397 | xprt->max_payload = frwr_maxpages(new_xprt); |
| 403 | if (xprt->max_payload == 0) | 398 | if (xprt->max_payload == 0) |
| 404 | goto out4; | 399 | goto out4; |
| 405 | xprt->max_payload <<= PAGE_SHIFT; | 400 | xprt->max_payload <<= PAGE_SHIFT; |
| @@ -423,7 +418,7 @@ out3: | |||
| 423 | out2: | 418 | out2: |
| 424 | rpcrdma_ia_close(&new_xprt->rx_ia); | 419 | rpcrdma_ia_close(&new_xprt->rx_ia); |
| 425 | out1: | 420 | out1: |
| 426 | trace_xprtrdma_destroy(new_xprt); | 421 | trace_xprtrdma_op_destroy(new_xprt); |
| 427 | xprt_rdma_free_addresses(xprt); | 422 | xprt_rdma_free_addresses(xprt); |
| 428 | xprt_free(xprt); | 423 | xprt_free(xprt); |
| 429 | return ERR_PTR(rc); | 424 | return ERR_PTR(rc); |
| @@ -433,29 +428,33 @@ out1: | |||
| 433 | * xprt_rdma_close - close a transport connection | 428 | * xprt_rdma_close - close a transport connection |
| 434 | * @xprt: transport context | 429 | * @xprt: transport context |
| 435 | * | 430 | * |
| 436 | * Called during transport shutdown, reconnect, or device removal. | 431 | * Called during autoclose or device removal. |
| 432 | * | ||
| 437 | * Caller holds @xprt's send lock to prevent activity on this | 433 | * Caller holds @xprt's send lock to prevent activity on this |
| 438 | * transport while the connection is torn down. | 434 | * transport while the connection is torn down. |
| 439 | */ | 435 | */ |
| 440 | static void | 436 | void xprt_rdma_close(struct rpc_xprt *xprt) |
| 441 | xprt_rdma_close(struct rpc_xprt *xprt) | ||
| 442 | { | 437 | { |
| 443 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 438 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 444 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | 439 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
| 445 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 440 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 446 | 441 | ||
| 447 | dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); | 442 | might_sleep(); |
| 443 | |||
| 444 | trace_xprtrdma_op_close(r_xprt); | ||
| 445 | |||
| 446 | /* Prevent marshaling and sending of new requests */ | ||
| 447 | xprt_clear_connected(xprt); | ||
| 448 | 448 | ||
| 449 | if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { | 449 | if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { |
| 450 | xprt_clear_connected(xprt); | ||
| 451 | rpcrdma_ia_remove(ia); | 450 | rpcrdma_ia_remove(ia); |
| 452 | return; | 451 | goto out; |
| 453 | } | 452 | } |
| 453 | |||
| 454 | if (ep->rep_connected == -ENODEV) | 454 | if (ep->rep_connected == -ENODEV) |
| 455 | return; | 455 | return; |
| 456 | if (ep->rep_connected > 0) | 456 | if (ep->rep_connected > 0) |
| 457 | xprt->reestablish_timeout = 0; | 457 | xprt->reestablish_timeout = 0; |
| 458 | xprt_disconnect_done(xprt); | ||
| 459 | rpcrdma_ep_disconnect(ep, ia); | 458 | rpcrdma_ep_disconnect(ep, ia); |
| 460 | 459 | ||
| 461 | /* Prepare @xprt for the next connection by reinitializing | 460 | /* Prepare @xprt for the next connection by reinitializing |
| @@ -463,6 +462,10 @@ xprt_rdma_close(struct rpc_xprt *xprt) | |||
| 463 | */ | 462 | */ |
| 464 | r_xprt->rx_buf.rb_credits = 1; | 463 | r_xprt->rx_buf.rb_credits = 1; |
| 465 | xprt->cwnd = RPC_CWNDSHIFT; | 464 | xprt->cwnd = RPC_CWNDSHIFT; |
| 465 | |||
| 466 | out: | ||
| 467 | ++xprt->connect_cookie; | ||
| 468 | xprt_disconnect_done(xprt); | ||
| 466 | } | 469 | } |
| 467 | 470 | ||
| 468 | /** | 471 | /** |
| @@ -525,6 +528,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 525 | { | 528 | { |
| 526 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 529 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 527 | 530 | ||
| 531 | trace_xprtrdma_op_connect(r_xprt); | ||
| 528 | if (r_xprt->rx_ep.rep_connected != 0) { | 532 | if (r_xprt->rx_ep.rep_connected != 0) { |
| 529 | /* Reconnect */ | 533 | /* Reconnect */ |
| 530 | schedule_delayed_work(&r_xprt->rx_connect_worker, | 534 | schedule_delayed_work(&r_xprt->rx_connect_worker, |
| @@ -659,11 +663,11 @@ xprt_rdma_allocate(struct rpc_task *task) | |||
| 659 | 663 | ||
| 660 | rqst->rq_buffer = req->rl_sendbuf->rg_base; | 664 | rqst->rq_buffer = req->rl_sendbuf->rg_base; |
| 661 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; | 665 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
| 662 | trace_xprtrdma_allocate(task, req); | 666 | trace_xprtrdma_op_allocate(task, req); |
| 663 | return 0; | 667 | return 0; |
| 664 | 668 | ||
| 665 | out_fail: | 669 | out_fail: |
| 666 | trace_xprtrdma_allocate(task, NULL); | 670 | trace_xprtrdma_op_allocate(task, NULL); |
| 667 | return -ENOMEM; | 671 | return -ENOMEM; |
| 668 | } | 672 | } |
| 669 | 673 | ||
| @@ -682,7 +686,7 @@ xprt_rdma_free(struct rpc_task *task) | |||
| 682 | 686 | ||
| 683 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) | 687 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) |
| 684 | rpcrdma_release_rqst(r_xprt, req); | 688 | rpcrdma_release_rqst(r_xprt, req); |
| 685 | trace_xprtrdma_rpc_done(task, req); | 689 | trace_xprtrdma_op_free(task, req); |
| 686 | } | 690 | } |
| 687 | 691 | ||
| 688 | /** | 692 | /** |
| @@ -696,8 +700,10 @@ xprt_rdma_free(struct rpc_task *task) | |||
| 696 | * %-ENOTCONN if the caller should reconnect and call again | 700 | * %-ENOTCONN if the caller should reconnect and call again |
| 697 | * %-EAGAIN if the caller should call again | 701 | * %-EAGAIN if the caller should call again |
| 698 | * %-ENOBUFS if the caller should call again after a delay | 702 | * %-ENOBUFS if the caller should call again after a delay |
| 699 | * %-EIO if a permanent error occurred and the request was not | 703 | * %-EMSGSIZE if encoding ran out of buffer space. The request |
| 700 | * sent. Do not try to send this message again. | 704 | * was not sent. Do not try to send this message again. |
| 705 | * %-EIO if an I/O error occurred. The request was not sent. | ||
| 706 | * Do not try to send this message again. | ||
| 701 | */ | 707 | */ |
| 702 | static int | 708 | static int |
| 703 | xprt_rdma_send_request(struct rpc_rqst *rqst) | 709 | xprt_rdma_send_request(struct rpc_rqst *rqst) |
| @@ -713,7 +719,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) | |||
| 713 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 719 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
| 714 | 720 | ||
| 715 | if (!xprt_connected(xprt)) | 721 | if (!xprt_connected(xprt)) |
| 716 | goto drop_connection; | 722 | return -ENOTCONN; |
| 717 | 723 | ||
| 718 | if (!xprt_request_get_cong(xprt, rqst)) | 724 | if (!xprt_request_get_cong(xprt, rqst)) |
| 719 | return -EBADSLT; | 725 | return -EBADSLT; |
| @@ -745,8 +751,8 @@ failed_marshal: | |||
| 745 | if (rc != -ENOTCONN) | 751 | if (rc != -ENOTCONN) |
| 746 | return rc; | 752 | return rc; |
| 747 | drop_connection: | 753 | drop_connection: |
| 748 | xprt_disconnect_done(xprt); | 754 | xprt_rdma_close(xprt); |
| 749 | return -ENOTCONN; /* implies disconnect */ | 755 | return -ENOTCONN; |
| 750 | } | 756 | } |
| 751 | 757 | ||
| 752 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 758 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
| @@ -843,58 +849,31 @@ static struct xprt_class xprt_rdma = { | |||
| 843 | 849 | ||
| 844 | void xprt_rdma_cleanup(void) | 850 | void xprt_rdma_cleanup(void) |
| 845 | { | 851 | { |
| 846 | int rc; | ||
| 847 | |||
| 848 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | ||
| 849 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 852 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 850 | if (sunrpc_table_header) { | 853 | if (sunrpc_table_header) { |
| 851 | unregister_sysctl_table(sunrpc_table_header); | 854 | unregister_sysctl_table(sunrpc_table_header); |
| 852 | sunrpc_table_header = NULL; | 855 | sunrpc_table_header = NULL; |
| 853 | } | 856 | } |
| 854 | #endif | 857 | #endif |
| 855 | rc = xprt_unregister_transport(&xprt_rdma); | ||
| 856 | if (rc) | ||
| 857 | dprintk("RPC: %s: xprt_unregister returned %i\n", | ||
| 858 | __func__, rc); | ||
| 859 | |||
| 860 | rpcrdma_destroy_wq(); | ||
| 861 | 858 | ||
| 862 | rc = xprt_unregister_transport(&xprt_rdma_bc); | 859 | xprt_unregister_transport(&xprt_rdma); |
| 863 | if (rc) | 860 | xprt_unregister_transport(&xprt_rdma_bc); |
| 864 | dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", | ||
| 865 | __func__, rc); | ||
| 866 | } | 861 | } |
| 867 | 862 | ||
| 868 | int xprt_rdma_init(void) | 863 | int xprt_rdma_init(void) |
| 869 | { | 864 | { |
| 870 | int rc; | 865 | int rc; |
| 871 | 866 | ||
| 872 | rc = rpcrdma_alloc_wq(); | ||
| 873 | if (rc) | ||
| 874 | return rc; | ||
| 875 | |||
| 876 | rc = xprt_register_transport(&xprt_rdma); | 867 | rc = xprt_register_transport(&xprt_rdma); |
| 877 | if (rc) { | 868 | if (rc) |
| 878 | rpcrdma_destroy_wq(); | ||
| 879 | return rc; | 869 | return rc; |
| 880 | } | ||
| 881 | 870 | ||
| 882 | rc = xprt_register_transport(&xprt_rdma_bc); | 871 | rc = xprt_register_transport(&xprt_rdma_bc); |
| 883 | if (rc) { | 872 | if (rc) { |
| 884 | xprt_unregister_transport(&xprt_rdma); | 873 | xprt_unregister_transport(&xprt_rdma); |
| 885 | rpcrdma_destroy_wq(); | ||
| 886 | return rc; | 874 | return rc; |
| 887 | } | 875 | } |
| 888 | 876 | ||
| 889 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); | ||
| 890 | |||
| 891 | dprintk("Defaults:\n"); | ||
| 892 | dprintk("\tSlots %d\n" | ||
| 893 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", | ||
| 894 | xprt_rdma_slot_table_entries, | ||
| 895 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | ||
| 896 | dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); | ||
| 897 | |||
| 898 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 877 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 899 | if (!sunrpc_table_header) | 878 | if (!sunrpc_table_header) |
| 900 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | 879 | sunrpc_table_header = register_sysctl_table(sunrpc_table); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3ddba94c939f..7749a2bf6887 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
| @@ -78,53 +78,25 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); | |||
| 78 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); | 78 | static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); |
| 79 | static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); | 79 | static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); |
| 80 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); | 80 | static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); |
| 81 | static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); | ||
| 81 | 82 | ||
| 82 | struct workqueue_struct *rpcrdma_receive_wq __read_mostly; | 83 | /* Wait for outstanding transport work to finish. |
| 83 | |||
| 84 | int | ||
| 85 | rpcrdma_alloc_wq(void) | ||
| 86 | { | ||
| 87 | struct workqueue_struct *recv_wq; | ||
| 88 | |||
| 89 | recv_wq = alloc_workqueue("xprtrdma_receive", | ||
| 90 | WQ_MEM_RECLAIM | WQ_HIGHPRI, | ||
| 91 | 0); | ||
| 92 | if (!recv_wq) | ||
| 93 | return -ENOMEM; | ||
| 94 | |||
| 95 | rpcrdma_receive_wq = recv_wq; | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | void | ||
| 100 | rpcrdma_destroy_wq(void) | ||
| 101 | { | ||
| 102 | struct workqueue_struct *wq; | ||
| 103 | |||
| 104 | if (rpcrdma_receive_wq) { | ||
| 105 | wq = rpcrdma_receive_wq; | ||
| 106 | rpcrdma_receive_wq = NULL; | ||
| 107 | destroy_workqueue(wq); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * rpcrdma_disconnect_worker - Force a disconnect | ||
| 113 | * @work: endpoint to be disconnected | ||
| 114 | * | ||
| 115 | * Provider callbacks can possibly run in an IRQ context. This function | ||
| 116 | * is invoked in a worker thread to guarantee that disconnect wake-up | ||
| 117 | * calls are always done in process context. | ||
| 118 | */ | 84 | */ |
| 119 | static void | 85 | static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) |
| 120 | rpcrdma_disconnect_worker(struct work_struct *work) | ||
| 121 | { | 86 | { |
| 122 | struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, | 87 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
| 123 | rep_disconnect_worker.work); | 88 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 124 | struct rpcrdma_xprt *r_xprt = | ||
| 125 | container_of(ep, struct rpcrdma_xprt, rx_ep); | ||
| 126 | 89 | ||
| 127 | xprt_force_disconnect(&r_xprt->rx_xprt); | 90 | /* Flush Receives, then wait for deferred Reply work |
| 91 | * to complete. | ||
| 92 | */ | ||
| 93 | ib_drain_qp(ia->ri_id->qp); | ||
| 94 | drain_workqueue(buf->rb_completion_wq); | ||
| 95 | |||
| 96 | /* Deferred Reply processing might have scheduled | ||
| 97 | * local invalidations. | ||
| 98 | */ | ||
| 99 | ib_drain_sq(ia->ri_id->qp); | ||
| 128 | } | 100 | } |
| 129 | 101 | ||
| 130 | /** | 102 | /** |
| @@ -143,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context) | |||
| 143 | rx_ep); | 115 | rx_ep); |
| 144 | 116 | ||
| 145 | trace_xprtrdma_qp_event(r_xprt, event); | 117 | trace_xprtrdma_qp_event(r_xprt, event); |
| 146 | pr_err("rpcrdma: %s on device %s connected to %s:%s\n", | ||
| 147 | ib_event_msg(event->event), event->device->name, | ||
| 148 | rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); | ||
| 149 | |||
| 150 | if (ep->rep_connected == 1) { | ||
| 151 | ep->rep_connected = -EIO; | ||
| 152 | schedule_delayed_work(&ep->rep_disconnect_worker, 0); | ||
| 153 | wake_up_all(&ep->rep_connect_wait); | ||
| 154 | } | ||
| 155 | } | 118 | } |
| 156 | 119 | ||
| 157 | /** | 120 | /** |
| @@ -189,11 +152,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) | |||
| 189 | struct ib_cqe *cqe = wc->wr_cqe; | 152 | struct ib_cqe *cqe = wc->wr_cqe; |
| 190 | struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, | 153 | struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, |
| 191 | rr_cqe); | 154 | rr_cqe); |
| 155 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; | ||
| 192 | 156 | ||
| 193 | /* WARNING: Only wr_id and status are reliable at this point */ | 157 | /* WARNING: Only wr_cqe and status are reliable at this point */ |
| 194 | trace_xprtrdma_wc_receive(wc); | 158 | trace_xprtrdma_wc_receive(wc); |
| 159 | --r_xprt->rx_ep.rep_receive_count; | ||
| 195 | if (wc->status != IB_WC_SUCCESS) | 160 | if (wc->status != IB_WC_SUCCESS) |
| 196 | goto out_fail; | 161 | goto out_flushed; |
| 197 | 162 | ||
| 198 | /* status == SUCCESS means all fields in wc are trustworthy */ | 163 | /* status == SUCCESS means all fields in wc are trustworthy */ |
| 199 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); | 164 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); |
| @@ -204,17 +169,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) | |||
| 204 | rdmab_addr(rep->rr_rdmabuf), | 169 | rdmab_addr(rep->rr_rdmabuf), |
| 205 | wc->byte_len, DMA_FROM_DEVICE); | 170 | wc->byte_len, DMA_FROM_DEVICE); |
| 206 | 171 | ||
| 207 | out_schedule: | 172 | rpcrdma_post_recvs(r_xprt, false); |
| 208 | rpcrdma_reply_handler(rep); | 173 | rpcrdma_reply_handler(rep); |
| 209 | return; | 174 | return; |
| 210 | 175 | ||
| 211 | out_fail: | 176 | out_flushed: |
| 212 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 177 | if (wc->status != IB_WC_WR_FLUSH_ERR) |
| 213 | pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", | 178 | pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", |
| 214 | ib_wc_status_msg(wc->status), | 179 | ib_wc_status_msg(wc->status), |
| 215 | wc->status, wc->vendor_err); | 180 | wc->status, wc->vendor_err); |
| 216 | rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); | 181 | rpcrdma_recv_buffer_put(rep); |
| 217 | goto out_schedule; | ||
| 218 | } | 182 | } |
| 219 | 183 | ||
| 220 | static void | 184 | static void |
| @@ -316,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
| 316 | ep->rep_connected = -EAGAIN; | 280 | ep->rep_connected = -EAGAIN; |
| 317 | goto disconnected; | 281 | goto disconnected; |
| 318 | case RDMA_CM_EVENT_DISCONNECTED: | 282 | case RDMA_CM_EVENT_DISCONNECTED: |
| 319 | ++xprt->connect_cookie; | ||
| 320 | ep->rep_connected = -ECONNABORTED; | 283 | ep->rep_connected = -ECONNABORTED; |
| 321 | disconnected: | 284 | disconnected: |
| 322 | xprt_force_disconnect(xprt); | 285 | xprt_force_disconnect(xprt); |
| @@ -326,10 +289,9 @@ disconnected: | |||
| 326 | break; | 289 | break; |
| 327 | } | 290 | } |
| 328 | 291 | ||
| 329 | dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__, | 292 | dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, |
| 330 | rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), | 293 | rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), |
| 331 | ia->ri_device->name, ia->ri_ops->ro_displayname, | 294 | ia->ri_device->name, rdma_event_msg(event->event)); |
| 332 | rdma_event_msg(event->event)); | ||
| 333 | return 0; | 295 | return 0; |
| 334 | } | 296 | } |
| 335 | 297 | ||
| @@ -347,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) | |||
| 347 | 309 | ||
| 348 | id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, | 310 | id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, |
| 349 | xprt, RDMA_PS_TCP, IB_QPT_RC); | 311 | xprt, RDMA_PS_TCP, IB_QPT_RC); |
| 350 | if (IS_ERR(id)) { | 312 | if (IS_ERR(id)) |
| 351 | rc = PTR_ERR(id); | ||
| 352 | dprintk("RPC: %s: rdma_create_id() failed %i\n", | ||
| 353 | __func__, rc); | ||
| 354 | return id; | 313 | return id; |
| 355 | } | ||
| 356 | 314 | ||
| 357 | ia->ri_async_rc = -ETIMEDOUT; | 315 | ia->ri_async_rc = -ETIMEDOUT; |
| 358 | rc = rdma_resolve_addr(id, NULL, | 316 | rc = rdma_resolve_addr(id, NULL, |
| 359 | (struct sockaddr *)&xprt->rx_xprt.addr, | 317 | (struct sockaddr *)&xprt->rx_xprt.addr, |
| 360 | RDMA_RESOLVE_TIMEOUT); | 318 | RDMA_RESOLVE_TIMEOUT); |
| 361 | if (rc) { | 319 | if (rc) |
| 362 | dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", | ||
| 363 | __func__, rc); | ||
| 364 | goto out; | 320 | goto out; |
| 365 | } | ||
| 366 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 321 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
| 367 | if (rc < 0) { | 322 | if (rc < 0) { |
| 368 | trace_xprtrdma_conn_tout(xprt); | 323 | trace_xprtrdma_conn_tout(xprt); |
| @@ -375,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) | |||
| 375 | 330 | ||
| 376 | ia->ri_async_rc = -ETIMEDOUT; | 331 | ia->ri_async_rc = -ETIMEDOUT; |
| 377 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); | 332 | rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); |
| 378 | if (rc) { | 333 | if (rc) |
| 379 | dprintk("RPC: %s: rdma_resolve_route() failed %i\n", | ||
| 380 | __func__, rc); | ||
| 381 | goto out; | 334 | goto out; |
| 382 | } | ||
| 383 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); | 335 | rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); |
| 384 | if (rc < 0) { | 336 | if (rc < 0) { |
| 385 | trace_xprtrdma_conn_tout(xprt); | 337 | trace_xprtrdma_conn_tout(xprt); |
| @@ -429,16 +381,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) | |||
| 429 | 381 | ||
| 430 | switch (xprt_rdma_memreg_strategy) { | 382 | switch (xprt_rdma_memreg_strategy) { |
| 431 | case RPCRDMA_FRWR: | 383 | case RPCRDMA_FRWR: |
| 432 | if (frwr_is_supported(ia)) { | 384 | if (frwr_is_supported(ia)) |
| 433 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
| 434 | break; | ||
| 435 | } | ||
| 436 | /*FALLTHROUGH*/ | ||
| 437 | case RPCRDMA_MTHCAFMR: | ||
| 438 | if (fmr_is_supported(ia)) { | ||
| 439 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
| 440 | break; | 385 | break; |
| 441 | } | ||
| 442 | /*FALLTHROUGH*/ | 386 | /*FALLTHROUGH*/ |
| 443 | default: | 387 | default: |
| 444 | pr_err("rpcrdma: Device %s does not support memreg mode %d\n", | 388 | pr_err("rpcrdma: Device %s does not support memreg mode %d\n", |
| @@ -481,7 +425,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) | |||
| 481 | * connection is already gone. | 425 | * connection is already gone. |
| 482 | */ | 426 | */ |
| 483 | if (ia->ri_id->qp) { | 427 | if (ia->ri_id->qp) { |
| 484 | ib_drain_qp(ia->ri_id->qp); | 428 | rpcrdma_xprt_drain(r_xprt); |
| 485 | rdma_destroy_qp(ia->ri_id); | 429 | rdma_destroy_qp(ia->ri_id); |
| 486 | ia->ri_id->qp = NULL; | 430 | ia->ri_id->qp = NULL; |
| 487 | } | 431 | } |
| @@ -552,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 552 | } | 496 | } |
| 553 | ia->ri_max_send_sges = max_sge; | 497 | ia->ri_max_send_sges = max_sge; |
| 554 | 498 | ||
| 555 | rc = ia->ri_ops->ro_open(ia, ep, cdata); | 499 | rc = frwr_open(ia, ep, cdata); |
| 556 | if (rc) | 500 | if (rc) |
| 557 | return rc; | 501 | return rc; |
| 558 | 502 | ||
| @@ -579,16 +523,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 579 | cdata->max_requests >> 2); | 523 | cdata->max_requests >> 2); |
| 580 | ep->rep_send_count = ep->rep_send_batch; | 524 | ep->rep_send_count = ep->rep_send_batch; |
| 581 | init_waitqueue_head(&ep->rep_connect_wait); | 525 | init_waitqueue_head(&ep->rep_connect_wait); |
| 582 | INIT_DELAYED_WORK(&ep->rep_disconnect_worker, | 526 | ep->rep_receive_count = 0; |
| 583 | rpcrdma_disconnect_worker); | ||
| 584 | 527 | ||
| 585 | sendcq = ib_alloc_cq(ia->ri_device, NULL, | 528 | sendcq = ib_alloc_cq(ia->ri_device, NULL, |
| 586 | ep->rep_attr.cap.max_send_wr + 1, | 529 | ep->rep_attr.cap.max_send_wr + 1, |
| 587 | 1, IB_POLL_WORKQUEUE); | 530 | 1, IB_POLL_WORKQUEUE); |
| 588 | if (IS_ERR(sendcq)) { | 531 | if (IS_ERR(sendcq)) { |
| 589 | rc = PTR_ERR(sendcq); | 532 | rc = PTR_ERR(sendcq); |
| 590 | dprintk("RPC: %s: failed to create send CQ: %i\n", | ||
| 591 | __func__, rc); | ||
| 592 | goto out1; | 533 | goto out1; |
| 593 | } | 534 | } |
| 594 | 535 | ||
| @@ -597,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 597 | 0, IB_POLL_WORKQUEUE); | 538 | 0, IB_POLL_WORKQUEUE); |
| 598 | if (IS_ERR(recvcq)) { | 539 | if (IS_ERR(recvcq)) { |
| 599 | rc = PTR_ERR(recvcq); | 540 | rc = PTR_ERR(recvcq); |
| 600 | dprintk("RPC: %s: failed to create recv CQ: %i\n", | ||
| 601 | __func__, rc); | ||
| 602 | goto out2; | 541 | goto out2; |
| 603 | } | 542 | } |
| 604 | 543 | ||
| @@ -611,7 +550,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 611 | /* Prepare RDMA-CM private message */ | 550 | /* Prepare RDMA-CM private message */ |
| 612 | pmsg->cp_magic = rpcrdma_cmp_magic; | 551 | pmsg->cp_magic = rpcrdma_cmp_magic; |
| 613 | pmsg->cp_version = RPCRDMA_CMP_VERSION; | 552 | pmsg->cp_version = RPCRDMA_CMP_VERSION; |
| 614 | pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; | 553 | pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; |
| 615 | pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); | 554 | pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); |
| 616 | pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); | 555 | pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); |
| 617 | ep->rep_remote_cma.private_data = pmsg; | 556 | ep->rep_remote_cma.private_data = pmsg; |
| @@ -653,8 +592,6 @@ out1: | |||
| 653 | void | 592 | void |
| 654 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 593 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
| 655 | { | 594 | { |
| 656 | cancel_delayed_work_sync(&ep->rep_disconnect_worker); | ||
| 657 | |||
| 658 | if (ia->ri_id && ia->ri_id->qp) { | 595 | if (ia->ri_id && ia->ri_id->qp) { |
| 659 | rpcrdma_ep_disconnect(ep, ia); | 596 | rpcrdma_ep_disconnect(ep, ia); |
| 660 | rdma_destroy_qp(ia->ri_id); | 597 | rdma_destroy_qp(ia->ri_id); |
| @@ -740,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, | |||
| 740 | } | 677 | } |
| 741 | 678 | ||
| 742 | err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); | 679 | err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); |
| 743 | if (err) { | 680 | if (err) |
| 744 | dprintk("RPC: %s: rdma_create_qp returned %d\n", | ||
| 745 | __func__, err); | ||
| 746 | goto out_destroy; | 681 | goto out_destroy; |
| 747 | } | ||
| 748 | 682 | ||
| 749 | /* Atomically replace the transport's ID and QP. */ | 683 | /* Atomically replace the transport's ID and QP. */ |
| 750 | rc = 0; | 684 | rc = 0; |
| @@ -775,8 +709,6 @@ retry: | |||
| 775 | dprintk("RPC: %s: connecting...\n", __func__); | 709 | dprintk("RPC: %s: connecting...\n", __func__); |
| 776 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | 710 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); |
| 777 | if (rc) { | 711 | if (rc) { |
| 778 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 779 | __func__, rc); | ||
| 780 | rc = -ENETUNREACH; | 712 | rc = -ENETUNREACH; |
| 781 | goto out_noupdate; | 713 | goto out_noupdate; |
| 782 | } | 714 | } |
| @@ -798,11 +730,8 @@ retry: | |||
| 798 | rpcrdma_post_recvs(r_xprt, true); | 730 | rpcrdma_post_recvs(r_xprt, true); |
| 799 | 731 | ||
| 800 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 732 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
| 801 | if (rc) { | 733 | if (rc) |
| 802 | dprintk("RPC: %s: rdma_connect() failed with %i\n", | ||
| 803 | __func__, rc); | ||
| 804 | goto out; | 734 | goto out; |
| 805 | } | ||
| 806 | 735 | ||
| 807 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); | 736 | wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); |
| 808 | if (ep->rep_connected <= 0) { | 737 | if (ep->rep_connected <= 0) { |
| @@ -822,8 +751,10 @@ out_noupdate: | |||
| 822 | return rc; | 751 | return rc; |
| 823 | } | 752 | } |
| 824 | 753 | ||
| 825 | /* | 754 | /** |
| 826 | * rpcrdma_ep_disconnect | 755 | * rpcrdma_ep_disconnect - Disconnect underlying transport |
| 756 | * @ep: endpoint to disconnect | ||
| 757 | * @ia: associated interface adapter | ||
| 827 | * | 758 | * |
| 828 | * This is separate from destroy to facilitate the ability | 759 | * This is separate from destroy to facilitate the ability |
| 829 | * to reconnect without recreating the endpoint. | 760 | * to reconnect without recreating the endpoint. |
| @@ -834,19 +765,20 @@ out_noupdate: | |||
| 834 | void | 765 | void |
| 835 | rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 766 | rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
| 836 | { | 767 | { |
| 768 | struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, | ||
| 769 | rx_ep); | ||
| 837 | int rc; | 770 | int rc; |
| 838 | 771 | ||
| 772 | /* returns without wait if ID is not connected */ | ||
| 839 | rc = rdma_disconnect(ia->ri_id); | 773 | rc = rdma_disconnect(ia->ri_id); |
| 840 | if (!rc) | 774 | if (!rc) |
| 841 | /* returns without wait if not connected */ | ||
| 842 | wait_event_interruptible(ep->rep_connect_wait, | 775 | wait_event_interruptible(ep->rep_connect_wait, |
| 843 | ep->rep_connected != 1); | 776 | ep->rep_connected != 1); |
| 844 | else | 777 | else |
| 845 | ep->rep_connected = rc; | 778 | ep->rep_connected = rc; |
| 846 | trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, | 779 | trace_xprtrdma_disconnect(r_xprt, rc); |
| 847 | rx_ep), rc); | ||
| 848 | 780 | ||
| 849 | ib_drain_qp(ia->ri_id->qp); | 781 | rpcrdma_xprt_drain(r_xprt); |
| 850 | } | 782 | } |
| 851 | 783 | ||
| 852 | /* Fixed-size circular FIFO queue. This implementation is wait-free and | 784 | /* Fixed-size circular FIFO queue. This implementation is wait-free and |
| @@ -1034,7 +966,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) | |||
| 1034 | if (!mr) | 966 | if (!mr) |
| 1035 | break; | 967 | break; |
| 1036 | 968 | ||
| 1037 | rc = ia->ri_ops->ro_init_mr(ia, mr); | 969 | rc = frwr_init_mr(ia, mr); |
| 1038 | if (rc) { | 970 | if (rc) { |
| 1039 | kfree(mr); | 971 | kfree(mr); |
| 1040 | break; | 972 | break; |
| @@ -1089,9 +1021,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) | |||
| 1089 | req->rl_buffer = buffer; | 1021 | req->rl_buffer = buffer; |
| 1090 | INIT_LIST_HEAD(&req->rl_registered); | 1022 | INIT_LIST_HEAD(&req->rl_registered); |
| 1091 | 1023 | ||
| 1092 | spin_lock(&buffer->rb_reqslock); | 1024 | spin_lock(&buffer->rb_lock); |
| 1093 | list_add(&req->rl_all, &buffer->rb_allreqs); | 1025 | list_add(&req->rl_all, &buffer->rb_allreqs); |
| 1094 | spin_unlock(&buffer->rb_reqslock); | 1026 | spin_unlock(&buffer->rb_lock); |
| 1095 | return req; | 1027 | return req; |
| 1096 | } | 1028 | } |
| 1097 | 1029 | ||
| @@ -1134,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) | |||
| 1134 | out_free: | 1066 | out_free: |
| 1135 | kfree(rep); | 1067 | kfree(rep); |
| 1136 | out: | 1068 | out: |
| 1137 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | ||
| 1138 | __func__, rc); | ||
| 1139 | return rc; | 1069 | return rc; |
| 1140 | } | 1070 | } |
| 1141 | 1071 | ||
| @@ -1159,7 +1089,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
| 1159 | 1089 | ||
| 1160 | INIT_LIST_HEAD(&buf->rb_send_bufs); | 1090 | INIT_LIST_HEAD(&buf->rb_send_bufs); |
| 1161 | INIT_LIST_HEAD(&buf->rb_allreqs); | 1091 | INIT_LIST_HEAD(&buf->rb_allreqs); |
| 1162 | spin_lock_init(&buf->rb_reqslock); | ||
| 1163 | for (i = 0; i < buf->rb_max_requests; i++) { | 1092 | for (i = 0; i < buf->rb_max_requests; i++) { |
| 1164 | struct rpcrdma_req *req; | 1093 | struct rpcrdma_req *req; |
| 1165 | 1094 | ||
| @@ -1174,13 +1103,19 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
| 1174 | } | 1103 | } |
| 1175 | 1104 | ||
| 1176 | buf->rb_credits = 1; | 1105 | buf->rb_credits = 1; |
| 1177 | buf->rb_posted_receives = 0; | ||
| 1178 | INIT_LIST_HEAD(&buf->rb_recv_bufs); | 1106 | INIT_LIST_HEAD(&buf->rb_recv_bufs); |
| 1179 | 1107 | ||
| 1180 | rc = rpcrdma_sendctxs_create(r_xprt); | 1108 | rc = rpcrdma_sendctxs_create(r_xprt); |
| 1181 | if (rc) | 1109 | if (rc) |
| 1182 | goto out; | 1110 | goto out; |
| 1183 | 1111 | ||
| 1112 | buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s", | ||
| 1113 | WQ_MEM_RECLAIM | WQ_HIGHPRI, | ||
| 1114 | 0, | ||
| 1115 | r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); | ||
| 1116 | if (!buf->rb_completion_wq) | ||
| 1117 | goto out; | ||
| 1118 | |||
| 1184 | return 0; | 1119 | return 0; |
| 1185 | out: | 1120 | out: |
| 1186 | rpcrdma_buffer_destroy(buf); | 1121 | rpcrdma_buffer_destroy(buf); |
| @@ -1194,9 +1129,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep) | |||
| 1194 | kfree(rep); | 1129 | kfree(rep); |
| 1195 | } | 1130 | } |
| 1196 | 1131 | ||
| 1132 | /** | ||
| 1133 | * rpcrdma_req_destroy - Destroy an rpcrdma_req object | ||
| 1134 | * @req: unused object to be destroyed | ||
| 1135 | * | ||
| 1136 | * This function assumes that the caller prevents concurrent device | ||
| 1137 | * unload and transport tear-down. | ||
| 1138 | */ | ||
| 1197 | void | 1139 | void |
| 1198 | rpcrdma_destroy_req(struct rpcrdma_req *req) | 1140 | rpcrdma_req_destroy(struct rpcrdma_req *req) |
| 1199 | { | 1141 | { |
| 1142 | list_del(&req->rl_all); | ||
| 1143 | |||
| 1200 | rpcrdma_free_regbuf(req->rl_recvbuf); | 1144 | rpcrdma_free_regbuf(req->rl_recvbuf); |
| 1201 | rpcrdma_free_regbuf(req->rl_sendbuf); | 1145 | rpcrdma_free_regbuf(req->rl_sendbuf); |
| 1202 | rpcrdma_free_regbuf(req->rl_rdmabuf); | 1146 | rpcrdma_free_regbuf(req->rl_rdmabuf); |
| @@ -1208,7 +1152,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) | |||
| 1208 | { | 1152 | { |
| 1209 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, | 1153 | struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, |
| 1210 | rx_buf); | 1154 | rx_buf); |
| 1211 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | ||
| 1212 | struct rpcrdma_mr *mr; | 1155 | struct rpcrdma_mr *mr; |
| 1213 | unsigned int count; | 1156 | unsigned int count; |
| 1214 | 1157 | ||
| @@ -1224,7 +1167,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) | |||
| 1224 | if (!list_empty(&mr->mr_list)) | 1167 | if (!list_empty(&mr->mr_list)) |
| 1225 | list_del(&mr->mr_list); | 1168 | list_del(&mr->mr_list); |
| 1226 | 1169 | ||
| 1227 | ia->ri_ops->ro_release_mr(mr); | 1170 | frwr_release_mr(mr); |
| 1228 | count++; | 1171 | count++; |
| 1229 | spin_lock(&buf->rb_mrlock); | 1172 | spin_lock(&buf->rb_mrlock); |
| 1230 | } | 1173 | } |
| @@ -1234,11 +1177,24 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) | |||
| 1234 | dprintk("RPC: %s: released %u MRs\n", __func__, count); | 1177 | dprintk("RPC: %s: released %u MRs\n", __func__, count); |
| 1235 | } | 1178 | } |
| 1236 | 1179 | ||
| 1180 | /** | ||
| 1181 | * rpcrdma_buffer_destroy - Release all hw resources | ||
| 1182 | * @buf: root control block for resources | ||
| 1183 | * | ||
| 1184 | * ORDERING: relies on a prior ib_drain_qp : | ||
| 1185 | * - No more Send or Receive completions can occur | ||
| 1186 | * - All MRs, reps, and reqs are returned to their free lists | ||
| 1187 | */ | ||
| 1237 | void | 1188 | void |
| 1238 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1189 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
| 1239 | { | 1190 | { |
| 1240 | cancel_delayed_work_sync(&buf->rb_refresh_worker); | 1191 | cancel_delayed_work_sync(&buf->rb_refresh_worker); |
| 1241 | 1192 | ||
| 1193 | if (buf->rb_completion_wq) { | ||
| 1194 | destroy_workqueue(buf->rb_completion_wq); | ||
| 1195 | buf->rb_completion_wq = NULL; | ||
| 1196 | } | ||
| 1197 | |||
| 1242 | rpcrdma_sendctxs_destroy(buf); | 1198 | rpcrdma_sendctxs_destroy(buf); |
| 1243 | 1199 | ||
| 1244 | while (!list_empty(&buf->rb_recv_bufs)) { | 1200 | while (!list_empty(&buf->rb_recv_bufs)) { |
| @@ -1250,19 +1206,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1250 | rpcrdma_destroy_rep(rep); | 1206 | rpcrdma_destroy_rep(rep); |
| 1251 | } | 1207 | } |
| 1252 | 1208 | ||
| 1253 | spin_lock(&buf->rb_reqslock); | 1209 | while (!list_empty(&buf->rb_send_bufs)) { |
| 1254 | while (!list_empty(&buf->rb_allreqs)) { | ||
| 1255 | struct rpcrdma_req *req; | 1210 | struct rpcrdma_req *req; |
| 1256 | 1211 | ||
| 1257 | req = list_first_entry(&buf->rb_allreqs, | 1212 | req = list_first_entry(&buf->rb_send_bufs, |
| 1258 | struct rpcrdma_req, rl_all); | 1213 | struct rpcrdma_req, rl_list); |
| 1259 | list_del(&req->rl_all); | 1214 | list_del(&req->rl_list); |
| 1260 | 1215 | rpcrdma_req_destroy(req); | |
| 1261 | spin_unlock(&buf->rb_reqslock); | ||
| 1262 | rpcrdma_destroy_req(req); | ||
| 1263 | spin_lock(&buf->rb_reqslock); | ||
| 1264 | } | 1216 | } |
| 1265 | spin_unlock(&buf->rb_reqslock); | ||
| 1266 | 1217 | ||
| 1267 | rpcrdma_mrs_destroy(buf); | 1218 | rpcrdma_mrs_destroy(buf); |
| 1268 | } | 1219 | } |
| @@ -1329,9 +1280,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) | |||
| 1329 | { | 1280 | { |
| 1330 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; | 1281 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
| 1331 | 1282 | ||
| 1332 | trace_xprtrdma_mr_unmap(mr); | 1283 | if (mr->mr_dir != DMA_NONE) { |
| 1333 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | 1284 | trace_xprtrdma_mr_unmap(mr); |
| 1334 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | 1285 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
| 1286 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | ||
| 1287 | mr->mr_dir = DMA_NONE; | ||
| 1288 | } | ||
| 1335 | __rpcrdma_mr_put(&r_xprt->rx_buf, mr); | 1289 | __rpcrdma_mr_put(&r_xprt->rx_buf, mr); |
| 1336 | } | 1290 | } |
| 1337 | 1291 | ||
| @@ -1410,7 +1364,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
| 1410 | * | 1364 | * |
| 1411 | * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for | 1365 | * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for |
| 1412 | * receiving the payload of RDMA RECV operations. During Long Calls | 1366 | * receiving the payload of RDMA RECV operations. During Long Calls |
| 1413 | * or Replies they may be registered externally via ro_map. | 1367 | * or Replies they may be registered externally via frwr_map. |
| 1414 | */ | 1368 | */ |
| 1415 | struct rpcrdma_regbuf * | 1369 | struct rpcrdma_regbuf * |
| 1416 | rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, | 1370 | rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, |
| @@ -1446,8 +1400,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
| 1446 | (void *)rb->rg_base, | 1400 | (void *)rb->rg_base, |
| 1447 | rdmab_length(rb), | 1401 | rdmab_length(rb), |
| 1448 | rb->rg_direction); | 1402 | rb->rg_direction); |
| 1449 | if (ib_dma_mapping_error(device, rdmab_addr(rb))) | 1403 | if (ib_dma_mapping_error(device, rdmab_addr(rb))) { |
| 1404 | trace_xprtrdma_dma_maperr(rdmab_addr(rb)); | ||
| 1450 | return false; | 1405 | return false; |
| 1406 | } | ||
| 1451 | 1407 | ||
| 1452 | rb->rg_device = device; | 1408 | rb->rg_device = device; |
| 1453 | rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; | 1409 | rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; |
| @@ -1479,10 +1435,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) | |||
| 1479 | kfree(rb); | 1435 | kfree(rb); |
| 1480 | } | 1436 | } |
| 1481 | 1437 | ||
| 1482 | /* | 1438 | /** |
| 1483 | * Prepost any receive buffer, then post send. | 1439 | * rpcrdma_ep_post - Post WRs to a transport's Send Queue |
| 1440 | * @ia: transport's device information | ||
| 1441 | * @ep: transport's RDMA endpoint information | ||
| 1442 | * @req: rpcrdma_req containing the Send WR to post | ||
| 1484 | * | 1443 | * |
| 1485 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1444 | * Returns 0 if the post was successful, otherwise -ENOTCONN |
| 1445 | * is returned. | ||
| 1486 | */ | 1446 | */ |
| 1487 | int | 1447 | int |
| 1488 | rpcrdma_ep_post(struct rpcrdma_ia *ia, | 1448 | rpcrdma_ep_post(struct rpcrdma_ia *ia, |
| @@ -1501,32 +1461,27 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
| 1501 | --ep->rep_send_count; | 1461 | --ep->rep_send_count; |
| 1502 | } | 1462 | } |
| 1503 | 1463 | ||
| 1504 | rc = ia->ri_ops->ro_send(ia, req); | 1464 | rc = frwr_send(ia, req); |
| 1505 | trace_xprtrdma_post_send(req, rc); | 1465 | trace_xprtrdma_post_send(req, rc); |
| 1506 | if (rc) | 1466 | if (rc) |
| 1507 | return -ENOTCONN; | 1467 | return -ENOTCONN; |
| 1508 | return 0; | 1468 | return 0; |
| 1509 | } | 1469 | } |
| 1510 | 1470 | ||
| 1511 | /** | 1471 | static void |
| 1512 | * rpcrdma_post_recvs - Maybe post some Receive buffers | ||
| 1513 | * @r_xprt: controlling transport | ||
| 1514 | * @temp: when true, allocate temp rpcrdma_rep objects | ||
| 1515 | * | ||
| 1516 | */ | ||
| 1517 | void | ||
| 1518 | rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) | 1472 | rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) |
| 1519 | { | 1473 | { |
| 1520 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 1474 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
| 1475 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; | ||
| 1521 | struct ib_recv_wr *wr, *bad_wr; | 1476 | struct ib_recv_wr *wr, *bad_wr; |
| 1522 | int needed, count, rc; | 1477 | int needed, count, rc; |
| 1523 | 1478 | ||
| 1524 | rc = 0; | 1479 | rc = 0; |
| 1525 | count = 0; | 1480 | count = 0; |
| 1526 | needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); | 1481 | needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); |
| 1527 | if (buf->rb_posted_receives > needed) | 1482 | if (ep->rep_receive_count > needed) |
| 1528 | goto out; | 1483 | goto out; |
| 1529 | needed -= buf->rb_posted_receives; | 1484 | needed -= ep->rep_receive_count; |
| 1530 | 1485 | ||
| 1531 | count = 0; | 1486 | count = 0; |
| 1532 | wr = NULL; | 1487 | wr = NULL; |
| @@ -1574,7 +1529,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) | |||
| 1574 | --count; | 1529 | --count; |
| 1575 | } | 1530 | } |
| 1576 | } | 1531 | } |
| 1577 | buf->rb_posted_receives += count; | 1532 | ep->rep_receive_count += count; |
| 1578 | out: | 1533 | out: |
| 1579 | trace_xprtrdma_post_recvs(r_xprt, count, rc); | 1534 | trace_xprtrdma_post_recvs(r_xprt, count, rc); |
| 1580 | } | 1535 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9218dbebedce..5a18472f2c9c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
| @@ -66,7 +66,6 @@ | |||
| 66 | * Interface Adapter -- one per transport instance | 66 | * Interface Adapter -- one per transport instance |
| 67 | */ | 67 | */ |
| 68 | struct rpcrdma_ia { | 68 | struct rpcrdma_ia { |
| 69 | const struct rpcrdma_memreg_ops *ri_ops; | ||
| 70 | struct ib_device *ri_device; | 69 | struct ib_device *ri_device; |
| 71 | struct rdma_cm_id *ri_id; | 70 | struct rdma_cm_id *ri_id; |
| 72 | struct ib_pd *ri_pd; | 71 | struct ib_pd *ri_pd; |
| @@ -81,8 +80,6 @@ struct rpcrdma_ia { | |||
| 81 | bool ri_implicit_roundup; | 80 | bool ri_implicit_roundup; |
| 82 | enum ib_mr_type ri_mrtype; | 81 | enum ib_mr_type ri_mrtype; |
| 83 | unsigned long ri_flags; | 82 | unsigned long ri_flags; |
| 84 | struct ib_qp_attr ri_qp_attr; | ||
| 85 | struct ib_qp_init_attr ri_qp_init_attr; | ||
| 86 | }; | 83 | }; |
| 87 | 84 | ||
| 88 | enum { | 85 | enum { |
| @@ -101,7 +98,7 @@ struct rpcrdma_ep { | |||
| 101 | wait_queue_head_t rep_connect_wait; | 98 | wait_queue_head_t rep_connect_wait; |
| 102 | struct rpcrdma_connect_private rep_cm_private; | 99 | struct rpcrdma_connect_private rep_cm_private; |
| 103 | struct rdma_conn_param rep_remote_cma; | 100 | struct rdma_conn_param rep_remote_cma; |
| 104 | struct delayed_work rep_disconnect_worker; | 101 | int rep_receive_count; |
| 105 | }; | 102 | }; |
| 106 | 103 | ||
| 107 | /* Pre-allocate extra Work Requests for handling backward receives | 104 | /* Pre-allocate extra Work Requests for handling backward receives |
| @@ -262,20 +259,12 @@ struct rpcrdma_frwr { | |||
| 262 | }; | 259 | }; |
| 263 | }; | 260 | }; |
| 264 | 261 | ||
| 265 | struct rpcrdma_fmr { | ||
| 266 | struct ib_fmr *fm_mr; | ||
| 267 | u64 *fm_physaddrs; | ||
| 268 | }; | ||
| 269 | |||
| 270 | struct rpcrdma_mr { | 262 | struct rpcrdma_mr { |
| 271 | struct list_head mr_list; | 263 | struct list_head mr_list; |
| 272 | struct scatterlist *mr_sg; | 264 | struct scatterlist *mr_sg; |
| 273 | int mr_nents; | 265 | int mr_nents; |
| 274 | enum dma_data_direction mr_dir; | 266 | enum dma_data_direction mr_dir; |
| 275 | union { | 267 | struct rpcrdma_frwr frwr; |
| 276 | struct rpcrdma_fmr fmr; | ||
| 277 | struct rpcrdma_frwr frwr; | ||
| 278 | }; | ||
| 279 | struct rpcrdma_xprt *mr_xprt; | 268 | struct rpcrdma_xprt *mr_xprt; |
| 280 | u32 mr_handle; | 269 | u32 mr_handle; |
| 281 | u32 mr_length; | 270 | u32 mr_length; |
| @@ -401,20 +390,18 @@ struct rpcrdma_buffer { | |||
| 401 | spinlock_t rb_lock; /* protect buf lists */ | 390 | spinlock_t rb_lock; /* protect buf lists */ |
| 402 | struct list_head rb_send_bufs; | 391 | struct list_head rb_send_bufs; |
| 403 | struct list_head rb_recv_bufs; | 392 | struct list_head rb_recv_bufs; |
| 393 | struct list_head rb_allreqs; | ||
| 394 | |||
| 404 | unsigned long rb_flags; | 395 | unsigned long rb_flags; |
| 405 | u32 rb_max_requests; | 396 | u32 rb_max_requests; |
| 406 | u32 rb_credits; /* most recent credit grant */ | 397 | u32 rb_credits; /* most recent credit grant */ |
| 407 | int rb_posted_receives; | ||
| 408 | 398 | ||
| 409 | u32 rb_bc_srv_max_requests; | 399 | u32 rb_bc_srv_max_requests; |
| 410 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | ||
| 411 | struct list_head rb_allreqs; | ||
| 412 | |||
| 413 | u32 rb_bc_max_requests; | 400 | u32 rb_bc_max_requests; |
| 414 | 401 | ||
| 402 | struct workqueue_struct *rb_completion_wq; | ||
| 415 | struct delayed_work rb_refresh_worker; | 403 | struct delayed_work rb_refresh_worker; |
| 416 | }; | 404 | }; |
| 417 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | ||
| 418 | 405 | ||
| 419 | /* rb_flags */ | 406 | /* rb_flags */ |
| 420 | enum { | 407 | enum { |
| @@ -465,35 +452,6 @@ struct rpcrdma_stats { | |||
| 465 | }; | 452 | }; |
| 466 | 453 | ||
| 467 | /* | 454 | /* |
| 468 | * Per-registration mode operations | ||
| 469 | */ | ||
| 470 | struct rpcrdma_xprt; | ||
| 471 | struct rpcrdma_memreg_ops { | ||
| 472 | struct rpcrdma_mr_seg * | ||
| 473 | (*ro_map)(struct rpcrdma_xprt *, | ||
| 474 | struct rpcrdma_mr_seg *, int, bool, | ||
| 475 | struct rpcrdma_mr **); | ||
| 476 | int (*ro_send)(struct rpcrdma_ia *ia, | ||
| 477 | struct rpcrdma_req *req); | ||
| 478 | void (*ro_reminv)(struct rpcrdma_rep *rep, | ||
| 479 | struct list_head *mrs); | ||
| 480 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, | ||
| 481 | struct list_head *); | ||
| 482 | int (*ro_open)(struct rpcrdma_ia *, | ||
| 483 | struct rpcrdma_ep *, | ||
| 484 | struct rpcrdma_create_data_internal *); | ||
| 485 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
| 486 | int (*ro_init_mr)(struct rpcrdma_ia *, | ||
| 487 | struct rpcrdma_mr *); | ||
| 488 | void (*ro_release_mr)(struct rpcrdma_mr *mr); | ||
| 489 | const char *ro_displayname; | ||
| 490 | const int ro_send_w_inv_ok; | ||
| 491 | }; | ||
| 492 | |||
| 493 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
| 494 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
| 495 | |||
| 496 | /* | ||
| 497 | * RPCRDMA transport -- encapsulates the structures above for | 455 | * RPCRDMA transport -- encapsulates the structures above for |
| 498 | * integration with RPC. | 456 | * integration with RPC. |
| 499 | * | 457 | * |
| @@ -544,10 +502,6 @@ extern unsigned int xprt_rdma_memreg_strategy; | |||
| 544 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); | 502 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
| 545 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); | 503 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
| 546 | void rpcrdma_ia_close(struct rpcrdma_ia *); | 504 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
| 547 | bool frwr_is_supported(struct rpcrdma_ia *); | ||
| 548 | bool fmr_is_supported(struct rpcrdma_ia *); | ||
| 549 | |||
| 550 | extern struct workqueue_struct *rpcrdma_receive_wq; | ||
| 551 | 505 | ||
| 552 | /* | 506 | /* |
| 553 | * Endpoint calls - xprtrdma/verbs.c | 507 | * Endpoint calls - xprtrdma/verbs.c |
| @@ -560,13 +514,12 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); | |||
| 560 | 514 | ||
| 561 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | 515 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, |
| 562 | struct rpcrdma_req *); | 516 | struct rpcrdma_req *); |
| 563 | void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); | ||
| 564 | 517 | ||
| 565 | /* | 518 | /* |
| 566 | * Buffer calls - xprtrdma/verbs.c | 519 | * Buffer calls - xprtrdma/verbs.c |
| 567 | */ | 520 | */ |
| 568 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); | 521 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
| 569 | void rpcrdma_destroy_req(struct rpcrdma_req *); | 522 | void rpcrdma_req_destroy(struct rpcrdma_req *req); |
| 570 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); | 523 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
| 571 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); | 524 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
| 572 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); | 525 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
| @@ -604,9 +557,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
| 604 | return __rpcrdma_dma_map_regbuf(ia, rb); | 557 | return __rpcrdma_dma_map_regbuf(ia, rb); |
| 605 | } | 558 | } |
| 606 | 559 | ||
| 607 | int rpcrdma_alloc_wq(void); | ||
| 608 | void rpcrdma_destroy_wq(void); | ||
| 609 | |||
| 610 | /* | 560 | /* |
| 611 | * Wrappers for chunk registration, shared by read/write chunk code. | 561 | * Wrappers for chunk registration, shared by read/write chunk code. |
| 612 | */ | 562 | */ |
| @@ -617,6 +567,23 @@ rpcrdma_data_dir(bool writing) | |||
| 617 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | 567 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; |
| 618 | } | 568 | } |
| 619 | 569 | ||
| 570 | /* Memory registration calls xprtrdma/frwr_ops.c | ||
| 571 | */ | ||
| 572 | bool frwr_is_supported(struct rpcrdma_ia *); | ||
| 573 | int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
| 574 | struct rpcrdma_create_data_internal *cdata); | ||
| 575 | int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); | ||
| 576 | void frwr_release_mr(struct rpcrdma_mr *mr); | ||
| 577 | size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); | ||
| 578 | struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, | ||
| 579 | struct rpcrdma_mr_seg *seg, | ||
| 580 | int nsegs, bool writing, u32 xid, | ||
| 581 | struct rpcrdma_mr **mr); | ||
| 582 | int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); | ||
| 583 | void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); | ||
| 584 | void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, | ||
| 585 | struct list_head *mrs); | ||
| 586 | |||
| 620 | /* | 587 | /* |
| 621 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 588 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
| 622 | */ | 589 | */ |
| @@ -653,6 +620,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) | |||
| 653 | extern unsigned int xprt_rdma_max_inline_read; | 620 | extern unsigned int xprt_rdma_max_inline_read; |
| 654 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | 621 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); |
| 655 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | 622 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); |
| 623 | void xprt_rdma_close(struct rpc_xprt *xprt); | ||
| 656 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); | 624 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); |
| 657 | int xprt_rdma_init(void); | 625 | int xprt_rdma_init(void); |
| 658 | void xprt_rdma_cleanup(void); | 626 | void xprt_rdma_cleanup(void); |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 44467caf3cd8..13559e6a460b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
| @@ -68,8 +68,6 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; | |||
| 68 | static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | 68 | static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; |
| 69 | static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | 69 | static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; |
| 70 | 70 | ||
| 71 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 72 | |||
| 73 | #define XS_TCP_LINGER_TO (15U * HZ) | 71 | #define XS_TCP_LINGER_TO (15U * HZ) |
| 74 | static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; | 72 | static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; |
| 75 | 73 | ||
| @@ -159,8 +157,6 @@ static struct ctl_table sunrpc_table[] = { | |||
| 159 | { }, | 157 | { }, |
| 160 | }; | 158 | }; |
| 161 | 159 | ||
| 162 | #endif | ||
| 163 | |||
| 164 | /* | 160 | /* |
| 165 | * Wait duration for a reply from the RPC portmapper. | 161 | * Wait duration for a reply from the RPC portmapper. |
| 166 | */ | 162 | */ |
| @@ -1589,6 +1585,7 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t | |||
| 1589 | 1585 | ||
| 1590 | /** | 1586 | /** |
| 1591 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport | 1587 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport |
| 1588 | * @xprt: controlling transport | ||
| 1592 | * @task: task that timed out | 1589 | * @task: task that timed out |
| 1593 | * | 1590 | * |
| 1594 | * Adjust the congestion window after a retransmit timeout has occurred. | 1591 | * Adjust the congestion window after a retransmit timeout has occurred. |
| @@ -2246,6 +2243,7 @@ out: | |||
| 2246 | 2243 | ||
| 2247 | /** | 2244 | /** |
| 2248 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint | 2245 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint |
| 2246 | * @work: queued work item | ||
| 2249 | * | 2247 | * |
| 2250 | * Invoked by a work queue tasklet. | 2248 | * Invoked by a work queue tasklet. |
| 2251 | */ | 2249 | */ |
| @@ -3095,10 +3093,8 @@ static struct xprt_class xs_bc_tcp_transport = { | |||
| 3095 | */ | 3093 | */ |
| 3096 | int init_socket_xprt(void) | 3094 | int init_socket_xprt(void) |
| 3097 | { | 3095 | { |
| 3098 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 3099 | if (!sunrpc_table_header) | 3096 | if (!sunrpc_table_header) |
| 3100 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | 3097 | sunrpc_table_header = register_sysctl_table(sunrpc_table); |
| 3101 | #endif | ||
| 3102 | 3098 | ||
| 3103 | xprt_register_transport(&xs_local_transport); | 3099 | xprt_register_transport(&xs_local_transport); |
| 3104 | xprt_register_transport(&xs_udp_transport); | 3100 | xprt_register_transport(&xs_udp_transport); |
| @@ -3114,12 +3110,10 @@ int init_socket_xprt(void) | |||
| 3114 | */ | 3110 | */ |
| 3115 | void cleanup_socket_xprt(void) | 3111 | void cleanup_socket_xprt(void) |
| 3116 | { | 3112 | { |
| 3117 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
| 3118 | if (sunrpc_table_header) { | 3113 | if (sunrpc_table_header) { |
| 3119 | unregister_sysctl_table(sunrpc_table_header); | 3114 | unregister_sysctl_table(sunrpc_table_header); |
| 3120 | sunrpc_table_header = NULL; | 3115 | sunrpc_table_header = NULL; |
| 3121 | } | 3116 | } |
| 3122 | #endif | ||
| 3123 | 3117 | ||
| 3124 | xprt_unregister_transport(&xs_local_transport); | 3118 | xprt_unregister_transport(&xs_local_transport); |
| 3125 | xprt_unregister_transport(&xs_udp_transport); | 3119 | xprt_unregister_transport(&xs_udp_transport); |
