From 8e2e5b7c492639109b1137c286dbad529c2b35e1 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 28 Nov 2018 15:05:58 +0000 Subject: SUNRPC: allow /proc entries without CONFIG_SUNRPC_DEBUG If we want /proc/sys/sunrpc the current kernel also drags in other debug features which we don't really want. Instead, we should always show the following entries: /proc/sys/sunrpc/udp_slot_table_entries /proc/sys/sunrpc/tcp_slot_table_entries /proc/sys/sunrpc/tcp_max_slot_table_entries /proc/sys/sunrpc/min_resvport /proc/sys/sunrpc/max_resvport /proc/sys/sunrpc/tcp_fin_timeout Signed-off-by: Ben Dooks Signed-off-by: Thomas Preston Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f0b3700cec95..a6870d3cb121 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -68,8 +68,6 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; @@ -159,8 +157,6 @@ static struct ctl_table sunrpc_table[] = { { }, }; -#endif - /* * Wait duration for a reply from the RPC portmapper. */ @@ -3107,10 +3103,8 @@ static struct xprt_class xs_bc_tcp_transport = { */ int init_socket_xprt(void) { -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); -#endif xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); @@ -3126,12 +3120,10 @@ int init_socket_xprt(void) */ void cleanup_socket_xprt(void) { -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; } -#endif xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); -- cgit v1.2.2 From 97f68c6b02e0225b38d327103c59cfe2ab5ecda7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: add 'struct cred *' to auth_cred and rpc_cred The SUNRPC credential framework was put together before Linux has 'struct cred'. Now that we have it, it makes sense to use it. This first step just includes a suitable 'struct cred *' pointer in every 'struct auth_cred' and almost every 'struct rpc_cred'. The rpc_cred used for auth_null has a NULL 'struct cred *' as nothing else really makes sense. For rpc_cred, the pointer is reference counted. For auth_cred it isn't. struct auth_cred are either allocated on the stack, in which case the thread owns a reference to the auth, or are part of 'struct generic_cred' in which case gc_base owns the reference, and "acred" shares it. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 8 +++++++- net/sunrpc/auth_generic.c | 8 +++++++- net/sunrpc/auth_gss/auth_gss.c | 2 ++ net/sunrpc/auth_unix.c | 1 + 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ad8ead738981..a7e08e44f92b 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -659,6 +659,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) acred.uid = cred->fsuid; acred.gid = cred->fsgid; acred.group_info = cred->group_info; + acred.cred = cred; ret = auth->au_ops->lookup_cred(auth, &acred, flags); return ret; } @@ -674,6 +675,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_auth = auth; cred->cr_ops = ops; cred->cr_expire = jiffies; + cred->cr_cred = get_cred(acred->cred); cred->cr_uid = acred->uid; } EXPORT_SYMBOL_GPL(rpcauth_init_cred); @@ -694,11 +696,15 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) struct auth_cred acred = { .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, + .cred = get_task_cred(&init_task), }; + struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - return auth->au_ops->lookup_cred(auth, &acred, lookupflags); + ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); + put_cred(acred.cred); + return ret; } static struct rpc_cred * diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index ab4a3be1542a..16a0a4b89bb4 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -61,11 +61,15 @@ struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) .gid = RPC_MACHINE_CRED_GROUPID, .principal = service_name, .machine_cred = 1, + .cred = get_task_cred(&init_task), }; + struct rpc_cred *ret; dprintk("RPC: looking up machine cred for service %s\n", service_name); - return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); + ret = generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); + put_cred(acred.cred); + return ret; } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); @@ -110,6 +114,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g gcred->acred.uid = acred->uid; gcred->acred.gid = acred->gid; gcred->acred.group_info = acred->group_info; + gcred->acred.cred = gcred->gc_base.cr_cred; gcred->acred.ac_flags = 0; if (gcred->acred.group_info != NULL) get_group_info(gcred->acred.group_info); @@ -132,6 +137,7 @@ generic_free_cred(struct rpc_cred *cred) dprintk("RPC: generic_free_cred %p\n", gcred); if (gcred->acred.group_info != NULL) put_group_info(gcred->acred.group_info); + put_cred(cred->cr_cred); kfree(gcred); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ba765473d1f0..56604b259f2c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1343,6 +1343,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); + put_cred(cred->cr_cred); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); @@ -1608,6 +1609,7 @@ static int gss_renew_cred(struct rpc_task *task) struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { .uid = oldcred->cr_uid, + .cred = oldcred->cr_cred, .principal = gss_cred->gc_principal, .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), }; diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4c1c7e56288f..36e01384f082 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -97,6 +97,7 @@ static void unx_free_cred(struct unx_cred *unx_cred) { dprintk("RPC: unx_free_cred %p\n", unx_cred); + put_cred(unx_cred->uc_base.cr_cred); kfree(unx_cred); } -- cgit v1.2.2 From fc0664fd9bccafb00bd2dfe0d5218147994f81ee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: remove groupinfo from struct auth_cred. We can use cred->groupinfo (from the 'struct cred') instead. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 1 - net/sunrpc/auth_generic.c | 17 +++++++---------- net/sunrpc/auth_unix.c | 12 ++++++------ 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a7e08e44f92b..e1053b96e0e5 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -658,7 +658,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) memset(&acred, 0, sizeof(acred)); acred.uid = cred->fsuid; acred.gid = cred->fsgid; - acred.group_info = cred->group_info; acred.cred = cred; ret = auth->au_ops->lookup_cred(auth, &acred, flags); return ret; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 16a0a4b89bb4..a4ae7bd7ca7b 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -113,11 +113,8 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g gcred->acred.uid = acred->uid; gcred->acred.gid = acred->gid; - gcred->acred.group_info = acred->group_info; gcred->acred.cred = gcred->gc_base.cr_cred; gcred->acred.ac_flags = 0; - if (gcred->acred.group_info != NULL) - get_group_info(gcred->acred.group_info); gcred->acred.machine_cred = acred->machine_cred; gcred->acred.principal = acred->principal; @@ -135,8 +132,6 @@ generic_free_cred(struct rpc_cred *cred) struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); dprintk("RPC: generic_free_cred %p\n", gcred); - if (gcred->acred.group_info != NULL) - put_group_info(gcred->acred.group_info); put_cred(cred->cr_cred); kfree(gcred); } @@ -173,6 +168,7 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); int i; + struct group_info *a, *g; if (acred->machine_cred) return machine_cred_match(acred, gcred, flags); @@ -182,16 +178,17 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) gcred->acred.machine_cred != 0) goto out_nomatch; + a = acred->cred->group_info; + g = gcred->acred.cred->group_info; /* Optimisation in the case where pointers are identical... */ - if (gcred->acred.group_info == acred->group_info) + if (a == g) goto out_match; /* Slow path... */ - if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) + if (g->ngroups != a->ngroups) goto out_nomatch; - for (i = 0; i < gcred->acred.group_info->ngroups; i++) { - if (!gid_eq(gcred->acred.group_info->gid[i], - acred->group_info->gid[i])) + for (i = 0; i < g->ngroups; i++) { + if (!gid_eq(g->gid[i], a->gid[i])) goto out_nomatch; } out_match: diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 36e01384f082..0a6397a099d6 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -79,14 +79,14 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - if (acred->group_info != NULL) - groups = acred->group_info->ngroups; + if (acred->cred && acred->cred->group_info != NULL) + groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) - cred->uc_gids[i] = acred->group_info->gid[i]; + cred->uc_gids[i] = acred->cred->group_info->gid[i]; if (i < UNX_NGROUPS) cred->uc_gids[i] = INVALID_GID; @@ -130,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid)) return 0; - if (acred->group_info != NULL) - groups = acred->group_info->ngroups; + if (acred->cred && acred->cred->group_info != NULL) + groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; for (i = 0; i < groups ; i++) - if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) + if (!gid_eq(cred->uc_gids[i], acred->cred->group_info->gid[i])) return 0; if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) return 0; -- cgit v1.2.2 From 8276c902bbe95d628f48a7fdc13c71e265992085 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: remove uid and gid from struct auth_cred Use cred->fsuid and cred->fsgid instead. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 6 +----- net/sunrpc/auth_generic.c | 23 ++++++++--------------- net/sunrpc/auth_gss/auth_gss.c | 9 ++++----- net/sunrpc/auth_unix.c | 12 ++++++------ 4 files changed, 19 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index e1053b96e0e5..63e2d35c10d5 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -656,8 +656,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) auth->au_ops->au_name); memset(&acred, 0, sizeof(acred)); - acred.uid = cred->fsuid; - acred.gid = cred->fsgid; acred.cred = cred; ret = auth->au_ops->lookup_cred(auth, &acred, flags); return ret; @@ -675,7 +673,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_ops = ops; cred->cr_expire = jiffies; cred->cr_cred = get_cred(acred->cred); - cred->cr_uid = acred->uid; + cred->cr_uid = acred->cred->fsuid; } EXPORT_SYMBOL_GPL(rpcauth_init_cred); @@ -693,8 +691,6 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { - .uid = GLOBAL_ROOT_UID, - .gid = GLOBAL_ROOT_GID, .cred = get_task_cred(&init_task), }; struct rpc_cred *ret; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index a4ae7bd7ca7b..6c7c65da6063 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -18,9 +18,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define RPC_MACHINE_CRED_USERID GLOBAL_ROOT_UID -#define RPC_MACHINE_CRED_GROUPID GLOBAL_ROOT_GID - struct generic_cred { struct rpc_cred gc_base; struct auth_cred acred; @@ -57,8 +54,6 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) { struct auth_cred acred = { - .uid = RPC_MACHINE_CRED_USERID, - .gid = RPC_MACHINE_CRED_GROUPID, .principal = service_name, .machine_cred = 1, .cred = get_task_cred(&init_task), @@ -85,8 +80,8 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task, static int generic_hash_cred(struct auth_cred *acred, unsigned int hashbits) { - return hash_64(from_kgid(&init_user_ns, acred->gid) | - ((u64)from_kuid(&init_user_ns, acred->uid) << + return hash_64(from_kgid(&init_user_ns, acred->cred->fsgid) | + ((u64)from_kuid(&init_user_ns, acred->cred->fsuid) << (sizeof(gid_t) * 8)), hashbits); } @@ -111,8 +106,6 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - gcred->acred.uid = acred->uid; - gcred->acred.gid = acred->gid; gcred->acred.cred = gcred->gc_base.cr_cred; gcred->acred.ac_flags = 0; gcred->acred.machine_cred = acred->machine_cred; @@ -121,8 +114,8 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", gcred->acred.machine_cred ? "machine" : "generic", gcred, - from_kuid(&init_user_ns, acred->uid), - from_kgid(&init_user_ns, acred->gid)); + from_kuid(&init_user_ns, acred->cred->fsuid), + from_kgid(&init_user_ns, acred->cred->fsgid)); return &gcred->gc_base; } @@ -154,8 +147,8 @@ machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flag { if (!gcred->acred.machine_cred || gcred->acred.principal != acred->principal || - !uid_eq(gcred->acred.uid, acred->uid) || - !gid_eq(gcred->acred.gid, acred->gid)) + !uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || + !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid)) return 0; return 1; } @@ -173,8 +166,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (acred->machine_cred) return machine_cred_match(acred, gcred, flags); - if (!uid_eq(gcred->acred.uid, acred->uid) || - !gid_eq(gcred->acred.gid, acred->gid) || + if (!uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || + !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid) || gcred->acred.machine_cred != 0) goto out_nomatch; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 56604b259f2c..762b071cba71 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1248,7 +1248,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) new = kzalloc(sizeof(*gss_cred), GFP_NOIO); if (new) { struct auth_cred acred = { - .uid = gss_cred->gc_base.cr_uid, + .cred = gss_cred->gc_base.cr_cred, }; struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); @@ -1362,7 +1362,7 @@ gss_destroy_cred(struct rpc_cred *cred) static int gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) { - return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits); + return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits); } /* @@ -1382,7 +1382,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t int err = -ENOMEM; dprintk("RPC: %s for uid %d, flavor %d\n", - __func__, from_kuid(&init_user_ns, acred->uid), + __func__, from_kuid(&init_user_ns, acred->cred->fsuid), auth->au_flavor); if (!(cred = kzalloc(sizeof(*cred), gfp))) @@ -1523,7 +1523,7 @@ out: } if (gss_cred->gc_principal != NULL) return 0; - ret = uid_eq(rc->cr_uid, acred->uid); + ret = uid_eq(rc->cr_uid, acred->cred->fsuid); check_expire: if (ret == 0) @@ -1608,7 +1608,6 @@ static int gss_renew_cred(struct rpc_task *task) gc_base); struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { - .uid = oldcred->cr_uid, .cred = oldcred->cr_cred, .principal = gss_cred->gc_principal, .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 0a6397a099d6..7d4099fc18e7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -48,8 +48,8 @@ unx_destroy(struct rpc_auth *auth) static int unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) { - return hash_64(from_kgid(&init_user_ns, acred->gid) | - ((u64)from_kuid(&init_user_ns, acred->uid) << + return hash_64(from_kgid(&init_user_ns, acred->cred->fsgid) | + ((u64)from_kuid(&init_user_ns, acred->cred->fsuid) << (sizeof(gid_t) * 8)), hashbits); } @@ -70,8 +70,8 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t unsigned int i; dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", - from_kuid(&init_user_ns, acred->uid), - from_kgid(&init_user_ns, acred->gid)); + from_kuid(&init_user_ns, acred->cred->fsuid), + from_kgid(&init_user_ns, acred->cred->fsgid)); if (!(cred = kmalloc(sizeof(*cred), gfp))) return ERR_PTR(-ENOMEM); @@ -84,7 +84,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; - cred->uc_gid = acred->gid; + cred->uc_gid = acred->cred->fsgid; for (i = 0; i < groups; i++) cred->uc_gids[i] = acred->cred->group_info->gid[i]; if (i < UNX_NGROUPS) @@ -127,7 +127,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) unsigned int i; - if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid)) + if (!uid_eq(cred->uc_uid, acred->cred->fsuid) || !gid_eq(cred->uc_gid, acred->cred->fsgid)) return 0; if (acred->cred && acred->cred->group_info != NULL) -- cgit v1.2.2 From 1a80810fbf238e6dbaaaa5262a76d328ace21376 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: remove machine_cred field from struct auth_cred The cred is a machine_cred iff ->principal is set, so there is no need for the extra flag. There is one case which deserves some explanation. nfs4_root_machine_cred() calls rpc_lookup_machine_cred() with a NULL principal name which results in not getting a machine credential, but getting a root credential instead. This appears to be what is expected of the caller, and is clearly the result provided by both auth_unix and auth_gss which already ignore the flag. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth_generic.c | 12 ++++++------ net/sunrpc/auth_gss/auth_gss.c | 5 +---- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 6c7c65da6063..7d1a8f45726c 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -50,12 +50,13 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); /* * Public call interface for looking up machine creds. + * Note that if service_name is NULL, we actually look up + * "root" credential. */ struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) { struct auth_cred acred = { .principal = service_name, - .machine_cred = 1, .cred = get_task_cred(&init_task), }; struct rpc_cred *ret; @@ -108,11 +109,10 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g gcred->acred.cred = gcred->gc_base.cr_cred; gcred->acred.ac_flags = 0; - gcred->acred.machine_cred = acred->machine_cred; gcred->acred.principal = acred->principal; dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", - gcred->acred.machine_cred ? "machine" : "generic", + gcred->acred.principal ? "machine" : "generic", gcred, from_kuid(&init_user_ns, acred->cred->fsuid), from_kgid(&init_user_ns, acred->cred->fsgid)); @@ -145,7 +145,7 @@ generic_destroy_cred(struct rpc_cred *cred) static int machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags) { - if (!gcred->acred.machine_cred || + if (!gcred->acred.principal || gcred->acred.principal != acred->principal || !uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid)) @@ -163,12 +163,12 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) int i; struct group_info *a, *g; - if (acred->machine_cred) + if (acred->principal) return machine_cred_match(acred, gcred, flags); if (!uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid) || - gcred->acred.machine_cred != 0) + gcred->acred.principal != NULL) goto out_nomatch; a = acred->cred->group_info; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 762b071cba71..b218e15b61cb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1395,9 +1395,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; - cred->gc_principal = NULL; - if (acred->machine_cred) - cred->gc_principal = acred->principal; + cred->gc_principal = acred->principal; kref_get(&gss_auth->kref); return &cred->gc_base; @@ -1610,7 +1608,6 @@ static int gss_renew_cred(struct rpc_task *task) struct auth_cred acred = { .cred = oldcred->cr_cred, .principal = gss_cred->gc_principal, - .machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0), }; struct rpc_cred *new; -- cgit v1.2.2 From 5e16923b432bfe79fdfb7cd95ed8e63f6438b663 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: NFS/SUNRPC: don't lookup machine credential until rpcauth_bindcred(). When NFS creates a machine credential, it is a "generic" credential, not tied to any auth protocol, and is really just a container for the princpal name. This doesn't get linked to a genuine credential until rpcauth_bindcred() is called. The lookup always succeeds, so various places that test if the machine credential is NULL, are pointless. As a step towards getting rid of generic credentials, this patch gets rid of generic machine credentials. The nfs_client and rpc_client just hold a pointer to a constant principal name. When a machine credential is wanted, a special static 'struct rpc_cred' pointer is used. rpcauth_bindcred() recognizes this, finds the principal from the client, and binds the correct credential. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 42 +++++++++++++++++++++++++++++++++++++++--- net/sunrpc/auth_generic.c | 21 --------------------- net/sunrpc/clnt.c | 1 + 3 files changed, 40 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 63e2d35c10d5..9e709dcc8c39 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -39,6 +39,20 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +static struct rpc_cred machine_cred = { + .cr_count = REFCOUNT_INIT(1), +}; + +/* + * Return the machine_cred pointer to be used whenever + * the a generic machine credential is needed. + */ +struct rpc_cred *rpc_machine_cred(void) +{ + return &machine_cred; +} +EXPORT_SYMBOL_GPL(rpc_machine_cred); + #define MAX_HASHTABLE_BITS (14) static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) { @@ -702,6 +716,22 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) return ret; } +static struct rpc_cred * +rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct auth_cred acred = { + .principal = task->tk_client->cl_principal, + .cred = init_task.cred, + }; + + if (!acred.principal) + return NULL; + dprintk("RPC: %5u looking up %s machine cred\n", + task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); + return auth->au_ops->lookup_cred(auth, &acred, lookupflags); +} + static struct rpc_cred * rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { @@ -716,14 +746,20 @@ static int rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_cred *new; + struct rpc_cred *new = NULL; int lookupflags = 0; if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; - if (cred != NULL) + if (cred != NULL && cred != &machine_cred) new = cred->cr_ops->crbind(task, cred, lookupflags); - else if (flags & RPC_TASK_ROOTCREDS) + else if (cred == &machine_cred) + new = rpcauth_bind_machine_cred(task, lookupflags); + + /* If machine cred couldn't be bound, try a root cred */ + if (new) + ; + else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) new = rpcauth_bind_root_cred(task, lookupflags); else new = rpcauth_bind_new_cred(task, lookupflags); diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 7d1a8f45726c..5f7aa6324b78 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -48,27 +48,6 @@ struct rpc_cred *rpc_lookup_cred_nonblock(void) } EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); -/* - * Public call interface for looking up machine creds. - * Note that if service_name is NULL, we actually look up - * "root" credential. - */ -struct rpc_cred *rpc_lookup_machine_cred(const char *service_name) -{ - struct auth_cred acred = { - .principal = service_name, - .cred = get_task_cred(&init_task), - }; - struct rpc_cred *ret; - - dprintk("RPC: looking up machine cred for service %s\n", - service_name); - ret = generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); - put_cred(acred.cred); - return ret; -} -EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); - static struct rpc_cred *generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 24cbddc44c88..c5bf56abf266 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -627,6 +627,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, new->cl_noretranstimeo = clnt->cl_noretranstimeo; new->cl_discrtry = clnt->cl_discrtry; new->cl_chatty = clnt->cl_chatty; + new->cl_principal = clnt->cl_principal; return new; out_err: -- cgit v1.2.2 From a68a72e135ef55bce136a0b604413fd6b0f6d3fc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: introduce RPC_TASK_NULLCREDS to request auth_none In almost all cases the credential stored in rpc_message.rpc_cred is a "generic" credential. One of the two expections is when an AUTH_NULL credential is used such as for RPC ping requests. To improve consistency, don't pass an explicit credential in these cases, but instead pass NULL and set a task flag, similar to RPC_TASK_ROOTCREDS, which requests that NULL credentials be used by default. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 2 ++ net/sunrpc/clnt.c | 19 ++++++------------- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9e709dcc8c39..dcfcc590b34e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -761,6 +761,8 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) ; else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) new = rpcauth_bind_root_cred(task, lookupflags); + else if (flags & RPC_TASK_NULLCREDS) + new = authnull_ops.lookup_cred(NULL, NULL, 0); else new = rpcauth_bind_new_cred(task, lookupflags); if (IS_ERR(new)) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c5bf56abf266..26bea2301045 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2522,9 +2522,8 @@ static int rpc_ping(struct rpc_clnt *clnt) .rpc_proc = &rpcproc_null, }; int err; - msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); - put_rpccred(msg.rpc_cred); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN | + RPC_TASK_NULLCREDS); return err; } @@ -2594,7 +2593,6 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, void *dummy) { struct rpc_cb_add_xprt_calldata *data; - struct rpc_cred *cred; struct rpc_task *task; data = kmalloc(sizeof(*data), GFP_NOFS); @@ -2603,11 +2601,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, data->xps = xprt_switch_get(xps); data->xprt = xprt_get(xprt); - cred = authnull_ops.lookup_cred(NULL, NULL, 0); - task = rpc_call_null_helper(clnt, xprt, cred, - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC, + task = rpc_call_null_helper(clnt, xprt, NULL, + RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, &rpc_cb_add_xprt_call_ops, data); - put_rpccred(cred); if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); @@ -2638,7 +2634,6 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data) { - struct rpc_cred *cred; struct rpc_task *task; struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; int status = -EADDRINUSE; @@ -2650,11 +2645,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, goto out_err; /* Test the connection */ - cred = authnull_ops.lookup_cred(NULL, NULL, 0); - task = rpc_call_null_helper(clnt, xprt, cred, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + task = rpc_call_null_helper(clnt, xprt, NULL, + RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, NULL, NULL); - put_rpccred(cred); if (IS_ERR(task)) { status = PTR_ERR(task); goto out_err; -- cgit v1.2.2 From 1de7eea92946d7b581a8cd26084410913c80e594 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: add side channel to use non-generic cred for rpc call. The credential passed in rpc_message.rpc_cred is always a generic credential except in one instance. When gss_destroying_context() calls rpc_call_null(), it passes a specific credential that it needs to destroy. In this case the RPC acts *on* the credential rather than being authorized by it. This special case deserves explicit support and providing that will mean that rpc_message.rpc_cred is *always* generic, allowing some optimizations. So add "tk_op_cred" to rpc_task and "rpc_op_cred" to the setup data. Use this to pass the cred down from rpc_call_null(), and have rpcauth_bindcred() notice it and bind it in place. Credit to kernel test robot for finding a bug in earlier version of this patch. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 6 +++++- net/sunrpc/clnt.c | 2 +- net/sunrpc/sched.c | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index dcfcc590b34e..27d90578e7a0 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -751,7 +751,11 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; - if (cred != NULL && cred != &machine_cred) + if (task->tk_op_cred) + /* Task must use exactly this rpc_cred */ + new = task->tk_op_cred->cr_ops->crbind(task, task->tk_op_cred, + lookupflags); + else if (cred != NULL && cred != &machine_cred) new = cred->cr_ops->crbind(task, cred, lookupflags); else if (cred == &machine_cred) new = rpcauth_bind_machine_cred(task, lookupflags); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 26bea2301045..4cb697cfb377 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2534,12 +2534,12 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, { struct rpc_message msg = { .rpc_proc = &rpcproc_null, - .rpc_cred = cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, .rpc_xprt = xprt, .rpc_message = &msg, + .rpc_op_cred = cred, .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, .callback_data = data, .flags = flags, diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 57ca5bead1cb..c9f65037a6ad 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -997,6 +997,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_xprt = xprt_get(task_setup_data->rpc_xprt); + task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred); + if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; @@ -1054,6 +1056,7 @@ static void rpc_free_task(struct rpc_task *task) { unsigned short tk_flags = task->tk_flags; + put_rpccred(task->tk_op_cred); rpc_release_calldata(task->tk_ops, task->tk_calldata); if (tk_flags & RPC_TASK_DYNAMIC) { -- cgit v1.2.2 From ddf529eeed59184c49dcad1633c11831f822bf6b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: NFS: move credential expiry tracking out of SUNRPC into NFS. NFS needs to know when a credential is about to expire so that it can modify write-back behaviour to finish the write inside the expiry time. It currently uses functions in SUNRPC code which make use of a fairly complex callback scheme and flags in the generic credientials. As I am working to discard the generic credentials, this has to change. This patch moves the logic into NFS, in part by finding and caching the low-level credential in the open_context. We then make direct cred-api calls on that. This makes the code much simpler and removes a dependency on generic rpc credentials. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 23 -------------- net/sunrpc/auth_generic.c | 69 ------------------------------------------ net/sunrpc/auth_gss/auth_gss.c | 21 +++---------- 3 files changed, 4 insertions(+), 109 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 27d90578e7a0..cf23eed01b1c 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -360,29 +360,6 @@ out_nocache: } EXPORT_SYMBOL_GPL(rpcauth_init_credcache); -/* - * Setup a credential key lifetime timeout notification - */ -int -rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred) -{ - if (!cred->cr_auth->au_ops->key_timeout) - return 0; - return cred->cr_auth->au_ops->key_timeout(auth, cred); -} -EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify); - -bool -rpcauth_cred_key_to_expire(struct rpc_auth *auth, struct rpc_cred *cred) -{ - if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) - return false; - if (!cred->cr_ops->crkey_to_expire) - return false; - return cred->cr_ops->crkey_to_expire(cred); -} -EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); - char * rpcauth_stringify_acceptor(struct rpc_cred *cred) { diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 5f7aa6324b78..c57e83184d3c 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -87,7 +87,6 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, g gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; gcred->acred.cred = gcred->gc_base.cr_cred; - gcred->acred.ac_flags = 0; gcred->acred.principal = acred->principal; dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", @@ -179,72 +178,12 @@ void rpc_destroy_generic_auth(void) rpcauth_destroy_credcache(&generic_auth); } -/* - * Test the the current time (now) against the underlying credential key expiry - * minus a timeout and setup notification. - * - * The normal case: - * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set - * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential - * rpc_credops crmatch routine to notify this generic cred when it's key - * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0. - * - * The error case: - * If the underlying cred lookup fails, return -EACCES. - * - * The 'almost' error case: - * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within - * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit - * on the acred ac_flags and return 0. - */ -static int -generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) -{ - struct auth_cred *acred = &container_of(cred, struct generic_cred, - gc_base)->acred; - struct rpc_cred *tcred; - int ret = 0; - - - /* Fast track for non crkey_timeout (no key) underlying credentials */ - if (auth->au_flags & RPCAUTH_AUTH_NO_CRKEY_TIMEOUT) - return 0; - - /* Fast track for the normal case */ - if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags)) - return 0; - - /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */ - tcred = auth->au_ops->lookup_cred(auth, acred, 0); - if (IS_ERR(tcred)) - return -EACCES; - - /* Test for the almost error case */ - ret = tcred->cr_ops->crkey_timeout(tcred); - if (ret != 0) { - set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - ret = 0; - } else { - /* In case underlying cred key has been reset */ - if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, - &acred->ac_flags)) - dprintk("RPC: UID %d Credential key reset\n", - from_kuid(&init_user_ns, tcred->cr_uid)); - /* set up fasttrack for the normal case */ - set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); - } - - put_rpccred(tcred); - return ret; -} - static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", .hash_cred = generic_hash_cred, .lookup_cred = generic_lookup_cred, .crcreate = generic_create_cred, - .key_timeout = generic_key_timeout, }; static struct rpc_auth generic_auth = { @@ -252,17 +191,9 @@ static struct rpc_auth generic_auth = { .au_count = REFCOUNT_INIT(1), }; -static bool generic_key_to_expire(struct rpc_cred *cred) -{ - struct auth_cred *acred = &container_of(cred, struct generic_cred, - gc_base)->acred; - return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); -} - static const struct rpc_credops generic_credops = { .cr_name = "Generic cred", .crdestroy = generic_destroy_cred, .crbind = generic_bind_cred, .crmatch = generic_match, - .crkey_to_expire = generic_key_to_expire, }; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index b218e15b61cb..03a1cd5bfb43 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1517,23 +1517,10 @@ out: if (gss_cred->gc_principal == NULL) return 0; ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; - goto check_expire; - } - if (gss_cred->gc_principal != NULL) - return 0; - ret = uid_eq(rc->cr_uid, acred->cred->fsuid); - -check_expire: - if (ret == 0) - return ret; - - /* Notify acred users of GSS context expiration timeout */ - if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) && - (gss_key_timeout(rc) != 0)) { - /* test will now be done from generic cred */ - test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); - /* tell NFS layer that key will expire soon */ - set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); + } else { + if (gss_cred->gc_principal != NULL) + return 0; + ret = uid_eq(rc->cr_uid, acred->cred->fsuid); } return ret; } -- cgit v1.2.2 From 354698b7d47165ed2f52d6c2bf682096a4cd71d1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:30 +1100 Subject: SUNRPC: remove RPCAUTH_AUTH_NO_CRKEY_TIMEOUT This is no longer used. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth_null.c | 1 - net/sunrpc/auth_unix.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 2694a1bc026b..135c75d6c470 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -116,7 +116,6 @@ static struct rpc_auth null_auth = { .au_cslack = NUL_CALLSLACK, .au_rslack = NUL_REPLYSLACK, - .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authnull_ops, .au_flavor = RPC_AUTH_NULL, .au_count = REFCOUNT_INIT(1), diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 7d4099fc18e7..6ee43bfbfb4b 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -237,7 +237,6 @@ static struct rpc_auth unix_auth = { .au_cslack = UNX_CALLSLACK, .au_rslack = NUL_REPLYSLACK, - .au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT, .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = REFCOUNT_INIT(1), -- cgit v1.2.2 From a52458b48af142bcc2b72fe810c0db20cfae7fdd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:31 +1100 Subject: NFS/NFSD/SUNRPC: replace generic creds with 'struct cred'. SUNRPC has two sorts of credentials, both of which appear as "struct rpc_cred". There are "generic credentials" which are supplied by clients such as NFS and passed in 'struct rpc_message' to indicate which user should be used to authorize the request, and there are low-level credentials such as AUTH_NULL, AUTH_UNIX, AUTH_GSS which describe the credential to be sent over the wires. This patch replaces all the generic credentials by 'struct cred' pointers - the credential structure used throughout Linux. For machine credentials, there is a special 'struct cred *' pointer which is statically allocated and recognized where needed as having a special meaning. A look-up of a low-level cred will map this to a machine credential. Signed-off-by: NeilBrown Acked-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 14 +++++++++----- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/sched.c | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index cf23eed01b1c..ac8f824ec34f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -39,15 +39,15 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; -static struct rpc_cred machine_cred = { - .cr_count = REFCOUNT_INIT(1), +static struct cred machine_cred = { + .usage = ATOMIC_INIT(1), }; /* * Return the machine_cred pointer to be used whenever * the a generic machine credential is needed. */ -struct rpc_cred *rpc_machine_cred(void) +const struct cred *rpc_machine_cred(void) { return &machine_cred; } @@ -720,11 +720,15 @@ rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) } static int -rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) +rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *new = NULL; int lookupflags = 0; + struct rpc_auth *auth = task->tk_client->cl_auth; + struct auth_cred acred = { + .cred = cred, + }; if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; @@ -733,7 +737,7 @@ rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) new = task->tk_op_cred->cr_ops->crbind(task, task->tk_op_cred, lookupflags); else if (cred != NULL && cred != &machine_cred) - new = cred->cr_ops->crbind(task, cred, lookupflags); + new = auth->au_ops->lookup_cred(auth, &acred, lookupflags); else if (cred == &machine_cred) new = rpcauth_bind_machine_cred(task, lookupflags); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4cb697cfb377..cad26f816d20 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1030,7 +1030,7 @@ rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) task->tk_msg.rpc_argp = msg->rpc_argp; task->tk_msg.rpc_resp = msg->rpc_resp; if (msg->rpc_cred != NULL) - task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); + task->tk_msg.rpc_cred = get_cred(msg->rpc_cred); } } @@ -2542,7 +2542,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, .rpc_op_cred = cred, .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, .callback_data = data, - .flags = flags, + .flags = flags | RPC_TASK_NULLCREDS, }; return rpc_run_task(&task_setup_data); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c9f65037a6ad..adc3c40cc733 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1074,7 +1074,7 @@ static void rpc_release_resources_task(struct rpc_task *task) { xprt_release(task); if (task->tk_msg.rpc_cred) { - put_rpccred(task->tk_msg.rpc_cred); + put_cred(task->tk_msg.rpc_cred); task->tk_msg.rpc_cred = NULL; } rpc_task_release_client(task); -- cgit v1.2.2 From 89a4f758d9f55f197c2a461f61ffa4a75127b30d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:31 +1100 Subject: SUNRPC: remove generic cred code. This is no longer used. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/Makefile | 2 +- net/sunrpc/auth.c | 18 +---- net/sunrpc/auth_generic.c | 199 ---------------------------------------------- net/sunrpc/auth_null.c | 2 - 4 files changed, 2 insertions(+), 219 deletions(-) delete mode 100644 net/sunrpc/auth_generic.c (limited to 'net') diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 090658c3da12..9488600451e8 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o auth_generic.o \ + auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ac8f824ec34f..2debbaba7809 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -578,13 +578,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { if (!entry->cr_ops->crmatch(acred, entry, flags)) continue; - if (flags & RPCAUTH_LOOKUP_RCU) { - if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) || - refcount_read(&entry->cr_count) == 0) - continue; - cred = entry; - break; - } cred = get_rpccred(entry); if (cred) break; @@ -594,9 +587,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, if (cred != NULL) goto found; - if (flags & RPCAUTH_LOOKUP_RCU) - return ERR_PTR(-ECHILD); - new = auth->au_ops->crcreate(auth, acred, flags, gfp); if (IS_ERR(new)) { cred = new; @@ -925,15 +915,10 @@ int __init rpcauth_init_module(void) err = rpc_init_authunix(); if (err < 0) goto out1; - err = rpc_init_generic_auth(); - if (err < 0) - goto out2; err = register_shrinker(&rpc_cred_shrinker); if (err < 0) - goto out3; + goto out2; return 0; -out3: - rpc_destroy_generic_auth(); out2: rpc_destroy_authunix(); out1: @@ -943,6 +928,5 @@ out1: void rpcauth_remove_module(void) { rpc_destroy_authunix(); - rpc_destroy_generic_auth(); unregister_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c deleted file mode 100644 index c57e83184d3c..000000000000 --- a/net/sunrpc/auth_generic.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Generic RPC credential - * - * Copyright (C) 2008, Trond Myklebust - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -struct generic_cred { - struct rpc_cred gc_base; - struct auth_cred acred; -}; - -static struct rpc_auth generic_auth; -static const struct rpc_credops generic_credops; - -/* - * Public call interface - */ -struct rpc_cred *rpc_lookup_cred(void) -{ - return rpcauth_lookupcred(&generic_auth, 0); -} -EXPORT_SYMBOL_GPL(rpc_lookup_cred); - -struct rpc_cred * -rpc_lookup_generic_cred(struct auth_cred *acred, int flags, gfp_t gfp) -{ - return rpcauth_lookup_credcache(&generic_auth, acred, flags, gfp); -} -EXPORT_SYMBOL_GPL(rpc_lookup_generic_cred); - -struct rpc_cred *rpc_lookup_cred_nonblock(void) -{ - return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU); -} -EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock); - -static struct rpc_cred *generic_bind_cred(struct rpc_task *task, - struct rpc_cred *cred, int lookupflags) -{ - struct rpc_auth *auth = task->tk_client->cl_auth; - struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - - return auth->au_ops->lookup_cred(auth, acred, lookupflags); -} - -static int -generic_hash_cred(struct auth_cred *acred, unsigned int hashbits) -{ - return hash_64(from_kgid(&init_user_ns, acred->cred->fsgid) | - ((u64)from_kuid(&init_user_ns, acred->cred->fsuid) << - (sizeof(gid_t) * 8)), hashbits); -} - -/* - * Lookup generic creds for current process - */ -static struct rpc_cred * -generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) -{ - return rpcauth_lookup_credcache(&generic_auth, acred, flags, GFP_KERNEL); -} - -static struct rpc_cred * -generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) -{ - struct generic_cred *gcred; - - gcred = kmalloc(sizeof(*gcred), gfp); - if (gcred == NULL) - return ERR_PTR(-ENOMEM); - - rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); - gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - - gcred->acred.cred = gcred->gc_base.cr_cred; - gcred->acred.principal = acred->principal; - - dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", - gcred->acred.principal ? "machine" : "generic", - gcred, - from_kuid(&init_user_ns, acred->cred->fsuid), - from_kgid(&init_user_ns, acred->cred->fsgid)); - return &gcred->gc_base; -} - -static void -generic_free_cred(struct rpc_cred *cred) -{ - struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); - - dprintk("RPC: generic_free_cred %p\n", gcred); - put_cred(cred->cr_cred); - kfree(gcred); -} - -static void -generic_free_cred_callback(struct rcu_head *head) -{ - struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu); - generic_free_cred(cred); -} - -static void -generic_destroy_cred(struct rpc_cred *cred) -{ - call_rcu(&cred->cr_rcu, generic_free_cred_callback); -} - -static int -machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags) -{ - if (!gcred->acred.principal || - gcred->acred.principal != acred->principal || - !uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || - !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid)) - return 0; - return 1; -} - -/* - * Match credentials against current process creds. - */ -static int -generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) -{ - struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); - int i; - struct group_info *a, *g; - - if (acred->principal) - return machine_cred_match(acred, gcred, flags); - - if (!uid_eq(gcred->acred.cred->fsuid, acred->cred->fsuid) || - !gid_eq(gcred->acred.cred->fsgid, acred->cred->fsgid) || - gcred->acred.principal != NULL) - goto out_nomatch; - - a = acred->cred->group_info; - g = gcred->acred.cred->group_info; - /* Optimisation in the case where pointers are identical... */ - if (a == g) - goto out_match; - - /* Slow path... */ - if (g->ngroups != a->ngroups) - goto out_nomatch; - for (i = 0; i < g->ngroups; i++) { - if (!gid_eq(g->gid[i], a->gid[i])) - goto out_nomatch; - } -out_match: - return 1; -out_nomatch: - return 0; -} - -int __init rpc_init_generic_auth(void) -{ - return rpcauth_init_credcache(&generic_auth); -} - -void rpc_destroy_generic_auth(void) -{ - rpcauth_destroy_credcache(&generic_auth); -} - -static const struct rpc_authops generic_auth_ops = { - .owner = THIS_MODULE, - .au_name = "Generic", - .hash_cred = generic_hash_cred, - .lookup_cred = generic_lookup_cred, - .crcreate = generic_create_cred, -}; - -static struct rpc_auth generic_auth = { - .au_ops = &generic_auth_ops, - .au_count = REFCOUNT_INIT(1), -}; - -static const struct rpc_credops generic_credops = { - .cr_name = "Generic cred", - .crdestroy = generic_destroy_cred, - .crbind = generic_bind_cred, - .crmatch = generic_match, -}; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 135c75d6c470..830686e80bed 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -36,8 +36,6 @@ nul_destroy(struct rpc_auth *auth) static struct rpc_cred * nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - if (flags & RPCAUTH_LOOKUP_RCU) - return &null_cred; return get_rpccred(&null_cred); } -- cgit v1.2.2 From d6efccd97e6de25e002d658593675ce8e07ceb8c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:31 +1100 Subject: SUNRPC: remove crbind rpc_cred operation This now always just does get_rpccred(), so we don't need an operation pointer to know to do that. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 12 +----------- net/sunrpc/auth_gss/auth_gss.c | 2 -- net/sunrpc/auth_null.c | 1 - net/sunrpc/auth_unix.c | 1 - 4 files changed, 1 insertion(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2debbaba7809..867ea9834bde 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -658,15 +658,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -struct rpc_cred * -rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) -{ - dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, - cred->cr_auth->au_ops->au_name, cred); - return get_rpccred(cred); -} -EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); - static struct rpc_cred * rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { @@ -724,8 +715,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) lookupflags |= RPCAUTH_LOOKUP_NEW; if (task->tk_op_cred) /* Task must use exactly this rpc_cred */ - new = task->tk_op_cred->cr_ops->crbind(task, task->tk_op_cred, - lookupflags); + new = get_rpccred(task->tk_op_cred); else if (cred != NULL && cred != &machine_cred) new = auth->au_ops->lookup_cred(auth, &acred, lookupflags); else if (cred == &machine_cred) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 03a1cd5bfb43..4e1a2ebef814 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2095,7 +2095,6 @@ static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, - .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, @@ -2110,7 +2109,6 @@ static const struct rpc_credops gss_credops = { static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_nullcred, - .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 830686e80bed..d0ceac57c06e 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -123,7 +123,6 @@ static const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, - .crbind = rpcauth_generic_bind_cred, .crmatch = nul_match, .crmarshal = nul_marshal, .crrefresh = nul_refresh, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 6ee43bfbfb4b..bff113a411e0 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -246,7 +246,6 @@ static const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, - .crbind = rpcauth_generic_bind_cred, .crmatch = unx_match, .crmarshal = unx_marshal, .crrefresh = unx_refresh, -- cgit v1.2.2 From 2edd8d746e51229705367528e95e5b49bccfa76e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:31 +1100 Subject: SUNRPC: simplify auth_unix. 1/ discard 'struct unx_cred'. We don't need any data that is not already in 'struct rpc_cred'. 2/ Don't keep these creds in a hash table. When a credential is needed, simply allocate it. When not needed, discard it. This can easily be faster than performing a lookup on a shared hash table. As the lookup can happen during write-out, use a mempool to ensure forward progress. This means that we cannot compare two credentials for equality by comparing the pointers, but we never do that anyway. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 1 + net/sunrpc/auth_unix.c | 101 +++++++++++++++---------------------------------- 2 files changed, 32 insertions(+), 70 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 867ea9834bde..a07a7c59d3a4 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -651,6 +651,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, INIT_LIST_HEAD(&cred->cr_lru); refcount_set(&cred->cr_count, 1); cred->cr_auth = auth; + cred->cr_flags = 0; cred->cr_ops = ops; cred->cr_expire = jiffies; cred->cr_cred = get_cred(acred->cred); diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index bff113a411e0..387f6b3ffbea 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -11,16 +11,11 @@ #include #include #include +#include #include #include #include -struct unx_cred { - struct rpc_cred uc_base; - kgid_t uc_gid; - kgid_t uc_gids[UNX_NGROUPS]; -}; -#define uc_uid uc_base.cr_uid #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH @@ -28,6 +23,7 @@ struct unx_cred { static struct rpc_auth unix_auth; static const struct rpc_credops unix_credops; +static mempool_t *unix_pool; static struct rpc_auth * unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) @@ -42,15 +38,6 @@ static void unx_destroy(struct rpc_auth *auth) { dprintk("RPC: destroying UNIX authenticator %p\n", auth); - rpcauth_clear_credcache(auth->au_credcache); -} - -static int -unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) -{ - return hash_64(from_kgid(&init_user_ns, acred->cred->fsgid) | - ((u64)from_kuid(&init_user_ns, acred->cred->fsuid) << - (sizeof(gid_t) * 8)), hashbits); } /* @@ -59,53 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) static struct rpc_cred * unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); -} - -static struct rpc_cred * -unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) -{ - struct unx_cred *cred; - unsigned int groups = 0; - unsigned int i; + struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", from_kuid(&init_user_ns, acred->cred->fsuid), from_kgid(&init_user_ns, acred->cred->fsgid)); - if (!(cred = kmalloc(sizeof(*cred), gfp))) - return ERR_PTR(-ENOMEM); - - rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); - cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - - if (acred->cred && acred->cred->group_info != NULL) - groups = acred->cred->group_info->ngroups; - if (groups > UNX_NGROUPS) - groups = UNX_NGROUPS; - - cred->uc_gid = acred->cred->fsgid; - for (i = 0; i < groups; i++) - cred->uc_gids[i] = acred->cred->group_info->gid[i]; - if (i < UNX_NGROUPS) - cred->uc_gids[i] = INVALID_GID; - - return &cred->uc_base; -} - -static void -unx_free_cred(struct unx_cred *unx_cred) -{ - dprintk("RPC: unx_free_cred %p\n", unx_cred); - put_cred(unx_cred->uc_base.cr_cred); - kfree(unx_cred); + rpcauth_init_cred(ret, acred, auth, &unix_credops); + ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + return ret; } static void unx_free_cred_callback(struct rcu_head *head) { - struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); - unx_free_cred(unx_cred); + struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); + dprintk("RPC: unx_free_cred %p\n", rpc_cred); + put_cred(rpc_cred->cr_cred); + mempool_free(rpc_cred, unix_pool); } static void @@ -115,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred) } /* - * Match credentials against current process creds. - * The root_override argument takes care of cases where the caller may - * request root creds (e.g. for NFS swapping). + * Match credentials against current the auth_cred. */ static int -unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) +unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { - struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); unsigned int groups = 0; unsigned int i; + if (cred->cr_cred == acred->cred) + return 1; - if (!uid_eq(cred->uc_uid, acred->cred->fsuid) || !gid_eq(cred->uc_gid, acred->cred->fsgid)) + if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) return 0; if (acred->cred && acred->cred->group_info != NULL) groups = acred->cred->group_info->ngroups; if (groups > UNX_NGROUPS) groups = UNX_NGROUPS; + if (cred->cr_cred->group_info == NULL) + return groups == 0; + if (groups != cred->cr_cred->group_info->ngroups) + return 0; + for (i = 0; i < groups ; i++) - if (!gid_eq(cred->uc_gids[i], acred->cred->group_info->gid[i])) + if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i])) return 0; - if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) - return 0; return 1; } @@ -150,9 +110,10 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); + struct rpc_cred *cred = task->tk_rqstp->rq_cred; __be32 *base, *hold; int i; + struct group_info *gi = cred->cr_cred->group_info; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -163,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p) */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); + *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid)); + *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid)); hold = p++; - for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); + if (gi) + for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) + *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i])); *hold = htonl(p - hold - 1); /* gid array length */ *base = htonl((p - base - 1) << 2); /* cred length */ @@ -214,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p) int __init rpc_init_authunix(void) { - return rpcauth_init_credcache(&unix_auth); + unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred)); + return unix_pool ? 0 : -ENOMEM; } void rpc_destroy_authunix(void) { - rpcauth_destroy_credcache(&unix_auth); + mempool_destroy(unix_pool); } const struct rpc_authops authunix_ops = { @@ -228,9 +191,7 @@ const struct rpc_authops authunix_ops = { .au_name = "UNIX", .create = unx_create, .destroy = unx_destroy, - .hash_cred = unx_hash_cred, .lookup_cred = unx_lookup_cred, - .crcreate = unx_create_cred, }; static -- cgit v1.2.2 From 04d1532bd0b93cc4d0056f27da1591f086d341a6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Dec 2018 11:30:31 +1100 Subject: SUNRPC discard cr_uid from struct rpc_cred. Just use ->cr_cred->fsuid directly. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 1 - net/sunrpc/auth_gss/auth_gss.c | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a07a7c59d3a4..1ff9768f5456 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -655,7 +655,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, cred->cr_ops = ops; cred->cr_expire = jiffies; cred->cr_cred = get_cred(acred->cred); - cred->cr_uid = acred->cred->fsuid; } EXPORT_SYMBOL_GPL(rpcauth_init_cred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4e1a2ebef814..dc86713b32b6 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -565,7 +565,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; - kuid_t uid = cred->cr_uid; + kuid_t uid = cred->cr_cred->fsuid; gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) @@ -604,7 +604,7 @@ gss_refresh_upcall(struct rpc_task *task) int err = 0; dprintk("RPC: %5u %s for uid %u\n", - task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid)); + task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we @@ -637,7 +637,7 @@ gss_refresh_upcall(struct rpc_task *task) out: dprintk("RPC: %5u %s for uid %u result %d\n", task->tk_pid, __func__, - from_kuid(&init_user_ns, cred->cr_uid), err); + from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } @@ -653,7 +653,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) int err; dprintk("RPC: %s for uid %u\n", - __func__, from_kuid(&init_user_ns, cred->cr_uid)); + __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid)); retry: err = 0; /* if gssd is down, just skip upcalling altogether */ @@ -701,7 +701,7 @@ out_intr: gss_release_msg(gss_msg); out: dprintk("RPC: %s for uid %u result %d\n", - __func__, from_kuid(&init_user_ns, cred->cr_uid), err); + __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } @@ -1520,7 +1520,7 @@ out: } else { if (gss_cred->gc_principal != NULL) return 0; - ret = uid_eq(rc->cr_uid, acred->cred->fsuid); + ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid); } return ret; } -- cgit v1.2.2 From e2f34e26710bfaa545a9d9cd0c70137406401467 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:13 -0500 Subject: xprtrdma: Yet another double DMA-unmap While chasing yet another set of DMAR fault reports, I noticed that the frwr recycler conflates whether or not an MR has been DMA unmapped with frwr->fr_state. Actually the two have only an indirect relationship. It's in fact impossible to guess reliably whether the MR has been DMA unmapped based on its fr_state field, especially as the surrounding code and its assumptions have changed over time. A better approach is to track the DMA mapping status explicitly so that the recycler is less brittle to unexpected situations, and attempts to DMA-unmap a second time are prevented. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v4.20 Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 6 ++++-- net/sunrpc/xprtrdma/verbs.c | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index fc6378cc0c1c..20ced24cc61b 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -117,15 +117,15 @@ static void frwr_mr_recycle_worker(struct work_struct *work) { struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); - enum rpcrdma_frwr_state state = mr->frwr.fr_state; struct rpcrdma_xprt *r_xprt = mr->mr_xprt; trace_xprtrdma_mr_recycle(mr); - if (state != FRWR_FLUSHED_LI) { + if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mr->mr_sg, mr->mr_nents, mr->mr_dir); + mr->mr_dir = DMA_NONE; } spin_lock(&r_xprt->rx_buf.rb_mrlock); @@ -150,6 +150,8 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) if (!mr->mr_sg) goto out_list_err; + frwr->fr_state = FRWR_IS_INVALID; + mr->mr_dir = DMA_NONE; INIT_LIST_HEAD(&mr->mr_list); INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); sg_init_table(mr->mr_sg, depth); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3ddba94c939f..b9bc7f9f6bb9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1329,9 +1329,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) { struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); + if (mr->mr_dir != DMA_NONE) { + trace_xprtrdma_mr_unmap(mr); + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, mr->mr_nents, mr->mr_dir); + mr->mr_dir = DMA_NONE; + } __rpcrdma_mr_put(&r_xprt->rx_buf, mr); } -- cgit v1.2.2 From b674c4b4a13dc3d37f1b4a449f3b9515f9a30615 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:19 -0500 Subject: xprtrdma: Ensure MRs are DMA-unmapped when posting LOCAL_INV fails The recovery case in frwr_op_unmap_sync needs to DMA unmap each MR. frwr_release_mr does not DMA-unmap, but the recycle worker does. Fixes: 61da886bf74e ("xprtrdma: Explicitly resetting MRs is ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 20ced24cc61b..27222c034ad2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -563,8 +563,8 @@ out_release: mr = container_of(frwr, struct rpcrdma_mr, frwr); bad_wr = bad_wr->next; - list_del(&mr->mr_list); - frwr_op_release_mr(mr); + list_del_init(&mr->mr_list); + rpcrdma_mr_recycle(mr); } } -- cgit v1.2.2 From 6ceea36890a01aa626ce08487eecc5fb43e749b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:24 -0500 Subject: xprtrdma: Refactor Receive accounting Clean up: Divide the work cleanly: - rpcrdma_wc_receive is responsible only for RDMA Receives - rpcrdma_reply_handler is responsible only for RPC Replies - the posted send and receive counts both belong in rpcrdma_ep Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 1 - net/sunrpc/xprtrdma/rpc_rdma.c | 21 +++------------------ net/sunrpc/xprtrdma/verbs.c | 31 ++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 4 files changed, 18 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index e5b367a3e517..2cb07a313b3d 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -207,7 +207,6 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (rc < 0) goto failed_marshal; - rpcrdma_post_recvs(r_xprt, true); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; return 0; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 9f53e0240035..dc2397731c5c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1312,11 +1312,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) u32 credits; __be32 *p; - --buf->rb_posted_receives; - - if (rep->rr_hdrbuf.head[0].iov_len == 0) - goto out_badstatus; - /* Fixed transport header fields */ xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, rep->rr_hdrbuf.head[0].iov_base); @@ -1361,31 +1356,21 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); - - rpcrdma_post_recvs(r_xprt, false); queue_work(rpcrdma_receive_wq, &rep->rr_work); return; out_badversion: trace_xprtrdma_reply_vers(rep); - goto repost; + goto out; -/* The RPC transaction has already been terminated, or the header - * is corrupt. - */ out_norqst: spin_unlock(&xprt->queue_lock); trace_xprtrdma_reply_rqst(rep); - goto repost; + goto out; out_shortreply: trace_xprtrdma_reply_short(rep); -/* If no pending RPC transaction was matched, post a replacement - * receive buffer before returning. - */ -repost: - rpcrdma_post_recvs(r_xprt, false); -out_badstatus: +out: rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index b9bc7f9f6bb9..e4461e7c1b0c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -78,6 +78,7 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); +static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); struct workqueue_struct *rpcrdma_receive_wq __read_mostly; @@ -189,11 +190,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, rr_cqe); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; - /* WARNING: Only wr_id and status are reliable at this point */ + /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_receive(wc); + --r_xprt->rx_ep.rep_receive_count; if (wc->status != IB_WC_SUCCESS) - goto out_fail; + goto out_flushed; /* status == SUCCESS means all fields in wc are trustworthy */ rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); @@ -204,17 +207,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) rdmab_addr(rep->rr_rdmabuf), wc->byte_len, DMA_FROM_DEVICE); -out_schedule: + rpcrdma_post_recvs(r_xprt, false); rpcrdma_reply_handler(rep); return; -out_fail: +out_flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); - rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); - goto out_schedule; + rpcrdma_recv_buffer_put(rep); } static void @@ -581,6 +583,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_disconnect_worker, rpcrdma_disconnect_worker); + ep->rep_receive_count = 0; sendcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_send_wr + 1, @@ -1174,7 +1177,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } buf->rb_credits = 1; - buf->rb_posted_receives = 0; INIT_LIST_HEAD(&buf->rb_recv_bufs); rc = rpcrdma_sendctxs_create(r_xprt); @@ -1511,25 +1513,20 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, return 0; } -/** - * rpcrdma_post_recvs - Maybe post some Receive buffers - * @r_xprt: controlling transport - * @temp: when true, allocate temp rpcrdma_rep objects - * - */ -void +static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct ib_recv_wr *wr, *bad_wr; int needed, count, rc; rc = 0; count = 0; needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); - if (buf->rb_posted_receives > needed) + if (ep->rep_receive_count > needed) goto out; - needed -= buf->rb_posted_receives; + needed -= ep->rep_receive_count; count = 0; wr = NULL; @@ -1577,7 +1574,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) --count; } } - buf->rb_posted_receives += count; + ep->rep_receive_count += count; out: trace_xprtrdma_post_recvs(r_xprt, count, rc); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a13ccb643ce0..788124cd9258 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -102,6 +102,7 @@ struct rpcrdma_ep { struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; struct delayed_work rep_disconnect_worker; + int rep_receive_count; }; /* Pre-allocate extra Work Requests for handling backward receives @@ -404,7 +405,6 @@ struct rpcrdma_buffer { unsigned long rb_flags; u32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ - int rb_posted_receives; u32 rb_bc_srv_max_requests; spinlock_t rb_reqslock; /* protect rb_allreqs */ @@ -560,7 +560,6 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); -void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); /* * Buffer calls - xprtrdma/verbs.c -- cgit v1.2.2 From 6d2d0ee27c7a12371a0ca51a5db414204901228c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:29 -0500 Subject: xprtrdma: Replace rpcrdma_receive_wq with a per-xprt workqueue To address a connection-close ordering problem, we need the ability to drain the RPC completions running on rpcrdma_receive_wq for just one transport. Give each transport its own RPC completion workqueue, and drain that workqueue when disconnecting the transport. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/transport.c | 17 ++++------- net/sunrpc/xprtrdma/verbs.c | 67 ++++++++++++++++++++++------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 6 +--- 4 files changed, 44 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index dc2397731c5c..5738c9f02144 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1356,7 +1356,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); - queue_work(rpcrdma_receive_wq, &rep->rr_work); + queue_work(buf->rb_completion_wq, &rep->rr_work); return; out_badversion: diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ae2a83828953..91c476a8f51c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -444,10 +444,14 @@ xprt_rdma_close(struct rpc_xprt *xprt) struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; + might_sleep(); + dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); + /* Prevent marshaling and sending of new requests */ + xprt_clear_connected(xprt); + if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { - xprt_clear_connected(xprt); rpcrdma_ia_remove(ia); return; } @@ -858,8 +862,6 @@ void xprt_rdma_cleanup(void) dprintk("RPC: %s: xprt_unregister returned %i\n", __func__, rc); - rpcrdma_destroy_wq(); - rc = xprt_unregister_transport(&xprt_rdma_bc); if (rc) dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", @@ -870,20 +872,13 @@ int xprt_rdma_init(void) { int rc; - rc = rpcrdma_alloc_wq(); - if (rc) - return rc; - rc = xprt_register_transport(&xprt_rdma); - if (rc) { - rpcrdma_destroy_wq(); + if (rc) return rc; - } rc = xprt_register_transport(&xprt_rdma_bc); if (rc) { xprt_unregister_transport(&xprt_rdma); - rpcrdma_destroy_wq(); return rc; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e4461e7c1b0c..cff3a5df0b90 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -80,33 +80,23 @@ static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); -struct workqueue_struct *rpcrdma_receive_wq __read_mostly; - -int -rpcrdma_alloc_wq(void) +/* Wait for outstanding transport work to finish. + */ +static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct workqueue_struct *recv_wq; - - recv_wq = alloc_workqueue("xprtrdma_receive", - WQ_MEM_RECLAIM | WQ_HIGHPRI, - 0); - if (!recv_wq) - return -ENOMEM; - - rpcrdma_receive_wq = recv_wq; - return 0; -} + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; -void -rpcrdma_destroy_wq(void) -{ - struct workqueue_struct *wq; + /* Flush Receives, then wait for deferred Reply work + * to complete. + */ + ib_drain_qp(ia->ri_id->qp); + drain_workqueue(buf->rb_completion_wq); - if (rpcrdma_receive_wq) { - wq = rpcrdma_receive_wq; - rpcrdma_receive_wq = NULL; - destroy_workqueue(wq); - } + /* Deferred Reply processing might have scheduled + * local invalidations. + */ + ib_drain_sq(ia->ri_id->qp); } /** @@ -483,7 +473,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) * connection is already gone. */ if (ia->ri_id->qp) { - ib_drain_qp(ia->ri_id->qp); + rpcrdma_xprt_drain(r_xprt); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } @@ -825,8 +815,10 @@ out_noupdate: return rc; } -/* - * rpcrdma_ep_disconnect +/** + * rpcrdma_ep_disconnect - Disconnect underlying transport + * @ep: endpoint to disconnect + * @ia: associated interface adapter * * This is separate from destroy to facilitate the ability * to reconnect without recreating the endpoint. @@ -837,19 +829,20 @@ out_noupdate: void rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { + struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, + rx_ep); int rc; + /* returns without wait if ID is not connected */ rc = rdma_disconnect(ia->ri_id); if (!rc) - /* returns without wait if not connected */ wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 1); else ep->rep_connected = rc; - trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, - rx_ep), rc); + trace_xprtrdma_disconnect(r_xprt, rc); - ib_drain_qp(ia->ri_id->qp); + rpcrdma_xprt_drain(r_xprt); } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -1183,6 +1176,13 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) if (rc) goto out; + buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s", + WQ_MEM_RECLAIM | WQ_HIGHPRI, + 0, + r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); + if (!buf->rb_completion_wq) + goto out; + return 0; out: rpcrdma_buffer_destroy(buf); @@ -1241,6 +1241,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { cancel_delayed_work_sync(&buf->rb_refresh_worker); + if (buf->rb_completion_wq) { + destroy_workqueue(buf->rb_completion_wq); + buf->rb_completion_wq = NULL; + } + rpcrdma_sendctxs_destroy(buf); while (!list_empty(&buf->rb_recv_bufs)) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 788124cd9258..3f198cde41e3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -412,6 +412,7 @@ struct rpcrdma_buffer { u32 rb_bc_max_requests; + struct workqueue_struct *rb_completion_wq; struct delayed_work rb_refresh_worker; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -547,8 +548,6 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *); bool fmr_is_supported(struct rpcrdma_ia *); -extern struct workqueue_struct *rpcrdma_receive_wq; - /* * Endpoint calls - xprtrdma/verbs.c */ @@ -603,9 +602,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) return __rpcrdma_dma_map_regbuf(ia, rb); } -int rpcrdma_alloc_wq(void); -void rpcrdma_destroy_wq(void); - /* * Wrappers for chunk registration, shared by read/write chunk code. */ -- cgit v1.2.2 From 3d433ad812baad45fa697f1af45a651147360712 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:35 -0500 Subject: xprtrdma: No qp_event disconnect After thinking about this more, and auditing other kernel ULP imple- mentations, I believe that a DISCONNECT cm_event will occur after a fatal QP event. If that's the case, there's no need for an explicit disconnect in the QP event handler. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 32 -------------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 33 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cff3a5df0b90..9a0a765a8732 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -99,25 +99,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) ib_drain_sq(ia->ri_id->qp); } -/** - * rpcrdma_disconnect_worker - Force a disconnect - * @work: endpoint to be disconnected - * - * Provider callbacks can possibly run in an IRQ context. This function - * is invoked in a worker thread to guarantee that disconnect wake-up - * calls are always done in process context. - */ -static void -rpcrdma_disconnect_worker(struct work_struct *work) -{ - struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, - rep_disconnect_worker.work); - struct rpcrdma_xprt *r_xprt = - container_of(ep, struct rpcrdma_xprt, rx_ep); - - xprt_force_disconnect(&r_xprt->rx_xprt); -} - /** * rpcrdma_qp_event_handler - Handle one QP event (error notification) * @event: details of the event @@ -134,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context) rx_ep); trace_xprtrdma_qp_event(r_xprt, event); - pr_err("rpcrdma: %s on device %s connected to %s:%s\n", - ib_event_msg(event->event), event->device->name, - rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); - - if (ep->rep_connected == 1) { - ep->rep_connected = -EIO; - schedule_delayed_work(&ep->rep_disconnect_worker, 0); - wake_up_all(&ep->rep_connect_wait); - } } /** @@ -571,8 +543,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, cdata->max_requests >> 2); ep->rep_send_count = ep->rep_send_batch; init_waitqueue_head(&ep->rep_connect_wait); - INIT_DELAYED_WORK(&ep->rep_disconnect_worker, - rpcrdma_disconnect_worker); ep->rep_receive_count = 0; sendcq = ib_alloc_cq(ia->ri_device, NULL, @@ -646,8 +616,6 @@ out1: void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - cancel_delayed_work_sync(&ep->rep_disconnect_worker); - if (ia->ri_id && ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3f198cde41e3..7c1b5191a5fe 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -101,7 +101,6 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; - struct delayed_work rep_disconnect_worker; int rep_receive_count; }; -- cgit v1.2.2 From 0c0829bcf51aef713806e49b8ea2bac7962f54e2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:40 -0500 Subject: xprtrdma: Don't wake pending tasks until disconnect is done Transport disconnect processing does a "wake pending tasks" at various points. Suppose an RPC Reply is being processed. The RPC task that Reply goes with is waiting on the pending queue. If a disconnect wake-up happens before reply processing is done, that reply, even if it is good, is thrown away, and the RPC has to be sent again. This window apparently does not exist for socket transports because there is a lock held while a reply is being received which prevents the wake-up call until after reply processing is done. To resolve this, all RPC replies being processed on an RPC-over-RDMA transport have to complete before pending tasks are awoken due to a transport disconnect. Callers that already hold the transport write lock may invoke ->ops->close directly. Others use a generic helper that schedules a close when the write lock can be taken safely. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 13 +++++++------ net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 8 +++++--- net/sunrpc/xprtrdma/transport.c | 17 ++++++++++------- net/sunrpc/xprtrdma/verbs.c | 1 - net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 5 files changed, 23 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 2cb07a313b3d..79a55fc540a6 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -193,14 +193,15 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) */ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) { - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + struct rpc_xprt *xprt = rqst->rq_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); int rc; - if (!xprt_connected(rqst->rq_xprt)) - goto drop_connection; + if (!xprt_connected(xprt)) + return -ENOTCONN; - if (!xprt_request_get_cong(rqst->rq_xprt, rqst)) + if (!xprt_request_get_cong(xprt, rqst)) return -EBADSLT; rc = rpcrdma_bc_marshal_reply(rqst); @@ -215,7 +216,7 @@ failed_marshal: if (rc != -ENOTCONN) return rc; drop_connection: - xprt_disconnect_done(rqst->rq_xprt); + xprt_rdma_close(xprt); return -ENOTCONN; } @@ -338,7 +339,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, out_overflow: pr_warn("RPC/RDMA backchannel overflow\n"); - xprt_disconnect_done(xprt); + xprt_force_disconnect(xprt); /* This receive buffer gets reposted automatically * when the connection is re-established. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index f3c147d70286..b908f2ca08fd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -200,11 +200,10 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) svc_rdma_send_ctxt_put(rdma, ctxt); goto drop_connection; } - return rc; + return 0; drop_connection: dprintk("svcrdma: failed to send bc call\n"); - xprt_disconnect_done(xprt); return -ENOTCONN; } @@ -225,8 +224,11 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst) ret = -ENOTCONN; rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) + if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { ret = rpcrdma_bc_send_request(rdma, rqst); + if (ret == -ENOTCONN) + svc_close_xprt(sxprt); + } mutex_unlock(&sxprt->xpt_mutex); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 91c476a8f51c..134aae2ee779 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -437,8 +437,7 @@ out1: * Caller holds @xprt's send lock to prevent activity on this * transport while the connection is torn down. */ -static void -xprt_rdma_close(struct rpc_xprt *xprt) +void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_ep *ep = &r_xprt->rx_ep; @@ -453,13 +452,13 @@ xprt_rdma_close(struct rpc_xprt *xprt) if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { rpcrdma_ia_remove(ia); - return; + goto out; } + if (ep->rep_connected == -ENODEV) return; if (ep->rep_connected > 0) xprt->reestablish_timeout = 0; - xprt_disconnect_done(xprt); rpcrdma_ep_disconnect(ep, ia); /* Prepare @xprt for the next connection by reinitializing @@ -467,6 +466,10 @@ xprt_rdma_close(struct rpc_xprt *xprt) */ r_xprt->rx_buf.rb_credits = 1; xprt->cwnd = RPC_CWNDSHIFT; + +out: + ++xprt->connect_cookie; + xprt_disconnect_done(xprt); } /** @@ -717,7 +720,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) #endif /* CONFIG_SUNRPC_BACKCHANNEL */ if (!xprt_connected(xprt)) - goto drop_connection; + return -ENOTCONN; if (!xprt_request_get_cong(xprt, rqst)) return -EBADSLT; @@ -749,8 +752,8 @@ failed_marshal: if (rc != -ENOTCONN) return rc; drop_connection: - xprt_disconnect_done(xprt); - return -ENOTCONN; /* implies disconnect */ + xprt_rdma_close(xprt); + return -ENOTCONN; } void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 9a0a765a8732..29798b65a40c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -280,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->rep_connected = -EAGAIN; goto disconnected; case RDMA_CM_EVENT_DISCONNECTED: - ++xprt->connect_cookie; ep->rep_connected = -ECONNABORTED; disconnected: xprt_force_disconnect(xprt); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 7c1b5191a5fe..99b7f8ea66b0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -647,6 +647,7 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) extern unsigned int xprt_rdma_max_inline_read; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); void xprt_rdma_free_addresses(struct rpc_xprt *xprt); +void xprt_rdma_close(struct rpc_xprt *xprt); void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); int xprt_rdma_init(void); void xprt_rdma_cleanup(void); -- cgit v1.2.2 From 6946f82380a83acb7023fab9d7033e1f016cb818 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:45 -0500 Subject: xprtrdma: Fix ri_max_segs and the result of ro_maxpages With certain combinations of krb5i/p, MR size, and r/wsize, I/O can fail with EMSGSIZE. This is because the calculated value of ri_max_segs (the max number of MRs per RPC) exceeded RPCRDMA_MAX_HDR_SEGS, which caused Read or Write list encoding to walk off the end of the transport header. Once that was addressed, the ro_maxpages result has to be corrected to account for the number of MRs needed for Reply chunks, which is 2 MRs smaller than a normal Read or Write chunk. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 7 +++++-- net/sunrpc/xprtrdma/frwr_ops.c | 7 +++++-- net/sunrpc/xprtrdma/transport.c | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 7f5632cd5a48..dd1e91bd8ef8 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -176,7 +176,10 @@ fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES); - ia->ri_max_segs += 2; /* segments for head and tail buffers */ + /* Reply chunks require segments for head and tail buffers */ + ia->ri_max_segs += 2; + if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) + ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; return 0; } @@ -186,7 +189,7 @@ static size_t fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) { return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); + (r_xprt->rx_ia.ri_max_segs - 2) * RPCRDMA_MAX_FMR_SGES); } /* Use the ib_map_phys_fmr() verb to register a memory region diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 27222c034ad2..f587e445d811 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -244,7 +244,10 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_frwr_depth); - ia->ri_max_segs += 2; /* segments for head and tail buffers */ + /* Reply chunks require segments for head and tail buffers */ + ia->ri_max_segs += 2; + if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) + ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; return 0; } @@ -257,7 +260,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth); + (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); } static void diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 134aae2ee779..d3f26c570067 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -703,8 +703,10 @@ xprt_rdma_free(struct rpc_task *task) * %-ENOTCONN if the caller should reconnect and call again * %-EAGAIN if the caller should call again * %-ENOBUFS if the caller should call again after a delay - * %-EIO if a permanent error occurred and the request was not - * sent. Do not try to send this message again. + * %-EMSGSIZE if encoding ran out of buffer space. The request + * was not sent. Do not try to send this message again. + * %-EIO if an I/O error occurred. The request was not sent. + * Do not try to send this message again. */ static int xprt_rdma_send_request(struct rpc_rqst *rqst) -- cgit v1.2.2 From a78868497c2e4858e2c73818eed7b4877ab2316d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:51 -0500 Subject: xprtrdma: Reduce max_frwr_depth Some devices advertise a large max_fast_reg_page_list_len capability, but perform optimally when MRs are significantly smaller than that depth -- probably when the MR itself is no larger than a page. By default, the RDMA R/W core API uses max_sge_rd as the maximum page depth for MRs. For some devices, the value of max_sge_rd is 1, which is also not optimal. Thus, when max_sge_rd is larger than 1, use that value. Otherwise use the value of the max_fast_reg_page_list_len attribute. I've tested this with CX-3 Pro, FastLinq, and CX-5 devices. It reproducibly improves the throughput of large I/Os by several percent. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index f587e445d811..16976b031865 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -193,10 +193,17 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; - ia->ri_max_frwr_depth = - min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - attrs->max_fast_reg_page_list_len); - dprintk("RPC: %s: device's max FR page list len = %u\n", + /* Quirk: Some devices advertise a large max_fast_reg_page_list_len + * capability, but perform optimally when the MRs are not larger + * than a page. + */ + if (attrs->max_sge_rd > 1) + ia->ri_max_frwr_depth = attrs->max_sge_rd; + else + ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len; + if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS) + ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS; + dprintk("RPC: %s: max FR page list depth = %u\n", __func__, ia->ri_max_frwr_depth); /* Add room for frwr register and invalidate WRs. -- cgit v1.2.2 From ba69cd122ece618eba47589764c7f9c1f57aed95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:58:56 -0500 Subject: xprtrdma: Remove support for FMR memory registration FMR is not supported on most recent RDMA devices. It is also less secure than FRWR because an FMR memory registration can expose adjacent bytes to remote reading or writing. As discussed during the RDMA BoF at LPC 2018, it is time to remove support for FMR in the NFS/RDMA client stack. Note that NFS/RDMA server-side uses either local memory registration or FRWR. FMR is not used. There are a few Infiniband/RoCE devices in the kernel tree that do not appear to support MEM_MGT_EXTENSIONS (FRWR), and therefore will not support client-side NFS/RDMA after this patch. These are: - mthca - qib - hns (RoCE) Users of these devices can use NFS/TCP on IPoIB instead. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/Makefile | 3 +- net/sunrpc/xprtrdma/fmr_ops.c | 340 ---------------------------------------- net/sunrpc/xprtrdma/verbs.c | 6 - net/sunrpc/xprtrdma/xprt_rdma.h | 12 +- 4 files changed, 2 insertions(+), 359 deletions(-) delete mode 100644 net/sunrpc/xprtrdma/fmr_ops.c (limited to 'net') diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 8bf19e142b6b..8ed0377d7a18 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o -rpcrdma-y := transport.o rpc_rdma.o verbs.o \ - fmr_ops.o frwr_ops.o \ +rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \ module.o diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c deleted file mode 100644 index dd1e91bd8ef8..000000000000 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2015, 2017 Oracle. All rights reserved. - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - */ - -/* Lightweight memory registration using Fast Memory Regions (FMR). - * Referred to sometimes as MTHCAFMR mode. - * - * FMR uses synchronous memory registration and deregistration. - * FMR registration is known to be fast, but FMR deregistration - * can take tens of usecs to complete. - */ - -/* Normal operation - * - * A Memory Region is prepared for RDMA READ or WRITE using the - * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is - * finished, the Memory Region is unmapped using the ib_unmap_fmr - * verb (fmr_op_unmap). - */ - -#include - -#include "xprt_rdma.h" -#include - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* Maximum scatter/gather per FMR */ -#define RPCRDMA_MAX_FMR_SGES (64) - -/* Access mode of externally registered pages */ -enum { - RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ, -}; - -bool -fmr_is_supported(struct rpcrdma_ia *ia) -{ - if (!ia->ri_device->alloc_fmr) { - pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n", - ia->ri_device->name); - return false; - } - return true; -} - -static void -__fmr_unmap(struct rpcrdma_mr *mr) -{ - LIST_HEAD(l); - int rc; - - list_add(&mr->fmr.fm_mr->list, &l); - rc = ib_unmap_fmr(&l); - list_del(&mr->fmr.fm_mr->list); - if (rc) - pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", - mr, rc); -} - -/* Release an MR. - */ -static void -fmr_op_release_mr(struct rpcrdma_mr *mr) -{ - int rc; - - kfree(mr->fmr.fm_physaddrs); - kfree(mr->mr_sg); - - /* In case this one was left mapped, try to unmap it - * to prevent dealloc_fmr from failing with EBUSY - */ - __fmr_unmap(mr); - - rc = ib_dealloc_fmr(mr->fmr.fm_mr); - if (rc) - pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", - mr, rc); - - kfree(mr); -} - -/* MRs are dynamically allocated, so simply clean up and release the MR. - * A replacement MR will subsequently be allocated on demand. - */ -static void -fmr_mr_recycle_worker(struct work_struct *work) -{ - struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle); - struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - - trace_xprtrdma_mr_recycle(mr); - - trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); - - spin_lock(&r_xprt->rx_buf.rb_mrlock); - list_del(&mr->mr_all); - r_xprt->rx_stats.mrs_recycled++; - spin_unlock(&r_xprt->rx_buf.rb_mrlock); - fmr_op_release_mr(mr); -} - -static int -fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) -{ - static struct ib_fmr_attr fmr_attr = { - .max_pages = RPCRDMA_MAX_FMR_SGES, - .max_maps = 1, - .page_shift = PAGE_SHIFT - }; - - mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, - sizeof(u64), GFP_KERNEL); - if (!mr->fmr.fm_physaddrs) - goto out_free; - - mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, - sizeof(*mr->mr_sg), GFP_KERNEL); - if (!mr->mr_sg) - goto out_free; - - sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); - - mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, - &fmr_attr); - if (IS_ERR(mr->fmr.fm_mr)) - goto out_fmr_err; - - INIT_LIST_HEAD(&mr->mr_list); - INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker); - return 0; - -out_fmr_err: - dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, - PTR_ERR(mr->fmr.fm_mr)); - -out_free: - kfree(mr->mr_sg); - kfree(mr->fmr.fm_physaddrs); - return -ENOMEM; -} - -/* On success, sets: - * ep->rep_attr.cap.max_send_wr - * ep->rep_attr.cap.max_recv_wr - * cdata->max_requests - * ia->ri_max_segs - */ -static int -fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, - struct rpcrdma_create_data_internal *cdata) -{ - int max_qp_wr; - - max_qp_wr = ia->ri_device->attrs.max_qp_wr; - max_qp_wr -= RPCRDMA_BACKWARD_WRS; - max_qp_wr -= 1; - if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) - return -ENOMEM; - if (cdata->max_requests > max_qp_wr) - cdata->max_requests = max_qp_wr; - ep->rep_attr.cap.max_send_wr = cdata->max_requests; - ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; - ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ - - ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / - RPCRDMA_MAX_FMR_SGES); - /* Reply chunks require segments for head and tail buffers */ - ia->ri_max_segs += 2; - if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) - ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; - return 0; -} - -/* FMR mode conveys up to 64 pages of payload per chunk segment. - */ -static size_t -fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) -{ - return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - (r_xprt->rx_ia.ri_max_segs - 2) * RPCRDMA_MAX_FMR_SGES); -} - -/* Use the ib_map_phys_fmr() verb to register a memory region - * for remote access via RDMA READ or RDMA WRITE. - */ -static struct rpcrdma_mr_seg * -fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, struct rpcrdma_mr **out) -{ - struct rpcrdma_mr_seg *seg1 = seg; - int len, pageoff, i, rc; - struct rpcrdma_mr *mr; - u64 *dma_pages; - - mr = rpcrdma_mr_get(r_xprt); - if (!mr) - return ERR_PTR(-EAGAIN); - - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; - if (nsegs > RPCRDMA_MAX_FMR_SGES) - nsegs = RPCRDMA_MAX_FMR_SGES; - for (i = 0; i < nsegs;) { - if (seg->mr_page) - sg_set_page(&mr->mr_sg[i], - seg->mr_page, - seg->mr_len, - offset_in_page(seg->mr_offset)); - else - sg_set_buf(&mr->mr_sg[i], seg->mr_offset, - seg->mr_len); - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) - break; - } - mr->mr_dir = rpcrdma_data_dir(writing); - - mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, i, mr->mr_dir); - if (!mr->mr_nents) - goto out_dmamap_err; - trace_xprtrdma_mr_map(mr); - - for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) - dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); - rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, - dma_pages[0]); - if (rc) - goto out_maperr; - - mr->mr_handle = mr->fmr.fm_mr->rkey; - mr->mr_length = len; - mr->mr_offset = dma_pages[0] + pageoff; - - *out = mr; - return seg; - -out_dmamap_err: - pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", - mr->mr_sg, i); - rpcrdma_mr_put(mr); - return ERR_PTR(-EIO); - -out_maperr: - pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", - len, (unsigned long long)dma_pages[0], - pageoff, mr->mr_nents, rc); - rpcrdma_mr_unmap_and_put(mr); - return ERR_PTR(-EIO); -} - -/* Post Send WR containing the RPC Call message. - */ -static int -fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) -{ - return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL); -} - -/* Invalidate all memory regions that were registered for "req". - * - * Sleeps until it is safe for the host CPU to access the - * previously mapped memory regions. - * - * Caller ensures that @mrs is not empty before the call. This - * function empties the list. - */ -static void -fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) -{ - struct rpcrdma_mr *mr; - LIST_HEAD(unmap_list); - int rc; - - /* ORDER: Invalidate all of the req's MRs first - * - * ib_unmap_fmr() is slow, so use a single call instead - * of one call per mapped FMR. - */ - list_for_each_entry(mr, mrs, mr_list) { - dprintk("RPC: %s: unmapping fmr %p\n", - __func__, &mr->fmr); - trace_xprtrdma_mr_localinv(mr); - list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); - } - r_xprt->rx_stats.local_inv_needed++; - rc = ib_unmap_fmr(&unmap_list); - if (rc) - goto out_release; - - /* ORDER: Now DMA unmap all of the req's MRs, and return - * them to the free MW list. - */ - while (!list_empty(mrs)) { - mr = rpcrdma_mr_pop(mrs); - list_del(&mr->fmr.fm_mr->list); - rpcrdma_mr_unmap_and_put(mr); - } - - return; - -out_release: - pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); - - while (!list_empty(mrs)) { - mr = rpcrdma_mr_pop(mrs); - list_del(&mr->fmr.fm_mr->list); - rpcrdma_mr_recycle(mr); - } -} - -const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { - .ro_map = fmr_op_map, - .ro_send = fmr_op_send, - .ro_unmap_sync = fmr_op_unmap_sync, - .ro_open = fmr_op_open, - .ro_maxpages = fmr_op_maxpages, - .ro_init_mr = fmr_op_init_mr, - .ro_release_mr = fmr_op_release_mr, - .ro_displayname = "fmr", - .ro_send_w_inv_ok = 0, -}; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 29798b65a40c..ef1759682604 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -397,12 +397,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) break; } /*FALLTHROUGH*/ - case RPCRDMA_MTHCAFMR: - if (fmr_is_supported(ia)) { - ia->ri_ops = &rpcrdma_fmr_memreg_ops; - break; - } - /*FALLTHROUGH*/ default: pr_err("rpcrdma: Device %s does not support memreg mode %d\n", ia->ri_device->name, xprt_rdma_memreg_strategy); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 99b7f8ea66b0..84f7bbecdd86 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -262,20 +262,12 @@ struct rpcrdma_frwr { }; }; -struct rpcrdma_fmr { - struct ib_fmr *fm_mr; - u64 *fm_physaddrs; -}; - struct rpcrdma_mr { struct list_head mr_list; struct scatterlist *mr_sg; int mr_nents; enum dma_data_direction mr_dir; - union { - struct rpcrdma_fmr fmr; - struct rpcrdma_frwr frwr; - }; + struct rpcrdma_frwr frwr; struct rpcrdma_xprt *mr_xprt; u32 mr_handle; u32 mr_length; @@ -490,7 +482,6 @@ struct rpcrdma_memreg_ops { const int ro_send_w_inv_ok; }; -extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; /* @@ -545,7 +536,6 @@ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_close(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *); -bool fmr_is_supported(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c -- cgit v1.2.2 From 5f62412be3ff738c9575b28c1f4a9b010ac22316 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:01 -0500 Subject: xprtrdma: Remove rpcrdma_memreg_ops Clean up: Now that there is only FRWR, there is no need for a memory registration switch. The indirect calls to the memreg operations can be replaced with faster direct calls. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 131 ++++++++++++++++++++++++++-------------- net/sunrpc/xprtrdma/rpc_rdma.c | 14 ++--- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 22 +++---- net/sunrpc/xprtrdma/xprt_rdma.h | 48 ++++++--------- 5 files changed, 116 insertions(+), 101 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 16976b031865..fb0944d854c9 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -15,21 +15,21 @@ /* Normal operation * * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG - * Work Request (frwr_op_map). When the RDMA operation is finished, this + * Work Request (frwr_map). When the RDMA operation is finished, this * Memory Region is invalidated using a LOCAL_INV Work Request - * (frwr_op_unmap_sync). + * (frwr_unmap_sync). * * Typically these Work Requests are not signaled, and neither are RDMA * SEND Work Requests (with the exception of signaling occasionally to * prevent provider work queue overflows). This greatly reduces HCA * interrupt workload. * - * As an optimization, frwr_op_unmap marks MRs INVALID before the + * As an optimization, frwr_unmap marks MRs INVALID before the * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on * rb_mrs immediately so that no work (like managing a linked list * under a spinlock) is needed in the completion upcall. * - * But this means that frwr_op_map() can occasionally encounter an MR + * But this means that frwr_map() can occasionally encounter an MR * that is INVALID but the LOCAL_INV WR has not completed. Work Queue * ordering prevents a subsequent FAST_REG WR from executing against * that MR while it is still being invalidated. @@ -57,14 +57,14 @@ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR * state, and the pending WR was flushed. * - * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered + * When frwr_map encounters FLUSHED and VALID MRs, they are recovered * with ib_dereg_mr and then are re-initialized. Because MR recovery * allocates fresh resources, it is deferred to a workqueue, and the * recovered MRs are placed back on the rb_mrs list when recovery is - * complete. frwr_op_map allocates another MR for the current RPC while + * complete. frwr_map allocates another MR for the current RPC while * the broken MR is reset. * - * To ensure that frwr_op_map doesn't encounter an MR that is marked + * To ensure that frwr_map doesn't encounter an MR that is marked * INVALID but that is about to be flushed due to a previous transport * disconnect, the transport connect worker attempts to drain all * pending send queue WRs before the transport is reconnected. @@ -80,8 +80,13 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -bool -frwr_is_supported(struct rpcrdma_ia *ia) +/** + * frwr_is_supported - Check if device supports FRWR + * @ia: interface adapter to check + * + * Returns true if device supports FRWR, otherwise false + */ +bool frwr_is_supported(struct rpcrdma_ia *ia) { struct ib_device_attr *attrs = &ia->ri_device->attrs; @@ -97,8 +102,12 @@ out_not_supported: return false; } -static void -frwr_op_release_mr(struct rpcrdma_mr *mr) +/** + * frwr_release_mr - Destroy one MR + * @mr: MR allocated by frwr_init_mr + * + */ +void frwr_release_mr(struct rpcrdma_mr *mr) { int rc; @@ -132,11 +141,19 @@ frwr_mr_recycle_worker(struct work_struct *work) list_del(&mr->mr_all); r_xprt->rx_stats.mrs_recycled++; spin_unlock(&r_xprt->rx_buf.rb_mrlock); - frwr_op_release_mr(mr); + + frwr_release_mr(mr); } -static int -frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) +/** + * frwr_init_mr - Initialize one MR + * @ia: interface adapter + * @mr: generic MR to prepare for FRWR + * + * Returns zero if successful. Otherwise a negative errno + * is returned. + */ +int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { unsigned int depth = ia->ri_max_frwr_depth; struct rpcrdma_frwr *frwr = &mr->frwr; @@ -172,7 +189,13 @@ out_list_err: return rc; } -/* On success, sets: +/** + * frwr_open - Prepare an endpoint for use with FRWR + * @ia: interface adapter this endpoint will use + * @ep: endpoint to prepare + * @cdata: transport parameters + * + * On success, sets: * ep->rep_attr.cap.max_send_wr * ep->rep_attr.cap.max_recv_wr * cdata->max_requests @@ -181,10 +204,11 @@ out_list_err: * And these FRWR-related fields: * ia->ri_max_frwr_depth * ia->ri_mrtype + * + * On failure, a negative errno is returned. */ -static int -frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, - struct rpcrdma_create_data_internal *cdata) +int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, + struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr *attrs = &ia->ri_device->attrs; int max_qp_wr, depth, delta; @@ -258,11 +282,16 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, return 0; } -/* FRWR mode conveys a list of pages per chunk segment. The +/** + * frwr_maxpages - Compute size of largest payload + * @r_xprt: transport + * + * Returns maximum size of an RPC message, in pages. + * + * FRWR mode conveys a list of pages per chunk segment. The * maximum length of that list is the FRWR page list depth. */ -static size_t -frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) +size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; @@ -344,12 +373,24 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_li_wake(wc, frwr); } -/* Post a REG_MR Work Request to register a memory region +/** + * frwr_map - Register a memory region + * @r_xprt: controlling transport + * @seg: memory region co-ordinates + * @nsegs: number of segments remaining + * @writing: true when RDMA Write will be used + * @out: initialized MR + * + * Prepare a REG_MR Work Request to register a memory region * for remote access via RDMA READ or RDMA WRITE. + * + * Returns the next segment or a negative errno pointer. + * On success, the prepared MR is planted in @out. */ -static struct rpcrdma_mr_seg * -frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, struct rpcrdma_mr **out) +struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_mr_seg *seg, + int nsegs, bool writing, + struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; @@ -434,14 +475,18 @@ out_mapmr_err: return ERR_PTR(-EIO); } -/* Post Send WR containing the RPC Call message. +/** + * frwr_send - post Send WR containing the RPC Call message + * @ia: interface adapter + * @req: Prepared RPC Call * * For FRMR, chain any FastReg WRs to the Send WR. Only a * single ib_post_send call is needed to register memory * and then post the Send WR. + * + * Returns the result of ib_post_send. */ -static int -frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) { struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; @@ -468,10 +513,13 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) return ib_post_send(ia->ri_id->qp, post_wr, NULL); } -/* Handle a remotely invalidated mr on the @mrs list +/** + * frwr_reminv - handle a remotely invalidated mr on the @mrs list + * @rep: Received reply + * @mrs: list of MRs to check + * */ -static void -frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) +void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) { struct rpcrdma_mr *mr; @@ -485,7 +533,10 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) } } -/* Invalidate all memory regions that were registered for "req". +/** + * frwr_unmap_sync - invalidate memory regions that were registered for @req + * @r_xprt: controlling transport + * @mrs: list of MRs to process * * Sleeps until it is safe for the host CPU to access the * previously mapped memory regions. @@ -493,8 +544,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) * Caller ensures that @mrs is not empty before the call. This * function empties the list. */ -static void -frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) { struct ib_send_wr *first, **prev, *last; const struct ib_send_wr *bad_wr; @@ -577,16 +627,3 @@ out_release: rpcrdma_mr_recycle(mr); } } - -const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { - .ro_map = frwr_op_map, - .ro_send = frwr_op_send, - .ro_reminv = frwr_op_reminv, - .ro_unmap_sync = frwr_op_unmap_sync, - .ro_open = frwr_op_open, - .ro_maxpages = frwr_op_maxpages, - .ro_init_mr = frwr_op_init_mr, - .ro_release_mr = frwr_op_release_mr, - .ro_displayname = "frwr", - .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK, -}; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5738c9f02144..2a2023d320e7 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -356,8 +356,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, return nsegs; do { - seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - false, &mr); + seg = frwr_map(r_xprt, seg, nsegs, false, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); @@ -414,8 +413,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { - seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mr); + seg = frwr_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); @@ -472,8 +470,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { - seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mr); + seg = frwr_map(r_xprt, seg, nsegs, true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); @@ -1262,8 +1259,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * RPC has relinquished all its Send Queue entries. */ if (!list_empty(&req->rl_registered)) - r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, - &req->rl_registered); + frwr_unmap_sync(r_xprt, &req->rl_registered); /* Ensure that any DMA mapped pages associated with * the Send of the RPC Call have been unmapped before @@ -1292,7 +1288,7 @@ void rpcrdma_deferred_completion(struct work_struct *work) trace_xprtrdma_defer_cmp(rep); if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) - r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); + frwr_reminv(rep, &req->rl_registered); rpcrdma_release_rqst(r_xprt, req); rpcrdma_complete_rqst(rep); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d3f26c570067..89e11f95c747 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -399,7 +399,7 @@ xprt_setup_rdma(struct xprt_create *args) INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); - xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); + xprt->max_payload = frwr_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; xprt->max_payload <<= PAGE_SHIFT; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index ef1759682604..0cce7b23dff4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -289,10 +289,9 @@ disconnected: break; } - dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__, + dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), - ia->ri_device->name, ia->ri_ops->ro_displayname, - rdma_event_msg(event->event)); + ia->ri_device->name, rdma_event_msg(event->event)); return 0; } @@ -392,10 +391,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) switch (xprt_rdma_memreg_strategy) { case RPCRDMA_FRWR: - if (frwr_is_supported(ia)) { - ia->ri_ops = &rpcrdma_frwr_memreg_ops; + if (frwr_is_supported(ia)) break; - } /*FALLTHROUGH*/ default: pr_err("rpcrdma: Device %s does not support memreg mode %d\n", @@ -509,7 +506,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ia->ri_max_send_sges = max_sge; - rc = ia->ri_ops->ro_open(ia, ep, cdata); + rc = frwr_open(ia, ep, cdata); if (rc) return rc; @@ -567,7 +564,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Prepare RDMA-CM private message */ pmsg->cp_magic = rpcrdma_cmp_magic; pmsg->cp_version = RPCRDMA_CMP_VERSION; - pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; + pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); ep->rep_remote_cma.private_data = pmsg; @@ -991,7 +988,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) if (!mr) break; - rc = ia->ri_ops->ro_init_mr(ia, mr); + rc = frwr_init_mr(ia, mr); if (rc) { kfree(mr); break; @@ -1171,7 +1168,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); - struct rpcrdma_ia *ia = rdmab_to_ia(buf); struct rpcrdma_mr *mr; unsigned int count; @@ -1187,7 +1183,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) if (!list_empty(&mr->mr_list)) list_del(&mr->mr_list); - ia->ri_ops->ro_release_mr(mr); + frwr_release_mr(mr); count++; spin_lock(&buf->rb_mrlock); } @@ -1381,7 +1377,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for * receiving the payload of RDMA RECV operations. During Long Calls - * or Replies they may be registered externally via ro_map. + * or Replies they may be registered externally via frwr_map. */ struct rpcrdma_regbuf * rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, @@ -1472,7 +1468,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, --ep->rep_send_count; } - rc = ia->ri_ops->ro_send(ia, req); + rc = frwr_send(ia, req); trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84f7bbecdd86..c42a0036a0bd 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -66,7 +66,6 @@ * Interface Adapter -- one per transport instance */ struct rpcrdma_ia { - const struct rpcrdma_memreg_ops *ri_ops; struct ib_device *ri_device; struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; @@ -406,7 +405,6 @@ struct rpcrdma_buffer { struct workqueue_struct *rb_completion_wq; struct delayed_work rb_refresh_worker; }; -#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) /* rb_flags */ enum { @@ -456,34 +454,6 @@ struct rpcrdma_stats { unsigned long bcall_count; }; -/* - * Per-registration mode operations - */ -struct rpcrdma_xprt; -struct rpcrdma_memreg_ops { - struct rpcrdma_mr_seg * - (*ro_map)(struct rpcrdma_xprt *, - struct rpcrdma_mr_seg *, int, bool, - struct rpcrdma_mr **); - int (*ro_send)(struct rpcrdma_ia *ia, - struct rpcrdma_req *req); - void (*ro_reminv)(struct rpcrdma_rep *rep, - struct list_head *mrs); - void (*ro_unmap_sync)(struct rpcrdma_xprt *, - struct list_head *); - int (*ro_open)(struct rpcrdma_ia *, - struct rpcrdma_ep *, - struct rpcrdma_create_data_internal *); - size_t (*ro_maxpages)(struct rpcrdma_xprt *); - int (*ro_init_mr)(struct rpcrdma_ia *, - struct rpcrdma_mr *); - void (*ro_release_mr)(struct rpcrdma_mr *mr); - const char *ro_displayname; - const int ro_send_w_inv_ok; -}; - -extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; - /* * RPCRDMA transport -- encapsulates the structures above for * integration with RPC. @@ -535,7 +505,6 @@ extern unsigned int xprt_rdma_memreg_strategy; int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_close(struct rpcrdma_ia *); -bool frwr_is_supported(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c @@ -601,6 +570,23 @@ rpcrdma_data_dir(bool writing) return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; } +/* Memory registration calls xprtrdma/frwr_ops.c + */ +bool frwr_is_supported(struct rpcrdma_ia *); +int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, + struct rpcrdma_create_data_internal *cdata); +int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); +void frwr_release_mr(struct rpcrdma_mr *mr); +size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); +struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_mr_seg *seg, + int nsegs, bool writing, + struct rpcrdma_mr **mr); +int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); +void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, + struct list_head *mrs); + /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ -- cgit v1.2.2 From 0a93fbcb16e6b1f36780f9a20d6427f26cec761d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:07 -0500 Subject: xprtrdma: Plant XID in on-the-wire RDMA offset (FRWR) Place the associated RPC transaction's XID in the upper 32 bits of each RDMA segment's rdma_offset field. There are two reasons to do this: - The R_key only has 8 bits that are different from registration to registration. The XID adds more uniqueness to each RDMA segment to reduce the likelihood of a software bug on the server reading from or writing into memory it's not supposed to. - On-the-wire RDMA Read and Write requests do not otherwise carry any identifier that matches them up to an RPC. The XID in the upper 32 bits will act as an eye-catcher in network captures. Suggested-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 5 ++++- net/sunrpc/xprtrdma/rpc_rdma.c | 6 +++--- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index fb0944d854c9..97f88bbc9047 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -379,6 +379,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) * @seg: memory region co-ordinates * @nsegs: number of segments remaining * @writing: true when RDMA Write will be used + * @xid: XID of RPC using the registered memory * @out: initialized MR * * Prepare a REG_MR Work Request to register a memory region @@ -389,7 +390,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) */ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, + int nsegs, bool writing, u32 xid, struct rpcrdma_mr **out) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; @@ -444,6 +445,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, if (unlikely(n != mr->mr_nents)) goto out_mapmr_err; + ibmr->iova &= 0x00000000ffffffff; + ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32; key = (u8)(ibmr->rkey & 0x000000FF); ib_update_fast_reg_key(ibmr, ++key); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2a2023d320e7..3804fb30bdcf 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -356,7 +356,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, return nsegs; do { - seg = frwr_map(r_xprt, seg, nsegs, false, &mr); + seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); @@ -413,7 +413,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { - seg = frwr_map(r_xprt, seg, nsegs, true, &mr); + seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); @@ -470,7 +470,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { - seg = frwr_map(r_xprt, seg, nsegs, true, &mr); + seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); rpcrdma_mr_push(mr, &req->rl_registered); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c42a0036a0bd..ff4eab1c3bf1 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -580,7 +580,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr); size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, - int nsegs, bool writing, + int nsegs, bool writing, u32 xid, struct rpcrdma_mr **mr); int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); -- cgit v1.2.2 From 15303d9ecd2f29168aea0b080bd1ec27c298da3f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:17 -0500 Subject: xprtrdma: Recognize XDRBUF_SPARSE_PAGES Commit 431f6eb3570f ("SUNRPC: Add a label for RPC calls that require allocation on receive") didn't update similar logic in rpc_rdma.c. I don't think this is a bug, per-se; the commit just adds more careful checking for broken upper layer behavior. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3804fb30bdcf..939f84adbbda 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -218,11 +218,12 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); page_base = offset_in_page(xdrbuf->page_base); while (len) { - if (unlikely(!*ppages)) { - /* XXX: Certain upper layer operations do - * not provide receive buffer pages. - */ - *ppages = alloc_page(GFP_ATOMIC); + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. + */ + if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) { + if (!*ppages) + *ppages = alloc_page(GFP_ATOMIC); if (!*ppages) return -ENOBUFS; } -- cgit v1.2.2 From 889ee07f7ed26bb2cc525eb48f7f865bc407ef0b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:23 -0500 Subject: xprtrdma: Remove request_module from backchannel Since commit ffe1f0df5862 ("rpcrdma: Merge svcrdma and xprtrdma modules into one"), the forward and backchannel components are part of the same kernel module. A separate request_module() call in the backchannel code is no longer necessary. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 79a55fc540a6..e2704db2abcb 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -5,7 +5,6 @@ * Support for backward direction RPCs on RPC/RDMA. */ -#include #include #include #include @@ -101,7 +100,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) goto out_free; r_xprt->rx_buf.rb_bc_srv_max_requests = reqs; - request_module("svcrdma"); trace_xprtrdma_cb_setup(r_xprt, reqs); return 0; -- cgit v1.2.2 From 236b0943d1f21335a0fc4324f3bcc455cf99dfb7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:28 -0500 Subject: xprtrdma: Expose transport header errors For better observability of parsing errors, return the error code generated in the decoders to the upper layer consumer. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 939f84adbbda..8de0b9fc975b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1246,7 +1246,6 @@ out: out_badheader: trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; - status = -EIO; goto out; } -- cgit v1.2.2 From 92f4433e567a034d87e1e2c9e5402ff5f58b545b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:33 -0500 Subject: xprtrdma: Simplify locking that protects the rl_allreqs list Clean up: There's little chance of contention between the use of rb_lock and rb_reqslock, so merge the two. This avoids having to take both in some (possibly future) cases. Transport tear-down is already serialized, thus there is no need for locking at all when destroying rpcrdma_reqs. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 20 +++----------------- net/sunrpc/xprtrdma/verbs.c | 31 +++++++++++++++++-------------- net/sunrpc/xprtrdma/xprt_rdma.h | 7 +++---- 3 files changed, 23 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index e2704db2abcb..aae2eb1ea506 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -19,29 +19,16 @@ #undef RPCRDMA_BACKCHANNEL_DEBUG -static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, - struct rpc_rqst *rqst) -{ - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - - spin_lock(&buf->rb_reqslock); - list_del(&req->rl_all); - spin_unlock(&buf->rb_reqslock); - - rpcrdma_destroy_req(req); -} - static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, unsigned int count) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_req *req; struct rpc_rqst *rqst; unsigned int i; for (i = 0; i < (count << 1); i++) { struct rpcrdma_regbuf *rb; - struct rpcrdma_req *req; size_t size; req = rpcrdma_create_req(r_xprt); @@ -67,7 +54,7 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt, return 0; out_fail: - rpcrdma_bc_free_rqst(r_xprt, rqst); + rpcrdma_req_destroy(req); return -ENOMEM; } @@ -225,7 +212,6 @@ drop_connection: */ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) { - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpc_rqst *rqst, *tmp; spin_lock(&xprt->bc_pa_lock); @@ -233,7 +219,7 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) list_del(&rqst->rq_bc_pa_list); spin_unlock(&xprt->bc_pa_lock); - rpcrdma_bc_free_rqst(r_xprt, rqst); + rpcrdma_req_destroy(rpcr_to_rdmar(rqst)); spin_lock(&xprt->bc_pa_lock); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 0cce7b23dff4..51e09ae1a81b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1043,9 +1043,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) req->rl_buffer = buffer; INIT_LIST_HEAD(&req->rl_registered); - spin_lock(&buffer->rb_reqslock); + spin_lock(&buffer->rb_lock); list_add(&req->rl_all, &buffer->rb_allreqs); - spin_unlock(&buffer->rb_reqslock); + spin_unlock(&buffer->rb_lock); return req; } @@ -1113,7 +1113,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_allreqs); - spin_lock_init(&buf->rb_reqslock); for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; @@ -1154,9 +1153,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep) kfree(rep); } +/** + * rpcrdma_req_destroy - Destroy an rpcrdma_req object + * @req: unused object to be destroyed + * + * This function assumes that the caller prevents concurrent device + * unload and transport tear-down. + */ void -rpcrdma_destroy_req(struct rpcrdma_req *req) +rpcrdma_req_destroy(struct rpcrdma_req *req) { + list_del(&req->rl_all); + rpcrdma_free_regbuf(req->rl_recvbuf); rpcrdma_free_regbuf(req->rl_sendbuf); rpcrdma_free_regbuf(req->rl_rdmabuf); @@ -1214,19 +1222,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rpcrdma_destroy_rep(rep); } - spin_lock(&buf->rb_reqslock); - while (!list_empty(&buf->rb_allreqs)) { + while (!list_empty(&buf->rb_send_bufs)) { struct rpcrdma_req *req; - req = list_first_entry(&buf->rb_allreqs, - struct rpcrdma_req, rl_all); - list_del(&req->rl_all); - - spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(req); - spin_lock(&buf->rb_reqslock); + req = list_first_entry(&buf->rb_send_bufs, + struct rpcrdma_req, rl_list); + list_del(&req->rl_list); + rpcrdma_req_destroy(req); } - spin_unlock(&buf->rb_reqslock); rpcrdma_mrs_destroy(buf); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ff4eab1c3bf1..a1cdc85898c7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -392,14 +392,13 @@ struct rpcrdma_buffer { spinlock_t rb_lock; /* protect buf lists */ struct list_head rb_send_bufs; struct list_head rb_recv_bufs; + struct list_head rb_allreqs; + unsigned long rb_flags; u32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ u32 rb_bc_srv_max_requests; - spinlock_t rb_reqslock; /* protect rb_allreqs */ - struct list_head rb_allreqs; - u32 rb_bc_max_requests; struct workqueue_struct *rb_completion_wq; @@ -522,7 +521,7 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, * Buffer calls - xprtrdma/verbs.c */ struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); -void rpcrdma_destroy_req(struct rpcrdma_req *); +void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); -- cgit v1.2.2 From ddbb347f0c68a66cb20b78af4bc318b10d30e425 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:39 -0500 Subject: xprtrdma: Cull dprintk() call sites Clean up: Remove dprintk() call sites that report rare or impossible errors. Leave a few that display high-value low noise status information. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 3 --- net/sunrpc/xprtrdma/rpc_rdma.c | 17 ++++++++++------- net/sunrpc/xprtrdma/transport.c | 33 ++++----------------------------- net/sunrpc/xprtrdma/verbs.c | 34 +++++----------------------------- 4 files changed, 19 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index aae2eb1ea506..dea831ee05fc 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -235,9 +235,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpc_xprt *xprt = rqst->rq_xprt; - dprintk("RPC: %s: freeing rqst %p (req %p)\n", - __func__, rqst, req); - rpcrdma_recv_buffer_put(req->rl_reply); req->rl_reply = NULL; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8de0b9fc975b..5a587698c885 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1186,17 +1186,20 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (!p) break; - dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n", - rqst->rq_task->tk_pid, __func__, - be32_to_cpup(p), be32_to_cpu(*(p + 1))); + dprintk("RPC: %s: server reports " + "version error (%u-%u), xid %08x\n", __func__, + be32_to_cpup(p), be32_to_cpu(*(p + 1)), + be32_to_cpu(rep->rr_xid)); break; case err_chunk: - dprintk("RPC: %5u: %s: server reports header decoding error\n", - rqst->rq_task->tk_pid, __func__); + dprintk("RPC: %s: server reports " + "header decoding error, xid %08x\n", __func__, + be32_to_cpu(rep->rr_xid)); break; default: - dprintk("RPC: %5u: %s: server reports unrecognized error %d\n", - rqst->rq_task->tk_pid, __func__, be32_to_cpup(p)); + dprintk("RPC: %s: server reports " + "unrecognized error %d, xid %08x\n", __func__, + be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); } r_xprt->rx_stats.bad_reply_count++; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 89e11f95c747..6a57033a5846 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -318,17 +318,12 @@ xprt_setup_rdma(struct xprt_create *args) struct sockaddr *sap; int rc; - if (args->addrlen > sizeof(xprt->addr)) { - dprintk("RPC: %s: address too large\n", __func__); + if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); - if (xprt == NULL) { - dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", - __func__); + if (!xprt) return ERR_PTR(-ENOMEM); - } /* 60 second timeout, no retries */ xprt->timeout = &xprt_rdma_default_timeout; @@ -445,8 +440,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt) might_sleep(); - dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); - /* Prevent marshaling and sending of new requests */ xprt_clear_connected(xprt); @@ -853,24 +846,15 @@ static struct xprt_class xprt_rdma = { void xprt_rdma_cleanup(void) { - int rc; - - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); sunrpc_table_header = NULL; } #endif - rc = xprt_unregister_transport(&xprt_rdma); - if (rc) - dprintk("RPC: %s: xprt_unregister returned %i\n", - __func__, rc); - rc = xprt_unregister_transport(&xprt_rdma_bc); - if (rc) - dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", - __func__, rc); + xprt_unregister_transport(&xprt_rdma); + xprt_unregister_transport(&xprt_rdma_bc); } int xprt_rdma_init(void) @@ -887,15 +871,6 @@ int xprt_rdma_init(void) return rc; } - dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); - - dprintk("Defaults:\n"); - dprintk("\tSlots %d\n" - "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", - xprt_rdma_slot_table_entries, - xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); - #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) sunrpc_table_header = register_sysctl_table(sunrpc_table); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 51e09ae1a81b..85c51b8c438d 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -309,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, xprt, RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(id)) { - rc = PTR_ERR(id); - dprintk("RPC: %s: rdma_create_id() failed %i\n", - __func__, rc); + if (IS_ERR(id)) return id; - } ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->rx_xprt.addr, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", - __func__, rc); + if (rc) goto out; - } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { trace_xprtrdma_conn_tout(xprt); @@ -337,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_route() failed %i\n", - __func__, rc); + if (rc) goto out; - } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { trace_xprtrdma_conn_tout(xprt); @@ -540,8 +530,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 1, IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); - dprintk("RPC: %s: failed to create send CQ: %i\n", - __func__, rc); goto out1; } @@ -550,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 0, IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); - dprintk("RPC: %s: failed to create recv CQ: %i\n", - __func__, rc); goto out2; } @@ -691,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, } err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); - if (err) { - dprintk("RPC: %s: rdma_create_qp returned %d\n", - __func__, err); + if (err) goto out_destroy; - } /* Atomically replace the transport's ID and QP. */ rc = 0; @@ -726,8 +709,6 @@ retry: dprintk("RPC: %s: connecting...\n", __func__); rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { - dprintk("RPC: %s: rdma_create_qp failed %i\n", - __func__, rc); rc = -ENETUNREACH; goto out_noupdate; } @@ -749,11 +730,8 @@ retry: rpcrdma_post_recvs(r_xprt, true); rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); - if (rc) { - dprintk("RPC: %s: rdma_connect() failed with %i\n", - __func__, rc); + if (rc) goto out; - } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); if (ep->rep_connected <= 0) { @@ -1088,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) out_free: kfree(rep); out: - dprintk("RPC: %s: reply buffer %d alloc failed\n", - __func__, rc); return rc; } -- cgit v1.2.2 From 9bef848f44b4316fbe12e364eea527bd59fa1ed3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:44 -0500 Subject: xprtrdma: Remove unused fields from rpcrdma_ia Clean up. The last use of these fields was in commit 173b8f49b3af ("xprtrdma: Demote "connect" log messages") . Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a1cdc85898c7..90422a66b806 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -80,8 +80,6 @@ struct rpcrdma_ia { bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; unsigned long ri_flags; - struct ib_qp_attr ri_qp_attr; - struct ib_qp_init_attr ri_qp_init_attr; }; enum { -- cgit v1.2.2 From aba11831794356ff58da69de46a125e6335eb9ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:49 -0500 Subject: xprtrdma: Clean up of xprtrdma chunk trace points The chunk-related trace points capture nearly the same information as the MR-related trace points. Also, rename them so globbing can be used to enable or disable these trace points more easily. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 5a587698c885..54fbd70c661c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_read_segment(xdr, mr, pos) < 0) return -EMSGSIZE; - trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); + trace_xprtrdma_chunk_read(rqst->rq_task, pos, mr, nsegs); r_xprt->rx_stats.read_chunk_count++; nsegs -= mr->mr_nents; } while (nsegs); @@ -422,7 +422,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); + trace_xprtrdma_chunk_write(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; @@ -479,7 +479,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); + trace_xprtrdma_chunk_reply(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; -- cgit v1.2.2 From ba217ec64aef91f40c3cbdbfb0ab3a4000782504 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 10:59:55 -0500 Subject: xprtrdma: Relocate the xprtrdma_mr_map trace points The mr_map trace points were capturing information about the previous use of the MR rather than about the segment that was just mapped. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 97f88bbc9047..1f508f4742f9 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -438,7 +438,6 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir); if (!mr->mr_nents) goto out_dmamap_err; - trace_xprtrdma_mr_map(mr); ibmr = frwr->fr_mr; n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); @@ -460,6 +459,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, mr->mr_handle = ibmr->rkey; mr->mr_length = ibmr->length; mr->mr_offset = ibmr->iova; + trace_xprtrdma_mr_map(mr); *out = mr; return seg; -- cgit v1.2.2 From 395069fc37e7a76280b176c0327d1ead6ca29838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:00 -0500 Subject: xprtrdma: Add trace points for calls to transport switch methods Name them "trace_xprtrdma_op_*" so they can be easily enabled as a group. No trace point is added where the generic layer already has observability. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6a57033a5846..d94da3c57593 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -268,7 +268,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_inject_dsc(r_xprt); + trace_xprtrdma_op_inject_dsc(r_xprt); rdma_disconnect(r_xprt->rx_ia.ri_id); } @@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_destroy(r_xprt); + trace_xprtrdma_op_destroy(r_xprt); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); @@ -418,7 +418,7 @@ out3: out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: - trace_xprtrdma_destroy(new_xprt); + trace_xprtrdma_op_destroy(new_xprt); xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); @@ -428,7 +428,8 @@ out1: * xprt_rdma_close - close a transport connection * @xprt: transport context * - * Called during transport shutdown, reconnect, or device removal. + * Called during autoclose or device removal. + * * Caller holds @xprt's send lock to prevent activity on this * transport while the connection is torn down. */ @@ -440,6 +441,8 @@ void xprt_rdma_close(struct rpc_xprt *xprt) might_sleep(); + trace_xprtrdma_op_close(r_xprt); + /* Prevent marshaling and sending of new requests */ xprt_clear_connected(xprt); @@ -525,6 +528,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + trace_xprtrdma_op_connect(r_xprt); if (r_xprt->rx_ep.rep_connected != 0) { /* Reconnect */ schedule_delayed_work(&r_xprt->rx_connect_worker, @@ -659,11 +663,11 @@ xprt_rdma_allocate(struct rpc_task *task) rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; - trace_xprtrdma_allocate(task, req); + trace_xprtrdma_op_allocate(task, req); return 0; out_fail: - trace_xprtrdma_allocate(task, NULL); + trace_xprtrdma_op_allocate(task, NULL); return -ENOMEM; } @@ -682,7 +686,7 @@ xprt_rdma_free(struct rpc_task *task) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); - trace_xprtrdma_rpc_done(task, req); + trace_xprtrdma_op_free(task, req); } /** -- cgit v1.2.2 From 53b2c1cb9b3cd901a200ddbbf08c77eabf1ab3e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:06 -0500 Subject: xprtrdma: Trace mapping, alloc, and dereg failures These are rare, but can be helpful at tracking down DMAR and other problems. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 12 ++++-------- net/sunrpc/xprtrdma/rpc_rdma.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 4 +++- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 1f508f4742f9..8a0f1a6e6927 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -113,8 +113,7 @@ void frwr_release_mr(struct rpcrdma_mr *mr) rc = ib_dereg_mr(mr->frwr.fr_mr); if (rc) - pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", - mr, rc); + trace_xprtrdma_frwr_dereg(mr, rc); kfree(mr->mr_sg); kfree(mr); } @@ -177,8 +176,7 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) out_mr_err: rc = PTR_ERR(frwr->fr_mr); - dprintk("RPC: %s: ib_alloc_mr status %i\n", - __func__, rc); + trace_xprtrdma_frwr_alloc(mr, rc); return rc; out_list_err: @@ -465,15 +463,13 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, return seg; out_dmamap_err: - pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", - mr->mr_sg, i); frwr->fr_state = FRWR_IS_INVALID; + trace_xprtrdma_frwr_sgerr(mr, i); rpcrdma_mr_put(mr); return ERR_PTR(-EIO); out_mapmr_err: - pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", - frwr->fr_mr, n, mr->mr_nents); + trace_xprtrdma_frwr_maperr(mr, n); rpcrdma_mr_recycle(mr); return ERR_PTR(-EIO); } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 54fbd70c661c..062aee97b070 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -665,7 +665,7 @@ out_mapping_overflow: out_mapping_err: rpcrdma_unmap_sendctx(sc); - pr_err("rpcrdma: Send mapping error\n"); + trace_xprtrdma_dma_maperr(sge[sge_no].addr); return false; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 85c51b8c438d..719b69c8bb81 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1392,8 +1392,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) (void *)rb->rg_base, rdmab_length(rb), rb->rg_direction); - if (ib_dma_mapping_error(device, rdmab_addr(rb))) + if (ib_dma_mapping_error(device, rdmab_addr(rb))) { + trace_xprtrdma_dma_maperr(rdmab_addr(rb)); return false; + } rb->rg_device = device; rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; -- cgit v1.2.2 From acf0a39f4f277d0cb7178be5ec8a808c6c2bcd9c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:22 -0500 Subject: SUNRPC: Fix some kernel doc complaints Clean up some warnings observed when building with "make W=1". Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/xprtmultipath.c | 4 ++-- net/sunrpc/xprtsock.c | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 16ac0f4cb7d8..379318dff534 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -244,7 +244,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) /** * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors - * @array: array to fill in + * @array_ptr: array to fill in * @size: size of "array" * * Returns the number of array items filled in, or a negative errno. diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index fa5ba6ed3197..ec451b8114b0 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -197,7 +197,7 @@ out_free: /** * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. * @xprt: the transport holding the preallocated strucures - * @max_reqs the maximum number of preallocated structures to destroy + * @max_reqs: the maximum number of preallocated structures to destroy * * Since these structures may have been allocated by multiple calls * to xprt_setup_backchannel, we only destroy up to the maximum number diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index e2d64c7138c3..8394124126f8 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -383,7 +383,7 @@ void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, /** * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch * @xpi: pointer to rpc_xprt_iter - * @xps: pointer to a new rpc_xprt_switch or NULL + * @newswitch: pointer to a new rpc_xprt_switch or NULL * * Swaps out the existing xpi->xpi_xpswitch with a new value. */ @@ -401,7 +401,7 @@ struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi, /** * xprt_iter_destroy - Destroys the xprt iterator - * @xpi pointer to rpc_xprt_iter + * @xpi: pointer to rpc_xprt_iter */ void xprt_iter_destroy(struct rpc_xprt_iter *xpi) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a6870d3cb121..bd64d5a776a3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1596,6 +1596,7 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t /** * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport + * @xprt: controlling transport * @task: task that timed out * * Adjust the congestion window after a retransmit timeout has occurred. @@ -2253,6 +2254,7 @@ out: /** * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint + * @work: queued work item * * Invoked by a work queue tasklet. */ -- cgit v1.2.2 From e0f86bc4f990edb56440640964fdcf3f3cf4e240 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:27 -0500 Subject: xprtrdma: Update comments in frwr_op_send Commit f2877623082b ("xprtrdma: Chain Send to FastReg WRs") was written before commit ce5b37178283 ("xprtrdma: Replace all usage of "frmr" with "frwr""), but was merged afterwards. Thus it still refers to FRMR and MWs. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 8a0f1a6e6927..35c8f62ad61e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -479,7 +479,7 @@ out_mapmr_err: * @ia: interface adapter * @req: Prepared RPC Call * - * For FRMR, chain any FastReg WRs to the Send WR. Only a + * For FRWR, chain any FastReg WRs to the Send WR. Only a * single ib_post_send call is needed to register memory * and then post the Send WR. * @@ -507,7 +507,7 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) } /* If ib_post_send fails, the next ->send_request for - * @req will queue these MWs for recovery. + * @req will queue these MRs for recovery. */ return ib_post_send(ia->ri_id->qp, post_wr, NULL); } -- cgit v1.2.2 From 995d312a28cc2a6a5640ceb2dcbdfde37d050c07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:32 -0500 Subject: xprtrdma: Replace outdated comment for rpcrdma_ep_post Since commit 7c8d9e7c8863 ("xprtrdma: Move Receive posting to Receive handler"), rpcrdma_ep_post is no longer responsible for posting Receive buffers. Update the documenting comment to reflect this change. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 719b69c8bb81..11976c31ba93 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1427,10 +1427,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) kfree(rb); } -/* - * Prepost any receive buffer, then post send. +/** + * rpcrdma_ep_post - Post WRs to a transport's Send Queue + * @ia: transport's device information + * @ep: transport's RDMA endpoint information + * @req: rpcrdma_req containing the Send WR to post * - * Receive buffer is donated to hardware, reclaimed upon recv completion. + * Returns 0 if the post was successful, otherwise -ENOTCONN + * is returned. */ int rpcrdma_ep_post(struct rpcrdma_ia *ia, -- cgit v1.2.2 From af65ed404c437684c9f58d0c37495abedcdfa3fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:37 -0500 Subject: xprtrdma: Add documenting comment for rpcrdma_buffer_destroy Make a note of the function's dependency on an earlier ib_drain_qp. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 11976c31ba93..7749a2bf6887 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1177,6 +1177,14 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) dprintk("RPC: %s: released %u MRs\n", __func__, count); } +/** + * rpcrdma_buffer_destroy - Release all hw resources + * @buf: root control block for resources + * + * ORDERING: relies on a prior ib_drain_qp : + * - No more Send or Receive completions can occur + * - All MRs, reps, and reqs are returned to their free lists + */ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { -- cgit v1.2.2 From f85adb1bf59557909f86f71cf4b1e5a906bdb465 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Dec 2018 11:00:48 -0500 Subject: xprtrdma: Don't leak freed MRs Defensive clean up. Don't set frwr->fr_mr until we know that the scatterlist allocation has succeeded. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 35c8f62ad61e..6a561056b538 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -155,36 +155,39 @@ frwr_mr_recycle_worker(struct work_struct *work) int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) { unsigned int depth = ia->ri_max_frwr_depth; - struct rpcrdma_frwr *frwr = &mr->frwr; + struct scatterlist *sg; + struct ib_mr *frmr; int rc; - frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); - if (IS_ERR(frwr->fr_mr)) + frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); + if (IS_ERR(frmr)) goto out_mr_err; - mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL); - if (!mr->mr_sg) + sg = kcalloc(depth, sizeof(*sg), GFP_KERNEL); + if (!sg) goto out_list_err; - frwr->fr_state = FRWR_IS_INVALID; + mr->frwr.fr_mr = frmr; + mr->frwr.fr_state = FRWR_IS_INVALID; mr->mr_dir = DMA_NONE; INIT_LIST_HEAD(&mr->mr_list); INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker); - sg_init_table(mr->mr_sg, depth); - init_completion(&frwr->fr_linv_done); + init_completion(&mr->frwr.fr_linv_done); + + sg_init_table(sg, depth); + mr->mr_sg = sg; return 0; out_mr_err: - rc = PTR_ERR(frwr->fr_mr); + rc = PTR_ERR(frmr); trace_xprtrdma_frwr_alloc(mr, rc); return rc; out_list_err: - rc = -ENOMEM; dprintk("RPC: %s: sg allocation failure\n", __func__); - ib_dereg_mr(frwr->fr_mr); - return rc; + ib_dereg_mr(frmr); + return -ENOMEM; } /** -- cgit v1.2.2 From 07e10308ee5da8e6132e0b737ece1c99dd651fb6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 7 Dec 2018 11:11:44 -0500 Subject: xprtrdma: Prevent leak of rpcrdma_rep objects If a reply has been processed but the RPC is later retransmitted anyway, the req->rl_reply field still contains the only pointer to the old rpcrdma rep. When the next reply comes in, the reply handler will stomp on the rl_reply field, leaking the old rep. A trace event is added to capture such leaks. This problem seems to be worsened by the restructuring of the RPC Call path in v4.20. Fully addressing this issue will require at least a re-architecture of the disconnect logic, which is not appropriate during -rc. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 062aee97b070..d18614e02b4e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1350,6 +1350,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) } req = rpcr_to_rdmar(rqst); + if (req->rl_reply) { + trace_xprtrdma_leaked_rep(rqst, req->rl_reply); + rpcrdma_recv_buffer_put(req->rl_reply); + } req->rl_reply = rep; rep->rr_rqst = rqst; clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); -- cgit v1.2.2 From 81c88b18de1f11f70c97f28ced8d642c00bb3955 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Dec 2018 10:35:11 -0500 Subject: sunrpc: handle ENOMEM in rpcb_getport_async If we ignore the error we'll hit a null dereference a little later. Reported-by: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c7872bc13860..08b5fa4a2852 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -771,6 +771,12 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); + if (!map->r_addr) { + status = -ENOMEM; + dprintk("RPC: %5u %s: no memory available\n", + task->tk_pid, __func__); + goto bailout_free_args; + } map->r_owner = ""; break; case RPCBVERS_2: @@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task) rpc_put_task(child); return; +bailout_free_args: + kfree(map); bailout_release_client: rpc_release_client(rpcb_clnt); bailout_nofree: -- cgit v1.2.2 From cb24e35b4fa8448e7ee963884958235b8de44f25 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Dec 2018 10:42:36 -0500 Subject: sunrpc: convert unnecessary GFP_ATOMIC to GFP_NOFS It's OK to sleep here, we just don't want to recurse into the filesystem as a writeout could be waiting on this. Future work: the documentation for GFP_NOFS says "Please try to avoid using this flag directly and instead use memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't recurse into the FS layer with a short explanation why. All allocation requests will inherit GFP_NOFS implicitly." But I'm not sure where to do this. Should the workqueue be arranging that for us in the case of workqueues created with WQ_MEM_RECLAIM? Reported-by: Trond Myklebust Signed-off-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 08b5fa4a2852..41a971ac1c63 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); + map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); if (!map) { status = -ENOMEM; dprintk("RPC: %5u %s: no memory available\n", @@ -770,7 +770,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; - map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); if (!map->r_addr) { status = -ENOMEM; dprintk("RPC: %5u %s: no memory available\n", -- cgit v1.2.2 From 10e037d1e0d5d93cc057e4fad6911e481a462407 Mon Sep 17 00:00:00 2001 From: Santosh kumar pradhan Date: Wed, 19 Dec 2018 12:29:57 +0530 Subject: sunrpc: Add xprt after nfs4_test_session_trunk() Multipathing: In case of NFSv3, rpc_clnt_test_and_add_xprt() adds the xprt to xprt switch (i.e. xps) if rpc_call_null_helper() returns success. But in case of NFSv4.1, it needs to do EXCHANGEID to verify the path along with check for session trunking. Add the xprt in nfs4_test_session_trunk() only when nfs4_detect_session_trunking() returns success. Also release refcount hold by rpc_clnt_setup_test_and_add_xprt(). Signed-off-by: Santosh kumar pradhan Tested-by: Suresh Jayaraman Reported-by: Aditya Agnihotri Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cad26f816d20..71d9599b5816 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2661,6 +2661,9 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ xtest->add_xprt_test(clnt, xprt, xtest->data); + xprt_put(xprt); + xprt_switch_put(xps); + /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ return 1; out_err: -- cgit v1.2.2 From 260f71eff493a844531629854c0935fa8de4fa2c Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Fri, 21 Dec 2018 10:59:36 -0500 Subject: sunrpc: convert to DEFINE_SHOW_ATTRIBUTE Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Yangtao Li Signed-off-by: Anna Schumaker --- net/sunrpc/rpc_pipe.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 4fda18d47e2c..69663681bf9d 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = { * that this file will be there and have a certain format. */ static int -rpc_show_dummy_info(struct seq_file *m, void *v) +rpc_dummy_info_show(struct seq_file *m, void *v) { seq_printf(m, "RPC server: %s\n", utsname()->nodename); seq_printf(m, "service: foo (1) version 0\n"); @@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v) seq_printf(m, "port: 0\n"); return 0; } - -static int -rpc_dummy_info_open(struct inode *inode, struct file *file) -{ - return single_open(file, rpc_show_dummy_info, NULL); -} - -static const struct file_operations rpc_dummy_info_operations = { - .owner = THIS_MODULE, - .open = rpc_dummy_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info); static const struct rpc_filelist gssd_dummy_info_file[] = { [0] = { .name = "info", - .i_fop = &rpc_dummy_info_operations, + .i_fop = &rpc_dummy_info_fops, .mode = S_IFREG | 0400, }, }; -- cgit v1.2.2