pnfs: Proper delay for NFS4ERR_RECALLCONFLICT in layout_get_done

commit ed7e5423014ad89720fcf315c0b73f2c5d0c7bd2 upstream. An NFS4ERR_RECALLCONFLICT is returned by server from a GET_LAYOUT only when a Server Sent a RECALL do to that GET_LAYOUT, or the RECALL and GET_LAYOUT crossed on the wire. In any way this means we want to wait at most until in-flight IO is finished and the RECALL can be satisfied. So a proper wait here is more like 1/10 of a second, not 15 seconds like we have now. In case of a server bug we delay exponentially longer on each retry. Current code totally craps out performance of very large files on most pnfs-objects layouts, because of how the map changes when the file has grown into the next raid group. [Stable: This will patch back to 3.9. If there are earlier still maintained trees, please tell me I'll send a patch] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Boaz Harrosh <bharrosh@panasas.com> 2014-01-22 13:34:54 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2014-02-13 16:48:01 -0500
commit: 305a7c624df026dbdde23eb880549db7df0ad7b9 (patch)
tree: ae7c38a79a1b5e61d64d6bcba0626ad18354e1e9 /fs
parent: daab6e7df44ba7d4281379c6ac5780e10d133286 (diff)
1 files changed, 30 insertions, 4 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5aca7b400ad9..26e71bdb5b33 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6232,9 +6232,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        struct nfs_server *server = NFS_SERVER(inode);
        struct pnfs_layout_hdr *lo;
        struct nfs4_state *state = NULL;
-        unsigned long timeo, giveup;
+        unsigned long timeo, now, giveup;
-        dprintk("--> %s\n", __func__);
+        dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
        if (!nfs41_sequence_done(task, &lgp->res.seq_res))
                goto out;
@@ -6242,12 +6242,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        switch (task->tk_status) {
        case 0:
                goto out;
+        /*
+         * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+         * (or clients) writing to the same RAID stripe
+         */
        case -NFS4ERR_LAYOUTTRYLATER:
+        /*
+         * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+         * existing layout before getting a new one).
+         */
        case -NFS4ERR_RECALLCONFLICT:
                timeo = rpc_get_timeout(task->tk_client);
                giveup = lgp->args.timestamp + timeo;
-                if (time_after(giveup, jiffies))
+                now = jiffies;
-                        task->tk_status = -NFS4ERR_DELAY;
+                if (time_after(giveup, now)) {
+                        unsigned long delay;
+                        /* Delay for:
+                         * - Not less then NFS4_POLL_RETRY_MIN.
+                         * - One last time a jiffie before we give up
+                         * - exponential backoff (time_now minus start_attempt)
+                         */
+                        delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+                                    min((giveup - now - 1),
+                                        now - lgp->args.timestamp));
+                        dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+                                __func__, delay);
+                        rpc_delay(task, delay);
+                        task->tk_status = 0;
+                        rpc_restart_call_prepare(task);
+                        goto out; /* Do not call nfs4_async_handle_error() */
+                }
                break;
        case -NFS4ERR_EXPIRED:
        case -NFS4ERR_BAD_STATEID:
author	Boaz Harrosh <bharrosh@panasas.com>	2014-01-22 13:34:54 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2014-02-13 16:48:01 -0500
commit	305a7c624df026dbdde23eb880549db7df0ad7b9 (patch)
tree	ae7c38a79a1b5e61d64d6bcba0626ad18354e1e9 /fs
parent	daab6e7df44ba7d4281379c6ac5780e10d133286 (diff)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5aca7b400ad9..26e71bdb5b33 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c
@@ -6232,9 +6232,9 @@ static void nfs4_layoutget_done(struct rpc_task task, void calldata)
6232	struct nfs_server *server = NFS_SERVER(inode);	6232	struct nfs_server *server = NFS_SERVER(inode);
6233	struct pnfs_layout_hdr *lo;	6233	struct pnfs_layout_hdr *lo;
6234	struct nfs4_state *state = NULL;	6234	struct nfs4_state *state = NULL;
6235	unsigned long timeo, giveup;	6235	unsigned long timeo, now, giveup;
6236		6236
6237	dprintk("--> %s\n", __func__);	6237	dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
6238		6238
6239	if (!nfs41_sequence_done(task, &lgp->res.seq_res))	6239	if (!nfs41_sequence_done(task, &lgp->res.seq_res))
6240	goto out;	6240	goto out;
@@ -6242,12 +6242,38 @@ static void nfs4_layoutget_done(struct rpc_task task, void calldata)
6242	switch (task->tk_status) {	6242	switch (task->tk_status) {
6243	case 0:	6243	case 0:
6244	goto out;	6244	goto out;
		6245	/*
		6246	* NFS4ERR_LAYOUTTRYLATER is a conflict with another client
		6247	* (or clients) writing to the same RAID stripe
		6248	*/
6245	case -NFS4ERR_LAYOUTTRYLATER:	6249	case -NFS4ERR_LAYOUTTRYLATER:
		6250	/*
		6251	* NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
		6252	* existing layout before getting a new one).
		6253	*/
6246	case -NFS4ERR_RECALLCONFLICT:	6254	case -NFS4ERR_RECALLCONFLICT:
6247	timeo = rpc_get_timeout(task->tk_client);	6255	timeo = rpc_get_timeout(task->tk_client);
6248	giveup = lgp->args.timestamp + timeo;	6256	giveup = lgp->args.timestamp + timeo;
6249	if (time_after(giveup, jiffies))	6257	now = jiffies;
6250	task->tk_status = -NFS4ERR_DELAY;	6258	if (time_after(giveup, now)) {
		6259	unsigned long delay;
		6260
		6261	/* Delay for:
		6262	* - Not less then NFS4_POLL_RETRY_MIN.
		6263	* - One last time a jiffie before we give up
		6264	* - exponential backoff (time_now minus start_attempt)
		6265	*/
		6266	delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
		6267	min((giveup - now - 1),
		6268	now - lgp->args.timestamp));
		6269
		6270	dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
		6271	__func__, delay);
		6272	rpc_delay(task, delay);
		6273	task->tk_status = 0;
		6274	rpc_restart_call_prepare(task);
		6275	goto out; /* Do not call nfs4_async_handle_error() */
		6276	}
6251	break;	6277	break;
6252	case -NFS4ERR_EXPIRED:	6278	case -NFS4ERR_EXPIRED:
6253	case -NFS4ERR_BAD_STATEID:	6279	case -NFS4ERR_BAD_STATEID: