aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2014-01-22 13:34:54 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-13 16:48:01 -0500
commit305a7c624df026dbdde23eb880549db7df0ad7b9 (patch)
treeae7c38a79a1b5e61d64d6bcba0626ad18354e1e9 /fs
parentdaab6e7df44ba7d4281379c6ac5780e10d133286 (diff)
pnfs: Proper delay for NFS4ERR_RECALLCONFLICT in layout_get_done
commit ed7e5423014ad89720fcf315c0b73f2c5d0c7bd2 upstream. An NFS4ERR_RECALLCONFLICT is returned by server from a GET_LAYOUT only when a Server Sent a RECALL do to that GET_LAYOUT, or the RECALL and GET_LAYOUT crossed on the wire. In any way this means we want to wait at most until in-flight IO is finished and the RECALL can be satisfied. So a proper wait here is more like 1/10 of a second, not 15 seconds like we have now. In case of a server bug we delay exponentially longer on each retry. Current code totally craps out performance of very large files on most pnfs-objects layouts, because of how the map changes when the file has grown into the next raid group. [Stable: This will patch back to 3.9. If there are earlier still maintained trees, please tell me I'll send a patch] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/nfs4proc.c34
1 files changed, 30 insertions, 4 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5aca7b400ad9..26e71bdb5b33 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6232,9 +6232,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
6232 struct nfs_server *server = NFS_SERVER(inode); 6232 struct nfs_server *server = NFS_SERVER(inode);
6233 struct pnfs_layout_hdr *lo; 6233 struct pnfs_layout_hdr *lo;
6234 struct nfs4_state *state = NULL; 6234 struct nfs4_state *state = NULL;
6235 unsigned long timeo, giveup; 6235 unsigned long timeo, now, giveup;
6236 6236
6237 dprintk("--> %s\n", __func__); 6237 dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
6238 6238
6239 if (!nfs41_sequence_done(task, &lgp->res.seq_res)) 6239 if (!nfs41_sequence_done(task, &lgp->res.seq_res))
6240 goto out; 6240 goto out;
@@ -6242,12 +6242,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
6242 switch (task->tk_status) { 6242 switch (task->tk_status) {
6243 case 0: 6243 case 0:
6244 goto out; 6244 goto out;
6245 /*
6246 * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
6247 * (or clients) writing to the same RAID stripe
6248 */
6245 case -NFS4ERR_LAYOUTTRYLATER: 6249 case -NFS4ERR_LAYOUTTRYLATER:
6250 /*
6251 * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
6252 * existing layout before getting a new one).
6253 */
6246 case -NFS4ERR_RECALLCONFLICT: 6254 case -NFS4ERR_RECALLCONFLICT:
6247 timeo = rpc_get_timeout(task->tk_client); 6255 timeo = rpc_get_timeout(task->tk_client);
6248 giveup = lgp->args.timestamp + timeo; 6256 giveup = lgp->args.timestamp + timeo;
6249 if (time_after(giveup, jiffies)) 6257 now = jiffies;
6250 task->tk_status = -NFS4ERR_DELAY; 6258 if (time_after(giveup, now)) {
6259 unsigned long delay;
6260
6261 /* Delay for:
6262 * - Not less then NFS4_POLL_RETRY_MIN.
6263 * - One last time a jiffie before we give up
6264 * - exponential backoff (time_now minus start_attempt)
6265 */
6266 delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
6267 min((giveup - now - 1),
6268 now - lgp->args.timestamp));
6269
6270 dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
6271 __func__, delay);
6272 rpc_delay(task, delay);
6273 task->tk_status = 0;
6274 rpc_restart_call_prepare(task);
6275 goto out; /* Do not call nfs4_async_handle_error() */
6276 }
6251 break; 6277 break;
6252 case -NFS4ERR_EXPIRED: 6278 case -NFS4ERR_EXPIRED:
6253 case -NFS4ERR_BAD_STATEID: 6279 case -NFS4ERR_BAD_STATEID: