diff options
author | Trond Myklebust <trondmy@gmail.com> | 2019-04-07 13:59:08 -0400 |
---|---|---|
committer | Anna Schumaker <Anna.Schumaker@Netapp.com> | 2019-04-25 14:18:14 -0400 |
commit | 33344e0f7eaa2efbf9fcc55557d02e8603aa7012 (patch) | |
tree | d4d349e0fd2dfca232c8fc02b6d47ca1ac626caf /fs/nfs | |
parent | 28b1d3f5a772b705ca76df620eb9f686aa2d0b4c (diff) |
pNFS: Add tracking to limit the number of pNFS retries
When the client is reading or writing using pNFS, and hits an error
on the DS, then it typically sends a LAYOUTERROR and/or LAYOUTRETURN
to the MDS, before redirtying the failed pages, and going for a new
round of reads/writebacks. The problem is that if the server has no
way to fix the DS, then we may need a way to interrupt this loop
after a set number of attempts have been made.
This patch adds an optional module parameter that allows the admin
to specify how many times to retry the read/writeback process before
failing with a fatal error.
The default behaviour is to retry forever.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/direct.c | 7 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 8 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 14 | ||||
-rw-r--r-- | fs/nfs/write.c | 5 |
4 files changed, 33 insertions, 1 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2d301a1a73e2..2436bd92bc00 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
663 | } | 663 | } |
664 | 664 | ||
665 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { | 665 | list_for_each_entry_safe(req, tmp, &reqs, wb_list) { |
666 | /* Bump the transmission count */ | ||
667 | req->wb_nio++; | ||
666 | if (!nfs_pageio_add_request(&desc, req)) { | 668 | if (!nfs_pageio_add_request(&desc, req)) { |
667 | nfs_list_move_request(req, &failed); | 669 | nfs_list_move_request(req, &failed); |
668 | spin_lock(&cinfo.inode->i_lock); | 670 | spin_lock(&cinfo.inode->i_lock); |
@@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||
703 | req = nfs_list_entry(data->pages.next); | 705 | req = nfs_list_entry(data->pages.next); |
704 | nfs_list_remove_request(req); | 706 | nfs_list_remove_request(req); |
705 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { | 707 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { |
708 | /* | ||
709 | * Despite the reboot, the write was successful, | ||
710 | * so reset wb_nio. | ||
711 | */ | ||
712 | req->wb_nio = 0; | ||
706 | /* Note the rewrite will go through mds */ | 713 | /* Note the rewrite will go through mds */ |
707 | nfs_mark_request_commit(req, NULL, &cinfo, 0); | 714 | nfs_mark_request_commit(req, NULL, &cinfo, 0); |
708 | } else | 715 | } else |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6673d4ff5a2a..9fdbcfd3e39d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -28,6 +28,8 @@ | |||
28 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) | 28 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) |
29 | #define FF_LAYOUTRETURN_MAXERR 20 | 29 | #define FF_LAYOUTRETURN_MAXERR 20 |
30 | 30 | ||
31 | static unsigned short io_maxretrans; | ||
32 | |||
31 | static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, | 33 | static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, |
32 | struct nfs_pgio_header *hdr); | 34 | struct nfs_pgio_header *hdr); |
33 | static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, | 35 | static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, |
@@ -925,6 +927,7 @@ retry: | |||
925 | pgm = &pgio->pg_mirrors[0]; | 927 | pgm = &pgio->pg_mirrors[0]; |
926 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; | 928 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; |
927 | 929 | ||
930 | pgio->pg_maxretrans = io_maxretrans; | ||
928 | return; | 931 | return; |
929 | out_nolseg: | 932 | out_nolseg: |
930 | if (pgio->pg_error < 0) | 933 | if (pgio->pg_error < 0) |
@@ -992,6 +995,7 @@ retry: | |||
992 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; | 995 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; |
993 | } | 996 | } |
994 | 997 | ||
998 | pgio->pg_maxretrans = io_maxretrans; | ||
995 | return; | 999 | return; |
996 | 1000 | ||
997 | out_mds: | 1001 | out_mds: |
@@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); | |||
2515 | 2519 | ||
2516 | module_init(nfs4flexfilelayout_init); | 2520 | module_init(nfs4flexfilelayout_init); |
2517 | module_exit(nfs4flexfilelayout_exit); | 2521 | module_exit(nfs4flexfilelayout_exit); |
2522 | |||
2523 | module_param(io_maxretrans, ushort, 0644); | ||
2524 | MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client " | ||
2525 | "retries an I/O request before returning an error. "); | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b8301c40dd78..4a31284f411e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -16,8 +16,8 @@ | |||
16 | #include <linux/nfs.h> | 16 | #include <linux/nfs.h> |
17 | #include <linux/nfs3.h> | 17 | #include <linux/nfs3.h> |
18 | #include <linux/nfs4.h> | 18 | #include <linux/nfs4.h> |
19 | #include <linux/nfs_page.h> | ||
20 | #include <linux/nfs_fs.h> | 19 | #include <linux/nfs_fs.h> |
20 | #include <linux/nfs_page.h> | ||
21 | #include <linux/nfs_mount.h> | 21 | #include <linux/nfs_mount.h> |
22 | #include <linux/export.h> | 22 | #include <linux/export.h> |
23 | 23 | ||
@@ -327,6 +327,7 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, | |||
327 | req->wb_bytes = count; | 327 | req->wb_bytes = count; |
328 | req->wb_context = get_nfs_open_context(ctx); | 328 | req->wb_context = get_nfs_open_context(ctx); |
329 | kref_init(&req->wb_kref); | 329 | kref_init(&req->wb_kref); |
330 | req->wb_nio = 0; | ||
330 | return req; | 331 | return req; |
331 | } | 332 | } |
332 | 333 | ||
@@ -370,6 +371,7 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last, | |||
370 | nfs_lock_request(ret); | 371 | nfs_lock_request(ret); |
371 | ret->wb_index = req->wb_index; | 372 | ret->wb_index = req->wb_index; |
372 | nfs_page_group_init(ret, last); | 373 | nfs_page_group_init(ret, last); |
374 | ret->wb_nio = req->wb_nio; | ||
373 | } | 375 | } |
374 | return ret; | 376 | return ret; |
375 | } | 377 | } |
@@ -724,6 +726,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
724 | desc->pg_mirrors_dynamic = NULL; | 726 | desc->pg_mirrors_dynamic = NULL; |
725 | desc->pg_mirrors = desc->pg_mirrors_static; | 727 | desc->pg_mirrors = desc->pg_mirrors_static; |
726 | nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); | 728 | nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); |
729 | desc->pg_maxretrans = 0; | ||
727 | } | 730 | } |
728 | 731 | ||
729 | /** | 732 | /** |
@@ -983,6 +986,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
983 | return 0; | 986 | return 0; |
984 | mirror->pg_base = req->wb_pgbase; | 987 | mirror->pg_base = req->wb_pgbase; |
985 | } | 988 | } |
989 | |||
990 | if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) { | ||
991 | if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR) | ||
992 | desc->pg_error = -ETIMEDOUT; | ||
993 | else | ||
994 | desc->pg_error = -EIO; | ||
995 | return 0; | ||
996 | } | ||
997 | |||
986 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 998 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
987 | return 0; | 999 | return 0; |
988 | nfs_list_move_request(req, &mirror->pg_list); | 1000 | nfs_list_move_request(req, &mirror->pg_list); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b9bcbd06a628..294604784f70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1009,6 +1009,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
1009 | goto remove_req; | 1009 | goto remove_req; |
1010 | } | 1010 | } |
1011 | if (nfs_write_need_commit(hdr)) { | 1011 | if (nfs_write_need_commit(hdr)) { |
1012 | /* Reset wb_nio, since the write was successful. */ | ||
1013 | req->wb_nio = 0; | ||
1012 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); | 1014 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); |
1013 | nfs_mark_request_commit(req, hdr->lseg, &cinfo, | 1015 | nfs_mark_request_commit(req, hdr->lseg, &cinfo, |
1014 | hdr->pgio_mirror_idx); | 1016 | hdr->pgio_mirror_idx); |
@@ -1142,6 +1144,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
1142 | req->wb_bytes = end - req->wb_offset; | 1144 | req->wb_bytes = end - req->wb_offset; |
1143 | else | 1145 | else |
1144 | req->wb_bytes = rqend - req->wb_offset; | 1146 | req->wb_bytes = rqend - req->wb_offset; |
1147 | req->wb_nio = 0; | ||
1145 | return req; | 1148 | return req; |
1146 | out_flushme: | 1149 | out_flushme: |
1147 | /* | 1150 | /* |
@@ -1416,6 +1419,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, | |||
1416 | */ | 1419 | */ |
1417 | static void nfs_redirty_request(struct nfs_page *req) | 1420 | static void nfs_redirty_request(struct nfs_page *req) |
1418 | { | 1421 | { |
1422 | /* Bump the transmission count */ | ||
1423 | req->wb_nio++; | ||
1419 | nfs_mark_request_dirty(req); | 1424 | nfs_mark_request_dirty(req); |
1420 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); | 1425 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); |
1421 | nfs_end_page_writeback(req); | 1426 | nfs_end_page_writeback(req); |