diff options
author | NeilBrown <neilb@suse.de> | 2013-09-04 03:04:49 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2013-09-04 12:26:32 -0400 |
commit | ef1820f9be27b6ad158f433ab38002ab8131db4d (patch) | |
tree | 8b4b16d0c119bfe3baf7642ac6855bfd43550c05 /fs/nfs | |
parent | 40b5ea0c25669cb99ba7f4836437a7ebaba91408 (diff) |
NFSv4: Don't try to recover NFSv4 locks when they are lost.
When an NFSv4 client loses contact with the server it can lose any
locks that it holds.
Currently when it reconnects to the server it simply tries to reclaim
those locks. This might succeed even though some other client has
held and released a lock in the mean time. So the first client might
think the file is unchanged, but it isn't. This isn't good.
If, when recovery happens, the locks cannot be claimed because some
other client still holds the lock, then we get a message in the kernel
logs, but the client can still write. So two clients can both think
they have a lock and can both write at the same time. This is equally
not good.
There was a patch a while ago
http://comments.gmane.org/gmane.linux.nfs/41917
which tried to address some of this, but it didn't seem to go
anywhere. That patch would also send a signal to the process. That
might be useful but for now this patch just causes writes to fail.
For NFSv4 (unlike v2/v3) there is a strong link between the lock and
the write request so we can fairly easily fail any IO of the lock is
gone. While some applications might not expect this, it is still
safer than allowing the write to succeed.
Because this is a fairly big change in behaviour a module parameter,
"recover_locks", is introduced which defaults to true (the current
behaviour) but can be set to "false" to tell the client not to try to
recover things that were lost.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/nfs3proc.c | 6 | ||||
-rw-r--r-- | fs/nfs/nfs4_fs.h | 1 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 34 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 14 | ||||
-rw-r--r-- | fs/nfs/proc.c | 6 | ||||
-rw-r--r-- | fs/nfs/read.c | 7 | ||||
-rw-r--r-- | fs/nfs/write.c | 7 |
7 files changed, 54 insertions, 21 deletions
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1db588a3f08b..90cb10d7b693 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -826,9 +826,10 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message | |||
826 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; | 826 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; |
827 | } | 827 | } |
828 | 828 | ||
829 | static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 829 | static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) |
830 | { | 830 | { |
831 | rpc_call_start(task); | 831 | rpc_call_start(task); |
832 | return 0; | ||
832 | } | 833 | } |
833 | 834 | ||
834 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | 835 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -847,9 +848,10 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
847 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; | 848 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; |
848 | } | 849 | } |
849 | 850 | ||
850 | static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | 851 | static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) |
851 | { | 852 | { |
852 | rpc_call_start(task); | 853 | rpc_call_start(task); |
854 | return 0; | ||
853 | } | 855 | } |
854 | 856 | ||
855 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 857 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index af2d5bf043f0..64118316a407 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -133,6 +133,7 @@ struct nfs4_lock_state { | |||
133 | struct list_head ls_locks; /* Other lock stateids */ | 133 | struct list_head ls_locks; /* Other lock stateids */ |
134 | struct nfs4_state * ls_state; /* Pointer to open state */ | 134 | struct nfs4_state * ls_state; /* Pointer to open state */ |
135 | #define NFS_LOCK_INITIALIZED 0 | 135 | #define NFS_LOCK_INITIALIZED 0 |
136 | #define NFS_LOCK_LOST 1 | ||
136 | unsigned long ls_flags; | 137 | unsigned long ls_flags; |
137 | struct nfs_seqid_counter ls_seqid; | 138 | struct nfs_seqid_counter ls_seqid; |
138 | nfs4_stateid ls_stateid; | 139 | nfs4_stateid ls_stateid; |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0122919a311a..1eb694e0f305 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -4028,15 +4028,19 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message | |||
4028 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | 4028 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); |
4029 | } | 4029 | } |
4030 | 4030 | ||
4031 | static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 4031 | static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) |
4032 | { | 4032 | { |
4033 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | 4033 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), |
4034 | &data->args.seq_args, | 4034 | &data->args.seq_args, |
4035 | &data->res.seq_res, | 4035 | &data->res.seq_res, |
4036 | task)) | 4036 | task)) |
4037 | return; | 4037 | return 0; |
4038 | nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | 4038 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, |
4039 | data->args.lock_context, FMODE_READ); | 4039 | data->args.lock_context, FMODE_READ) == -EIO) |
4040 | return -EIO; | ||
4041 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | ||
4042 | return -EIO; | ||
4043 | return 0; | ||
4040 | } | 4044 | } |
4041 | 4045 | ||
4042 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) | 4046 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) |
@@ -4112,15 +4116,19 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
4112 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | 4116 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); |
4113 | } | 4117 | } |
4114 | 4118 | ||
4115 | static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | 4119 | static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) |
4116 | { | 4120 | { |
4117 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | 4121 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), |
4118 | &data->args.seq_args, | 4122 | &data->args.seq_args, |
4119 | &data->res.seq_res, | 4123 | &data->res.seq_res, |
4120 | task)) | 4124 | task)) |
4121 | return; | 4125 | return 0; |
4122 | nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | 4126 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, |
4123 | data->args.lock_context, FMODE_WRITE); | 4127 | data->args.lock_context, FMODE_WRITE) == -EIO) |
4128 | return -EIO; | ||
4129 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | ||
4130 | return -EIO; | ||
4131 | return 0; | ||
4124 | } | 4132 | } |
4125 | 4133 | ||
4126 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 4134 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
@@ -5515,6 +5523,12 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request | |||
5515 | return err; | 5523 | return err; |
5516 | } | 5524 | } |
5517 | 5525 | ||
5526 | bool recover_locks = true; | ||
5527 | module_param(recover_locks, bool, 0644); | ||
5528 | MODULE_PARM_DESC(recover_locks, | ||
5529 | "If the server reports that a lock might be lost, " | ||
5530 | "try to recovery it risking corruption."); | ||
5531 | |||
5518 | static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) | 5532 | static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) |
5519 | { | 5533 | { |
5520 | struct nfs_server *server = NFS_SERVER(state->inode); | 5534 | struct nfs_server *server = NFS_SERVER(state->inode); |
@@ -5526,6 +5540,10 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request | |||
5526 | err = nfs4_set_lock_state(state, request); | 5540 | err = nfs4_set_lock_state(state, request); |
5527 | if (err != 0) | 5541 | if (err != 0) |
5528 | return err; | 5542 | return err; |
5543 | if (!recover_locks) { | ||
5544 | set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags); | ||
5545 | return 0; | ||
5546 | } | ||
5529 | do { | 5547 | do { |
5530 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | 5548 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) |
5531 | return 0; | 5549 | return 0; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index da608ee8d5ff..cc14cbb78b73 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -969,7 +969,9 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, | |||
969 | fl_pid = lockowner->l_pid; | 969 | fl_pid = lockowner->l_pid; |
970 | spin_lock(&state->state_lock); | 970 | spin_lock(&state->state_lock); |
971 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); | 971 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); |
972 | if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { | 972 | if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) |
973 | ret = -EIO; | ||
974 | else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { | ||
973 | nfs4_stateid_copy(dst, &lsp->ls_stateid); | 975 | nfs4_stateid_copy(dst, &lsp->ls_stateid); |
974 | ret = 0; | 976 | ret = 0; |
975 | smp_rmb(); | 977 | smp_rmb(); |
@@ -1009,11 +1011,17 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) | |||
1009 | int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, | 1011 | int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, |
1010 | fmode_t fmode, const struct nfs_lockowner *lockowner) | 1012 | fmode_t fmode, const struct nfs_lockowner *lockowner) |
1011 | { | 1013 | { |
1012 | int ret = 0; | 1014 | int ret = nfs4_copy_lock_stateid(dst, state, lockowner); |
1015 | if (ret == -EIO) | ||
1016 | /* A lost lock - don't even consider delegations */ | ||
1017 | goto out; | ||
1013 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) | 1018 | if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) |
1014 | goto out; | 1019 | goto out; |
1015 | ret = nfs4_copy_lock_stateid(dst, state, lockowner); | ||
1016 | if (ret != -ENOENT) | 1020 | if (ret != -ENOENT) |
1021 | /* nfs4_copy_delegation_stateid() didn't over-write | ||
1022 | * dst, so it still has the lock stateid which we now | ||
1023 | * choose to use. | ||
1024 | */ | ||
1017 | goto out; | 1025 | goto out; |
1018 | ret = nfs4_copy_open_stateid(dst, state); | 1026 | ret = nfs4_copy_open_stateid(dst, state); |
1019 | out: | 1027 | out: |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c041c41f7a52..a8f57c728df5 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -623,9 +623,10 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * | |||
623 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; | 623 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; |
624 | } | 624 | } |
625 | 625 | ||
626 | static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 626 | static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) |
627 | { | 627 | { |
628 | rpc_call_start(task); | 628 | rpc_call_start(task); |
629 | return 0; | ||
629 | } | 630 | } |
630 | 631 | ||
631 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 632 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) |
@@ -644,9 +645,10 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message | |||
644 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; | 645 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; |
645 | } | 646 | } |
646 | 647 | ||
647 | static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | 648 | static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) |
648 | { | 649 | { |
649 | rpc_call_start(task); | 650 | rpc_call_start(task); |
651 | return 0; | ||
650 | } | 652 | } |
651 | 653 | ||
652 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 654 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 70a26c651f09..31db5c366b81 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -513,9 +513,10 @@ static void nfs_readpage_release_common(void *calldata) | |||
513 | void nfs_read_prepare(struct rpc_task *task, void *calldata) | 513 | void nfs_read_prepare(struct rpc_task *task, void *calldata) |
514 | { | 514 | { |
515 | struct nfs_read_data *data = calldata; | 515 | struct nfs_read_data *data = calldata; |
516 | NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); | 516 | int err; |
517 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | 517 | err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); |
518 | rpc_exit(task, -EIO); | 518 | if (err) |
519 | rpc_exit(task, err); | ||
519 | } | 520 | } |
520 | 521 | ||
521 | static const struct rpc_call_ops nfs_read_common_ops = { | 522 | static const struct rpc_call_ops nfs_read_common_ops = { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 94eb4504731a..379450c8d04b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1294,9 +1294,10 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | |||
1294 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | 1294 | void nfs_write_prepare(struct rpc_task *task, void *calldata) |
1295 | { | 1295 | { |
1296 | struct nfs_write_data *data = calldata; | 1296 | struct nfs_write_data *data = calldata; |
1297 | NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); | 1297 | int err; |
1298 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | 1298 | err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); |
1299 | rpc_exit(task, -EIO); | 1299 | if (err) |
1300 | rpc_exit(task, err); | ||
1300 | } | 1301 | } |
1301 | 1302 | ||
1302 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) | 1303 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) |