From 96f287b0cf512ee537826943c15b0b8647472f70 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Dec 2009 08:09:56 -0500
Subject: NFS: BKL removal from the mount code...

None of the code in nfs_umount_begin() or nfs_remount() has any BKL
dependency.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 90be551b80c1..f0188eaf3726 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -714,8 +714,6 @@ static void nfs_umount_begin(struct super_block *sb)
 	struct nfs_server *server;
 	struct rpc_clnt *rpc;
 
-	lock_kernel();
-
 	server = NFS_SB(sb);
 	/* -EIO all pending I/O */
 	rpc = server->client_acl;
@@ -724,8 +722,6 @@ static void nfs_umount_begin(struct super_block *sb)
 	rpc = server->client;
 	if (!IS_ERR(rpc))
 		rpc_killall_tasks(rpc);
-
-	unlock_kernel();
 }
 
 static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version)
@@ -1881,7 +1877,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	if (data == NULL)
 		return -ENOMEM;
 
-	lock_kernel();
 	/* fill out struct with values from existing mount */
 	data->flags = nfss->flags;
 	data->rsize = nfss->rsize;
@@ -1907,7 +1902,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	error = nfs_compare_remount_data(nfss, data);
 out:
 	kfree(data);
-	unlock_kernel();
 	return error;
 }
 
-- 
cgit v1.2.2


From d4e935bd67ca05db4119b67801d9ece6ae139f05 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 3 Dec 2009 15:58:33 -0500
Subject: The rpc server does not require that service threads take the BKL.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 293fa0528a6e..e66ec5d169f7 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -78,11 +78,6 @@ nfs4_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	/*
-	 * FIXME: do we really need to run this under the BKL? If so, please
-	 * add a comment about what it's intended to protect.
-	 */
-	lock_kernel();
 	while (!kthread_should_stop()) {
 		/*
 		 * Listen for a request on the socket
@@ -104,7 +99,6 @@ nfs4_callback_svc(void *vrqstp)
 		preverr = err;
 		svc_process(rqstp);
 	}
-	unlock_kernel();
 	return 0;
 }
 
@@ -160,11 +154,6 @@ nfs41_callback_svc(void *vrqstp)
 
 	set_freezable();
 
-	/*
-	 * FIXME: do we really need to run this under the BKL? If so, please
-	 * add a comment about what it's intended to protect.
-	 */
-	lock_kernel();
 	while (!kthread_should_stop()) {
 		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
 		spin_lock_bh(&serv->sv_cb_lock);
@@ -183,7 +172,6 @@ nfs41_callback_svc(void *vrqstp)
 		}
 		finish_wait(&serv->sv_cb_waitq, &wq);
 	}
-	unlock_kernel();
 	return 0;
 }
 
-- 
cgit v1.2.2


From ee671b016fbfc26d69c3fe02e28706222beb1149 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFS: convert proto= option to use netids rather than a protoname

Solaris uses netids as values for the proto= option, so that when
someone specifies "tcp6" they get traffic over TCP + IPv6. Until
recently, this has never really been an issue for Linux since it didn't
support NFS over IPv6. The netid and the protocol name were generally
always the same (modulo any strange configuration in /etc/netconfig).

The solaris manpage documents their proto= option as:

    proto= _netid_ | rdma

This patch is intended to bring Linux closer to how the Solaris proto=
option works, by declaring a static netid mapping in the kernel and
converting the proto= and mountproto= options to follow it and display
the proper values in /proc/mounts.

Much of this functionality will need to be provided by a userspace
mount.nfs patch. Chuck Lever has a patch to change mount.nfs in
the same way. In principle, we could do *all* of this in userspace but
that would mean that the options in /proc/mounts may not match the
options used by userspace.

The alternative to the static mapping here is to add a mechanism to
upcall to userspace for netid's. I'm not opposed to that option, but
it'll probably mean more overhead (and quite a bit more code). Rather
than shoot for that at first, I figured it was probably better to
start simply.

Comments welcome.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 81 insertions(+), 13 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f0188eaf3726..bfad74648754 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -175,14 +175,16 @@ static const match_table_t nfs_mount_option_tokens = {
 };
 
 enum {
-	Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma,
+	Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
 
 	Opt_xprt_err
 };
 
 static const match_table_t nfs_xprt_protocol_tokens = {
 	{ Opt_xprt_udp, "udp" },
+	{ Opt_xprt_udp6, "udp6" },
 	{ Opt_xprt_tcp, "tcp" },
+	{ Opt_xprt_tcp6, "tcp6" },
 	{ Opt_xprt_rdma, "rdma" },
 
 	{ Opt_xprt_err, NULL }
@@ -492,6 +494,45 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 	return sec_flavours[i].str;
 }
 
+static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss,
+				  int showdefaults)
+{
+	struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address;
+
+	seq_printf(m, ",mountproto=");
+	switch (sap->sa_family) {
+	case AF_INET:
+		switch (nfss->mountd_protocol) {
+		case IPPROTO_UDP:
+			seq_printf(m, RPCBIND_NETID_UDP);
+			break;
+		case IPPROTO_TCP:
+			seq_printf(m, RPCBIND_NETID_TCP);
+			break;
+		default:
+			if (showdefaults)
+				seq_printf(m, "auto");
+		}
+		break;
+	case AF_INET6:
+		switch (nfss->mountd_protocol) {
+		case IPPROTO_UDP:
+			seq_printf(m, RPCBIND_NETID_UDP6);
+			break;
+		case IPPROTO_TCP:
+			seq_printf(m, RPCBIND_NETID_TCP6);
+			break;
+		default:
+			if (showdefaults)
+				seq_printf(m, "auto");
+		}
+		break;
+	default:
+		if (showdefaults)
+			seq_printf(m, "auto");
+	}
+}
+
 static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 				    int showdefaults)
 {
@@ -518,17 +559,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 	if (nfss->mountd_port || showdefaults)
 		seq_printf(m, ",mountport=%u", nfss->mountd_port);
 
-	switch (nfss->mountd_protocol) {
-	case IPPROTO_UDP:
-		seq_printf(m, ",mountproto=udp");
-		break;
-	case IPPROTO_TCP:
-		seq_printf(m, ",mountproto=tcp");
-		break;
-	default:
-		if (showdefaults)
-			seq_printf(m, ",mountproto=auto");
-	}
+	nfs_show_mountd_netid(m, nfss, showdefaults);
 }
 
 /*
@@ -578,7 +609,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 			seq_puts(m, nfs_infop->nostr);
 	}
 	seq_printf(m, ",proto=%s",
-		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
+		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
 	if (version == 4) {
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
@@ -883,6 +914,8 @@ static int nfs_parse_mount_options(char *raw,
 {
 	char *p, *string, *secdata;
 	int rc, sloppy = 0, invalid_option = 0;
+	unsigned short protofamily = AF_UNSPEC;
+	unsigned short mountfamily = AF_UNSPEC;
 
 	if (!raw) {
 		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -1228,12 +1261,17 @@ static int nfs_parse_mount_options(char *raw,
 			token = match_token(string,
 					    nfs_xprt_protocol_tokens, args);
 
+			protofamily = AF_INET;
 			switch (token) {
+			case Opt_xprt_udp6:
+				protofamily = AF_INET6;
 			case Opt_xprt_udp:
 				mnt->flags &= ~NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 				kfree(string);
 				break;
+			case Opt_xprt_tcp6:
+				protofamily = AF_INET6;
 			case Opt_xprt_tcp:
 				mnt->flags |= NFS_MOUNT_TCP;
 				mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
@@ -1261,10 +1299,15 @@ static int nfs_parse_mount_options(char *raw,
 					    nfs_xprt_protocol_tokens, args);
 			kfree(string);
 
+			mountfamily = AF_INET;
 			switch (token) {
+			case Opt_xprt_udp6:
+				mountfamily = AF_INET6;
 			case Opt_xprt_udp:
 				mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
 				break;
+			case Opt_xprt_tcp6:
+				mountfamily = AF_INET6;
 			case Opt_xprt_tcp:
 				mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
 				break;
@@ -1363,8 +1406,33 @@ static int nfs_parse_mount_options(char *raw,
 	if (!sloppy && invalid_option)
 		return 0;
 
+	/*
+	 * verify that any proto=/mountproto= options match the address
+	 * familiies in the addr=/mountaddr= options.
+	 */
+	if (protofamily != AF_UNSPEC &&
+	    protofamily != mnt->nfs_server.address.ss_family)
+		goto out_proto_mismatch;
+
+	if (mountfamily != AF_UNSPEC) {
+		if (mnt->mount_server.addrlen) {
+			if (mountfamily != mnt->mount_server.address.ss_family)
+				goto out_mountproto_mismatch;
+		} else {
+			if (mountfamily != mnt->nfs_server.address.ss_family)
+				goto out_mountproto_mismatch;
+		}
+	}
+
 	return 1;
 
+out_mountproto_mismatch:
+	printk(KERN_INFO "NFS: mount server address does not match mountproto= "
+			 "option\n");
+	return 0;
+out_proto_mismatch:
+	printk(KERN_INFO "NFS: server address does not match proto= option\n");
+	return 0;
 out_invalid_address:
 	printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
 	return 0;
-- 
cgit v1.2.2


From d250e190fb9b06f4c595eade88b3d0b705fb330a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFS: Display compressed (shorthand) IPv6 in /proc/mounts

Recent changes to snprintf() introduced the %pI6c formatter, which can
display an IPv6 address with standard shorthanding.  Use this new
formatter when displaying IPv6 server addresses in /proc/mounts.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index bfad74648754..837032731bb6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -546,7 +546,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
 	}
 	case AF_INET6: {
 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-		seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr);
+		seq_printf(m, ",mountaddr=%pI6c", &sin6->sin6_addr);
 		break;
 	}
 	default:
-- 
cgit v1.2.2


From dd47f96c077b4516727e497e4b6fd47a06778c0a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFS: Revert default r/wsize behavior

When the "rsize=" or "wsize=" mount options are not specified,
text-based mounts have slightly different behavior than legacy binary
mounts.  Text-based mounts use the smaller of the server's maximum
and the client's maximum, but binary mounts use the smaller of the
server's _preferred_ size and the client's maximum.

This difference is actually pretty subtle.  Most servers advertise
the same value as their maximum and their preferred transfer size, so
the end result is the same in most cases.

The reason for this difference is that for text-based mounts, if
r/wsize are not specified, they are set to the largest value supported
by the client.  For legacy mounts, the values are set to zero if these
options are not specified.

nfs_server_set_fsinfo() can negotiate the transfer size defaults
correctly in any case.  There's no need to specify any particular
value as default in the text-based option parsing logic.

Note that nfs4 doesn't use nfs_server_set_fsinfo(), but the mount.nfs4
command does set rsize and wsize to 0 if the user didn't specify these
options.  So, make the same change for text-based NFSv4 mounts.

Thanks to James Pearson <james-p@moving-picture.com> for reporting and
diagnosing the problem.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 837032731bb6..ce907efc5508 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -761,8 +761,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data) {
-		data->rsize		= NFS_MAX_FILE_IO_SIZE;
-		data->wsize		= NFS_MAX_FILE_IO_SIZE;
 		data->acregmin		= NFS_DEF_ACREGMIN;
 		data->acregmax		= NFS_DEF_ACREGMAX;
 		data->acdirmin		= NFS_DEF_ACDIRMIN;
-- 
cgit v1.2.2


From 9c4c761a629caa5572c1a29a8288416070d5d6b7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFSv4.1: Handle NFSv4.1 session errors in the lock recovery code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4state.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2ef4fecf3984..3004089e97b1 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -877,6 +877,10 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
 			case -NFS4ERR_EXPIRED:
 			case -NFS4ERR_NO_GRACE:
 			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_BADSESSION:
+			case -NFS4ERR_BADSLOT:
+			case -NFS4ERR_BAD_HIGH_SLOT:
+			case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
 				goto out;
 			default:
 				printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
@@ -959,6 +963,10 @@ restart:
 			case -NFS4ERR_NO_GRACE:
 				nfs4_state_mark_reclaim_nograce(sp->so_client, state);
 			case -NFS4ERR_STALE_CLIENTID:
+			case -NFS4ERR_BADSESSION:
+			case -NFS4ERR_BADSLOT:
+			case -NFS4ERR_BAD_HIGH_SLOT:
+			case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
 				goto out_err;
 		}
 		nfs4_put_open_state(state);
-- 
cgit v1.2.2


From e48de5ec25b37d42292c876c1d3337766aae89bd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: nfs: remove unnecessary check from nfs_rename()

VFS already checks if both source and target are directories.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7cb298525eef..b5fae1953e9d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1601,13 +1601,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * silly-rename. If the silly-rename succeeds, the
 	 * copied dentry is hashed and becomes the new target.
 	 */
-	if (!new_inode)
-		goto go_ahead;
-	if (S_ISDIR(new_inode->i_mode)) {
-		error = -EISDIR;
-		if (!S_ISDIR(old_inode->i_mode))
-			goto out;
-	} else if (atomic_read(&new_dentry->d_count) > 2) {
+	if (new_inode && !S_ISDIR(new_inode->i_mode) &&
+	    atomic_read(&new_dentry->d_count) > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
@@ -1627,7 +1622,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto out;
 	}
 
-go_ahead:
 	/*
 	 * ... prune child dentries and writebacks if needed.
 	 */
-- 
cgit v1.2.2


From 28f79a1a695e7a5b00af3b6713b449e08581ffbb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: nfs: fix comments in nfs_rename()

Comments are wrong or out of date.  In particular d_drop() doesn't
free the inode it just unhashes the dentry.  And if target is a
directory then it is not checked for being busy.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b5fae1953e9d..11d0c4cffffc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1581,7 +1581,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	/*
 	 * To prevent any new references to the target during the rename,
-	 * we unhash the dentry and free the inode in advance.
+	 * we unhash the dentry in advance.
 	 */
 	if (!d_unhashed(new_dentry)) {
 		d_drop(new_dentry);
@@ -1594,12 +1594,10 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 atomic_read(&new_dentry->d_count));
 
 	/*
-	 * First check whether the target is busy ... we can't
-	 * safely do _any_ rename if the target is in use.
-	 *
-	 * For files, make a copy of the dentry and then do a 
-	 * silly-rename. If the silly-rename succeeds, the
-	 * copied dentry is hashed and becomes the new target.
+	 * For non-directories, check whether the target is busy and if so,
+	 * make a copy of the dentry and then do a silly-rename. If the
+	 * silly-rename succeeds, the copied dentry is hashed and becomes
+	 * the new target.
 	 */
 	if (new_inode && !S_ISDIR(new_inode->i_mode) &&
 	    atomic_read(&new_dentry->d_count) > 2) {
-- 
cgit v1.2.2


From 27226104e60964f21717e0f452cecd45c85a64c6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: nfs: dont unhash target if renaming a directory

Move unhashing the target to after the check for existence and being a
non-directory.

If renaming a directory then the VFS already unhashes the target if it
is not busy.  If it's busy then acquiring more references during the
rename makes no difference.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 56 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 27 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 11d0c4cffffc..76b7f539d76e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1579,15 +1579,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct dentry *dentry = NULL, *rehash = NULL;
 	int error = -EBUSY;
 
-	/*
-	 * To prevent any new references to the target during the rename,
-	 * we unhash the dentry in advance.
-	 */
-	if (!d_unhashed(new_dentry)) {
-		d_drop(new_dentry);
-		rehash = new_dentry;
-	}
-
 	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
 		 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
@@ -1599,25 +1590,36 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * silly-rename succeeds, the copied dentry is hashed and becomes
 	 * the new target.
 	 */
-	if (new_inode && !S_ISDIR(new_inode->i_mode) &&
-	    atomic_read(&new_dentry->d_count) > 2) {
-		int err;
-		/* copy the target dentry's name */
-		dentry = d_alloc(new_dentry->d_parent,
-				 &new_dentry->d_name);
-		if (!dentry)
-			goto out;
+	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
+		/*
+		 * To prevent any new references to the target during the
+		 * rename, we unhash the dentry in advance.
+		 */
+		if (!d_unhashed(new_dentry)) {
+			d_drop(new_dentry);
+			rehash = new_dentry;
+		}
 
-		/* silly-rename the existing target ... */
-		err = nfs_sillyrename(new_dir, new_dentry);
-		if (!err) {
-			new_dentry = rehash = dentry;
-			new_inode = NULL;
-			/* instantiate the replacement target */
-			d_instantiate(new_dentry, NULL);
-		} else if (atomic_read(&new_dentry->d_count) > 1)
-			/* dentry still busy? */
-			goto out;
+		if (atomic_read(&new_dentry->d_count) > 2) {
+			int err;
+
+			/* copy the target dentry's name */
+			dentry = d_alloc(new_dentry->d_parent,
+					 &new_dentry->d_name);
+			if (!dentry)
+				goto out;
+
+			/* silly-rename the existing target ... */
+			err = nfs_sillyrename(new_dir, new_dentry);
+			if (!err) {
+				new_dentry = rehash = dentry;
+				new_inode = NULL;
+				/* instantiate the replacement target */
+				d_instantiate(new_dentry, NULL);
+			} else if (atomic_read(&new_dentry->d_count) > 1)
+				/* dentry still busy? */
+				goto out;
+		}
 	}
 
 	/*
-- 
cgit v1.2.2


From 24e93025ee434a58d35e5abb283c5bcc9a13e477 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: nfs: clean up sillyrenaming in nfs_rename()

The d_instantiate(new_dentry, NULL) is superfluous, the dentry is
already negative.  Rehashing this dummy dentry isn't needed either,
d_move() works fine on an unhashed target.

The re-checking for busy after a failed nfs_sillyrename() is bogus
too: new_dentry->d_count < 2 would be a bug here.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 76b7f539d76e..2c5ace4f00a7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1611,14 +1611,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 			/* silly-rename the existing target ... */
 			err = nfs_sillyrename(new_dir, new_dentry);
-			if (!err) {
-				new_dentry = rehash = dentry;
-				new_inode = NULL;
-				/* instantiate the replacement target */
-				d_instantiate(new_dentry, NULL);
-			} else if (atomic_read(&new_dentry->d_count) > 1)
-				/* dentry still busy? */
+			if (err)
 				goto out;
+
+			new_dentry = dentry;
+			new_inode = NULL;
 		}
 	}
 
-- 
cgit v1.2.2


From 44ed3556bad809797f7b06a4a88918fd8a23d6fe Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 3 Dec 2009 15:58:56 -0500
Subject: NFS4ERR_FILE_OPEN handling in Linux/NFS

NFS4ERR_FILE_OPEN is return by the server when an operation cannot be
performed because the file is currently open and local (to the server)
semantics prohibit the operation while the file is open.
A typical case is a RENAME operation on an MS-Windows platform, which
prevents rename while the file is open.

While it is possible that such a condition is transitory, it is also
very possible that the file will be held open for an extended period
of time thus preventing the operation.

The current behaviour of Linux/NFS is to retry the operation
indefinitely.  This is not appropriate - we do not expect a rename to
take an arbitrary amount of time to complete.

Rather, and error should be returned.  The most obvious error code
would be EBUSY, which is a legal at least for 'rename' and 'unlink',
and accurately captures the reason for the error.

This patch allows a few retries until about 2 seconds have elapsed,
then returns EBUSY.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 741a562177fc..40da0d5bc5fc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -275,6 +275,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
 			/* FALLTHROUGH */
 #endif /* !defined(CONFIG_NFS_V4_1) */
 		case -NFS4ERR_FILE_OPEN:
+			if (exception->timeout > HZ) {
+				/* We have retried a decent amount, time to
+				 * fail
+				 */
+				ret = -EBUSY;
+				break;
+			}
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
-- 
cgit v1.2.2