aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-09-25 12:26:59 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-09-25 12:26:59 -0400
commit363e065c02b1273364d5356711a83e7f548fc0c8 (patch)
tree0df0e65da403ade33ade580c2770c97437b1b1af /fs
parent907b9bceb41fa46beae93f79cc4a2247df502c0f (diff)
parent7c250413e5b7c3dfae89354725b70c76d7621395 (diff)
[GFS2] Fix up merge of Linus' kernel into GFS2
This fixes up a couple of conflicts when merging up with Linus' latest kernel. This will hopefully allow GFS2 to be more easily merged into forthcoming -mm and FC kernels due to the "one line per header" format now used for the kernel headers. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Conflicts: include/linux/Kbuild include/linux/kernel.h
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig8
-rw-r--r--fs/affs/affs.h1
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/autofs/autofs_i.h2
-rw-r--r--fs/autofs/inode.c1
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/cifs/CHANGES7
-rw-r--r--fs/cifs/cifs_fs_sb.h2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/connect.c47
-rw-r--r--fs/cifs/dir.c23
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/configfs/dir.c32
-rw-r--r--fs/dcache.c164
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/host.c51
-rw-r--r--fs/lockd/mon.c41
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfs/Makefile6
-rw-r--r--fs/nfs/callback.c31
-rw-r--r--fs/nfs/callback.h7
-rw-r--r--fs/nfs/callback_proc.c13
-rw-r--r--fs/nfs/client.c1448
-rw-r--r--fs/nfs/delegation.c35
-rw-r--r--fs/nfs/delegation.h10
-rw-r--r--fs/nfs/dir.c341
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c311
-rw-r--r--fs/nfs/idmap.c45
-rw-r--r--fs/nfs/inode.c46
-rw-r--r--fs/nfs/internal.h105
-rw-r--r--fs/nfs/mount_clnt.c30
-rw-r--r--fs/nfs/namespace.c34
-rw-r--r--fs/nfs/nfs2xdr.c21
-rw-r--r--fs/nfs/nfs3proc.c42
-rw-r--r--fs/nfs/nfs3xdr.c7
-rw-r--r--fs/nfs/nfs4_fs.h78
-rw-r--r--fs/nfs/nfs4namespace.c118
-rw-r--r--fs/nfs/nfs4proc.c218
-rw-r--r--fs/nfs/nfs4renewd.c20
-rw-r--r--fs/nfs/nfs4state.c174
-rw-r--r--fs/nfs/nfs4xdr.c50
-rw-r--r--fs/nfs/proc.c41
-rw-r--r--fs/nfs/read.c21
-rw-r--r--fs/nfs/super.c1421
-rw-r--r--fs/nfs/write.c11
-rw-r--r--fs/nfsd/nfs4callback.c66
-rw-r--r--fs/nfsd/nfs4recover.c21
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/aops.c83
-rw-r--r--fs/ocfs2/buffer_head_io.c95
-rw-r--r--fs/ocfs2/buffer_head_io.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h8
-rw-r--r--fs/ocfs2/dcache.c359
-rw-r--r--fs/ocfs2/dcache.h27
-rw-r--r--fs/ocfs2/dir.c28
-rw-r--r--fs/ocfs2/dlm/dlmapi.h1
-rw-r--r--fs/ocfs2/dlm/dlmast.c16
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmlock.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/userdlm.c81
-rw-r--r--fs/ocfs2/dlm/userdlm.h1
-rw-r--r--fs/ocfs2/dlmglue.c1103
-rw-r--r--fs/ocfs2/dlmglue.h22
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/file.c3
-rw-r--r--fs/ocfs2/inode.c188
-rw-r--r--fs/ocfs2/inode.h11
-rw-r--r--fs/ocfs2/ioctl.c136
-rw-r--r--fs/ocfs2/ioctl.h16
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c148
-rw-r--r--fs/ocfs2/ocfs2_fs.h24
-rw-r--r--fs/ocfs2/ocfs2_lockid.h25
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/sysfile.c6
-rw-r--r--fs/ocfs2/uptodate.c21
-rw-r--r--fs/ocfs2/uptodate.h2
-rw-r--r--fs/ocfs2/vote.c180
-rw-r--r--fs/ocfs2/vote.h5
-rw-r--r--fs/openpromfs/inode.c2
92 files changed, 5097 insertions, 2759 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index ddc7462ddb56..ca9affd676ae 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -326,8 +326,8 @@ source "fs/xfs/Kconfig"
326source "fs/gfs2/Kconfig" 326source "fs/gfs2/Kconfig"
327 327
328config OCFS2_FS 328config OCFS2_FS
329 tristate "OCFS2 file system support (EXPERIMENTAL)" 329 tristate "OCFS2 file system support"
330 depends on NET && SYSFS && EXPERIMENTAL 330 depends on NET && SYSFS
331 select CONFIGFS_FS 331 select CONFIGFS_FS
332 select JBD 332 select JBD
333 select CRC32 333 select CRC32
@@ -1472,8 +1472,8 @@ config NFS_V4
1472 If unsure, say N. 1472 If unsure, say N.
1473 1473
1474config NFS_DIRECTIO 1474config NFS_DIRECTIO
1475 bool "Allow direct I/O on NFS files (EXPERIMENTAL)" 1475 bool "Allow direct I/O on NFS files"
1476 depends on NFS_FS && EXPERIMENTAL 1476 depends on NFS_FS
1477 help 1477 help
1478 This option enables applications to perform uncached I/O on files 1478 This option enables applications to perform uncached I/O on files
1479 in NFS file systems using the O_DIRECT open() flag. When O_DIRECT 1479 in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0ddd4cc0d1a0..1dc8438ef389 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,7 +1,6 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/buffer_head.h> 3#include <linux/buffer_head.h>
4#include <linux/affs_fs.h>
5#include <linux/amigaffs.h> 4#include <linux/amigaffs.h>
6 5
7/* AmigaOS allows file names with up to 30 characters length. 6/* AmigaOS allows file names with up to 30 characters length.
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5200f4938df0..17352011ab67 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/statfs.h> 15#include <linux/statfs.h>
16#include <linux/parser.h> 16#include <linux/parser.h>
17#include <linux/magic.h>
17#include "affs.h" 18#include "affs.h"
18 19
19extern struct timezone sys_tz; 20extern struct timezone sys_tz;
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index a62327f1bdff..c7700d9b3f96 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -37,8 +37,6 @@
37#define DPRINTK(D) ((void)0) 37#define DPRINTK(D) ((void)0)
38#endif 38#endif
39 39
40#define AUTOFS_SUPER_MAGIC 0x0187
41
42/* 40/*
43 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the 41 * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
44 * kernel will keep the negative response cached for up to the time given 42 * kernel will keep the negative response cached for up to the time given
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 65e5ed42190e..af2efbbb5d76 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -16,6 +16,7 @@
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/parser.h> 17#include <linux/parser.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/magic.h>
19#include "autofs_i.h" 20#include "autofs_i.h"
20#include <linux/module.h> 21#include <linux/module.h>
21 22
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d6603d02304c..480ab178cba5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -40,8 +40,6 @@
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
42 42
43#define AUTOFS_SUPER_MAGIC 0x0187
44
45/* Unified info structure. This is pointed to by both the dentry and 43/* Unified info structure. This is pointed to by both the dentry and
46 inode structures. Each file in the filesystem has an instance of this 44 inode structures. Each file in the filesystem has an instance of this
47 structure. It holds a reference to the dentry, so dentries are never 45 structure. It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fde78b110ddd..11a6a9ae51b7 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
19#include <linux/parser.h> 19#include <linux/parser.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/smp_lock.h> 21#include <linux/smp_lock.h>
22#include <linux/magic.h>
22#include "autofs_i.h" 23#include "autofs_i.h"
23#include <linux/module.h> 24#include <linux/module.h>
24 25
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0feb3bd49cb8..1eb9a2ec0a3b 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
1Version 1.46
2------------
3Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps.
4
1Version 1.45 5Version 1.45
2------------ 6------------
3Do not time out lockw calls when using posix extensions. Do not 7Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@ on requests on other threads. Improve POSIX locking emulation,
6(lock cancel now works, and unlock of merged range works even 10(lock cancel now works, and unlock of merged range works even
7to Windows servers now). Fix oops on mount to lanman servers 11to Windows servers now). Fix oops on mount to lanman servers
8(win9x, os/2 etc.) when null password. Do not send listxattr 12(win9x, os/2 etc.) when null password. Do not send listxattr
9(SMB to query all EAs) if nouser_xattr specified. 13(SMB to query all EAs) if nouser_xattr specified. Fix SE Linux
14problem (instantiate inodes/dentries in right order for readdir).
10 15
11Version 1.44 16Version 1.44
12------------ 17------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ad58eb0c4d6d..fd1e52ebcee6 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,5 +40,7 @@ struct cifs_sb_info {
40 mode_t mnt_file_mode; 40 mode_t mnt_file_mode;
41 mode_t mnt_dir_mode; 41 mode_t mnt_dir_mode;
42 int mnt_cifs_flags; 42 int mnt_cifs_flags;
43 int prepathlen;
44 char * prepath;
43}; 45};
44#endif /* _CIFS_FS_SB_H */ 46#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cd750029be2..c3ef1c0d0e68 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
189 buf->f_files = 0; /* undefined */ 189 buf->f_files = 0; /* undefined */
190 buf->f_ffree = 0; /* unlimited */ 190 buf->f_ffree = 0; /* unlimited */
191 191
192#ifdef CONFIG_CIFS_EXPERIMENTAL
193/* BB we could add a second check for a QFS Unix capability bit */ 192/* BB we could add a second check for a QFS Unix capability bit */
194/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */ 193/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
195 if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS & 194 if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
199 /* Only need to call the old QFSInfo if failed 198 /* Only need to call the old QFSInfo if failed
200 on newer one */ 199 on newer one */
201 if(rc) 200 if(rc)
202#endif /* CIFS_EXPERIMENTAL */
203 rc = CIFSSMBQFSInfo(xid, pTcon, buf); 201 rc = CIFSSMBQFSInfo(xid, pTcon, buf);
204 202
205 /* Old Windows servers do not support level 103, retry with level 203 /* Old Windows servers do not support level 103, retry with level
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 39ee8ef3bdeb..bea875d9a46a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
101extern int cifs_ioctl (struct inode * inode, struct file * filep, 101extern int cifs_ioctl (struct inode * inode, struct file * filep,
102 unsigned int command, unsigned long arg); 102 unsigned int command, unsigned long arg);
103#define CIFS_VERSION "1.45" 103#define CIFS_VERSION "1.46"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 86239023545b..81df2bf8e75a 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1344,6 +1344,7 @@ struct smb_t2_rsp {
1344#define SMB_QUERY_ATTR_FLAGS 0x206 /* append,immutable etc. */ 1344#define SMB_QUERY_ATTR_FLAGS 0x206 /* append,immutable etc. */
1345#define SMB_QUERY_POSIX_PERMISSION 0x207 1345#define SMB_QUERY_POSIX_PERMISSION 0x207
1346#define SMB_QUERY_POSIX_LOCK 0x208 1346#define SMB_QUERY_POSIX_LOCK 0x208
1347/* #define SMB_POSIX_OPEN 0x209 */
1347#define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee 1348#define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee
1348#define SMB_QUERY_FILE_ACCESS_INFO 0x3f0 1349#define SMB_QUERY_FILE_ACCESS_INFO 0x3f0
1349#define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */ 1350#define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@ struct smb_t2_rsp {
1363#define SMB_SET_XATTR 0x205 1364#define SMB_SET_XATTR 0x205
1364#define SMB_SET_ATTR_FLAGS 0x206 /* append, immutable etc. */ 1365#define SMB_SET_ATTR_FLAGS 0x206 /* append, immutable etc. */
1365#define SMB_SET_POSIX_LOCK 0x208 1366#define SMB_SET_POSIX_LOCK 0x208
1367#define SMB_POSIX_OPEN 0x209
1366#define SMB_SET_FILE_BASIC_INFO2 0x3ec 1368#define SMB_SET_FILE_BASIC_INFO2 0x3ec
1367#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */ 1369#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
1368#define SMB_FILE_ALL_INFO2 0x3fa 1370#define SMB_FILE_ALL_INFO2 0x3fa
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d394c726860..0e9ba0b9d71e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@ struct smb_vol {
89 unsigned int wsize; 89 unsigned int wsize;
90 unsigned int sockopt; 90 unsigned int sockopt;
91 unsigned short int port; 91 unsigned short int port;
92 char * prepath;
92}; 93};
93 94
94static int ipv4_connect(struct sockaddr_in *psin_server, 95static int ipv4_connect(struct sockaddr_in *psin_server,
@@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
993 printk(KERN_WARNING "CIFS: domain name too long\n"); 994 printk(KERN_WARNING "CIFS: domain name too long\n");
994 return 1; 995 return 1;
995 } 996 }
997 } else if (strnicmp(data, "prefixpath", 10) == 0) {
998 if (!value || !*value) {
999 printk(KERN_WARNING
1000 "CIFS: invalid path prefix\n");
1001 return 1; /* needs_arg; */
1002 }
1003 if ((temp_len = strnlen(value, 1024)) < 1024) {
1004 if(value[0] != '/')
1005 temp_len++; /* missing leading slash */
1006 vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
1007 if(vol->prepath == NULL)
1008 return 1;
1009 if(value[0] != '/') {
1010 vol->prepath[0] = '/';
1011 strcpy(vol->prepath+1,value);
1012 } else
1013 strcpy(vol->prepath,value);
1014 cFYI(1,("prefix path %s",vol->prepath));
1015 } else {
1016 printk(KERN_WARNING "CIFS: prefix too long\n");
1017 return 1;
1018 }
996 } else if (strnicmp(data, "iocharset", 9) == 0) { 1019 } else if (strnicmp(data, "iocharset", 9) == 0) {
997 if (!value || !*value) { 1020 if (!value || !*value) {
998 printk(KERN_WARNING "CIFS: invalid iocharset specified\n"); 1021 printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1605 if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { 1628 if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
1606 kfree(volume_info.UNC); 1629 kfree(volume_info.UNC);
1607 kfree(volume_info.password); 1630 kfree(volume_info.password);
1631 kfree(volume_info.prepath);
1608 FreeXid(xid); 1632 FreeXid(xid);
1609 return -EINVAL; 1633 return -EINVAL;
1610 } 1634 }
@@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1619 locations such as env variables and files on disk */ 1643 locations such as env variables and files on disk */
1620 kfree(volume_info.UNC); 1644 kfree(volume_info.UNC);
1621 kfree(volume_info.password); 1645 kfree(volume_info.password);
1646 kfree(volume_info.prepath);
1622 FreeXid(xid); 1647 FreeXid(xid);
1623 return -EINVAL; 1648 return -EINVAL;
1624 } 1649 }
@@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1639 /* we failed translating address */ 1664 /* we failed translating address */
1640 kfree(volume_info.UNC); 1665 kfree(volume_info.UNC);
1641 kfree(volume_info.password); 1666 kfree(volume_info.password);
1667 kfree(volume_info.prepath);
1642 FreeXid(xid); 1668 FreeXid(xid);
1643 return -EINVAL; 1669 return -EINVAL;
1644 } 1670 }
@@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1651 cERROR(1,("Connecting to DFS root not implemented yet")); 1677 cERROR(1,("Connecting to DFS root not implemented yet"));
1652 kfree(volume_info.UNC); 1678 kfree(volume_info.UNC);
1653 kfree(volume_info.password); 1679 kfree(volume_info.password);
1680 kfree(volume_info.prepath);
1654 FreeXid(xid); 1681 FreeXid(xid);
1655 return -EINVAL; 1682 return -EINVAL;
1656 } else /* which servers DFS root would we conect to */ { 1683 } else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1658 ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified")); 1685 ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
1659 kfree(volume_info.UNC); 1686 kfree(volume_info.UNC);
1660 kfree(volume_info.password); 1687 kfree(volume_info.password);
1688 kfree(volume_info.prepath);
1661 FreeXid(xid); 1689 FreeXid(xid);
1662 return -EINVAL; 1690 return -EINVAL;
1663 } 1691 }
@@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1672 cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset)); 1700 cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
1673 kfree(volume_info.UNC); 1701 kfree(volume_info.UNC);
1674 kfree(volume_info.password); 1702 kfree(volume_info.password);
1703 kfree(volume_info.prepath);
1675 FreeXid(xid); 1704 FreeXid(xid);
1676 return -ELIBACC; 1705 return -ELIBACC;
1677 } 1706 }
@@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1688 else { 1717 else {
1689 kfree(volume_info.UNC); 1718 kfree(volume_info.UNC);
1690 kfree(volume_info.password); 1719 kfree(volume_info.password);
1720 kfree(volume_info.prepath);
1691 FreeXid(xid); 1721 FreeXid(xid);
1692 return -EINVAL; 1722 return -EINVAL;
1693 } 1723 }
@@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1710 sock_release(csocket); 1740 sock_release(csocket);
1711 kfree(volume_info.UNC); 1741 kfree(volume_info.UNC);
1712 kfree(volume_info.password); 1742 kfree(volume_info.password);
1743 kfree(volume_info.prepath);
1713 FreeXid(xid); 1744 FreeXid(xid);
1714 return rc; 1745 return rc;
1715 } 1746 }
@@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1720 sock_release(csocket); 1751 sock_release(csocket);
1721 kfree(volume_info.UNC); 1752 kfree(volume_info.UNC);
1722 kfree(volume_info.password); 1753 kfree(volume_info.password);
1754 kfree(volume_info.prepath);
1723 FreeXid(xid); 1755 FreeXid(xid);
1724 return rc; 1756 return rc;
1725 } else { 1757 } else {
@@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1744 sock_release(csocket); 1776 sock_release(csocket);
1745 kfree(volume_info.UNC); 1777 kfree(volume_info.UNC);
1746 kfree(volume_info.password); 1778 kfree(volume_info.password);
1779 kfree(volume_info.prepath);
1747 FreeXid(xid); 1780 FreeXid(xid);
1748 return rc; 1781 return rc;
1749 } 1782 }
@@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1831 /* Windows ME may prefer this */ 1864 /* Windows ME may prefer this */
1832 cFYI(1,("readsize set to minimum 2048")); 1865 cFYI(1,("readsize set to minimum 2048"));
1833 } 1866 }
1867 /* calculate prepath */
1868 cifs_sb->prepath = volume_info.prepath;
1869 if(cifs_sb->prepath) {
1870 cifs_sb->prepathlen = strlen(cifs_sb->prepath);
1871 cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
1872 volume_info.prepath = NULL;
1873 } else
1874 cifs_sb->prepathlen = 0;
1834 cifs_sb->mnt_uid = volume_info.linux_uid; 1875 cifs_sb->mnt_uid = volume_info.linux_uid;
1835 cifs_sb->mnt_gid = volume_info.linux_gid; 1876 cifs_sb->mnt_gid = volume_info.linux_gid;
1836 cifs_sb->mnt_file_mode = volume_info.file_mode; 1877 cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2008 the password ptr is put in the new session structure (in which case the 2049 the password ptr is put in the new session structure (in which case the
2009 password will be freed at unmount time) */ 2050 password will be freed at unmount time) */
2010 kfree(volume_info.UNC); 2051 kfree(volume_info.UNC);
2052 kfree(volume_info.prepath);
2011 FreeXid(xid); 2053 FreeXid(xid);
2012 return rc; 2054 return rc;
2013} 2055}
@@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3195 int xid; 3237 int xid;
3196 struct cifsSesInfo *ses = NULL; 3238 struct cifsSesInfo *ses = NULL;
3197 struct task_struct *cifsd_task; 3239 struct task_struct *cifsd_task;
3240 char * tmp;
3198 3241
3199 xid = GetXid(); 3242 xid = GetXid();
3200 3243
@@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3228 } 3271 }
3229 3272
3230 cifs_sb->tcon = NULL; 3273 cifs_sb->tcon = NULL;
3274 tmp = cifs_sb->prepath;
3275 cifs_sb->prepathlen = 0;
3276 cifs_sb->prepath = NULL;
3277 kfree(tmp);
3231 if (ses) 3278 if (ses)
3232 schedule_timeout_interruptible(msecs_to_jiffies(500)); 3279 schedule_timeout_interruptible(msecs_to_jiffies(500));
3233 if (ses) 3280 if (ses)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 914239d53634..66b825ade3e1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -46,7 +46,8 @@ char *
46build_path_from_dentry(struct dentry *direntry) 46build_path_from_dentry(struct dentry *direntry)
47{ 47{
48 struct dentry *temp; 48 struct dentry *temp;
49 int namelen = 0; 49 int namelen;
50 int pplen;
50 char *full_path; 51 char *full_path;
51 char dirsep; 52 char dirsep;
52 53
@@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry)
56 when the server crashed */ 57 when the server crashed */
57 58
58 dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb)); 59 dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
60 pplen = CIFS_SB(direntry->d_sb)->prepathlen;
59cifs_bp_rename_retry: 61cifs_bp_rename_retry:
62 namelen = pplen;
60 for (temp = direntry; !IS_ROOT(temp);) { 63 for (temp = direntry; !IS_ROOT(temp);) {
61 namelen += (1 + temp->d_name.len); 64 namelen += (1 + temp->d_name.len);
62 temp = temp->d_parent; 65 temp = temp->d_parent;
@@ -70,7 +73,6 @@ cifs_bp_rename_retry:
70 if(full_path == NULL) 73 if(full_path == NULL)
71 return full_path; 74 return full_path;
72 full_path[namelen] = 0; /* trailing null */ 75 full_path[namelen] = 0; /* trailing null */
73
74 for (temp = direntry; !IS_ROOT(temp);) { 76 for (temp = direntry; !IS_ROOT(temp);) {
75 namelen -= 1 + temp->d_name.len; 77 namelen -= 1 + temp->d_name.len;
76 if (namelen < 0) { 78 if (namelen < 0) {
@@ -79,7 +81,7 @@ cifs_bp_rename_retry:
79 full_path[namelen] = dirsep; 81 full_path[namelen] = dirsep;
80 strncpy(full_path + namelen + 1, temp->d_name.name, 82 strncpy(full_path + namelen + 1, temp->d_name.name,
81 temp->d_name.len); 83 temp->d_name.len);
82 cFYI(0, (" name: %s ", full_path + namelen)); 84 cFYI(0, ("name: %s", full_path + namelen));
83 } 85 }
84 temp = temp->d_parent; 86 temp = temp->d_parent;
85 if(temp == NULL) { 87 if(temp == NULL) {
@@ -88,18 +90,23 @@ cifs_bp_rename_retry:
88 return NULL; 90 return NULL;
89 } 91 }
90 } 92 }
91 if (namelen != 0) { 93 if (namelen != pplen) {
92 cERROR(1, 94 cERROR(1,
93 ("We did not end path lookup where we expected namelen is %d", 95 ("did not end path lookup where expected namelen is %d",
94 namelen)); 96 namelen));
95 /* presumably this is only possible if we were racing with a rename 97 /* presumably this is only possible if racing with a rename
96 of one of the parent directories (we can not lock the dentries 98 of one of the parent directories (we can not lock the dentries
97 above us to prevent this, but retrying should be harmless) */ 99 above us to prevent this, but retrying should be harmless) */
98 kfree(full_path); 100 kfree(full_path);
99 namelen = 0;
100 goto cifs_bp_rename_retry; 101 goto cifs_bp_rename_retry;
101 } 102 }
102 103 /* DIR_SEP already set for byte 0 / vs \ but not for
104 subsequent slashes in prepath which currently must
105 be entered the right way - not sure if there is an alternative
106 since the '\' is a valid posix character so we can not switch
107 those safely to '/' if any are found in the middle of the prepath */
108 /* BB test paths to Windows with '/' in the midst of prepath */
109 strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
103 return full_path; 110 return full_path;
104} 111}
105 112
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9c5ba9084fc..ddb012a68023 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
752 int stored_rc = 0; 752 int stored_rc = 0;
753 struct cifsLockInfo *li, *tmp; 753 struct cifsLockInfo *li, *tmp;
754 754
755 rc = 0;
755 down(&fid->lock_sem); 756 down(&fid->lock_sem);
756 list_for_each_entry_safe(li, tmp, &fid->llist, llist) { 757 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
757 if (pfLock->fl_start <= li->offset && 758 if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
766 kfree(li); 767 kfree(li);
767 } 768 }
768 } 769 }
769 up(&fid->lock_sem); 770 up(&fid->lock_sem);
770 } 771 }
771 } 772 }
772 773
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 067648b7179b..18fcec190f8b 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
269 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, 269 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
270 ea_value, buf_size, 270 ea_value, buf_size,
271 ACL_TYPE_ACCESS); 271 ACL_TYPE_ACCESS);
272 CIFSSMBClose(xid, pTcon, fid) 272 CIFSSMBClose(xid, pTcon, fid);
273 } 273 }
274 } */ /* BB enable after fixing up return data */ 274 } */ /* BB enable after fixing up return data */
275 275
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df025453dd97..816e8ef64560 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
86 return sd; 86 return sd;
87} 87}
88 88
89/*
90 *
91 * Return -EEXIST if there is already a configfs element with the same
92 * name for the same parent.
93 *
94 * called with parent inode's i_mutex held
95 */
96int configfs_dirent_exists(struct configfs_dirent *parent_sd,
97 const unsigned char *new)
98{
99 struct configfs_dirent * sd;
100
101 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
102 if (sd->s_element) {
103 const unsigned char *existing = configfs_get_name(sd);
104 if (strcmp(existing, new))
105 continue;
106 else
107 return -EEXIST;
108 }
109 }
110
111 return 0;
112}
113
114
89int configfs_make_dirent(struct configfs_dirent * parent_sd, 115int configfs_make_dirent(struct configfs_dirent * parent_sd,
90 struct dentry * dentry, void * element, 116 struct dentry * dentry, void * element,
91 umode_t mode, int type) 117 umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
136 int error; 162 int error;
137 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 163 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
138 164
139 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 165 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
140 CONFIGFS_DIR); 166 if (!error)
167 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
168 CONFIGFS_DIR);
141 if (!error) { 169 if (!error) {
142 error = configfs_create(d, mode, init_dir); 170 error = configfs_create(d, mode, init_dir);
143 if (!error) { 171 if (!error) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a34ec57..17b392a2049e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
828 * (or otherwise set) by the caller to indicate that it is now 828 * (or otherwise set) by the caller to indicate that it is now
829 * in use by the dcache. 829 * in use by the dcache.
830 */ 830 */
831struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) 831static struct dentry *__d_instantiate_unique(struct dentry *entry,
832 struct inode *inode)
832{ 833{
833 struct dentry *alias; 834 struct dentry *alias;
834 int len = entry->d_name.len; 835 int len = entry->d_name.len;
835 const char *name = entry->d_name.name; 836 const char *name = entry->d_name.name;
836 unsigned int hash = entry->d_name.hash; 837 unsigned int hash = entry->d_name.hash;
837 838
838 BUG_ON(!list_empty(&entry->d_alias)); 839 if (!inode) {
839 spin_lock(&dcache_lock); 840 entry->d_inode = NULL;
840 if (!inode) 841 return NULL;
841 goto do_negative; 842 }
843
842 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 844 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
843 struct qstr *qstr = &alias->d_name; 845 struct qstr *qstr = &alias->d_name;
844 846
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
851 if (memcmp(qstr->name, name, len)) 853 if (memcmp(qstr->name, name, len))
852 continue; 854 continue;
853 dget_locked(alias); 855 dget_locked(alias);
854 spin_unlock(&dcache_lock);
855 BUG_ON(!d_unhashed(alias));
856 iput(inode);
857 return alias; 856 return alias;
858 } 857 }
858
859 list_add(&entry->d_alias, &inode->i_dentry); 859 list_add(&entry->d_alias, &inode->i_dentry);
860do_negative:
861 entry->d_inode = inode; 860 entry->d_inode = inode;
862 fsnotify_d_instantiate(entry, inode); 861 fsnotify_d_instantiate(entry, inode);
863 spin_unlock(&dcache_lock);
864 security_d_instantiate(entry, inode);
865 return NULL; 862 return NULL;
866} 863}
864
865struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
866{
867 struct dentry *result;
868
869 BUG_ON(!list_empty(&entry->d_alias));
870
871 spin_lock(&dcache_lock);
872 result = __d_instantiate_unique(entry, inode);
873 spin_unlock(&dcache_lock);
874
875 if (!result) {
876 security_d_instantiate(entry, inode);
877 return NULL;
878 }
879
880 BUG_ON(!d_unhashed(result));
881 iput(inode);
882 return result;
883}
884
867EXPORT_SYMBOL(d_instantiate_unique); 885EXPORT_SYMBOL(d_instantiate_unique);
868 886
869/** 887/**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1235 hlist_add_head_rcu(&entry->d_hash, list); 1253 hlist_add_head_rcu(&entry->d_hash, list);
1236} 1254}
1237 1255
1256static void _d_rehash(struct dentry * entry)
1257{
1258 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1259}
1260
1238/** 1261/**
1239 * d_rehash - add an entry back to the hash 1262 * d_rehash - add an entry back to the hash
1240 * @entry: dentry to add to the hash 1263 * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1244 1267
1245void d_rehash(struct dentry * entry) 1268void d_rehash(struct dentry * entry)
1246{ 1269{
1247 struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
1248
1249 spin_lock(&dcache_lock); 1270 spin_lock(&dcache_lock);
1250 spin_lock(&entry->d_lock); 1271 spin_lock(&entry->d_lock);
1251 __d_rehash(entry, list); 1272 _d_rehash(entry);
1252 spin_unlock(&entry->d_lock); 1273 spin_unlock(&entry->d_lock);
1253 spin_unlock(&dcache_lock); 1274 spin_unlock(&dcache_lock);
1254} 1275}
@@ -1386,6 +1407,120 @@ already_unhashed:
1386 spin_unlock(&dcache_lock); 1407 spin_unlock(&dcache_lock);
1387} 1408}
1388 1409
1410/*
1411 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1412 * named dentry in place of the dentry to be replaced.
1413 */
1414static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1415{
1416 struct dentry *dparent, *aparent;
1417
1418 switch_names(dentry, anon);
1419 do_switch(dentry->d_name.len, anon->d_name.len);
1420 do_switch(dentry->d_name.hash, anon->d_name.hash);
1421
1422 dparent = dentry->d_parent;
1423 aparent = anon->d_parent;
1424
1425 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1426 list_del(&dentry->d_u.d_child);
1427 if (!IS_ROOT(dentry))
1428 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1429 else
1430 INIT_LIST_HEAD(&dentry->d_u.d_child);
1431
1432 anon->d_parent = (dparent == dentry) ? anon : dparent;
1433 list_del(&anon->d_u.d_child);
1434 if (!IS_ROOT(anon))
1435 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
1436 else
1437 INIT_LIST_HEAD(&anon->d_u.d_child);
1438
1439 anon->d_flags &= ~DCACHE_DISCONNECTED;
1440}
1441
1442/**
1443 * d_materialise_unique - introduce an inode into the tree
1444 * @dentry: candidate dentry
1445 * @inode: inode to bind to the dentry, to which aliases may be attached
1446 *
1447 * Introduces an dentry into the tree, substituting an extant disconnected
1448 * root directory alias in its place if there is one
1449 */
1450struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1451{
1452 struct dentry *alias, *actual;
1453
1454 BUG_ON(!d_unhashed(dentry));
1455
1456 spin_lock(&dcache_lock);
1457
1458 if (!inode) {
1459 actual = dentry;
1460 dentry->d_inode = NULL;
1461 goto found_lock;
1462 }
1463
1464 /* See if a disconnected directory already exists as an anonymous root
1465 * that we should splice into the tree instead */
1466 if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
1467 spin_lock(&alias->d_lock);
1468
1469 /* Is this a mountpoint that we could splice into our tree? */
1470 if (IS_ROOT(alias))
1471 goto connect_mountpoint;
1472
1473 if (alias->d_name.len == dentry->d_name.len &&
1474 alias->d_parent == dentry->d_parent &&
1475 memcmp(alias->d_name.name,
1476 dentry->d_name.name,
1477 dentry->d_name.len) == 0)
1478 goto replace_with_alias;
1479
1480 spin_unlock(&alias->d_lock);
1481
1482 /* Doh! Seem to be aliasing directories for some reason... */
1483 dput(alias);
1484 }
1485
1486 /* Add a unique reference */
1487 actual = __d_instantiate_unique(dentry, inode);
1488 if (!actual)
1489 actual = dentry;
1490 else if (unlikely(!d_unhashed(actual)))
1491 goto shouldnt_be_hashed;
1492
1493found_lock:
1494 spin_lock(&actual->d_lock);
1495found:
1496 _d_rehash(actual);
1497 spin_unlock(&actual->d_lock);
1498 spin_unlock(&dcache_lock);
1499
1500 if (actual == dentry) {
1501 security_d_instantiate(dentry, inode);
1502 return NULL;
1503 }
1504
1505 iput(inode);
1506 return actual;
1507
1508 /* Convert the anonymous/root alias into an ordinary dentry */
1509connect_mountpoint:
1510 __d_materialise_dentry(dentry, alias);
1511
1512 /* Replace the candidate dentry with the alias in the tree */
1513replace_with_alias:
1514 __d_drop(alias);
1515 actual = alias;
1516 goto found;
1517
1518shouldnt_be_hashed:
1519 spin_unlock(&dcache_lock);
1520 BUG();
1521 goto shouldnt_be_hashed;
1522}
1523
1389/** 1524/**
1390 * d_path - return the path of a dentry 1525 * d_path - return the path of a dentry
1391 * @dentry: dentry to report 1526 * @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
1784EXPORT_SYMBOL(d_invalidate); 1919EXPORT_SYMBOL(d_invalidate);
1785EXPORT_SYMBOL(d_lookup); 1920EXPORT_SYMBOL(d_lookup);
1786EXPORT_SYMBOL(d_move); 1921EXPORT_SYMBOL(d_move);
1922EXPORT_SYMBOL_GPL(d_materialise_unique);
1787EXPORT_SYMBOL(d_path); 1923EXPORT_SYMBOL(d_path);
1788EXPORT_SYMBOL(d_prune_aliases); 1924EXPORT_SYMBOL(d_prune_aliases);
1789EXPORT_SYMBOL(d_rehash); 1925EXPORT_SYMBOL(d_rehash);
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index f687d54ed442..32ab51e42b96 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -12,7 +12,6 @@
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/hpfs_fs.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
18 17
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f798480a363f..8fe51c343786 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -11,6 +11,7 @@
11#include <linux/parser.h> 11#include <linux/parser.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/statfs.h> 13#include <linux/statfs.h>
14#include <linux/magic.h>
14 15
15/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 16/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
16 17
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e00b85..3a566077ac95 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
41 41
42 uint16_t flags; 42 uint16_t flags;
43 uint8_t usercompr; 43 uint8_t usercompr;
44#if !defined (__ECOS)
45#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
46 struct inode vfs_inode; 44 struct inode vfs_inode;
47#endif
48#endif
49#ifdef CONFIG_JFFS2_FS_POSIX_ACL 45#ifdef CONFIG_JFFS2_FS_POSIX_ACL
50 struct posix_acl *i_acl_access; 46 struct posix_acl *i_acl_access;
51 struct posix_acl *i_acl_default; 47 struct posix_acl *i_acl_default;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df14c22..50dbb67ae0c4 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
151int 151int
152nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) 152nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
153{ 153{
154 struct rpc_clnt *client = NFS_CLIENT(inode);
155 struct sockaddr_in addr;
154 struct nlm_host *host; 156 struct nlm_host *host;
155 struct nlm_rqst *call; 157 struct nlm_rqst *call;
156 sigset_t oldset; 158 sigset_t oldset;
157 unsigned long flags; 159 unsigned long flags;
158 int status, proto, vers; 160 int status, vers;
159 161
160 vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1; 162 vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
161 if (NFS_PROTO(inode)->version > 3) { 163 if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
163 return -ENOLCK; 165 return -ENOLCK;
164 } 166 }
165 167
166 /* Retrieve transport protocol from NFS client */ 168 rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
167 proto = NFS_CLIENT(inode)->cl_xprt->prot; 169 host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
168
169 host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
170 if (host == NULL) 170 if (host == NULL)
171 return -ENOLCK; 171 return -ENOLCK;
172 172
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a1aec0..703fb038c813 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
26#define NLM_HOST_REBIND (60 * HZ) 26#define NLM_HOST_REBIND (60 * HZ)
27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ) 27#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ) 28#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
29#define NLM_HOST_ADDR(sv) (&(sv)->s_nlmclnt->cl_xprt->addr)
30 29
31static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH]; 30static struct nlm_host * nlm_hosts[NLM_HOST_NRHASH];
32static unsigned long next_gc; 31static unsigned long next_gc;
@@ -167,7 +166,6 @@ struct rpc_clnt *
167nlm_bind_host(struct nlm_host *host) 166nlm_bind_host(struct nlm_host *host)
168{ 167{
169 struct rpc_clnt *clnt; 168 struct rpc_clnt *clnt;
170 struct rpc_xprt *xprt;
171 169
172 dprintk("lockd: nlm_bind_host(%08x)\n", 170 dprintk("lockd: nlm_bind_host(%08x)\n",
173 (unsigned)ntohl(host->h_addr.sin_addr.s_addr)); 171 (unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -179,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
179 * RPC rebind is required 177 * RPC rebind is required
180 */ 178 */
181 if ((clnt = host->h_rpcclnt) != NULL) { 179 if ((clnt = host->h_rpcclnt) != NULL) {
182 xprt = clnt->cl_xprt;
183 if (time_after_eq(jiffies, host->h_nextrebind)) { 180 if (time_after_eq(jiffies, host->h_nextrebind)) {
184 rpc_force_rebind(clnt); 181 rpc_force_rebind(clnt);
185 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 182 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -187,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
187 host->h_nextrebind - jiffies); 184 host->h_nextrebind - jiffies);
188 } 185 }
189 } else { 186 } else {
190 xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); 187 unsigned long increment = nlmsvc_timeout * HZ;
191 if (IS_ERR(xprt)) 188 struct rpc_timeout timeparms = {
192 goto forgetit; 189 .to_initval = increment,
193 190 .to_increment = increment,
194 xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); 191 .to_maxval = increment * 6UL,
195 xprt->resvport = 1; /* NLM requires a reserved port */ 192 .to_retries = 5U,
196 193 };
197 /* Existing NLM servers accept AUTH_UNIX only */ 194 struct rpc_create_args args = {
198 clnt = rpc_new_client(xprt, host->h_name, &nlm_program, 195 .protocol = host->h_proto,
199 host->h_version, RPC_AUTH_UNIX); 196 .address = (struct sockaddr *)&host->h_addr,
200 if (IS_ERR(clnt)) 197 .addrsize = sizeof(host->h_addr),
201 goto forgetit; 198 .timeout = &timeparms,
202 clnt->cl_autobind = 1; /* turn on pmap queries */ 199 .servername = host->h_name,
203 clnt->cl_softrtry = 1; /* All queries are soft */ 200 .program = &nlm_program,
204 201 .version = host->h_version,
205 host->h_rpcclnt = clnt; 202 .authflavor = RPC_AUTH_UNIX,
203 .flags = (RPC_CLNT_CREATE_HARDRTRY |
204 RPC_CLNT_CREATE_AUTOBIND),
205 };
206
207 clnt = rpc_create(&args);
208 if (!IS_ERR(clnt))
209 host->h_rpcclnt = clnt;
210 else {
211 printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
212 clnt = NULL;
213 }
206 } 214 }
207 215
208 mutex_unlock(&host->h_mutex); 216 mutex_unlock(&host->h_mutex);
209 return clnt; 217 return clnt;
210
211forgetit:
212 printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
213 mutex_unlock(&host->h_mutex);
214 return NULL;
215} 218}
216 219
217/* 220/*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f46b3e..5954dcb497e4 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
109static struct rpc_clnt * 109static struct rpc_clnt *
110nsm_create(void) 110nsm_create(void)
111{ 111{
112 struct rpc_xprt *xprt; 112 struct sockaddr_in sin = {
113 struct rpc_clnt *clnt; 113 .sin_family = AF_INET,
114 struct sockaddr_in sin; 114 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
115 115 .sin_port = 0,
116 sin.sin_family = AF_INET; 116 };
117 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 117 struct rpc_create_args args = {
118 sin.sin_port = 0; 118 .protocol = IPPROTO_UDP,
119 119 .address = (struct sockaddr *)&sin,
120 xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); 120 .addrsize = sizeof(sin),
121 if (IS_ERR(xprt)) 121 .servername = "localhost",
122 return (struct rpc_clnt *)xprt; 122 .program = &nsm_program,
123 xprt->resvport = 1; /* NSM requires a reserved port */ 123 .version = SM_VERSION,
124 124 .authflavor = RPC_AUTH_NULL,
125 clnt = rpc_create_client(xprt, "localhost", 125 .flags = (RPC_CLNT_CREATE_ONESHOT),
126 &nsm_program, SM_VERSION, 126 };
127 RPC_AUTH_NULL); 127
128 if (IS_ERR(clnt)) 128 return rpc_create(&args);
129 goto out_err;
130 clnt->cl_softrtry = 1;
131 clnt->cl_oneshot = 1;
132 return clnt;
133
134out_err:
135 return clnt;
136} 129}
137 130
138/* 131/*
diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc6fab0..6b591c01b09f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2370 dput(new_dentry); 2370 dput(new_dentry);
2371 } 2371 }
2372 if (!error) 2372 if (!error)
2373 d_move(old_dentry,new_dentry); 2373 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2374 d_move(old_dentry,new_dentry);
2374 return error; 2375 return error;
2375} 2376}
2376 2377
@@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2393 else 2394 else
2394 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2395 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2395 if (!error) { 2396 if (!error) {
2396 /* The following d_move() should become unconditional */ 2397 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2397 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
2398 d_move(old_dentry, new_dentry); 2398 d_move(old_dentry, new_dentry);
2399 } 2399 }
2400 if (target) 2400 if (target)
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0c1967..f4580b44eef4 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
4 4
5obj-$(CONFIG_NFS_FS) += nfs.o 5obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 proc.o read.o symlink.o unlink.o write.o \ 8 pagelist.o proc.o read.o symlink.o unlink.o \
9 namespace.o 9 write.o namespace.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o 10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8ac149..a3ee11364db0 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
19 19
20#include "nfs4_fs.h" 20#include "nfs4_fs.h"
21#include "callback.h" 21#include "callback.h"
22#include "internal.h"
22 23
23#define NFSDBG_FACILITY NFSDBG_CALLBACK 24#define NFSDBG_FACILITY NFSDBG_CALLBACK
24 25
@@ -36,6 +37,21 @@ static struct svc_program nfs4_callback_program;
36 37
37unsigned int nfs_callback_set_tcpport; 38unsigned int nfs_callback_set_tcpport;
38unsigned short nfs_callback_tcpport; 39unsigned short nfs_callback_tcpport;
40static const int nfs_set_port_min = 0;
41static const int nfs_set_port_max = 65535;
42
43static int param_set_port(const char *val, struct kernel_param *kp)
44{
45 char *endp;
46 int num = simple_strtol(val, &endp, 0);
47 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
48 return -EINVAL;
49 *((int *)kp->arg) = num;
50 return 0;
51}
52
53module_param_call(callback_tcpport, param_set_port, param_get_int,
54 &nfs_callback_set_tcpport, 0644);
39 55
40/* 56/*
41 * This is the callback kernel thread. 57 * This is the callback kernel thread.
@@ -134,10 +150,8 @@ out_err:
134/* 150/*
135 * Kill the server process if it is not already up. 151 * Kill the server process if it is not already up.
136 */ 152 */
137int nfs_callback_down(void) 153void nfs_callback_down(void)
138{ 154{
139 int ret = 0;
140
141 lock_kernel(); 155 lock_kernel();
142 mutex_lock(&nfs_callback_mutex); 156 mutex_lock(&nfs_callback_mutex);
143 nfs_callback_info.users--; 157 nfs_callback_info.users--;
@@ -149,20 +163,19 @@ int nfs_callback_down(void)
149 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0); 163 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
150 mutex_unlock(&nfs_callback_mutex); 164 mutex_unlock(&nfs_callback_mutex);
151 unlock_kernel(); 165 unlock_kernel();
152 return ret;
153} 166}
154 167
155static int nfs_callback_authenticate(struct svc_rqst *rqstp) 168static int nfs_callback_authenticate(struct svc_rqst *rqstp)
156{ 169{
157 struct in_addr *addr = &rqstp->rq_addr.sin_addr; 170 struct sockaddr_in *addr = &rqstp->rq_addr;
158 struct nfs4_client *clp; 171 struct nfs_client *clp;
159 172
160 /* Don't talk to strangers */ 173 /* Don't talk to strangers */
161 clp = nfs4_find_client(addr); 174 clp = nfs_find_client(addr, 4);
162 if (clp == NULL) 175 if (clp == NULL)
163 return SVC_DROP; 176 return SVC_DROP;
164 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); 177 dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
165 nfs4_put_client(clp); 178 nfs_put_client(clp);
166 switch (rqstp->rq_authop->flavour) { 179 switch (rqstp->rq_authop->flavour) {
167 case RPC_AUTH_NULL: 180 case RPC_AUTH_NULL:
168 if (rqstp->rq_proc != CB_NULL) 181 if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7fe53a5..5676163d26e8 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
62extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); 62extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
63extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); 63extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
64 64
65#ifdef CONFIG_NFS_V4
65extern int nfs_callback_up(void); 66extern int nfs_callback_up(void);
66extern int nfs_callback_down(void); 67extern void nfs_callback_down(void);
68#else
69#define nfs_callback_up() (0)
70#define nfs_callback_down() do {} while(0)
71#endif
67 72
68extern unsigned int nfs_callback_set_tcpport; 73extern unsigned int nfs_callback_set_tcpport;
69extern unsigned short nfs_callback_tcpport; 74extern unsigned short nfs_callback_tcpport;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483ecdfc..97cf8f71451f 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,19 +10,20 @@
10#include "nfs4_fs.h" 10#include "nfs4_fs.h"
11#include "callback.h" 11#include "callback.h"
12#include "delegation.h" 12#include "delegation.h"
13#include "internal.h"
13 14
14#define NFSDBG_FACILITY NFSDBG_CALLBACK 15#define NFSDBG_FACILITY NFSDBG_CALLBACK
15 16
16unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) 17unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
17{ 18{
18 struct nfs4_client *clp; 19 struct nfs_client *clp;
19 struct nfs_delegation *delegation; 20 struct nfs_delegation *delegation;
20 struct nfs_inode *nfsi; 21 struct nfs_inode *nfsi;
21 struct inode *inode; 22 struct inode *inode;
22 23
23 res->bitmap[0] = res->bitmap[1] = 0; 24 res->bitmap[0] = res->bitmap[1] = 0;
24 res->status = htonl(NFS4ERR_BADHANDLE); 25 res->status = htonl(NFS4ERR_BADHANDLE);
25 clp = nfs4_find_client(&args->addr->sin_addr); 26 clp = nfs_find_client(args->addr, 4);
26 if (clp == NULL) 27 if (clp == NULL)
27 goto out; 28 goto out;
28 inode = nfs_delegation_find_inode(clp, &args->fh); 29 inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
48 up_read(&nfsi->rwsem); 49 up_read(&nfsi->rwsem);
49 iput(inode); 50 iput(inode);
50out_putclient: 51out_putclient:
51 nfs4_put_client(clp); 52 nfs_put_client(clp);
52out: 53out:
53 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); 54 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
54 return res->status; 55 return res->status;
@@ -56,12 +57,12 @@ out:
56 57
57unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) 58unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
58{ 59{
59 struct nfs4_client *clp; 60 struct nfs_client *clp;
60 struct inode *inode; 61 struct inode *inode;
61 unsigned res; 62 unsigned res;
62 63
63 res = htonl(NFS4ERR_BADHANDLE); 64 res = htonl(NFS4ERR_BADHANDLE);
64 clp = nfs4_find_client(&args->addr->sin_addr); 65 clp = nfs_find_client(args->addr, 4);
65 if (clp == NULL) 66 if (clp == NULL)
66 goto out; 67 goto out;
67 inode = nfs_delegation_find_inode(clp, &args->fh); 68 inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
80 } 81 }
81 iput(inode); 82 iput(inode);
82out_putclient: 83out_putclient:
83 nfs4_put_client(clp); 84 nfs_put_client(clp);
84out: 85out:
85 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); 86 dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
86 return res; 87 return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 000000000000..ec1938d4b814
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,1448 @@
1/* client.c: NFS client sharing and management code
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13#include <linux/config.h>
14#include <linux/module.h>
15#include <linux/init.h>
16
17#include <linux/time.h>
18#include <linux/kernel.h>
19#include <linux/mm.h>
20#include <linux/string.h>
21#include <linux/stat.h>
22#include <linux/errno.h>
23#include <linux/unistd.h>
24#include <linux/sunrpc/clnt.h>
25#include <linux/sunrpc/stats.h>
26#include <linux/sunrpc/metrics.h>
27#include <linux/nfs_fs.h>
28#include <linux/nfs_mount.h>
29#include <linux/nfs4_mount.h>
30#include <linux/lockd/bind.h>
31#include <linux/smp_lock.h>
32#include <linux/seq_file.h>
33#include <linux/mount.h>
34#include <linux/nfs_idmap.h>
35#include <linux/vfs.h>
36#include <linux/inet.h>
37#include <linux/nfs_xdr.h>
38
39#include <asm/system.h>
40
41#include "nfs4_fs.h"
42#include "callback.h"
43#include "delegation.h"
44#include "iostat.h"
45#include "internal.h"
46
47#define NFSDBG_FACILITY NFSDBG_CLIENT
48
49static DEFINE_SPINLOCK(nfs_client_lock);
50static LIST_HEAD(nfs_client_list);
51static LIST_HEAD(nfs_volume_list);
52static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
53
54/*
55 * RPC cruft for NFS
56 */
57static struct rpc_version *nfs_version[5] = {
58 [2] = &nfs_version2,
59#ifdef CONFIG_NFS_V3
60 [3] = &nfs_version3,
61#endif
62#ifdef CONFIG_NFS_V4
63 [4] = &nfs_version4,
64#endif
65};
66
67struct rpc_program nfs_program = {
68 .name = "nfs",
69 .number = NFS_PROGRAM,
70 .nrvers = ARRAY_SIZE(nfs_version),
71 .version = nfs_version,
72 .stats = &nfs_rpcstat,
73 .pipe_dir_name = "/nfs",
74};
75
76struct rpc_stat nfs_rpcstat = {
77 .program = &nfs_program
78};
79
80
81#ifdef CONFIG_NFS_V3_ACL
82static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
83static struct rpc_version * nfsacl_version[] = {
84 [3] = &nfsacl_version3,
85};
86
87struct rpc_program nfsacl_program = {
88 .name = "nfsacl",
89 .number = NFS_ACL_PROGRAM,
90 .nrvers = ARRAY_SIZE(nfsacl_version),
91 .version = nfsacl_version,
92 .stats = &nfsacl_rpcstat,
93};
94#endif /* CONFIG_NFS_V3_ACL */
95
96/*
97 * Allocate a shared client record
98 *
99 * Since these are allocated/deallocated very rarely, we don't
100 * bother putting them in a slab cache...
101 */
102static struct nfs_client *nfs_alloc_client(const char *hostname,
103 const struct sockaddr_in *addr,
104 int nfsversion)
105{
106 struct nfs_client *clp;
107 int error;
108
109 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
110 goto error_0;
111
112 error = rpciod_up();
113 if (error < 0) {
114 dprintk("%s: couldn't start rpciod! Error = %d\n",
115 __FUNCTION__, error);
116 goto error_1;
117 }
118 __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
119
120 if (nfsversion == 4) {
121 if (nfs_callback_up() < 0)
122 goto error_2;
123 __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
124 }
125
126 atomic_set(&clp->cl_count, 1);
127 clp->cl_cons_state = NFS_CS_INITING;
128
129 clp->cl_nfsversion = nfsversion;
130 memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
131
132 if (hostname) {
133 clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
134 if (!clp->cl_hostname)
135 goto error_3;
136 }
137
138 INIT_LIST_HEAD(&clp->cl_superblocks);
139 clp->cl_rpcclient = ERR_PTR(-EINVAL);
140
141#ifdef CONFIG_NFS_V4
142 init_rwsem(&clp->cl_sem);
143 INIT_LIST_HEAD(&clp->cl_delegations);
144 INIT_LIST_HEAD(&clp->cl_state_owners);
145 INIT_LIST_HEAD(&clp->cl_unused);
146 spin_lock_init(&clp->cl_lock);
147 INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
148 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
149 clp->cl_boot_time = CURRENT_TIME;
150 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
151#endif
152
153 return clp;
154
155error_3:
156 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
157 nfs_callback_down();
158error_2:
159 rpciod_down();
160 __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
161error_1:
162 kfree(clp);
163error_0:
164 return NULL;
165}
166
167static void nfs4_shutdown_client(struct nfs_client *clp)
168{
169#ifdef CONFIG_NFS_V4
170 if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
171 nfs4_kill_renewd(clp);
172 while (!list_empty(&clp->cl_unused)) {
173 struct nfs4_state_owner *sp;
174
175 sp = list_entry(clp->cl_unused.next,
176 struct nfs4_state_owner,
177 so_list);
178 list_del(&sp->so_list);
179 kfree(sp);
180 }
181 BUG_ON(!list_empty(&clp->cl_state_owners));
182 if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
183 nfs_idmap_delete(clp);
184#endif
185}
186
187/*
188 * Destroy a shared client record
189 */
190static void nfs_free_client(struct nfs_client *clp)
191{
192 dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
193
194 nfs4_shutdown_client(clp);
195
196 /* -EIO all pending I/O */
197 if (!IS_ERR(clp->cl_rpcclient))
198 rpc_shutdown_client(clp->cl_rpcclient);
199
200 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
201 nfs_callback_down();
202
203 if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
204 rpciod_down();
205
206 kfree(clp->cl_hostname);
207 kfree(clp);
208
209 dprintk("<-- nfs_free_client()\n");
210}
211
212/*
213 * Release a reference to a shared client record
214 */
215void nfs_put_client(struct nfs_client *clp)
216{
217 if (!clp)
218 return;
219
220 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
221
222 if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
223 list_del(&clp->cl_share_link);
224 spin_unlock(&nfs_client_lock);
225
226 BUG_ON(!list_empty(&clp->cl_superblocks));
227
228 nfs_free_client(clp);
229 }
230}
231
232/*
233 * Find a client by address
234 * - caller must hold nfs_client_lock
235 */
236static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
237{
238 struct nfs_client *clp;
239
240 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
241 /* Different NFS versions cannot share the same nfs_client */
242 if (clp->cl_nfsversion != nfsversion)
243 continue;
244
245 if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
246 sizeof(clp->cl_addr.sin_addr)) != 0)
247 continue;
248
249 if (clp->cl_addr.sin_port == addr->sin_port)
250 goto found;
251 }
252
253 return NULL;
254
255found:
256 atomic_inc(&clp->cl_count);
257 return clp;
258}
259
260/*
261 * Find a client by IP address and protocol version
262 * - returns NULL if no such client
263 */
264struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
265{
266 struct nfs_client *clp;
267
268 spin_lock(&nfs_client_lock);
269 clp = __nfs_find_client(addr, nfsversion);
270 spin_unlock(&nfs_client_lock);
271
272 BUG_ON(clp && clp->cl_cons_state == 0);
273
274 return clp;
275}
276
277/*
278 * Look up a client by IP address and protocol version
279 * - creates a new record if one doesn't yet exist
280 */
281static struct nfs_client *nfs_get_client(const char *hostname,
282 const struct sockaddr_in *addr,
283 int nfsversion)
284{
285 struct nfs_client *clp, *new = NULL;
286 int error;
287
288 dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
289 hostname ?: "", NIPQUAD(addr->sin_addr),
290 addr->sin_port, nfsversion);
291
292 /* see if the client already exists */
293 do {
294 spin_lock(&nfs_client_lock);
295
296 clp = __nfs_find_client(addr, nfsversion);
297 if (clp)
298 goto found_client;
299 if (new)
300 goto install_client;
301
302 spin_unlock(&nfs_client_lock);
303
304 new = nfs_alloc_client(hostname, addr, nfsversion);
305 } while (new);
306
307 return ERR_PTR(-ENOMEM);
308
309 /* install a new client and return with it unready */
310install_client:
311 clp = new;
312 list_add(&clp->cl_share_link, &nfs_client_list);
313 spin_unlock(&nfs_client_lock);
314 dprintk("--> nfs_get_client() = %p [new]\n", clp);
315 return clp;
316
317 /* found an existing client
318 * - make sure it's ready before returning
319 */
320found_client:
321 spin_unlock(&nfs_client_lock);
322
323 if (new)
324 nfs_free_client(new);
325
326 if (clp->cl_cons_state == NFS_CS_INITING) {
327 DECLARE_WAITQUEUE(myself, current);
328
329 add_wait_queue(&nfs_client_active_wq, &myself);
330
331 for (;;) {
332 set_current_state(TASK_INTERRUPTIBLE);
333 if (signal_pending(current) ||
334 clp->cl_cons_state > NFS_CS_READY)
335 break;
336 schedule();
337 }
338
339 remove_wait_queue(&nfs_client_active_wq, &myself);
340
341 if (signal_pending(current)) {
342 nfs_put_client(clp);
343 return ERR_PTR(-ERESTARTSYS);
344 }
345 }
346
347 if (clp->cl_cons_state < NFS_CS_READY) {
348 error = clp->cl_cons_state;
349 nfs_put_client(clp);
350 return ERR_PTR(error);
351 }
352
353 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
354
355 dprintk("--> nfs_get_client() = %p [share]\n", clp);
356 return clp;
357}
358
359/*
360 * Mark a server as ready or failed
361 */
362static void nfs_mark_client_ready(struct nfs_client *clp, int state)
363{
364 clp->cl_cons_state = state;
365 wake_up_all(&nfs_client_active_wq);
366}
367
368/*
369 * Initialise the timeout values for a connection
370 */
371static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
372 unsigned int timeo, unsigned int retrans)
373{
374 to->to_initval = timeo * HZ / 10;
375 to->to_retries = retrans;
376 if (!to->to_retries)
377 to->to_retries = 2;
378
379 switch (proto) {
380 case IPPROTO_TCP:
381 if (!to->to_initval)
382 to->to_initval = 60 * HZ;
383 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
384 to->to_initval = NFS_MAX_TCP_TIMEOUT;
385 to->to_increment = to->to_initval;
386 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
387 to->to_exponential = 0;
388 break;
389 case IPPROTO_UDP:
390 default:
391 if (!to->to_initval)
392 to->to_initval = 11 * HZ / 10;
393 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
394 to->to_initval = NFS_MAX_UDP_TIMEOUT;
395 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
396 to->to_exponential = 1;
397 break;
398 }
399}
400
401/*
402 * Create an RPC client handle
403 */
404static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
405 unsigned int timeo,
406 unsigned int retrans,
407 rpc_authflavor_t flavor)
408{
409 struct rpc_timeout timeparms;
410 struct rpc_clnt *clnt = NULL;
411 struct rpc_create_args args = {
412 .protocol = proto,
413 .address = (struct sockaddr *)&clp->cl_addr,
414 .addrsize = sizeof(clp->cl_addr),
415 .timeout = &timeparms,
416 .servername = clp->cl_hostname,
417 .program = &nfs_program,
418 .version = clp->rpc_ops->version,
419 .authflavor = flavor,
420 };
421
422 if (!IS_ERR(clp->cl_rpcclient))
423 return 0;
424
425 nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
426 clp->retrans_timeo = timeparms.to_initval;
427 clp->retrans_count = timeparms.to_retries;
428
429 clnt = rpc_create(&args);
430 if (IS_ERR(clnt)) {
431 dprintk("%s: cannot create RPC client. Error = %ld\n",
432 __FUNCTION__, PTR_ERR(clnt));
433 return PTR_ERR(clnt);
434 }
435
436 clp->cl_rpcclient = clnt;
437 return 0;
438}
439
440/*
441 * Version 2 or 3 client destruction
442 */
443static void nfs_destroy_server(struct nfs_server *server)
444{
445 if (!IS_ERR(server->client_acl))
446 rpc_shutdown_client(server->client_acl);
447
448 if (!(server->flags & NFS_MOUNT_NONLM))
449 lockd_down(); /* release rpc.lockd */
450}
451
452/*
453 * Version 2 or 3 lockd setup
454 */
455static int nfs_start_lockd(struct nfs_server *server)
456{
457 int error = 0;
458
459 if (server->nfs_client->cl_nfsversion > 3)
460 goto out;
461 if (server->flags & NFS_MOUNT_NONLM)
462 goto out;
463 error = lockd_up();
464 if (error < 0)
465 server->flags |= NFS_MOUNT_NONLM;
466 else
467 server->destroy = nfs_destroy_server;
468out:
469 return error;
470}
471
472/*
473 * Initialise an NFSv3 ACL client connection
474 */
475#ifdef CONFIG_NFS_V3_ACL
476static void nfs_init_server_aclclient(struct nfs_server *server)
477{
478 if (server->nfs_client->cl_nfsversion != 3)
479 goto out_noacl;
480 if (server->flags & NFS_MOUNT_NOACL)
481 goto out_noacl;
482
483 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
484 if (IS_ERR(server->client_acl))
485 goto out_noacl;
486
487 /* No errors! Assume that Sun nfsacls are supported */
488 server->caps |= NFS_CAP_ACLS;
489 return;
490
491out_noacl:
492 server->caps &= ~NFS_CAP_ACLS;
493}
494#else
495static inline void nfs_init_server_aclclient(struct nfs_server *server)
496{
497 server->flags &= ~NFS_MOUNT_NOACL;
498 server->caps &= ~NFS_CAP_ACLS;
499}
500#endif
501
502/*
503 * Create a general RPC client
504 */
505static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
506{
507 struct nfs_client *clp = server->nfs_client;
508
509 server->client = rpc_clone_client(clp->cl_rpcclient);
510 if (IS_ERR(server->client)) {
511 dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
512 return PTR_ERR(server->client);
513 }
514
515 if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
516 struct rpc_auth *auth;
517
518 auth = rpcauth_create(pseudoflavour, server->client);
519 if (IS_ERR(auth)) {
520 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
521 return PTR_ERR(auth);
522 }
523 }
524 server->client->cl_softrtry = 0;
525 if (server->flags & NFS_MOUNT_SOFT)
526 server->client->cl_softrtry = 1;
527
528 server->client->cl_intr = 0;
529 if (server->flags & NFS4_MOUNT_INTR)
530 server->client->cl_intr = 1;
531
532 return 0;
533}
534
535/*
536 * Initialise an NFS2 or NFS3 client
537 */
538static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
539{
540 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
541 int error;
542
543 if (clp->cl_cons_state == NFS_CS_READY) {
544 /* the client is already initialised */
545 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
546 return 0;
547 }
548
549 /* Check NFS protocol revision and initialize RPC op vector */
550 clp->rpc_ops = &nfs_v2_clientops;
551#ifdef CONFIG_NFS_V3
552 if (clp->cl_nfsversion == 3)
553 clp->rpc_ops = &nfs_v3_clientops;
554#endif
555 /*
556 * Create a client RPC handle for doing FSSTAT with UNIX auth only
557 * - RFC 2623, sec 2.3.2
558 */
559 error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
560 RPC_AUTH_UNIX);
561 if (error < 0)
562 goto error;
563 nfs_mark_client_ready(clp, NFS_CS_READY);
564 return 0;
565
566error:
567 nfs_mark_client_ready(clp, error);
568 dprintk("<-- nfs_init_client() = xerror %d\n", error);
569 return error;
570}
571
572/*
573 * Create a version 2 or 3 client
574 */
575static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
576{
577 struct nfs_client *clp;
578 int error, nfsvers = 2;
579
580 dprintk("--> nfs_init_server()\n");
581
582#ifdef CONFIG_NFS_V3
583 if (data->flags & NFS_MOUNT_VER3)
584 nfsvers = 3;
585#endif
586
587 /* Allocate or find a client reference we can use */
588 clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
589 if (IS_ERR(clp)) {
590 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
591 return PTR_ERR(clp);
592 }
593
594 error = nfs_init_client(clp, data);
595 if (error < 0)
596 goto error;
597
598 server->nfs_client = clp;
599
600 /* Initialise the client representation from the mount data */
601 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
602
603 if (data->rsize)
604 server->rsize = nfs_block_size(data->rsize, NULL);
605 if (data->wsize)
606 server->wsize = nfs_block_size(data->wsize, NULL);
607
608 server->acregmin = data->acregmin * HZ;
609 server->acregmax = data->acregmax * HZ;
610 server->acdirmin = data->acdirmin * HZ;
611 server->acdirmax = data->acdirmax * HZ;
612
613 /* Start lockd here, before we might error out */
614 error = nfs_start_lockd(server);
615 if (error < 0)
616 goto error;
617
618 error = nfs_init_server_rpcclient(server, data->pseudoflavor);
619 if (error < 0)
620 goto error;
621
622 server->namelen = data->namlen;
623 /* Create a client RPC handle for the NFSv3 ACL management interface */
624 nfs_init_server_aclclient(server);
625 if (clp->cl_nfsversion == 3) {
626 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
627 server->namelen = NFS3_MAXNAMLEN;
628 server->caps |= NFS_CAP_READDIRPLUS;
629 } else {
630 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
631 server->namelen = NFS2_MAXNAMLEN;
632 }
633
634 dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
635 return 0;
636
637error:
638 server->nfs_client = NULL;
639 nfs_put_client(clp);
640 dprintk("<-- nfs_init_server() = xerror %d\n", error);
641 return error;
642}
643
644/*
645 * Load up the server record from information gained in an fsinfo record
646 */
647static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
648{
649 unsigned long max_rpc_payload;
650
651 /* Work out a lot of parameters */
652 if (server->rsize == 0)
653 server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
654 if (server->wsize == 0)
655 server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
656
657 if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
658 server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
659 if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
660 server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
661
662 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
663 if (server->rsize > max_rpc_payload)
664 server->rsize = max_rpc_payload;
665 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
666 server->rsize = NFS_MAX_FILE_IO_SIZE;
667 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
668 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
669
670 if (server->wsize > max_rpc_payload)
671 server->wsize = max_rpc_payload;
672 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
673 server->wsize = NFS_MAX_FILE_IO_SIZE;
674 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
675 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
676
677 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
678 if (server->dtsize > PAGE_CACHE_SIZE)
679 server->dtsize = PAGE_CACHE_SIZE;
680 if (server->dtsize > server->rsize)
681 server->dtsize = server->rsize;
682
683 if (server->flags & NFS_MOUNT_NOAC) {
684 server->acregmin = server->acregmax = 0;
685 server->acdirmin = server->acdirmax = 0;
686 }
687
688 server->maxfilesize = fsinfo->maxfilesize;
689
690 /* We're airborne Set socket buffersize */
691 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
692}
693
694/*
695 * Probe filesystem information, including the FSID on v2/v3
696 */
697static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
698{
699 struct nfs_fsinfo fsinfo;
700 struct nfs_client *clp = server->nfs_client;
701 int error;
702
703 dprintk("--> nfs_probe_fsinfo()\n");
704
705 if (clp->rpc_ops->set_capabilities != NULL) {
706 error = clp->rpc_ops->set_capabilities(server, mntfh);
707 if (error < 0)
708 goto out_error;
709 }
710
711 fsinfo.fattr = fattr;
712 nfs_fattr_init(fattr);
713 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
714 if (error < 0)
715 goto out_error;
716
717 nfs_server_set_fsinfo(server, &fsinfo);
718
719 /* Get some general file system info */
720 if (server->namelen == 0) {
721 struct nfs_pathconf pathinfo;
722
723 pathinfo.fattr = fattr;
724 nfs_fattr_init(fattr);
725
726 if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
727 server->namelen = pathinfo.max_namelen;
728 }
729
730 dprintk("<-- nfs_probe_fsinfo() = 0\n");
731 return 0;
732
733out_error:
734 dprintk("nfs_probe_fsinfo: error = %d\n", -error);
735 return error;
736}
737
738/*
739 * Copy useful information when duplicating a server record
740 */
741static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
742{
743 target->flags = source->flags;
744 target->acregmin = source->acregmin;
745 target->acregmax = source->acregmax;
746 target->acdirmin = source->acdirmin;
747 target->acdirmax = source->acdirmax;
748 target->caps = source->caps;
749}
750
751/*
752 * Allocate and initialise a server record
753 */
754static struct nfs_server *nfs_alloc_server(void)
755{
756 struct nfs_server *server;
757
758 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
759 if (!server)
760 return NULL;
761
762 server->client = server->client_acl = ERR_PTR(-EINVAL);
763
764 /* Zero out the NFS state stuff */
765 INIT_LIST_HEAD(&server->client_link);
766 INIT_LIST_HEAD(&server->master_link);
767
768 server->io_stats = nfs_alloc_iostats();
769 if (!server->io_stats) {
770 kfree(server);
771 return NULL;
772 }
773
774 return server;
775}
776
777/*
778 * Free up a server record
779 */
780void nfs_free_server(struct nfs_server *server)
781{
782 dprintk("--> nfs_free_server()\n");
783
784 spin_lock(&nfs_client_lock);
785 list_del(&server->client_link);
786 list_del(&server->master_link);
787 spin_unlock(&nfs_client_lock);
788
789 if (server->destroy != NULL)
790 server->destroy(server);
791 if (!IS_ERR(server->client))
792 rpc_shutdown_client(server->client);
793
794 nfs_put_client(server->nfs_client);
795
796 nfs_free_iostats(server->io_stats);
797 kfree(server);
798 nfs_release_automount_timer();
799 dprintk("<-- nfs_free_server()\n");
800}
801
802/*
803 * Create a version 2 or 3 volume record
804 * - keyed on server and FSID
805 */
806struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
807 struct nfs_fh *mntfh)
808{
809 struct nfs_server *server;
810 struct nfs_fattr fattr;
811 int error;
812
813 server = nfs_alloc_server();
814 if (!server)
815 return ERR_PTR(-ENOMEM);
816
817 /* Get a client representation */
818 error = nfs_init_server(server, data);
819 if (error < 0)
820 goto error;
821
822 BUG_ON(!server->nfs_client);
823 BUG_ON(!server->nfs_client->rpc_ops);
824 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
825
826 /* Probe the root fh to retrieve its FSID */
827 error = nfs_probe_fsinfo(server, mntfh, &fattr);
828 if (error < 0)
829 goto error;
830 if (!(fattr.valid & NFS_ATTR_FATTR)) {
831 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
832 if (error < 0) {
833 dprintk("nfs_create_server: getattr error = %d\n", -error);
834 goto error;
835 }
836 }
837 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
838
839 dprintk("Server FSID: %llx:%llx\n",
840 (unsigned long long) server->fsid.major,
841 (unsigned long long) server->fsid.minor);
842
843 BUG_ON(!server->nfs_client);
844 BUG_ON(!server->nfs_client->rpc_ops);
845 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
846
847 spin_lock(&nfs_client_lock);
848 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
849 list_add_tail(&server->master_link, &nfs_volume_list);
850 spin_unlock(&nfs_client_lock);
851
852 server->mount_time = jiffies;
853 return server;
854
855error:
856 nfs_free_server(server);
857 return ERR_PTR(error);
858}
859
860#ifdef CONFIG_NFS_V4
861/*
862 * Initialise an NFS4 client record
863 */
864static int nfs4_init_client(struct nfs_client *clp,
865 int proto, int timeo, int retrans,
866 rpc_authflavor_t authflavour)
867{
868 int error;
869
870 if (clp->cl_cons_state == NFS_CS_READY) {
871 /* the client is initialised already */
872 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
873 return 0;
874 }
875
876 /* Check NFS protocol revision and initialize RPC op vector */
877 clp->rpc_ops = &nfs_v4_clientops;
878
879 error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
880 if (error < 0)
881 goto error;
882
883 error = nfs_idmap_new(clp);
884 if (error < 0) {
885 dprintk("%s: failed to create idmapper. Error = %d\n",
886 __FUNCTION__, error);
887 goto error;
888 }
889 __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
890
891 nfs_mark_client_ready(clp, NFS_CS_READY);
892 return 0;
893
894error:
895 nfs_mark_client_ready(clp, error);
896 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
897 return error;
898}
899
900/*
901 * Set up an NFS4 client
902 */
903static int nfs4_set_client(struct nfs_server *server,
904 const char *hostname, const struct sockaddr_in *addr,
905 rpc_authflavor_t authflavour,
906 int proto, int timeo, int retrans)
907{
908 struct nfs_client *clp;
909 int error;
910
911 dprintk("--> nfs4_set_client()\n");
912
913 /* Allocate or find a client reference we can use */
914 clp = nfs_get_client(hostname, addr, 4);
915 if (IS_ERR(clp)) {
916 error = PTR_ERR(clp);
917 goto error;
918 }
919 error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
920 if (error < 0)
921 goto error_put;
922
923 server->nfs_client = clp;
924 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
925 return 0;
926
927error_put:
928 nfs_put_client(clp);
929error:
930 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
931 return error;
932}
933
934/*
935 * Create a version 4 volume record
936 */
937static int nfs4_init_server(struct nfs_server *server,
938 const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
939{
940 int error;
941
942 dprintk("--> nfs4_init_server()\n");
943
944 /* Initialise the client representation from the mount data */
945 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
946 server->caps |= NFS_CAP_ATOMIC_OPEN;
947
948 if (data->rsize)
949 server->rsize = nfs_block_size(data->rsize, NULL);
950 if (data->wsize)
951 server->wsize = nfs_block_size(data->wsize, NULL);
952
953 server->acregmin = data->acregmin * HZ;
954 server->acregmax = data->acregmax * HZ;
955 server->acdirmin = data->acdirmin * HZ;
956 server->acdirmax = data->acdirmax * HZ;
957
958 error = nfs_init_server_rpcclient(server, authflavour);
959
960 /* Done */
961 dprintk("<-- nfs4_init_server() = %d\n", error);
962 return error;
963}
964
965/*
966 * Create a version 4 volume record
967 * - keyed on server and FSID
968 */
969struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
970 const char *hostname,
971 const struct sockaddr_in *addr,
972 const char *mntpath,
973 const char *ip_addr,
974 rpc_authflavor_t authflavour,
975 struct nfs_fh *mntfh)
976{
977 struct nfs_fattr fattr;
978 struct nfs_server *server;
979 int error;
980
981 dprintk("--> nfs4_create_server()\n");
982
983 server = nfs_alloc_server();
984 if (!server)
985 return ERR_PTR(-ENOMEM);
986
987 /* Get a client record */
988 error = nfs4_set_client(server, hostname, addr, authflavour,
989 data->proto, data->timeo, data->retrans);
990 if (error < 0)
991 goto error;
992
993 /* set up the general RPC client */
994 error = nfs4_init_server(server, data, authflavour);
995 if (error < 0)
996 goto error;
997
998 BUG_ON(!server->nfs_client);
999 BUG_ON(!server->nfs_client->rpc_ops);
1000 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1001
1002 /* Probe the root fh to retrieve its FSID */
1003 error = nfs4_path_walk(server, mntfh, mntpath);
1004 if (error < 0)
1005 goto error;
1006
1007 dprintk("Server FSID: %llx:%llx\n",
1008 (unsigned long long) server->fsid.major,
1009 (unsigned long long) server->fsid.minor);
1010 dprintk("Mount FH: %d\n", mntfh->size);
1011
1012 error = nfs_probe_fsinfo(server, mntfh, &fattr);
1013 if (error < 0)
1014 goto error;
1015
1016 BUG_ON(!server->nfs_client);
1017 BUG_ON(!server->nfs_client->rpc_ops);
1018 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1019
1020 spin_lock(&nfs_client_lock);
1021 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1022 list_add_tail(&server->master_link, &nfs_volume_list);
1023 spin_unlock(&nfs_client_lock);
1024
1025 server->mount_time = jiffies;
1026 dprintk("<-- nfs4_create_server() = %p\n", server);
1027 return server;
1028
1029error:
1030 nfs_free_server(server);
1031 dprintk("<-- nfs4_create_server() = error %d\n", error);
1032 return ERR_PTR(error);
1033}
1034
1035/*
1036 * Create an NFS4 referral server record
1037 */
1038struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1039 struct nfs_fh *fh)
1040{
1041 struct nfs_client *parent_client;
1042 struct nfs_server *server, *parent_server;
1043 struct nfs_fattr fattr;
1044 int error;
1045
1046 dprintk("--> nfs4_create_referral_server()\n");
1047
1048 server = nfs_alloc_server();
1049 if (!server)
1050 return ERR_PTR(-ENOMEM);
1051
1052 parent_server = NFS_SB(data->sb);
1053 parent_client = parent_server->nfs_client;
1054
1055 /* Get a client representation.
1056 * Note: NFSv4 always uses TCP, */
1057 error = nfs4_set_client(server, data->hostname, data->addr,
1058 data->authflavor,
1059 parent_server->client->cl_xprt->prot,
1060 parent_client->retrans_timeo,
1061 parent_client->retrans_count);
1062 if (error < 0)
1063 goto error;
1064
1065 /* Initialise the client representation from the parent server */
1066 nfs_server_copy_userdata(server, parent_server);
1067 server->caps |= NFS_CAP_ATOMIC_OPEN;
1068
1069 error = nfs_init_server_rpcclient(server, data->authflavor);
1070 if (error < 0)
1071 goto error;
1072
1073 BUG_ON(!server->nfs_client);
1074 BUG_ON(!server->nfs_client->rpc_ops);
1075 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1076
1077 /* probe the filesystem info for this server filesystem */
1078 error = nfs_probe_fsinfo(server, fh, &fattr);
1079 if (error < 0)
1080 goto error;
1081
1082 dprintk("Referral FSID: %llx:%llx\n",
1083 (unsigned long long) server->fsid.major,
1084 (unsigned long long) server->fsid.minor);
1085
1086 spin_lock(&nfs_client_lock);
1087 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1088 list_add_tail(&server->master_link, &nfs_volume_list);
1089 spin_unlock(&nfs_client_lock);
1090
1091 server->mount_time = jiffies;
1092
1093 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1094 return server;
1095
1096error:
1097 nfs_free_server(server);
1098 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1099 return ERR_PTR(error);
1100}
1101
1102#endif /* CONFIG_NFS_V4 */
1103
1104/*
1105 * Clone an NFS2, NFS3 or NFS4 server record
1106 */
1107struct nfs_server *nfs_clone_server(struct nfs_server *source,
1108 struct nfs_fh *fh,
1109 struct nfs_fattr *fattr)
1110{
1111 struct nfs_server *server;
1112 struct nfs_fattr fattr_fsinfo;
1113 int error;
1114
1115 dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
1116 (unsigned long long) fattr->fsid.major,
1117 (unsigned long long) fattr->fsid.minor);
1118
1119 server = nfs_alloc_server();
1120 if (!server)
1121 return ERR_PTR(-ENOMEM);
1122
1123 /* Copy data from the source */
1124 server->nfs_client = source->nfs_client;
1125 atomic_inc(&server->nfs_client->cl_count);
1126 nfs_server_copy_userdata(server, source);
1127
1128 server->fsid = fattr->fsid;
1129
1130 error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
1131 if (error < 0)
1132 goto out_free_server;
1133 if (!IS_ERR(source->client_acl))
1134 nfs_init_server_aclclient(server);
1135
1136 /* probe the filesystem info for this server filesystem */
1137 error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
1138 if (error < 0)
1139 goto out_free_server;
1140
1141 dprintk("Cloned FSID: %llx:%llx\n",
1142 (unsigned long long) server->fsid.major,
1143 (unsigned long long) server->fsid.minor);
1144
1145 error = nfs_start_lockd(server);
1146 if (error < 0)
1147 goto out_free_server;
1148
1149 spin_lock(&nfs_client_lock);
1150 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1151 list_add_tail(&server->master_link, &nfs_volume_list);
1152 spin_unlock(&nfs_client_lock);
1153
1154 server->mount_time = jiffies;
1155
1156 dprintk("<-- nfs_clone_server() = %p\n", server);
1157 return server;
1158
1159out_free_server:
1160 nfs_free_server(server);
1161 dprintk("<-- nfs_clone_server() = error %d\n", error);
1162 return ERR_PTR(error);
1163}
1164
1165#ifdef CONFIG_PROC_FS
1166static struct proc_dir_entry *proc_fs_nfs;
1167
1168static int nfs_server_list_open(struct inode *inode, struct file *file);
1169static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
1170static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
1171static void nfs_server_list_stop(struct seq_file *p, void *v);
1172static int nfs_server_list_show(struct seq_file *m, void *v);
1173
1174static struct seq_operations nfs_server_list_ops = {
1175 .start = nfs_server_list_start,
1176 .next = nfs_server_list_next,
1177 .stop = nfs_server_list_stop,
1178 .show = nfs_server_list_show,
1179};
1180
1181static struct file_operations nfs_server_list_fops = {
1182 .open = nfs_server_list_open,
1183 .read = seq_read,
1184 .llseek = seq_lseek,
1185 .release = seq_release,
1186};
1187
1188static int nfs_volume_list_open(struct inode *inode, struct file *file);
1189static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
1190static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
1191static void nfs_volume_list_stop(struct seq_file *p, void *v);
1192static int nfs_volume_list_show(struct seq_file *m, void *v);
1193
1194static struct seq_operations nfs_volume_list_ops = {
1195 .start = nfs_volume_list_start,
1196 .next = nfs_volume_list_next,
1197 .stop = nfs_volume_list_stop,
1198 .show = nfs_volume_list_show,
1199};
1200
1201static struct file_operations nfs_volume_list_fops = {
1202 .open = nfs_volume_list_open,
1203 .read = seq_read,
1204 .llseek = seq_lseek,
1205 .release = seq_release,
1206};
1207
1208/*
1209 * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
1210 * we're dealing
1211 */
1212static int nfs_server_list_open(struct inode *inode, struct file *file)
1213{
1214 struct seq_file *m;
1215 int ret;
1216
1217 ret = seq_open(file, &nfs_server_list_ops);
1218 if (ret < 0)
1219 return ret;
1220
1221 m = file->private_data;
1222 m->private = PDE(inode)->data;
1223
1224 return 0;
1225}
1226
1227/*
1228 * set up the iterator to start reading from the server list and return the first item
1229 */
1230static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1231{
1232 struct list_head *_p;
1233 loff_t pos = *_pos;
1234
1235 /* lock the list against modification */
1236 spin_lock(&nfs_client_lock);
1237
1238 /* allow for the header line */
1239 if (!pos)
1240 return SEQ_START_TOKEN;
1241 pos--;
1242
1243 /* find the n'th element in the list */
1244 list_for_each(_p, &nfs_client_list)
1245 if (!pos--)
1246 break;
1247
1248 return _p != &nfs_client_list ? _p : NULL;
1249}
1250
1251/*
1252 * move to next server
1253 */
1254static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1255{
1256 struct list_head *_p;
1257
1258 (*pos)++;
1259
1260 _p = v;
1261 _p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
1262
1263 return _p != &nfs_client_list ? _p : NULL;
1264}
1265
1266/*
1267 * clean up after reading from the transports list
1268 */
1269static void nfs_server_list_stop(struct seq_file *p, void *v)
1270{
1271 spin_unlock(&nfs_client_lock);
1272}
1273
1274/*
1275 * display a header line followed by a load of call lines
1276 */
1277static int nfs_server_list_show(struct seq_file *m, void *v)
1278{
1279 struct nfs_client *clp;
1280
1281 /* display header on line 1 */
1282 if (v == SEQ_START_TOKEN) {
1283 seq_puts(m, "NV SERVER PORT USE HOSTNAME\n");
1284 return 0;
1285 }
1286
1287 /* display one transport per line on subsequent lines */
1288 clp = list_entry(v, struct nfs_client, cl_share_link);
1289
1290 seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
1291 clp->cl_nfsversion,
1292 NIPQUAD(clp->cl_addr.sin_addr),
1293 ntohs(clp->cl_addr.sin_port),
1294 atomic_read(&clp->cl_count),
1295 clp->cl_hostname);
1296
1297 return 0;
1298}
1299
1300/*
1301 * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
1302 */
1303static int nfs_volume_list_open(struct inode *inode, struct file *file)
1304{
1305 struct seq_file *m;
1306 int ret;
1307
1308 ret = seq_open(file, &nfs_volume_list_ops);
1309 if (ret < 0)
1310 return ret;
1311
1312 m = file->private_data;
1313 m->private = PDE(inode)->data;
1314
1315 return 0;
1316}
1317
1318/*
1319 * set up the iterator to start reading from the volume list and return the first item
1320 */
1321static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1322{
1323 struct list_head *_p;
1324 loff_t pos = *_pos;
1325
1326 /* lock the list against modification */
1327 spin_lock(&nfs_client_lock);
1328
1329 /* allow for the header line */
1330 if (!pos)
1331 return SEQ_START_TOKEN;
1332 pos--;
1333
1334 /* find the n'th element in the list */
1335 list_for_each(_p, &nfs_volume_list)
1336 if (!pos--)
1337 break;
1338
1339 return _p != &nfs_volume_list ? _p : NULL;
1340}
1341
1342/*
1343 * move to next volume
1344 */
1345static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1346{
1347 struct list_head *_p;
1348
1349 (*pos)++;
1350
1351 _p = v;
1352 _p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
1353
1354 return _p != &nfs_volume_list ? _p : NULL;
1355}
1356
1357/*
1358 * clean up after reading from the transports list
1359 */
1360static void nfs_volume_list_stop(struct seq_file *p, void *v)
1361{
1362 spin_unlock(&nfs_client_lock);
1363}
1364
1365/*
1366 * display a header line followed by a load of call lines
1367 */
1368static int nfs_volume_list_show(struct seq_file *m, void *v)
1369{
1370 struct nfs_server *server;
1371 struct nfs_client *clp;
1372 char dev[8], fsid[17];
1373
1374 /* display header on line 1 */
1375 if (v == SEQ_START_TOKEN) {
1376 seq_puts(m, "NV SERVER PORT DEV FSID\n");
1377 return 0;
1378 }
1379 /* display one transport per line on subsequent lines */
1380 server = list_entry(v, struct nfs_server, master_link);
1381 clp = server->nfs_client;
1382
1383 snprintf(dev, 8, "%u:%u",
1384 MAJOR(server->s_dev), MINOR(server->s_dev));
1385
1386 snprintf(fsid, 17, "%llx:%llx",
1387 (unsigned long long) server->fsid.major,
1388 (unsigned long long) server->fsid.minor);
1389
1390 seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
1391 clp->cl_nfsversion,
1392 NIPQUAD(clp->cl_addr.sin_addr),
1393 ntohs(clp->cl_addr.sin_port),
1394 dev,
1395 fsid);
1396
1397 return 0;
1398}
1399
1400/*
1401 * initialise the /proc/fs/nfsfs/ directory
1402 */
1403int __init nfs_fs_proc_init(void)
1404{
1405 struct proc_dir_entry *p;
1406
1407 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
1408 if (!proc_fs_nfs)
1409 goto error_0;
1410
1411 proc_fs_nfs->owner = THIS_MODULE;
1412
1413 /* a file of servers with which we're dealing */
1414 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
1415 if (!p)
1416 goto error_1;
1417
1418 p->proc_fops = &nfs_server_list_fops;
1419 p->owner = THIS_MODULE;
1420
1421 /* a file of volumes that we have mounted */
1422 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
1423 if (!p)
1424 goto error_2;
1425
1426 p->proc_fops = &nfs_volume_list_fops;
1427 p->owner = THIS_MODULE;
1428 return 0;
1429
1430error_2:
1431 remove_proc_entry("servers", proc_fs_nfs);
1432error_1:
1433 remove_proc_entry("nfsfs", proc_root_fs);
1434error_0:
1435 return -ENOMEM;
1436}
1437
1438/*
1439 * clean up the /proc/fs/nfsfs/ directory
1440 */
1441void nfs_fs_proc_exit(void)
1442{
1443 remove_proc_entry("volumes", proc_fs_nfs);
1444 remove_proc_entry("servers", proc_fs_nfs);
1445 remove_proc_entry("nfsfs", proc_root_fs);
1446}
1447
1448#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a316c05e..57133678db16 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@
18 18
19#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "delegation.h" 20#include "delegation.h"
21#include "internal.h"
21 22
22static struct nfs_delegation *nfs_alloc_delegation(void) 23static struct nfs_delegation *nfs_alloc_delegation(void)
23{ 24{
@@ -52,7 +53,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
52 case -NFS4ERR_EXPIRED: 53 case -NFS4ERR_EXPIRED:
53 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 54 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
54 case -NFS4ERR_STALE_CLIENTID: 55 case -NFS4ERR_STALE_CLIENTID:
55 nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state); 56 nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
56 goto out_err; 57 goto out_err;
57 } 58 }
58 } 59 }
@@ -114,7 +115,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
114 */ 115 */
115int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 116int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
116{ 117{
117 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 118 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
118 struct nfs_inode *nfsi = NFS_I(inode); 119 struct nfs_inode *nfsi = NFS_I(inode);
119 struct nfs_delegation *delegation; 120 struct nfs_delegation *delegation;
120 int status = 0; 121 int status = 0;
@@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
145 sizeof(delegation->stateid)) != 0 || 146 sizeof(delegation->stateid)) != 0 ||
146 delegation->type != nfsi->delegation->type) { 147 delegation->type != nfsi->delegation->type) {
147 printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", 148 printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
148 __FUNCTION__, NIPQUAD(clp->cl_addr)); 149 __FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
149 status = -EIO; 150 status = -EIO;
150 } 151 }
151 } 152 }
@@ -176,7 +177,7 @@ static void nfs_msync_inode(struct inode *inode)
176 */ 177 */
177int __nfs_inode_return_delegation(struct inode *inode) 178int __nfs_inode_return_delegation(struct inode *inode)
178{ 179{
179 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 180 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
180 struct nfs_inode *nfsi = NFS_I(inode); 181 struct nfs_inode *nfsi = NFS_I(inode);
181 struct nfs_delegation *delegation; 182 struct nfs_delegation *delegation;
182 int res = 0; 183 int res = 0;
@@ -208,7 +209,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
208 */ 209 */
209void nfs_return_all_delegations(struct super_block *sb) 210void nfs_return_all_delegations(struct super_block *sb)
210{ 211{
211 struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; 212 struct nfs_client *clp = NFS_SB(sb)->nfs_client;
212 struct nfs_delegation *delegation; 213 struct nfs_delegation *delegation;
213 struct inode *inode; 214 struct inode *inode;
214 215
@@ -232,7 +233,7 @@ restart:
232 233
233int nfs_do_expire_all_delegations(void *ptr) 234int nfs_do_expire_all_delegations(void *ptr)
234{ 235{
235 struct nfs4_client *clp = ptr; 236 struct nfs_client *clp = ptr;
236 struct nfs_delegation *delegation; 237 struct nfs_delegation *delegation;
237 struct inode *inode; 238 struct inode *inode;
238 239
@@ -254,11 +255,11 @@ restart:
254 } 255 }
255out: 256out:
256 spin_unlock(&clp->cl_lock); 257 spin_unlock(&clp->cl_lock);
257 nfs4_put_client(clp); 258 nfs_put_client(clp);
258 module_put_and_exit(0); 259 module_put_and_exit(0);
259} 260}
260 261
261void nfs_expire_all_delegations(struct nfs4_client *clp) 262void nfs_expire_all_delegations(struct nfs_client *clp)
262{ 263{
263 struct task_struct *task; 264 struct task_struct *task;
264 265
@@ -266,17 +267,17 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
266 atomic_inc(&clp->cl_count); 267 atomic_inc(&clp->cl_count);
267 task = kthread_run(nfs_do_expire_all_delegations, clp, 268 task = kthread_run(nfs_do_expire_all_delegations, clp,
268 "%u.%u.%u.%u-delegreturn", 269 "%u.%u.%u.%u-delegreturn",
269 NIPQUAD(clp->cl_addr)); 270 NIPQUAD(clp->cl_addr.sin_addr));
270 if (!IS_ERR(task)) 271 if (!IS_ERR(task))
271 return; 272 return;
272 nfs4_put_client(clp); 273 nfs_put_client(clp);
273 module_put(THIS_MODULE); 274 module_put(THIS_MODULE);
274} 275}
275 276
276/* 277/*
277 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. 278 * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
278 */ 279 */
279void nfs_handle_cb_pathdown(struct nfs4_client *clp) 280void nfs_handle_cb_pathdown(struct nfs_client *clp)
280{ 281{
281 struct nfs_delegation *delegation; 282 struct nfs_delegation *delegation;
282 struct inode *inode; 283 struct inode *inode;
@@ -299,7 +300,7 @@ restart:
299 300
300struct recall_threadargs { 301struct recall_threadargs {
301 struct inode *inode; 302 struct inode *inode;
302 struct nfs4_client *clp; 303 struct nfs_client *clp;
303 const nfs4_stateid *stateid; 304 const nfs4_stateid *stateid;
304 305
305 struct completion started; 306 struct completion started;
@@ -310,7 +311,7 @@ static int recall_thread(void *data)
310{ 311{
311 struct recall_threadargs *args = (struct recall_threadargs *)data; 312 struct recall_threadargs *args = (struct recall_threadargs *)data;
312 struct inode *inode = igrab(args->inode); 313 struct inode *inode = igrab(args->inode);
313 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 314 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
314 struct nfs_inode *nfsi = NFS_I(inode); 315 struct nfs_inode *nfsi = NFS_I(inode);
315 struct nfs_delegation *delegation; 316 struct nfs_delegation *delegation;
316 317
@@ -371,7 +372,7 @@ out_module_put:
371/* 372/*
372 * Retrieve the inode associated with a delegation 373 * Retrieve the inode associated with a delegation
373 */ 374 */
374struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) 375struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
375{ 376{
376 struct nfs_delegation *delegation; 377 struct nfs_delegation *delegation;
377 struct inode *res = NULL; 378 struct inode *res = NULL;
@@ -389,7 +390,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
389/* 390/*
390 * Mark all delegations as needing to be reclaimed 391 * Mark all delegations as needing to be reclaimed
391 */ 392 */
392void nfs_delegation_mark_reclaim(struct nfs4_client *clp) 393void nfs_delegation_mark_reclaim(struct nfs_client *clp)
393{ 394{
394 struct nfs_delegation *delegation; 395 struct nfs_delegation *delegation;
395 spin_lock(&clp->cl_lock); 396 spin_lock(&clp->cl_lock);
@@ -401,7 +402,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
401/* 402/*
402 * Reap all unclaimed delegations after reboot recovery is done 403 * Reap all unclaimed delegations after reboot recovery is done
403 */ 404 */
404void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) 405void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
405{ 406{
406 struct nfs_delegation *delegation, *n; 407 struct nfs_delegation *delegation, *n;
407 LIST_HEAD(head); 408 LIST_HEAD(head);
@@ -423,7 +424,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
423 424
424int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) 425int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
425{ 426{
426 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; 427 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
427 struct nfs_inode *nfsi = NFS_I(inode); 428 struct nfs_inode *nfsi = NFS_I(inode);
428 struct nfs_delegation *delegation; 429 struct nfs_delegation *delegation;
429 int res = 0; 430 int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694652fa..2cfd4b24c7fe 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
29int __nfs_inode_return_delegation(struct inode *inode); 29int __nfs_inode_return_delegation(struct inode *inode);
30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); 30int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
31 31
32struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); 32struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
33void nfs_return_all_delegations(struct super_block *sb); 33void nfs_return_all_delegations(struct super_block *sb);
34void nfs_expire_all_delegations(struct nfs4_client *clp); 34void nfs_expire_all_delegations(struct nfs_client *clp);
35void nfs_handle_cb_pathdown(struct nfs4_client *clp); 35void nfs_handle_cb_pathdown(struct nfs_client *clp);
36 36
37void nfs_delegation_mark_reclaim(struct nfs4_client *clp); 37void nfs_delegation_mark_reclaim(struct nfs_client *clp);
38void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); 38void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
39 39
40/* NFSv4 delegation-related procedures */ 40/* NFSv4 delegation-related procedures */
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4deb3e5..7432f1a43f3d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,7 +30,9 @@
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/smp_lock.h> 32#include <linux/smp_lock.h>
33#include <linux/pagevec.h>
33#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/mount.h>
34 36
35#include "nfs4_fs.h" 37#include "nfs4_fs.h"
36#include "delegation.h" 38#include "delegation.h"
@@ -870,14 +872,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
870 return (nd->intent.open.flags & O_EXCL) != 0; 872 return (nd->intent.open.flags & O_EXCL) != 0;
871} 873}
872 874
873static inline int nfs_reval_fsid(struct inode *dir, 875static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
874 struct nfs_fh *fh, struct nfs_fattr *fattr) 876 struct nfs_fh *fh, struct nfs_fattr *fattr)
875{ 877{
876 struct nfs_server *server = NFS_SERVER(dir); 878 struct nfs_server *server = NFS_SERVER(dir);
877 879
878 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) 880 if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
879 /* Revalidate fsid on root dir */ 881 /* Revalidate fsid on root dir */
880 return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); 882 return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
881 return 0; 883 return 0;
882} 884}
883 885
@@ -902,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
902 904
903 lock_kernel(); 905 lock_kernel();
904 906
905 /* If we're doing an exclusive create, optimize away the lookup */ 907 /*
906 if (nfs_is_exclusive_create(dir, nd)) 908 * If we're doing an exclusive create, optimize away the lookup
907 goto no_entry; 909 * but don't hash the dentry.
910 */
911 if (nfs_is_exclusive_create(dir, nd)) {
912 d_instantiate(dentry, NULL);
913 res = NULL;
914 goto out_unlock;
915 }
908 916
909 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 917 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
910 if (error == -ENOENT) 918 if (error == -ENOENT)
@@ -913,7 +921,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
913 res = ERR_PTR(error); 921 res = ERR_PTR(error);
914 goto out_unlock; 922 goto out_unlock;
915 } 923 }
916 error = nfs_reval_fsid(dir, &fhandle, &fattr); 924 error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
917 if (error < 0) { 925 if (error < 0) {
918 res = ERR_PTR(error); 926 res = ERR_PTR(error);
919 goto out_unlock; 927 goto out_unlock;
@@ -922,8 +930,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
922 res = (struct dentry *)inode; 930 res = (struct dentry *)inode;
923 if (IS_ERR(res)) 931 if (IS_ERR(res))
924 goto out_unlock; 932 goto out_unlock;
933
925no_entry: 934no_entry:
926 res = d_add_unique(dentry, inode); 935 res = d_materialise_unique(dentry, inode);
927 if (res != NULL) 936 if (res != NULL)
928 dentry = res; 937 dentry = res;
929 nfs_renew_times(dentry); 938 nfs_renew_times(dentry);
@@ -1117,11 +1126,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1117 dput(dentry); 1126 dput(dentry);
1118 return NULL; 1127 return NULL;
1119 } 1128 }
1120 alias = d_add_unique(dentry, inode); 1129
1130 alias = d_materialise_unique(dentry, inode);
1121 if (alias != NULL) { 1131 if (alias != NULL) {
1122 dput(dentry); 1132 dput(dentry);
1123 dentry = alias; 1133 dentry = alias;
1124 } 1134 }
1135
1125 nfs_renew_times(dentry); 1136 nfs_renew_times(dentry);
1126 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1137 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1127 return dentry; 1138 return dentry;
@@ -1143,23 +1154,22 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1143 struct inode *dir = dentry->d_parent->d_inode; 1154 struct inode *dir = dentry->d_parent->d_inode;
1144 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1155 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
1145 if (error) 1156 if (error)
1146 goto out_err; 1157 return error;
1147 } 1158 }
1148 if (!(fattr->valid & NFS_ATTR_FATTR)) { 1159 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1149 struct nfs_server *server = NFS_SB(dentry->d_sb); 1160 struct nfs_server *server = NFS_SB(dentry->d_sb);
1150 error = server->rpc_ops->getattr(server, fhandle, fattr); 1161 error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
1151 if (error < 0) 1162 if (error < 0)
1152 goto out_err; 1163 return error;
1153 } 1164 }
1154 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1165 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1155 error = PTR_ERR(inode); 1166 error = PTR_ERR(inode);
1156 if (IS_ERR(inode)) 1167 if (IS_ERR(inode))
1157 goto out_err; 1168 return error;
1158 d_instantiate(dentry, inode); 1169 d_instantiate(dentry, inode);
1170 if (d_unhashed(dentry))
1171 d_rehash(dentry);
1159 return 0; 1172 return 0;
1160out_err:
1161 d_drop(dentry);
1162 return error;
1163} 1173}
1164 1174
1165/* 1175/*
@@ -1440,48 +1450,82 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1440 return error; 1450 return error;
1441} 1451}
1442 1452
1443static int 1453/*
1444nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 1454 * To create a symbolic link, most file systems instantiate a new inode,
1455 * add a page to it containing the path, then write it out to the disk
1456 * using prepare_write/commit_write.
1457 *
1458 * Unfortunately the NFS client can't create the in-core inode first
1459 * because it needs a file handle to create an in-core inode (see
1460 * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
1461 * symlink request has completed on the server.
1462 *
1463 * So instead we allocate a raw page, copy the symname into it, then do
1464 * the SYMLINK request with the page as the buffer. If it succeeds, we
1465 * now have a new file handle and can instantiate an in-core NFS inode
1466 * and move the raw page into its mapping.
1467 */
1468static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1445{ 1469{
1470 struct pagevec lru_pvec;
1471 struct page *page;
1472 char *kaddr;
1446 struct iattr attr; 1473 struct iattr attr;
1447 struct nfs_fattr sym_attr; 1474 unsigned int pathlen = strlen(symname);
1448 struct nfs_fh sym_fh;
1449 struct qstr qsymname;
1450 int error; 1475 int error;
1451 1476
1452 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, 1477 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
1453 dir->i_ino, dentry->d_name.name, symname); 1478 dir->i_ino, dentry->d_name.name, symname);
1454 1479
1455#ifdef NFS_PARANOIA 1480 if (pathlen > PAGE_SIZE)
1456if (dentry->d_inode) 1481 return -ENAMETOOLONG;
1457printk("nfs_proc_symlink: %s/%s not negative!\n",
1458dentry->d_parent->d_name.name, dentry->d_name.name);
1459#endif
1460 /*
1461 * Fill in the sattr for the call.
1462 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
1463 */
1464 attr.ia_valid = ATTR_MODE;
1465 attr.ia_mode = S_IFLNK | S_IRWXUGO;
1466 1482
1467 qsymname.name = symname; 1483 attr.ia_mode = S_IFLNK | S_IRWXUGO;
1468 qsymname.len = strlen(symname); 1484 attr.ia_valid = ATTR_MODE;
1469 1485
1470 lock_kernel(); 1486 lock_kernel();
1487
1488 page = alloc_page(GFP_KERNEL);
1489 if (!page) {
1490 unlock_kernel();
1491 return -ENOMEM;
1492 }
1493
1494 kaddr = kmap_atomic(page, KM_USER0);
1495 memcpy(kaddr, symname, pathlen);
1496 if (pathlen < PAGE_SIZE)
1497 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
1498 kunmap_atomic(kaddr, KM_USER0);
1499
1471 nfs_begin_data_update(dir); 1500 nfs_begin_data_update(dir);
1472 error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, 1501 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
1473 &attr, &sym_fh, &sym_attr);
1474 nfs_end_data_update(dir); 1502 nfs_end_data_update(dir);
1475 if (!error) { 1503 if (error != 0) {
1476 error = nfs_instantiate(dentry, &sym_fh, &sym_attr); 1504 dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
1477 } else { 1505 dir->i_sb->s_id, dir->i_ino,
1478 if (error == -EEXIST) 1506 dentry->d_name.name, symname, error);
1479 printk("nfs_proc_symlink: %s/%s already exists??\n",
1480 dentry->d_parent->d_name.name, dentry->d_name.name);
1481 d_drop(dentry); 1507 d_drop(dentry);
1508 __free_page(page);
1509 unlock_kernel();
1510 return error;
1482 } 1511 }
1512
1513 /*
1514 * No big deal if we can't add this page to the page cache here.
1515 * READLINK will get the missing page from the server if needed.
1516 */
1517 pagevec_init(&lru_pvec, 0);
1518 if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
1519 GFP_KERNEL)) {
1520 if (!pagevec_add(&lru_pvec, page))
1521 __pagevec_lru_add(&lru_pvec);
1522 SetPageUptodate(page);
1523 unlock_page(page);
1524 } else
1525 __free_page(page);
1526
1483 unlock_kernel(); 1527 unlock_kernel();
1484 return error; 1528 return 0;
1485} 1529}
1486 1530
1487static int 1531static int
@@ -1625,8 +1669,7 @@ out:
1625 if (rehash) 1669 if (rehash)
1626 d_rehash(rehash); 1670 d_rehash(rehash);
1627 if (!error) { 1671 if (!error) {
1628 if (!S_ISDIR(old_inode->i_mode)) 1672 d_move(old_dentry, new_dentry);
1629 d_move(old_dentry, new_dentry);
1630 nfs_renew_times(new_dentry); 1673 nfs_renew_times(new_dentry);
1631 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); 1674 nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
1632 } 1675 }
@@ -1638,35 +1681,211 @@ out:
1638 return error; 1681 return error;
1639} 1682}
1640 1683
1684static DEFINE_SPINLOCK(nfs_access_lru_lock);
1685static LIST_HEAD(nfs_access_lru_list);
1686static atomic_long_t nfs_access_nr_entries;
1687
1688static void nfs_access_free_entry(struct nfs_access_entry *entry)
1689{
1690 put_rpccred(entry->cred);
1691 kfree(entry);
1692 smp_mb__before_atomic_dec();
1693 atomic_long_dec(&nfs_access_nr_entries);
1694 smp_mb__after_atomic_dec();
1695}
1696
1697int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1698{
1699 LIST_HEAD(head);
1700 struct nfs_inode *nfsi;
1701 struct nfs_access_entry *cache;
1702
1703 spin_lock(&nfs_access_lru_lock);
1704restart:
1705 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1706 struct inode *inode;
1707
1708 if (nr_to_scan-- == 0)
1709 break;
1710 inode = igrab(&nfsi->vfs_inode);
1711 if (inode == NULL)
1712 continue;
1713 spin_lock(&inode->i_lock);
1714 if (list_empty(&nfsi->access_cache_entry_lru))
1715 goto remove_lru_entry;
1716 cache = list_entry(nfsi->access_cache_entry_lru.next,
1717 struct nfs_access_entry, lru);
1718 list_move(&cache->lru, &head);
1719 rb_erase(&cache->rb_node, &nfsi->access_cache);
1720 if (!list_empty(&nfsi->access_cache_entry_lru))
1721 list_move_tail(&nfsi->access_cache_inode_lru,
1722 &nfs_access_lru_list);
1723 else {
1724remove_lru_entry:
1725 list_del_init(&nfsi->access_cache_inode_lru);
1726 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1727 }
1728 spin_unlock(&inode->i_lock);
1729 iput(inode);
1730 goto restart;
1731 }
1732 spin_unlock(&nfs_access_lru_lock);
1733 while (!list_empty(&head)) {
1734 cache = list_entry(head.next, struct nfs_access_entry, lru);
1735 list_del(&cache->lru);
1736 nfs_access_free_entry(cache);
1737 }
1738 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
1739}
1740
1741static void __nfs_access_zap_cache(struct inode *inode)
1742{
1743 struct nfs_inode *nfsi = NFS_I(inode);
1744 struct rb_root *root_node = &nfsi->access_cache;
1745 struct rb_node *n, *dispose = NULL;
1746 struct nfs_access_entry *entry;
1747
1748 /* Unhook entries from the cache */
1749 while ((n = rb_first(root_node)) != NULL) {
1750 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1751 rb_erase(n, root_node);
1752 list_del(&entry->lru);
1753 n->rb_left = dispose;
1754 dispose = n;
1755 }
1756 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1757 spin_unlock(&inode->i_lock);
1758
1759 /* Now kill them all! */
1760 while (dispose != NULL) {
1761 n = dispose;
1762 dispose = n->rb_left;
1763 nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
1764 }
1765}
1766
1767void nfs_access_zap_cache(struct inode *inode)
1768{
1769 /* Remove from global LRU init */
1770 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
1771 spin_lock(&nfs_access_lru_lock);
1772 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
1773 spin_unlock(&nfs_access_lru_lock);
1774 }
1775
1776 spin_lock(&inode->i_lock);
1777 /* This will release the spinlock */
1778 __nfs_access_zap_cache(inode);
1779}
1780
1781static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
1782{
1783 struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
1784 struct nfs_access_entry *entry;
1785
1786 while (n != NULL) {
1787 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1788
1789 if (cred < entry->cred)
1790 n = n->rb_left;
1791 else if (cred > entry->cred)
1792 n = n->rb_right;
1793 else
1794 return entry;
1795 }
1796 return NULL;
1797}
1798
1641int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) 1799int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
1642{ 1800{
1643 struct nfs_inode *nfsi = NFS_I(inode); 1801 struct nfs_inode *nfsi = NFS_I(inode);
1644 struct nfs_access_entry *cache = &nfsi->cache_access; 1802 struct nfs_access_entry *cache;
1803 int err = -ENOENT;
1645 1804
1646 if (cache->cred != cred 1805 spin_lock(&inode->i_lock);
1647 || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) 1806 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
1648 || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) 1807 goto out_zap;
1649 return -ENOENT; 1808 cache = nfs_access_search_rbtree(inode, cred);
1650 memcpy(res, cache, sizeof(*res)); 1809 if (cache == NULL)
1651 return 0; 1810 goto out;
1811 if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
1812 goto out_stale;
1813 res->jiffies = cache->jiffies;
1814 res->cred = cache->cred;
1815 res->mask = cache->mask;
1816 list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
1817 err = 0;
1818out:
1819 spin_unlock(&inode->i_lock);
1820 return err;
1821out_stale:
1822 rb_erase(&cache->rb_node, &nfsi->access_cache);
1823 list_del(&cache->lru);
1824 spin_unlock(&inode->i_lock);
1825 nfs_access_free_entry(cache);
1826 return -ENOENT;
1827out_zap:
1828 /* This will release the spinlock */
1829 __nfs_access_zap_cache(inode);
1830 return -ENOENT;
1652} 1831}
1653 1832
1654void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) 1833static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
1655{ 1834{
1656 struct nfs_inode *nfsi = NFS_I(inode); 1835 struct nfs_inode *nfsi = NFS_I(inode);
1657 struct nfs_access_entry *cache = &nfsi->cache_access; 1836 struct rb_root *root_node = &nfsi->access_cache;
1837 struct rb_node **p = &root_node->rb_node;
1838 struct rb_node *parent = NULL;
1839 struct nfs_access_entry *entry;
1658 1840
1659 if (cache->cred != set->cred) {
1660 if (cache->cred)
1661 put_rpccred(cache->cred);
1662 cache->cred = get_rpccred(set->cred);
1663 }
1664 /* FIXME: replace current access_cache BKL reliance with inode->i_lock */
1665 spin_lock(&inode->i_lock); 1841 spin_lock(&inode->i_lock);
1666 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; 1842 while (*p != NULL) {
1843 parent = *p;
1844 entry = rb_entry(parent, struct nfs_access_entry, rb_node);
1845
1846 if (set->cred < entry->cred)
1847 p = &parent->rb_left;
1848 else if (set->cred > entry->cred)
1849 p = &parent->rb_right;
1850 else
1851 goto found;
1852 }
1853 rb_link_node(&set->rb_node, parent, p);
1854 rb_insert_color(&set->rb_node, root_node);
1855 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
1667 spin_unlock(&inode->i_lock); 1856 spin_unlock(&inode->i_lock);
1857 return;
1858found:
1859 rb_replace_node(parent, &set->rb_node, root_node);
1860 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
1861 list_del(&entry->lru);
1862 spin_unlock(&inode->i_lock);
1863 nfs_access_free_entry(entry);
1864}
1865
1866void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
1867{
1868 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
1869 if (cache == NULL)
1870 return;
1871 RB_CLEAR_NODE(&cache->rb_node);
1668 cache->jiffies = set->jiffies; 1872 cache->jiffies = set->jiffies;
1873 cache->cred = get_rpccred(set->cred);
1669 cache->mask = set->mask; 1874 cache->mask = set->mask;
1875
1876 nfs_access_add_rbtree(inode, cache);
1877
1878 /* Update accounting */
1879 smp_mb__before_atomic_inc();
1880 atomic_long_inc(&nfs_access_nr_entries);
1881 smp_mb__after_atomic_inc();
1882
1883 /* Add inode to global LRU list */
1884 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
1885 spin_lock(&nfs_access_lru_lock);
1886 list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
1887 spin_unlock(&nfs_access_lru_lock);
1888 }
1670} 1889}
1671 1890
1672static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) 1891static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e892880d5b..be997d649127 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
111 111
112 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 112 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
113 lock_kernel(); 113 lock_kernel();
114 res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); 114 res = NFS_PROTO(inode)->file_open(inode, filp);
115 unlock_kernel(); 115 unlock_kernel();
116 return res; 116 return res;
117} 117}
@@ -157,7 +157,7 @@ force_reval:
157static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) 157static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
158{ 158{
159 /* origin == SEEK_END => we must revalidate the cached file length */ 159 /* origin == SEEK_END => we must revalidate the cached file length */
160 if (origin == 2) { 160 if (origin == SEEK_END) {
161 struct inode *inode = filp->f_mapping->host; 161 struct inode *inode = filp->f_mapping->host;
162 int retval = nfs_revalidate_file_size(inode, filp); 162 int retval = nfs_revalidate_file_size(inode, filp);
163 if (retval < 0) 163 if (retval < 0)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 000000000000..76b08ae9ed82
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,311 @@
1/* getroot.c: get the root dentry for an NFS mount
2 *
3 * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/config.h>
13#include <linux/module.h>
14#include <linux/init.h>
15
16#include <linux/time.h>
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/string.h>
20#include <linux/stat.h>
21#include <linux/errno.h>
22#include <linux/unistd.h>
23#include <linux/sunrpc/clnt.h>
24#include <linux/sunrpc/stats.h>
25#include <linux/nfs_fs.h>
26#include <linux/nfs_mount.h>
27#include <linux/nfs4_mount.h>
28#include <linux/lockd/bind.h>
29#include <linux/smp_lock.h>
30#include <linux/seq_file.h>
31#include <linux/mount.h>
32#include <linux/nfs_idmap.h>
33#include <linux/vfs.h>
34#include <linux/namei.h>
35#include <linux/namespace.h>
36#include <linux/security.h>
37
38#include <asm/system.h>
39#include <asm/uaccess.h>
40
41#include "nfs4_fs.h"
42#include "delegation.h"
43#include "internal.h"
44
45#define NFSDBG_FACILITY NFSDBG_CLIENT
46#define NFS_PARANOIA 1
47
48/*
49 * get an NFS2/NFS3 root dentry from the root filehandle
50 */
51struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
52{
53 struct nfs_server *server = NFS_SB(sb);
54 struct nfs_fsinfo fsinfo;
55 struct nfs_fattr fattr;
56 struct dentry *mntroot;
57 struct inode *inode;
58 int error;
59
60 /* create a dummy root dentry with dummy inode for this superblock */
61 if (!sb->s_root) {
62 struct nfs_fh dummyfh;
63 struct dentry *root;
64 struct inode *iroot;
65
66 memset(&dummyfh, 0, sizeof(dummyfh));
67 memset(&fattr, 0, sizeof(fattr));
68 nfs_fattr_init(&fattr);
69 fattr.valid = NFS_ATTR_FATTR;
70 fattr.type = NFDIR;
71 fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
72 fattr.nlink = 2;
73
74 iroot = nfs_fhget(sb, &dummyfh, &fattr);
75 if (IS_ERR(iroot))
76 return ERR_PTR(PTR_ERR(iroot));
77
78 root = d_alloc_root(iroot);
79 if (!root) {
80 iput(iroot);
81 return ERR_PTR(-ENOMEM);
82 }
83
84 sb->s_root = root;
85 }
86
87 /* get the actual root for this mount */
88 fsinfo.fattr = &fattr;
89
90 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
91 if (error < 0) {
92 dprintk("nfs_get_root: getattr error = %d\n", -error);
93 return ERR_PTR(error);
94 }
95
96 inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
97 if (IS_ERR(inode)) {
98 dprintk("nfs_get_root: get root inode failed\n");
99 return ERR_PTR(PTR_ERR(inode));
100 }
101
102 /* root dentries normally start off anonymous and get spliced in later
103 * if the dentry tree reaches them; however if the dentry already
104 * exists, we'll pick it up at this point and use it as the root
105 */
106 mntroot = d_alloc_anon(inode);
107 if (!mntroot) {
108 iput(inode);
109 dprintk("nfs_get_root: get root dentry failed\n");
110 return ERR_PTR(-ENOMEM);
111 }
112
113 security_d_instantiate(mntroot, inode);
114
115 if (!mntroot->d_op)
116 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
117
118 return mntroot;
119}
120
121#ifdef CONFIG_NFS_V4
122
123/*
124 * Do a simple pathwalk from the root FH of the server to the nominated target
125 * of the mountpoint
126 * - give error on symlinks
127 * - give error on ".." occurring in the path
128 * - follow traversals
129 */
130int nfs4_path_walk(struct nfs_server *server,
131 struct nfs_fh *mntfh,
132 const char *path)
133{
134 struct nfs_fsinfo fsinfo;
135 struct nfs_fattr fattr;
136 struct nfs_fh lastfh;
137 struct qstr name;
138 int ret;
139 //int referral_count = 0;
140
141 dprintk("--> nfs4_path_walk(,,%s)\n", path);
142
143 fsinfo.fattr = &fattr;
144 nfs_fattr_init(&fattr);
145
146 if (*path++ != '/') {
147 dprintk("nfs4_get_root: Path does not begin with a slash\n");
148 return -EINVAL;
149 }
150
151 /* Start by getting the root filehandle from the server */
152 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
153 if (ret < 0) {
154 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
155 return ret;
156 }
157
158 if (fattr.type != NFDIR) {
159 printk(KERN_ERR "nfs4_get_root:"
160 " getroot encountered non-directory\n");
161 return -ENOTDIR;
162 }
163
164 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
165 printk(KERN_ERR "nfs4_get_root:"
166 " getroot obtained referral\n");
167 return -EREMOTE;
168 }
169
170next_component:
171 dprintk("Next: %s\n", path);
172
173 /* extract the next bit of the path */
174 if (!*path)
175 goto path_walk_complete;
176
177 name.name = path;
178 while (*path && *path != '/')
179 path++;
180 name.len = path - (const char *) name.name;
181
182eat_dot_dir:
183 while (*path == '/')
184 path++;
185
186 if (path[0] == '.' && (path[1] == '/' || !path[1])) {
187 path += 2;
188 goto eat_dot_dir;
189 }
190
191 if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
192 ) {
193 printk(KERN_ERR "nfs4_get_root:"
194 " Mount path contains reference to \"..\"\n");
195 return -EINVAL;
196 }
197
198 /* lookup the next FH in the sequence */
199 memcpy(&lastfh, mntfh, sizeof(lastfh));
200
201 dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
202
203 ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
204 mntfh, &fattr);
205 if (ret < 0) {
206 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
207 return ret;
208 }
209
210 if (fattr.type != NFDIR) {
211 printk(KERN_ERR "nfs4_get_root:"
212 " lookupfh encountered non-directory\n");
213 return -ENOTDIR;
214 }
215
216 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
217 printk(KERN_ERR "nfs4_get_root:"
218 " lookupfh obtained referral\n");
219 return -EREMOTE;
220 }
221
222 goto next_component;
223
224path_walk_complete:
225 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
226 dprintk("<-- nfs4_path_walk() = 0\n");
227 return 0;
228}
229
230/*
231 * get an NFS4 root dentry from the root filehandle
232 */
233struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
234{
235 struct nfs_server *server = NFS_SB(sb);
236 struct nfs_fattr fattr;
237 struct dentry *mntroot;
238 struct inode *inode;
239 int error;
240
241 dprintk("--> nfs4_get_root()\n");
242
243 /* create a dummy root dentry with dummy inode for this superblock */
244 if (!sb->s_root) {
245 struct nfs_fh dummyfh;
246 struct dentry *root;
247 struct inode *iroot;
248
249 memset(&dummyfh, 0, sizeof(dummyfh));
250 memset(&fattr, 0, sizeof(fattr));
251 nfs_fattr_init(&fattr);
252 fattr.valid = NFS_ATTR_FATTR;
253 fattr.type = NFDIR;
254 fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
255 fattr.nlink = 2;
256
257 iroot = nfs_fhget(sb, &dummyfh, &fattr);
258 if (IS_ERR(iroot))
259 return ERR_PTR(PTR_ERR(iroot));
260
261 root = d_alloc_root(iroot);
262 if (!root) {
263 iput(iroot);
264 return ERR_PTR(-ENOMEM);
265 }
266
267 sb->s_root = root;
268 }
269
270 /* get the info about the server and filesystem */
271 error = nfs4_server_capabilities(server, mntfh);
272 if (error < 0) {
273 dprintk("nfs_get_root: getcaps error = %d\n",
274 -error);
275 return ERR_PTR(error);
276 }
277
278 /* get the actual root for this mount */
279 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
280 if (error < 0) {
281 dprintk("nfs_get_root: getattr error = %d\n", -error);
282 return ERR_PTR(error);
283 }
284
285 inode = nfs_fhget(sb, mntfh, &fattr);
286 if (IS_ERR(inode)) {
287 dprintk("nfs_get_root: get root inode failed\n");
288 return ERR_PTR(PTR_ERR(inode));
289 }
290
291 /* root dentries normally start off anonymous and get spliced in later
292 * if the dentry tree reaches them; however if the dentry already
293 * exists, we'll pick it up at this point and use it as the root
294 */
295 mntroot = d_alloc_anon(inode);
296 if (!mntroot) {
297 iput(inode);
298 dprintk("nfs_get_root: get root dentry failed\n");
299 return ERR_PTR(-ENOMEM);
300 }
301
302 security_d_instantiate(mntroot, inode);
303
304 if (!mntroot->d_op)
305 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
306
307 dprintk("<-- nfs4_get_root()\n");
308 return mntroot;
309}
310
311#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd57646e..82ad7110a1c0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
57/* Default cache timeout is 10 minutes */ 57/* Default cache timeout is 10 minutes */
58unsigned int nfs_idmap_cache_timeout = 600 * HZ; 58unsigned int nfs_idmap_cache_timeout = 600 * HZ;
59 59
60static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
61{
62 char *endp;
63 int num = simple_strtol(val, &endp, 0);
64 int jif = num * HZ;
65 if (endp == val || *endp || num < 0 || jif < num)
66 return -EINVAL;
67 *((int *)kp->arg) = jif;
68 return 0;
69}
70
71module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
72 &nfs_idmap_cache_timeout, 0644);
73
60struct idmap_hashent { 74struct idmap_hashent {
61 unsigned long ih_expires; 75 unsigned long ih_expires;
62 __u32 ih_id; 76 __u32 ih_id;
@@ -70,7 +84,6 @@ struct idmap_hashtable {
70}; 84};
71 85
72struct idmap { 86struct idmap {
73 char idmap_path[48];
74 struct dentry *idmap_dentry; 87 struct dentry *idmap_dentry;
75 wait_queue_head_t idmap_wq; 88 wait_queue_head_t idmap_wq;
76 struct idmap_msg idmap_im; 89 struct idmap_msg idmap_im;
@@ -94,24 +107,23 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
94 .destroy_msg = idmap_pipe_destroy_msg, 107 .destroy_msg = idmap_pipe_destroy_msg,
95}; 108};
96 109
97void 110int
98nfs_idmap_new(struct nfs4_client *clp) 111nfs_idmap_new(struct nfs_client *clp)
99{ 112{
100 struct idmap *idmap; 113 struct idmap *idmap;
114 int error;
101 115
102 if (clp->cl_idmap != NULL) 116 BUG_ON(clp->cl_idmap != NULL);
103 return;
104 if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
105 return;
106 117
107 snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), 118 if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
108 "%s/idmap", clp->cl_rpcclient->cl_pathname); 119 return -ENOMEM;
109 120
110 idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, 121 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
111 idmap, &idmap_upcall_ops, 0); 122 idmap, &idmap_upcall_ops, 0);
112 if (IS_ERR(idmap->idmap_dentry)) { 123 if (IS_ERR(idmap->idmap_dentry)) {
124 error = PTR_ERR(idmap->idmap_dentry);
113 kfree(idmap); 125 kfree(idmap);
114 return; 126 return error;
115 } 127 }
116 128
117 mutex_init(&idmap->idmap_lock); 129 mutex_init(&idmap->idmap_lock);
@@ -121,10 +133,11 @@ nfs_idmap_new(struct nfs4_client *clp)
121 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; 133 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
122 134
123 clp->cl_idmap = idmap; 135 clp->cl_idmap = idmap;
136 return 0;
124} 137}
125 138
126void 139void
127nfs_idmap_delete(struct nfs4_client *clp) 140nfs_idmap_delete(struct nfs_client *clp)
128{ 141{
129 struct idmap *idmap = clp->cl_idmap; 142 struct idmap *idmap = clp->cl_idmap;
130 143
@@ -477,27 +490,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
477 return (hash); 490 return (hash);
478} 491}
479 492
480int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) 493int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
481{ 494{
482 struct idmap *idmap = clp->cl_idmap; 495 struct idmap *idmap = clp->cl_idmap;
483 496
484 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 497 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
485} 498}
486 499
487int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) 500int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
488{ 501{
489 struct idmap *idmap = clp->cl_idmap; 502 struct idmap *idmap = clp->cl_idmap;
490 503
491 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 504 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
492} 505}
493 506
494int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) 507int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
495{ 508{
496 struct idmap *idmap = clp->cl_idmap; 509 struct idmap *idmap = clp->cl_idmap;
497 510
498 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 511 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
499} 512}
500int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) 513int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
501{ 514{
502 struct idmap *idmap = clp->cl_idmap; 515 struct idmap *idmap = clp->cl_idmap;
503 516
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2245da..e8c143d182c4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
76 76
77void nfs_clear_inode(struct inode *inode) 77void nfs_clear_inode(struct inode *inode)
78{ 78{
79 struct nfs_inode *nfsi = NFS_I(inode);
80 struct rpc_cred *cred;
81
82 /* 79 /*
83 * The following should never happen... 80 * The following should never happen...
84 */ 81 */
85 BUG_ON(nfs_have_writebacks(inode)); 82 BUG_ON(nfs_have_writebacks(inode));
86 BUG_ON (!list_empty(&nfsi->open_files)); 83 BUG_ON(!list_empty(&NFS_I(inode)->open_files));
84 BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
87 nfs_zap_acl_cache(inode); 85 nfs_zap_acl_cache(inode);
88 cred = nfsi->cache_access.cred; 86 nfs_access_zap_cache(inode);
89 if (cred)
90 put_rpccred(cred);
91 BUG_ON(atomic_read(&nfsi->data_updates) != 0);
92} 87}
93 88
94/** 89/**
@@ -242,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
242 /* Why so? Because we want revalidate for devices/FIFOs, and 237 /* Why so? Because we want revalidate for devices/FIFOs, and
243 * that's precisely what we have in nfs_file_inode_operations. 238 * that's precisely what we have in nfs_file_inode_operations.
244 */ 239 */
245 inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; 240 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
246 if (S_ISREG(inode->i_mode)) { 241 if (S_ISREG(inode->i_mode)) {
247 inode->i_fop = &nfs_file_operations; 242 inode->i_fop = &nfs_file_operations;
248 inode->i_data.a_ops = &nfs_file_aops; 243 inode->i_data.a_ops = &nfs_file_aops;
249 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; 244 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
250 } else if (S_ISDIR(inode->i_mode)) { 245 } else if (S_ISDIR(inode->i_mode)) {
251 inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; 246 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
252 inode->i_fop = &nfs_dir_operations; 247 inode->i_fop = &nfs_dir_operations;
253 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 248 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
254 && fattr->size <= NFS_LIMIT_READDIRPLUS) 249 && fattr->size <= NFS_LIMIT_READDIRPLUS)
@@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
290 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 285 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
291 nfsi->attrtimeo_timestamp = jiffies; 286 nfsi->attrtimeo_timestamp = jiffies;
292 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 287 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
293 nfsi->cache_access.cred = NULL; 288 nfsi->access_cache = RB_ROOT;
294 289
295 unlock_new_inode(inode); 290 unlock_new_inode(inode);
296 } else 291 } else
@@ -722,13 +717,11 @@ void nfs_end_data_update(struct inode *inode)
722{ 717{
723 struct nfs_inode *nfsi = NFS_I(inode); 718 struct nfs_inode *nfsi = NFS_I(inode);
724 719
725 if (!nfs_have_delegation(inode, FMODE_READ)) { 720 /* Directories: invalidate page cache */
726 /* Directories and symlinks: invalidate page cache */ 721 if (S_ISDIR(inode->i_mode)) {
727 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) { 722 spin_lock(&inode->i_lock);
728 spin_lock(&inode->i_lock); 723 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
729 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 724 spin_unlock(&inode->i_lock);
730 spin_unlock(&inode->i_lock);
731 }
732 } 725 }
733 nfsi->cache_change_attribute = jiffies; 726 nfsi->cache_change_attribute = jiffies;
734 atomic_dec(&nfsi->data_updates); 727 atomic_dec(&nfsi->data_updates);
@@ -847,6 +840,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
847 * 840 *
848 * After an operation that has changed the inode metadata, mark the 841 * After an operation that has changed the inode metadata, mark the
849 * attribute cache as being invalid, then try to update it. 842 * attribute cache as being invalid, then try to update it.
843 *
844 * NB: if the server didn't return any post op attributes, this
845 * function will force the retrieval of attributes before the next
846 * NFS request. Thus it should be used only for operations that
847 * are expected to change one or more attributes, to avoid
848 * unnecessary NFS requests and trips through nfs_update_inode().
850 */ 849 */
851int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) 850int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
852{ 851{
@@ -1025,7 +1024,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1025 out_fileid: 1024 out_fileid:
1026 printk(KERN_ERR "NFS: server %s error: fileid changed\n" 1025 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
1027 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n", 1026 "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
1028 NFS_SERVER(inode)->hostname, inode->i_sb->s_id, 1027 NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
1029 (long long)nfsi->fileid, (long long)fattr->fileid); 1028 (long long)nfsi->fileid, (long long)fattr->fileid);
1030 goto out_err; 1029 goto out_err;
1031} 1030}
@@ -1109,6 +1108,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
1109 INIT_LIST_HEAD(&nfsi->dirty); 1108 INIT_LIST_HEAD(&nfsi->dirty);
1110 INIT_LIST_HEAD(&nfsi->commit); 1109 INIT_LIST_HEAD(&nfsi->commit);
1111 INIT_LIST_HEAD(&nfsi->open_files); 1110 INIT_LIST_HEAD(&nfsi->open_files);
1111 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1112 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1112 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1113 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1113 atomic_set(&nfsi->data_updates, 0); 1114 atomic_set(&nfsi->data_updates, 0);
1114 nfsi->ndirty = 0; 1115 nfsi->ndirty = 0;
@@ -1144,6 +1145,10 @@ static int __init init_nfs_fs(void)
1144{ 1145{
1145 int err; 1146 int err;
1146 1147
1148 err = nfs_fs_proc_init();
1149 if (err)
1150 goto out5;
1151
1147 err = nfs_init_nfspagecache(); 1152 err = nfs_init_nfspagecache();
1148 if (err) 1153 if (err)
1149 goto out4; 1154 goto out4;
@@ -1184,6 +1189,8 @@ out2:
1184out3: 1189out3:
1185 nfs_destroy_nfspagecache(); 1190 nfs_destroy_nfspagecache();
1186out4: 1191out4:
1192 nfs_fs_proc_exit();
1193out5:
1187 return err; 1194 return err;
1188} 1195}
1189 1196
@@ -1198,6 +1205,7 @@ static void __exit exit_nfs_fs(void)
1198 rpc_proc_unregister("nfs"); 1205 rpc_proc_unregister("nfs");
1199#endif 1206#endif
1200 unregister_nfs_fs(); 1207 unregister_nfs_fs();
1208 nfs_fs_proc_exit();
1201} 1209}
1202 1210
1203/* Not quite true; I just maintain it */ 1211/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5def0fc..bea0b016bd70 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
4 4
5#include <linux/mount.h> 5#include <linux/mount.h>
6 6
7struct nfs_string;
8struct nfs_mount_data;
9struct nfs4_mount_data;
10
11/* Maximum number of readahead requests
12 * FIXME: this should really be a sysctl so that users may tune it to suit
13 * their needs. People that do NFS over a slow network, might for
14 * instance want to reduce it to something closer to 1 for improved
15 * interactive response.
16 */
17#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
18
7struct nfs_clone_mount { 19struct nfs_clone_mount {
8 const struct super_block *sb; 20 const struct super_block *sb;
9 const struct dentry *dentry; 21 const struct dentry *dentry;
@@ -15,7 +27,40 @@ struct nfs_clone_mount {
15 rpc_authflavor_t authflavor; 27 rpc_authflavor_t authflavor;
16}; 28};
17 29
18/* namespace-nfs4.c */ 30/* client.c */
31extern struct rpc_program nfs_program;
32
33extern void nfs_put_client(struct nfs_client *);
34extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
35extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
36 struct nfs_fh *);
37extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
38 const char *,
39 const struct sockaddr_in *,
40 const char *,
41 const char *,
42 rpc_authflavor_t,
43 struct nfs_fh *);
44extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
45 struct nfs_fh *);
46extern void nfs_free_server(struct nfs_server *server);
47extern struct nfs_server *nfs_clone_server(struct nfs_server *,
48 struct nfs_fh *,
49 struct nfs_fattr *);
50#ifdef CONFIG_PROC_FS
51extern int __init nfs_fs_proc_init(void);
52extern void nfs_fs_proc_exit(void);
53#else
54static inline int nfs_fs_proc_init(void)
55{
56 return 0;
57}
58static inline void nfs_fs_proc_exit(void)
59{
60}
61#endif
62
63/* nfs4namespace.c */
19#ifdef CONFIG_NFS_V4 64#ifdef CONFIG_NFS_V4
20extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 65extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
21#else 66#else
@@ -46,6 +91,7 @@ extern void nfs_destroy_directcache(void);
46#endif 91#endif
47 92
48/* nfs2xdr.c */ 93/* nfs2xdr.c */
94extern int nfs_stat_to_errno(int);
49extern struct rpc_procinfo nfs_procedures[]; 95extern struct rpc_procinfo nfs_procedures[];
50extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); 96extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
51 97
@@ -54,8 +100,9 @@ extern struct rpc_procinfo nfs3_procedures[];
54extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); 100extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
55 101
56/* nfs4xdr.c */ 102/* nfs4xdr.c */
57extern int nfs_stat_to_errno(int); 103#ifdef CONFIG_NFS_V4
58extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); 104extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
105#endif
59 106
60/* nfs4proc.c */ 107/* nfs4proc.c */
61#ifdef CONFIG_NFS_V4 108#ifdef CONFIG_NFS_V4
@@ -66,6 +113,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
66 struct page *page); 113 struct page *page);
67#endif 114#endif
68 115
116/* dir.c */
117extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
118
69/* inode.c */ 119/* inode.c */
70extern struct inode *nfs_alloc_inode(struct super_block *sb); 120extern struct inode *nfs_alloc_inode(struct super_block *sb);
71extern void nfs_destroy_inode(struct inode *); 121extern void nfs_destroy_inode(struct inode *);
@@ -76,10 +126,10 @@ extern void nfs4_clear_inode(struct inode *);
76#endif 126#endif
77 127
78/* super.c */ 128/* super.c */
79extern struct file_system_type nfs_referral_nfs4_fs_type; 129extern struct file_system_type nfs_xdev_fs_type;
80extern struct file_system_type clone_nfs_fs_type;
81#ifdef CONFIG_NFS_V4 130#ifdef CONFIG_NFS_V4
82extern struct file_system_type clone_nfs4_fs_type; 131extern struct file_system_type nfs4_xdev_fs_type;
132extern struct file_system_type nfs4_referral_fs_type;
83#endif 133#endif
84 134
85extern struct rpc_stat nfs_rpcstat; 135extern struct rpc_stat nfs_rpcstat;
@@ -88,30 +138,30 @@ extern int __init register_nfs_fs(void);
88extern void __exit unregister_nfs_fs(void); 138extern void __exit unregister_nfs_fs(void);
89 139
90/* namespace.c */ 140/* namespace.c */
91extern char *nfs_path(const char *base, const struct dentry *dentry, 141extern char *nfs_path(const char *base,
142 const struct dentry *droot,
143 const struct dentry *dentry,
92 char *buffer, ssize_t buflen); 144 char *buffer, ssize_t buflen);
93 145
94/* 146/* getroot.c */
95 * Determine the mount path as a string 147extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
96 */
97static inline char *
98nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
99{
100#ifdef CONFIG_NFS_V4 148#ifdef CONFIG_NFS_V4
101 return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); 149extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
102#else 150
103 return NULL; 151extern int nfs4_path_walk(struct nfs_server *server,
152 struct nfs_fh *mntfh,
153 const char *path);
104#endif 154#endif
105}
106 155
107/* 156/*
108 * Determine the device name as a string 157 * Determine the device name as a string
109 */ 158 */
110static inline char *nfs_devname(const struct vfsmount *mnt_parent, 159static inline char *nfs_devname(const struct vfsmount *mnt_parent,
111 const struct dentry *dentry, 160 const struct dentry *dentry,
112 char *buffer, ssize_t buflen) 161 char *buffer, ssize_t buflen)
113{ 162{
114 return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); 163 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
164 dentry, buffer, buflen);
115} 165}
116 166
117/* 167/*
@@ -167,20 +217,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
167 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) 217 if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
168 sb->s_maxbytes = MAX_LFS_FILESIZE; 218 sb->s_maxbytes = MAX_LFS_FILESIZE;
169} 219}
170
171/*
172 * Check if the string represents a "valid" IPv4 address
173 */
174static inline int valid_ipaddr4(const char *buf)
175{
176 int rc, count, in[4];
177
178 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
179 if (rc != 4)
180 return -EINVAL;
181 for (count = 0; count < 4; count++) {
182 if (in[count] > 255)
183 return -EINVAL;
184 }
185 return 0;
186}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4d4214..d507b021207f 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
14#include <linux/net.h> 14#include <linux/net.h>
15#include <linux/in.h> 15#include <linux/in.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/sched.h> 17#include <linux/sunrpc/sched.h>
19#include <linux/nfs_fs.h> 18#include <linux/nfs_fs.h>
20 19
@@ -77,22 +76,19 @@ static struct rpc_clnt *
77mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, 76mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
78 int protocol) 77 int protocol)
79{ 78{
80 struct rpc_xprt *xprt; 79 struct rpc_create_args args = {
81 struct rpc_clnt *clnt; 80 .protocol = protocol,
82 81 .address = (struct sockaddr *)srvaddr,
83 xprt = xprt_create_proto(protocol, srvaddr, NULL); 82 .addrsize = sizeof(*srvaddr),
84 if (IS_ERR(xprt)) 83 .servername = hostname,
85 return (struct rpc_clnt *)xprt; 84 .program = &mnt_program,
86 85 .version = version,
87 clnt = rpc_create_client(xprt, hostname, 86 .authflavor = RPC_AUTH_UNIX,
88 &mnt_program, version, 87 .flags = (RPC_CLNT_CREATE_ONESHOT |
89 RPC_AUTH_UNIX); 88 RPC_CLNT_CREATE_INTR),
90 if (!IS_ERR(clnt)) { 89 };
91 clnt->cl_softrtry = 1; 90
92 clnt->cl_oneshot = 1; 91 return rpc_create(&args);
93 clnt->cl_intr = 1;
94 }
95 return clnt;
96} 92}
97 93
98/* 94/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169c8cac..77b00684894d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
2 * linux/fs/nfs/namespace.c 2 * linux/fs/nfs/namespace.c
3 * 3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> 4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 * - Modified by David Howells <dhowells@redhat.com>
5 * 6 *
6 * NFS namespace 7 * NFS namespace
7 */ 8 */
@@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
28/* 29/*
29 * nfs_path - reconstruct the path given an arbitrary dentry 30 * nfs_path - reconstruct the path given an arbitrary dentry
30 * @base - arbitrary string to prepend to the path 31 * @base - arbitrary string to prepend to the path
32 * @droot - pointer to root dentry for mountpoint
31 * @dentry - pointer to dentry 33 * @dentry - pointer to dentry
32 * @buffer - result buffer 34 * @buffer - result buffer
33 * @buflen - length of buffer 35 * @buflen - length of buffer
@@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
38 * This is mainly for use in figuring out the path on the 40 * This is mainly for use in figuring out the path on the
39 * server side when automounting on top of an existing partition. 41 * server side when automounting on top of an existing partition.
40 */ 42 */
41char *nfs_path(const char *base, const struct dentry *dentry, 43char *nfs_path(const char *base,
44 const struct dentry *droot,
45 const struct dentry *dentry,
42 char *buffer, ssize_t buflen) 46 char *buffer, ssize_t buflen)
43{ 47{
44 char *end = buffer+buflen; 48 char *end = buffer+buflen;
@@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
47 *--end = '\0'; 51 *--end = '\0';
48 buflen--; 52 buflen--;
49 spin_lock(&dcache_lock); 53 spin_lock(&dcache_lock);
50 while (!IS_ROOT(dentry)) { 54 while (!IS_ROOT(dentry) && dentry != droot) {
51 namelen = dentry->d_name.len; 55 namelen = dentry->d_name.len;
52 buflen -= namelen + 1; 56 buflen -= namelen + 1;
53 if (buflen < 0) 57 if (buflen < 0)
@@ -96,15 +100,18 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
96 struct nfs_fattr fattr; 100 struct nfs_fattr fattr;
97 int err; 101 int err;
98 102
103 dprintk("--> nfs_follow_mountpoint()\n");
104
99 BUG_ON(IS_ROOT(dentry)); 105 BUG_ON(IS_ROOT(dentry));
100 dprintk("%s: enter\n", __FUNCTION__); 106 dprintk("%s: enter\n", __FUNCTION__);
101 dput(nd->dentry); 107 dput(nd->dentry);
102 nd->dentry = dget(dentry); 108 nd->dentry = dget(dentry);
103 if (d_mountpoint(nd->dentry)) 109
104 goto out_follow;
105 /* Look it up again */ 110 /* Look it up again */
106 parent = dget_parent(nd->dentry); 111 parent = dget_parent(nd->dentry);
107 err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); 112 err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
113 &nd->dentry->d_name,
114 &fh, &fattr);
108 dput(parent); 115 dput(parent);
109 if (err != 0) 116 if (err != 0)
110 goto out_err; 117 goto out_err;
@@ -132,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
132 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); 139 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
133out: 140out:
134 dprintk("%s: done, returned %d\n", __FUNCTION__, err); 141 dprintk("%s: done, returned %d\n", __FUNCTION__, err);
142
143 dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
135 return ERR_PTR(err); 144 return ERR_PTR(err);
136out_err: 145out_err:
137 path_release(nd); 146 path_release(nd);
@@ -172,22 +181,23 @@ void nfs_release_automount_timer(void)
172/* 181/*
173 * Clone a mountpoint of the appropriate type 182 * Clone a mountpoint of the appropriate type
174 */ 183 */
175static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname, 184static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
185 const char *devname,
176 struct nfs_clone_mount *mountdata) 186 struct nfs_clone_mount *mountdata)
177{ 187{
178#ifdef CONFIG_NFS_V4 188#ifdef CONFIG_NFS_V4
179 struct vfsmount *mnt = NULL; 189 struct vfsmount *mnt = NULL;
180 switch (server->rpc_ops->version) { 190 switch (server->nfs_client->cl_nfsversion) {
181 case 2: 191 case 2:
182 case 3: 192 case 3:
183 mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); 193 mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
184 break; 194 break;
185 case 4: 195 case 4:
186 mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata); 196 mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
187 } 197 }
188 return mnt; 198 return mnt;
189#else 199#else
190 return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata); 200 return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
191#endif 201#endif
192} 202}
193 203
@@ -213,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
213 char *page = (char *) __get_free_page(GFP_USER); 223 char *page = (char *) __get_free_page(GFP_USER);
214 char *devname; 224 char *devname;
215 225
226 dprintk("--> nfs_do_submount()\n");
227
216 dprintk("%s: submounting on %s/%s\n", __FUNCTION__, 228 dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
217 dentry->d_parent->d_name.name, 229 dentry->d_parent->d_name.name,
218 dentry->d_name.name); 230 dentry->d_name.name);
@@ -227,5 +239,7 @@ free_page:
227 free_page((unsigned long)page); 239 free_page((unsigned long)page);
228out: 240out:
229 dprintk("%s: done\n", __FUNCTION__); 241 dprintk("%s: done\n", __FUNCTION__);
242
243 dprintk("<-- nfs_do_submount() = %p\n", mnt);
230 return mnt; 244 return mnt;
231} 245}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391eef6b93..b49501fc0a79 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
51#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) 51#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
52#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) 52#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
53#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) 53#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz)
54#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) 54#define NFS_symlinkargs_sz (NFS_diropargs_sz+1+NFS_sattr_sz)
55#define NFS_readdirargs_sz (NFS_fhandle_sz+2) 55#define NFS_readdirargs_sz (NFS_fhandle_sz+2)
56 56
57#define NFS_attrstat_sz (1+NFS_fattr_sz) 57#define NFS_attrstat_sz (1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
351static int 351static int
352nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args) 352nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
353{ 353{
354 struct xdr_buf *sndbuf = &req->rq_snd_buf;
355 size_t pad;
356
354 p = xdr_encode_fhandle(p, args->fromfh); 357 p = xdr_encode_fhandle(p, args->fromfh);
355 p = xdr_encode_array(p, args->fromname, args->fromlen); 358 p = xdr_encode_array(p, args->fromname, args->fromlen);
356 p = xdr_encode_array(p, args->topath, args->tolen); 359 *p++ = htonl(args->pathlen);
360 sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
361
362 xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
363
364 /*
365 * xdr_encode_pages may have added a few bytes to ensure the
366 * pathname ends on a 4-byte boundary. Start encoding the
367 * attributes after the pad bytes.
368 */
369 pad = sndbuf->tail->iov_len;
370 if (pad > 0)
371 p++;
357 p = xdr_encode_sattr(p, args->sattr); 372 p = xdr_encode_sattr(p, args->sattr);
358 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 373 sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
359 return 0; 374 return 0;
360} 375}
361 376
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f82cea..f8688eaa0001 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
81} 81}
82 82
83/* 83/*
84 * Bare-bones access to getattr: this is for nfs_read_super. 84 * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
85 */ 85 */
86static int 86static int
87nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 87nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
90 int status; 90 int status;
91 91
92 status = do_proc_get_root(server->client, fhandle, info); 92 status = do_proc_get_root(server->client, fhandle, info);
93 if (status && server->client_sys != server->client) 93 if (status && server->nfs_client->cl_rpcclient != server->client)
94 status = do_proc_get_root(server->client_sys, fhandle, info); 94 status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
95 return status; 95 return status;
96} 96}
97 97
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
544} 544}
545 545
546static int 546static int
547nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, 547nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
548 struct iattr *sattr, struct nfs_fh *fhandle, 548 unsigned int len, struct iattr *sattr)
549 struct nfs_fattr *fattr)
550{ 549{
551 struct nfs_fattr dir_attr; 550 struct nfs_fh fhandle;
551 struct nfs_fattr fattr, dir_attr;
552 struct nfs3_symlinkargs arg = { 552 struct nfs3_symlinkargs arg = {
553 .fromfh = NFS_FH(dir), 553 .fromfh = NFS_FH(dir),
554 .fromname = name->name, 554 .fromname = dentry->d_name.name,
555 .fromlen = name->len, 555 .fromlen = dentry->d_name.len,
556 .topath = path->name, 556 .pages = &page,
557 .tolen = path->len, 557 .pathlen = len,
558 .sattr = sattr 558 .sattr = sattr
559 }; 559 };
560 struct nfs3_diropres res = { 560 struct nfs3_diropres res = {
561 .dir_attr = &dir_attr, 561 .dir_attr = &dir_attr,
562 .fh = fhandle, 562 .fh = &fhandle,
563 .fattr = fattr 563 .fattr = &fattr
564 }; 564 };
565 struct rpc_message msg = { 565 struct rpc_message msg = {
566 .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK], 566 .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -569,13 +569,19 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
569 }; 569 };
570 int status; 570 int status;
571 571
572 if (path->len > NFS3_MAXPATHLEN) 572 if (len > NFS3_MAXPATHLEN)
573 return -ENAMETOOLONG; 573 return -ENAMETOOLONG;
574 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 574
575 dprintk("NFS call symlink %s\n", dentry->d_name.name);
576
575 nfs_fattr_init(&dir_attr); 577 nfs_fattr_init(&dir_attr);
576 nfs_fattr_init(fattr); 578 nfs_fattr_init(&fattr);
577 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 579 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
578 nfs_post_op_update_inode(dir, &dir_attr); 580 nfs_post_op_update_inode(dir, &dir_attr);
581 if (status != 0)
582 goto out;
583 status = nfs_instantiate(dentry, &fhandle, &fattr);
584out:
579 dprintk("NFS reply symlink: %d\n", status); 585 dprintk("NFS reply symlink: %d\n", status);
580 return status; 586 return status;
581} 587}
@@ -785,7 +791,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
785 791
786 dprintk("NFS call fsinfo\n"); 792 dprintk("NFS call fsinfo\n");
787 nfs_fattr_init(info->fattr); 793 nfs_fattr_init(info->fattr);
788 status = rpc_call_sync(server->client_sys, &msg, 0); 794 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
789 dprintk("NFS reply fsinfo: %d\n", status); 795 dprintk("NFS reply fsinfo: %d\n", status);
790 return status; 796 return status;
791} 797}
@@ -886,7 +892,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
886 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); 892 return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
887} 893}
888 894
889struct nfs_rpc_ops nfs_v3_clientops = { 895const struct nfs_rpc_ops nfs_v3_clientops = {
890 .version = 3, /* protocol version */ 896 .version = 3, /* protocol version */
891 .dentry_ops = &nfs_dentry_operations, 897 .dentry_ops = &nfs_dentry_operations,
892 .dir_inode_ops = &nfs3_dir_inode_operations, 898 .dir_inode_ops = &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269e9753..16556fa4effb 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
56#define NFS3_writeargs_sz (NFS3_fh_sz+5) 56#define NFS3_writeargs_sz (NFS3_fh_sz+5)
57#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 57#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
58#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) 58#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
59#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) 59#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+1+NFS3_sattr_sz)
60#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) 60#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz)
61#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) 61#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz)
62#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) 62#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
398 p = xdr_encode_fhandle(p, args->fromfh); 398 p = xdr_encode_fhandle(p, args->fromfh);
399 p = xdr_encode_array(p, args->fromname, args->fromlen); 399 p = xdr_encode_array(p, args->fromname, args->fromlen);
400 p = xdr_encode_sattr(p, args->sattr); 400 p = xdr_encode_sattr(p, args->sattr);
401 p = xdr_encode_array(p, args->topath, args->tolen); 401 *p++ = htonl(args->pathlen);
402 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 402 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
403
404 /* Copy the page */
405 xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
403 return 0; 406 return 0;
404} 407}
405 408
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a102860df37..61095fe4b5ca 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
43}; 43};
44 44
45/* 45/*
46 * The nfs4_client identifies our client state to the server.
47 */
48struct nfs4_client {
49 struct list_head cl_servers; /* Global list of servers */
50 struct in_addr cl_addr; /* Server identifier */
51 u64 cl_clientid; /* constant */
52 nfs4_verifier cl_confirm;
53 unsigned long cl_state;
54
55 u32 cl_lockowner_id;
56
57 /*
58 * The following rwsem ensures exclusive access to the server
59 * while we recover the state following a lease expiration.
60 */
61 struct rw_semaphore cl_sem;
62
63 struct list_head cl_delegations;
64 struct list_head cl_state_owners;
65 struct list_head cl_unused;
66 int cl_nunused;
67 spinlock_t cl_lock;
68 atomic_t cl_count;
69
70 struct rpc_clnt * cl_rpcclient;
71
72 struct list_head cl_superblocks; /* List of nfs_server structs */
73
74 unsigned long cl_lease_time;
75 unsigned long cl_last_renewal;
76 struct work_struct cl_renewd;
77 struct work_struct cl_recoverd;
78
79 struct rpc_wait_queue cl_rpcwaitq;
80
81 /* used for the setclientid verifier */
82 struct timespec cl_boot_time;
83
84 /* idmapper */
85 struct idmap * cl_idmap;
86
87 /* Our own IP address, as a null-terminated string.
88 * This is used to generate the clientid, and the callback address.
89 */
90 char cl_ipaddr[16];
91 unsigned char cl_id_uniquifier;
92};
93
94/*
95 * struct rpc_sequence ensures that RPC calls are sent in the exact 46 * struct rpc_sequence ensures that RPC calls are sent in the exact
96 * order that they appear on the list. 47 * order that they appear on the list.
97 */ 48 */
@@ -127,7 +78,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
127struct nfs4_state_owner { 78struct nfs4_state_owner {
128 spinlock_t so_lock; 79 spinlock_t so_lock;
129 struct list_head so_list; /* per-clientid list of state_owners */ 80 struct list_head so_list; /* per-clientid list of state_owners */
130 struct nfs4_client *so_client; 81 struct nfs_client *so_client;
131 u32 so_id; /* 32-bit identifier, unique */ 82 u32 so_id; /* 32-bit identifier, unique */
132 atomic_t so_count; 83 atomic_t so_count;
133 84
@@ -210,10 +161,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
210 161
211/* nfs4proc.c */ 162/* nfs4proc.c */
212extern int nfs4_map_errors(int err); 163extern int nfs4_map_errors(int err);
213extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *); 164extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
214extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *); 165extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
215extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *); 166extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
216extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *); 167extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
217extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); 168extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
218extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 169extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
219extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 170extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +182,14 @@ extern const u32 nfs4_fsinfo_bitmap[2];
231extern const u32 nfs4_fs_locations_bitmap[2]; 182extern const u32 nfs4_fs_locations_bitmap[2];
232 183
233/* nfs4renewd.c */ 184/* nfs4renewd.c */
234extern void nfs4_schedule_state_renewal(struct nfs4_client *); 185extern void nfs4_schedule_state_renewal(struct nfs_client *);
235extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); 186extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
236extern void nfs4_kill_renewd(struct nfs4_client *); 187extern void nfs4_kill_renewd(struct nfs_client *);
237extern void nfs4_renew_state(void *); 188extern void nfs4_renew_state(void *);
238 189
239/* nfs4state.c */ 190/* nfs4state.c */
240extern void init_nfsv4_state(struct nfs_server *); 191struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
241extern void destroy_nfsv4_state(struct nfs_server *); 192extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
242extern struct nfs4_client *nfs4_get_client(struct in_addr *);
243extern void nfs4_put_client(struct nfs4_client *clp);
244extern struct nfs4_client *nfs4_find_client(struct in_addr *);
245struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
246extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
247 193
248extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 194extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
249extern void nfs4_put_state_owner(struct nfs4_state_owner *); 195extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +198,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
252extern void nfs4_put_open_state(struct nfs4_state *); 198extern void nfs4_put_open_state(struct nfs4_state *);
253extern void nfs4_close_state(struct nfs4_state *, mode_t); 199extern void nfs4_close_state(struct nfs4_state *, mode_t);
254extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 200extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
255extern void nfs4_schedule_state_recovery(struct nfs4_client *); 201extern void nfs4_schedule_state_recovery(struct nfs_client *);
256extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 202extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
257extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 203extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
258extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 204extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
@@ -276,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
276 222
277#else 223#else
278 224
279#define init_nfsv4_state(server) do { } while (0)
280#define destroy_nfsv4_state(server) do { } while (0)
281#define nfs4_put_state_owner(inode, owner) do { } while (0)
282#define nfs4_put_open_state(state) do { } while (0)
283#define nfs4_close_state(a, b) do { } while (0) 225#define nfs4_close_state(a, b) do { } while (0)
284 226
285#endif /* CONFIG_NFS_V4 */ 227#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27b74e6..24e47f3bbd17 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
2 * linux/fs/nfs/nfs4namespace.c 2 * linux/fs/nfs/nfs4namespace.c
3 * 3 *
4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com> 4 * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
5 * - Modified by David Howells <dhowells@redhat.com>
5 * 6 *
6 * NFSv4 namespace 7 * NFSv4 namespace
7 */ 8 */
@@ -23,7 +24,7 @@
23/* 24/*
24 * Check if fs_root is valid 25 * Check if fs_root is valid
25 */ 26 */
26static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname, 27static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
27 char *buffer, ssize_t buflen) 28 char *buffer, ssize_t buflen)
28{ 29{
29 char *end = buffer + buflen; 30 char *end = buffer + buflen;
@@ -34,7 +35,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
34 35
35 n = pathname->ncomponents; 36 n = pathname->ncomponents;
36 while (--n >= 0) { 37 while (--n >= 0) {
37 struct nfs4_string *component = &pathname->components[n]; 38 const struct nfs4_string *component = &pathname->components[n];
38 buflen -= component->len + 1; 39 buflen -= component->len + 1;
39 if (buflen < 0) 40 if (buflen < 0)
40 goto Elong; 41 goto Elong;
@@ -47,6 +48,68 @@ Elong:
47 return ERR_PTR(-ENAMETOOLONG); 48 return ERR_PTR(-ENAMETOOLONG);
48} 49}
49 50
51/*
52 * Determine the mount path as a string
53 */
54static char *nfs4_path(const struct vfsmount *mnt_parent,
55 const struct dentry *dentry,
56 char *buffer, ssize_t buflen)
57{
58 const char *srvpath;
59
60 srvpath = strchr(mnt_parent->mnt_devname, ':');
61 if (srvpath)
62 srvpath++;
63 else
64 srvpath = mnt_parent->mnt_devname;
65
66 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
67}
68
69/*
70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
71 * believe to be the server path to this dentry
72 */
73static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
74 const struct dentry *dentry,
75 const struct nfs4_fs_locations *locations,
76 char *page, char *page2)
77{
78 const char *path, *fs_path;
79
80 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
81 if (IS_ERR(path))
82 return PTR_ERR(path);
83
84 fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
85 if (IS_ERR(fs_path))
86 return PTR_ERR(fs_path);
87
88 if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
89 dprintk("%s: path %s does not begin with fsroot %s\n",
90 __FUNCTION__, path, fs_path);
91 return -ENOENT;
92 }
93
94 return 0;
95}
96
97/*
98 * Check if the string represents a "valid" IPv4 address
99 */
100static inline int valid_ipaddr4(const char *buf)
101{
102 int rc, count, in[4];
103
104 rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
105 if (rc != 4)
106 return -EINVAL;
107 for (count = 0; count < 4; count++) {
108 if (in[count] > 255)
109 return -EINVAL;
110 }
111 return 0;
112}
50 113
51/** 114/**
52 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 115 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -60,7 +123,7 @@ Elong:
60 */ 123 */
61static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 124static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
62 const struct dentry *dentry, 125 const struct dentry *dentry,
63 struct nfs4_fs_locations *locations) 126 const struct nfs4_fs_locations *locations)
64{ 127{
65 struct vfsmount *mnt = ERR_PTR(-ENOENT); 128 struct vfsmount *mnt = ERR_PTR(-ENOENT);
66 struct nfs_clone_mount mountdata = { 129 struct nfs_clone_mount mountdata = {
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
68 .dentry = dentry, 131 .dentry = dentry,
69 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 132 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
70 }; 133 };
71 char *page, *page2; 134 char *page = NULL, *page2 = NULL;
72 char *path, *fs_path;
73 char *devname; 135 char *devname;
74 int loc, s; 136 int loc, s, error;
75 137
76 if (locations == NULL || locations->nlocations <= 0) 138 if (locations == NULL || locations->nlocations <= 0)
77 goto out; 139 goto out;
@@ -79,36 +141,30 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
79 dprintk("%s: referral at %s/%s\n", __FUNCTION__, 141 dprintk("%s: referral at %s/%s\n", __FUNCTION__,
80 dentry->d_parent->d_name.name, dentry->d_name.name); 142 dentry->d_parent->d_name.name, dentry->d_name.name);
81 143
82 /* Ensure fs path is a prefix of current dentry path */
83 page = (char *) __get_free_page(GFP_USER); 144 page = (char *) __get_free_page(GFP_USER);
84 if (page == NULL) 145 if (!page)
85 goto out; 146 goto out;
147
86 page2 = (char *) __get_free_page(GFP_USER); 148 page2 = (char *) __get_free_page(GFP_USER);
87 if (page2 == NULL) 149 if (!page2)
88 goto out; 150 goto out;
89 151
90 path = nfs4_path(dentry, page, PAGE_SIZE); 152 /* Ensure fs path is a prefix of current dentry path */
91 if (IS_ERR(path)) 153 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
92 goto out_free; 154 if (error < 0) {
93 155 mnt = ERR_PTR(error);
94 fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); 156 goto out;
95 if (IS_ERR(fs_path))
96 goto out_free;
97
98 if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
99 dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
100 goto out_free;
101 } 157 }
102 158
103 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 159 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
104 if (IS_ERR(devname)) { 160 if (IS_ERR(devname)) {
105 mnt = (struct vfsmount *)devname; 161 mnt = (struct vfsmount *)devname;
106 goto out_free; 162 goto out;
107 } 163 }
108 164
109 loc = 0; 165 loc = 0;
110 while (loc < locations->nlocations && IS_ERR(mnt)) { 166 while (loc < locations->nlocations && IS_ERR(mnt)) {
111 struct nfs4_fs_location *location = &locations->locations[loc]; 167 const struct nfs4_fs_location *location = &locations->locations[loc];
112 char *mnt_path; 168 char *mnt_path;
113 169
114 if (location == NULL || location->nservers <= 0 || 170 if (location == NULL || location->nservers <= 0 ||
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
140 addr.sin_port = htons(NFS_PORT); 196 addr.sin_port = htons(NFS_PORT);
141 mountdata.addr = &addr; 197 mountdata.addr = &addr;
142 198
143 mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata); 199 mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
144 if (!IS_ERR(mnt)) { 200 if (!IS_ERR(mnt)) {
145 break; 201 break;
146 } 202 }
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
149 loc++; 205 loc++;
150 } 206 }
151 207
152out_free:
153 free_page((unsigned long)page);
154 free_page((unsigned long)page2);
155out: 208out:
209 free_page((unsigned long) page);
210 free_page((unsigned long) page2);
156 dprintk("%s: done\n", __FUNCTION__); 211 dprintk("%s: done\n", __FUNCTION__);
157 return mnt; 212 return mnt;
158} 213}
@@ -165,7 +220,7 @@ out:
165 */ 220 */
166struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 221struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
167{ 222{
168 struct vfsmount *mnt = ERR_PTR(-ENOENT); 223 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
169 struct dentry *parent; 224 struct dentry *parent;
170 struct nfs4_fs_locations *fs_locations = NULL; 225 struct nfs4_fs_locations *fs_locations = NULL;
171 struct page *page; 226 struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
183 goto out_free; 238 goto out_free;
184 239
185 /* Get locations */ 240 /* Get locations */
241 mnt = ERR_PTR(-ENOENT);
242
186 parent = dget_parent(dentry); 243 parent = dget_parent(dentry);
187 dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name); 244 dprintk("%s: getting locations for %s/%s\n",
245 __FUNCTION__, parent->d_name.name, dentry->d_name.name);
246
188 err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page); 247 err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
189 dput(parent); 248 dput(parent);
190 if (err != 0 || fs_locations->nlocations <= 0 || 249 if (err != 0 ||
250 fs_locations->nlocations <= 0 ||
191 fs_locations->fs_path.ncomponents <= 0) 251 fs_locations->fs_path.ncomponents <= 0)
192 goto out_free; 252 goto out_free;
193 253
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b14145b7b87f..47c7e6e3910d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
55 55
56#define NFSDBG_FACILITY NFSDBG_PROC 56#define NFSDBG_FACILITY NFSDBG_PROC
57 57
58#define NFS4_POLL_RETRY_MIN (1*HZ) 58#define NFS4_POLL_RETRY_MIN (HZ/10)
59#define NFS4_POLL_RETRY_MAX (15*HZ) 59#define NFS4_POLL_RETRY_MAX (15*HZ)
60 60
61struct nfs4_opendata; 61struct nfs4_opendata;
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); 64static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); 65static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); 66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp); 67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68 68
69/* Prevent leaks of NFSv4 errors into userland */ 69/* Prevent leaks of NFSv4 errors into userland */
70int nfs4_map_errors(int err) 70int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
195 195
196static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 196static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
197{ 197{
198 struct nfs4_client *clp = server->nfs4_state; 198 struct nfs_client *clp = server->nfs_client;
199 spin_lock(&clp->cl_lock); 199 spin_lock(&clp->cl_lock);
200 if (time_before(clp->cl_last_renewal,timestamp)) 200 if (time_before(clp->cl_last_renewal,timestamp))
201 clp->cl_last_renewal = timestamp; 201 clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
252 atomic_inc(&sp->so_count); 252 atomic_inc(&sp->so_count);
253 p->o_arg.fh = NFS_FH(dir); 253 p->o_arg.fh = NFS_FH(dir);
254 p->o_arg.open_flags = flags, 254 p->o_arg.open_flags = flags,
255 p->o_arg.clientid = server->nfs4_state->cl_clientid; 255 p->o_arg.clientid = server->nfs_client->cl_clientid;
256 p->o_arg.id = sp->so_id; 256 p->o_arg.id = sp->so_id;
257 p->o_arg.name = &dentry->d_name; 257 p->o_arg.name = &dentry->d_name;
258 p->o_arg.server = server; 258 p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
550 case -NFS4ERR_STALE_STATEID: 550 case -NFS4ERR_STALE_STATEID:
551 case -NFS4ERR_EXPIRED: 551 case -NFS4ERR_EXPIRED:
552 /* Don't recall a delegation if it was lost */ 552 /* Don't recall a delegation if it was lost */
553 nfs4_schedule_state_recovery(server->nfs4_state); 553 nfs4_schedule_state_recovery(server->nfs_client);
554 return err; 554 return err;
555 } 555 }
556 err = nfs4_handle_exception(server, err, &exception); 556 err = nfs4_handle_exception(server, err, &exception);
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
758 } 758 }
759 nfs_confirm_seqid(&data->owner->so_seqid, 0); 759 nfs_confirm_seqid(&data->owner->so_seqid, 0);
760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 760 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
761 return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); 761 return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
762 return 0; 762 return 0;
763} 763}
764 764
@@ -792,11 +792,18 @@ out:
792 792
793int nfs4_recover_expired_lease(struct nfs_server *server) 793int nfs4_recover_expired_lease(struct nfs_server *server)
794{ 794{
795 struct nfs4_client *clp = server->nfs4_state; 795 struct nfs_client *clp = server->nfs_client;
796 int ret;
796 797
797 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 798 for (;;) {
799 ret = nfs4_wait_clnt_recover(server->client, clp);
800 if (ret != 0)
801 return ret;
802 if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
803 break;
798 nfs4_schedule_state_recovery(clp); 804 nfs4_schedule_state_recovery(clp);
799 return nfs4_wait_clnt_recover(server->client, clp); 805 }
806 return 0;
800} 807}
801 808
802/* 809/*
@@ -867,7 +874,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
867{ 874{
868 struct nfs_delegation *delegation; 875 struct nfs_delegation *delegation;
869 struct nfs_server *server = NFS_SERVER(inode); 876 struct nfs_server *server = NFS_SERVER(inode);
870 struct nfs4_client *clp = server->nfs4_state; 877 struct nfs_client *clp = server->nfs_client;
871 struct nfs_inode *nfsi = NFS_I(inode); 878 struct nfs_inode *nfsi = NFS_I(inode);
872 struct nfs4_state_owner *sp = NULL; 879 struct nfs4_state_owner *sp = NULL;
873 struct nfs4_state *state = NULL; 880 struct nfs4_state *state = NULL;
@@ -953,7 +960,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
953 struct nfs4_state_owner *sp; 960 struct nfs4_state_owner *sp;
954 struct nfs4_state *state = NULL; 961 struct nfs4_state *state = NULL;
955 struct nfs_server *server = NFS_SERVER(dir); 962 struct nfs_server *server = NFS_SERVER(dir);
956 struct nfs4_client *clp = server->nfs4_state; 963 struct nfs_client *clp = server->nfs_client;
957 struct nfs4_opendata *opendata; 964 struct nfs4_opendata *opendata;
958 int status; 965 int status;
959 966
@@ -1133,7 +1140,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1133 break; 1140 break;
1134 case -NFS4ERR_STALE_STATEID: 1141 case -NFS4ERR_STALE_STATEID:
1135 case -NFS4ERR_EXPIRED: 1142 case -NFS4ERR_EXPIRED:
1136 nfs4_schedule_state_recovery(server->nfs4_state); 1143 nfs4_schedule_state_recovery(server->nfs_client);
1137 break; 1144 break;
1138 default: 1145 default:
1139 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 1146 if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1268,7 +1275,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1268 BUG_ON(nd->intent.open.flags & O_CREAT); 1275 BUG_ON(nd->intent.open.flags & O_CREAT);
1269 } 1276 }
1270 1277
1271 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1278 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1272 if (IS_ERR(cred)) 1279 if (IS_ERR(cred))
1273 return (struct dentry *)cred; 1280 return (struct dentry *)cred;
1274 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); 1281 state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1298,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1291 struct rpc_cred *cred; 1298 struct rpc_cred *cred;
1292 struct nfs4_state *state; 1299 struct nfs4_state *state;
1293 1300
1294 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1301 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1295 if (IS_ERR(cred)) 1302 if (IS_ERR(cred))
1296 return PTR_ERR(cred); 1303 return PTR_ERR(cred);
1297 state = nfs4_open_delegated(dentry->d_inode, openflags, cred); 1304 state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1393,70 +1400,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
1393 return err; 1400 return err;
1394} 1401}
1395 1402
1403/*
1404 * get the file handle for the "/" directory on the server
1405 */
1396static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 1406static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
1397 struct nfs_fsinfo *info) 1407 struct nfs_fsinfo *info)
1398{ 1408{
1399 struct nfs_fattr * fattr = info->fattr;
1400 unsigned char * p;
1401 struct qstr q;
1402 struct nfs4_lookup_arg args = {
1403 .dir_fh = fhandle,
1404 .name = &q,
1405 .bitmask = nfs4_fattr_bitmap,
1406 };
1407 struct nfs4_lookup_res res = {
1408 .server = server,
1409 .fattr = fattr,
1410 .fh = fhandle,
1411 };
1412 struct rpc_message msg = {
1413 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1414 .rpc_argp = &args,
1415 .rpc_resp = &res,
1416 };
1417 int status; 1409 int status;
1418 1410
1419 /*
1420 * Now we do a separate LOOKUP for each component of the mount path.
1421 * The LOOKUPs are done separately so that we can conveniently
1422 * catch an ERR_WRONGSEC if it occurs along the way...
1423 */
1424 status = nfs4_lookup_root(server, fhandle, info); 1411 status = nfs4_lookup_root(server, fhandle, info);
1425 if (status)
1426 goto out;
1427
1428 p = server->mnt_path;
1429 for (;;) {
1430 struct nfs4_exception exception = { };
1431
1432 while (*p == '/')
1433 p++;
1434 if (!*p)
1435 break;
1436 q.name = p;
1437 while (*p && (*p != '/'))
1438 p++;
1439 q.len = p - q.name;
1440
1441 do {
1442 nfs_fattr_init(fattr);
1443 status = nfs4_handle_exception(server,
1444 rpc_call_sync(server->client, &msg, 0),
1445 &exception);
1446 } while (exception.retry);
1447 if (status == 0)
1448 continue;
1449 if (status == -ENOENT) {
1450 printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
1451 printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
1452 }
1453 break;
1454 }
1455 if (status == 0) 1412 if (status == 0)
1456 status = nfs4_server_capabilities(server, fhandle); 1413 status = nfs4_server_capabilities(server, fhandle);
1457 if (status == 0) 1414 if (status == 0)
1458 status = nfs4_do_fsinfo(server, fhandle, info); 1415 status = nfs4_do_fsinfo(server, fhandle, info);
1459out:
1460 return nfs4_map_errors(status); 1416 return nfs4_map_errors(status);
1461} 1417}
1462 1418
@@ -1565,7 +1521,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1565 1521
1566 nfs_fattr_init(fattr); 1522 nfs_fattr_init(fattr);
1567 1523
1568 cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); 1524 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
1569 if (IS_ERR(cred)) 1525 if (IS_ERR(cred))
1570 return PTR_ERR(cred); 1526 return PTR_ERR(cred);
1571 1527
@@ -1583,6 +1539,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1583 return status; 1539 return status;
1584} 1540}
1585 1541
1542static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1543 struct qstr *name, struct nfs_fh *fhandle,
1544 struct nfs_fattr *fattr)
1545{
1546 int status;
1547 struct nfs4_lookup_arg args = {
1548 .bitmask = server->attr_bitmask,
1549 .dir_fh = dirfh,
1550 .name = name,
1551 };
1552 struct nfs4_lookup_res res = {
1553 .server = server,
1554 .fattr = fattr,
1555 .fh = fhandle,
1556 };
1557 struct rpc_message msg = {
1558 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
1559 .rpc_argp = &args,
1560 .rpc_resp = &res,
1561 };
1562
1563 nfs_fattr_init(fattr);
1564
1565 dprintk("NFS call lookupfh %s\n", name->name);
1566 status = rpc_call_sync(server->client, &msg, 0);
1567 dprintk("NFS reply lookupfh: %d\n", status);
1568 if (status == -NFS4ERR_MOVED)
1569 status = -EREMOTE;
1570 return status;
1571}
1572
1573static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
1574 struct qstr *name, struct nfs_fh *fhandle,
1575 struct nfs_fattr *fattr)
1576{
1577 struct nfs4_exception exception = { };
1578 int err;
1579 do {
1580 err = nfs4_handle_exception(server,
1581 _nfs4_proc_lookupfh(server, dirfh, name,
1582 fhandle, fattr),
1583 &exception);
1584 } while (exception.retry);
1585 return err;
1586}
1587
1586static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name, 1588static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
1587 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 1589 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
1588{ 1590{
@@ -1881,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1881 struct rpc_cred *cred; 1883 struct rpc_cred *cred;
1882 int status = 0; 1884 int status = 0;
1883 1885
1884 cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); 1886 cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
1885 if (IS_ERR(cred)) { 1887 if (IS_ERR(cred)) {
1886 status = PTR_ERR(cred); 1888 status = PTR_ERR(cred);
1887 goto out; 1889 goto out;
@@ -2089,24 +2091,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
2089 return err; 2091 return err;
2090} 2092}
2091 2093
2092static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name, 2094static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
2093 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, 2095 struct page *page, unsigned int len, struct iattr *sattr)
2094 struct nfs_fattr *fattr)
2095{ 2096{
2096 struct nfs_server *server = NFS_SERVER(dir); 2097 struct nfs_server *server = NFS_SERVER(dir);
2097 struct nfs_fattr dir_fattr; 2098 struct nfs_fh fhandle;
2099 struct nfs_fattr fattr, dir_fattr;
2098 struct nfs4_create_arg arg = { 2100 struct nfs4_create_arg arg = {
2099 .dir_fh = NFS_FH(dir), 2101 .dir_fh = NFS_FH(dir),
2100 .server = server, 2102 .server = server,
2101 .name = name, 2103 .name = &dentry->d_name,
2102 .attrs = sattr, 2104 .attrs = sattr,
2103 .ftype = NF4LNK, 2105 .ftype = NF4LNK,
2104 .bitmask = server->attr_bitmask, 2106 .bitmask = server->attr_bitmask,
2105 }; 2107 };
2106 struct nfs4_create_res res = { 2108 struct nfs4_create_res res = {
2107 .server = server, 2109 .server = server,
2108 .fh = fhandle, 2110 .fh = &fhandle,
2109 .fattr = fattr, 2111 .fattr = &fattr,
2110 .dir_fattr = &dir_fattr, 2112 .dir_fattr = &dir_fattr,
2111 }; 2113 };
2112 struct rpc_message msg = { 2114 struct rpc_message msg = {
@@ -2116,29 +2118,32 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
2116 }; 2118 };
2117 int status; 2119 int status;
2118 2120
2119 if (path->len > NFS4_MAXPATHLEN) 2121 if (len > NFS4_MAXPATHLEN)
2120 return -ENAMETOOLONG; 2122 return -ENAMETOOLONG;
2121 arg.u.symlink = path; 2123
2122 nfs_fattr_init(fattr); 2124 arg.u.symlink.pages = &page;
2125 arg.u.symlink.len = len;
2126 nfs_fattr_init(&fattr);
2123 nfs_fattr_init(&dir_fattr); 2127 nfs_fattr_init(&dir_fattr);
2124 2128
2125 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 2129 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
2126 if (!status) 2130 if (!status) {
2127 update_changeattr(dir, &res.dir_cinfo); 2131 update_changeattr(dir, &res.dir_cinfo);
2128 nfs_post_op_update_inode(dir, res.dir_fattr); 2132 nfs_post_op_update_inode(dir, res.dir_fattr);
2133 status = nfs_instantiate(dentry, &fhandle, &fattr);
2134 }
2129 return status; 2135 return status;
2130} 2136}
2131 2137
2132static int nfs4_proc_symlink(struct inode *dir, struct qstr *name, 2138static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
2133 struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle, 2139 struct page *page, unsigned int len, struct iattr *sattr)
2134 struct nfs_fattr *fattr)
2135{ 2140{
2136 struct nfs4_exception exception = { }; 2141 struct nfs4_exception exception = { };
2137 int err; 2142 int err;
2138 do { 2143 do {
2139 err = nfs4_handle_exception(NFS_SERVER(dir), 2144 err = nfs4_handle_exception(NFS_SERVER(dir),
2140 _nfs4_proc_symlink(dir, name, path, sattr, 2145 _nfs4_proc_symlink(dir, dentry, page,
2141 fhandle, fattr), 2146 len, sattr),
2142 &exception); 2147 &exception);
2143 } while (exception.retry); 2148 } while (exception.retry);
2144 return err; 2149 return err;
@@ -2521,7 +2526,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2521 */ 2526 */
2522static void nfs4_renew_done(struct rpc_task *task, void *data) 2527static void nfs4_renew_done(struct rpc_task *task, void *data)
2523{ 2528{
2524 struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; 2529 struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
2525 unsigned long timestamp = (unsigned long)data; 2530 unsigned long timestamp = (unsigned long)data;
2526 2531
2527 if (task->tk_status < 0) { 2532 if (task->tk_status < 0) {
@@ -2543,7 +2548,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
2543 .rpc_call_done = nfs4_renew_done, 2548 .rpc_call_done = nfs4_renew_done,
2544}; 2549};
2545 2550
2546int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred) 2551int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
2547{ 2552{
2548 struct rpc_message msg = { 2553 struct rpc_message msg = {
2549 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 2554 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2560,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
2555 &nfs4_renew_ops, (void *)jiffies); 2560 &nfs4_renew_ops, (void *)jiffies);
2556} 2561}
2557 2562
2558int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred) 2563int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
2559{ 2564{
2560 struct rpc_message msg = { 2565 struct rpc_message msg = {
2561 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], 2566 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2770,7 +2775,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2770 return -EOPNOTSUPP; 2775 return -EOPNOTSUPP;
2771 nfs_inode_return_delegation(inode); 2776 nfs_inode_return_delegation(inode);
2772 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 2777 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2773 ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); 2778 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
2774 if (ret == 0) 2779 if (ret == 0)
2775 nfs4_write_cached_acl(inode, buf, buflen); 2780 nfs4_write_cached_acl(inode, buf, buflen);
2776 return ret; 2781 return ret;
@@ -2791,7 +2796,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2791static int 2796static int
2792nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) 2797nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2793{ 2798{
2794 struct nfs4_client *clp = server->nfs4_state; 2799 struct nfs_client *clp = server->nfs_client;
2795 2800
2796 if (!clp || task->tk_status >= 0) 2801 if (!clp || task->tk_status >= 0)
2797 return 0; 2802 return 0;
@@ -2828,7 +2833,7 @@ static int nfs4_wait_bit_interruptible(void *word)
2828 return 0; 2833 return 0;
2829} 2834}
2830 2835
2831static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) 2836static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
2832{ 2837{
2833 sigset_t oldset; 2838 sigset_t oldset;
2834 int res; 2839 int res;
@@ -2871,7 +2876,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2871 */ 2876 */
2872int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 2877int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2873{ 2878{
2874 struct nfs4_client *clp = server->nfs4_state; 2879 struct nfs_client *clp = server->nfs_client;
2875 int ret = errorcode; 2880 int ret = errorcode;
2876 2881
2877 exception->retry = 0; 2882 exception->retry = 0;
@@ -2886,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
2886 if (ret == 0) 2891 if (ret == 0)
2887 exception->retry = 1; 2892 exception->retry = 1;
2888 break; 2893 break;
2894 case -NFS4ERR_FILE_OPEN:
2889 case -NFS4ERR_GRACE: 2895 case -NFS4ERR_GRACE:
2890 case -NFS4ERR_DELAY: 2896 case -NFS4ERR_DELAY:
2891 ret = nfs4_delay(server->client, &exception->timeout); 2897 ret = nfs4_delay(server->client, &exception->timeout);
@@ -2898,7 +2904,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
2898 return nfs4_map_errors(ret); 2904 return nfs4_map_errors(ret);
2899} 2905}
2900 2906
2901int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 2907int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
2902{ 2908{
2903 nfs4_verifier sc_verifier; 2909 nfs4_verifier sc_verifier;
2904 struct nfs4_setclientid setclientid = { 2910 struct nfs4_setclientid setclientid = {
@@ -2922,7 +2928,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
2922 for(;;) { 2928 for(;;) {
2923 setclientid.sc_name_len = scnprintf(setclientid.sc_name, 2929 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
2924 sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", 2930 sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
2925 clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), 2931 clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
2926 cred->cr_ops->cr_name, 2932 cred->cr_ops->cr_name,
2927 clp->cl_id_uniquifier); 2933 clp->cl_id_uniquifier);
2928 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, 2934 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
@@ -2945,7 +2951,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
2945 return status; 2951 return status;
2946} 2952}
2947 2953
2948static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) 2954static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
2949{ 2955{
2950 struct nfs_fsinfo fsinfo; 2956 struct nfs_fsinfo fsinfo;
2951 struct rpc_message msg = { 2957 struct rpc_message msg = {
@@ -2969,7 +2975,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
2969 return status; 2975 return status;
2970} 2976}
2971 2977
2972int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) 2978int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
2973{ 2979{
2974 long timeout; 2980 long timeout;
2975 int err; 2981 int err;
@@ -3077,7 +3083,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
3077 switch (err) { 3083 switch (err) {
3078 case -NFS4ERR_STALE_STATEID: 3084 case -NFS4ERR_STALE_STATEID:
3079 case -NFS4ERR_EXPIRED: 3085 case -NFS4ERR_EXPIRED:
3080 nfs4_schedule_state_recovery(server->nfs4_state); 3086 nfs4_schedule_state_recovery(server->nfs_client);
3081 case 0: 3087 case 0:
3082 return 0; 3088 return 0;
3083 } 3089 }
@@ -3106,7 +3112,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3106{ 3112{
3107 struct inode *inode = state->inode; 3113 struct inode *inode = state->inode;
3108 struct nfs_server *server = NFS_SERVER(inode); 3114 struct nfs_server *server = NFS_SERVER(inode);
3109 struct nfs4_client *clp = server->nfs4_state; 3115 struct nfs_client *clp = server->nfs_client;
3110 struct nfs_lockt_args arg = { 3116 struct nfs_lockt_args arg = {
3111 .fh = NFS_FH(inode), 3117 .fh = NFS_FH(inode),
3112 .fl = request, 3118 .fl = request,
@@ -3231,7 +3237,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3231 break; 3237 break;
3232 case -NFS4ERR_STALE_STATEID: 3238 case -NFS4ERR_STALE_STATEID:
3233 case -NFS4ERR_EXPIRED: 3239 case -NFS4ERR_EXPIRED:
3234 nfs4_schedule_state_recovery(calldata->server->nfs4_state); 3240 nfs4_schedule_state_recovery(calldata->server->nfs_client);
3235 break; 3241 break;
3236 default: 3242 default:
3237 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { 3243 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3349,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3343 if (p->arg.lock_seqid == NULL) 3349 if (p->arg.lock_seqid == NULL)
3344 goto out_free; 3350 goto out_free;
3345 p->arg.lock_stateid = &lsp->ls_stateid; 3351 p->arg.lock_stateid = &lsp->ls_stateid;
3346 p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid; 3352 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
3347 p->arg.lock_owner.id = lsp->ls_id; 3353 p->arg.lock_owner.id = lsp->ls_id;
3348 p->lsp = lsp; 3354 p->lsp = lsp;
3349 atomic_inc(&lsp->ls_count); 3355 atomic_inc(&lsp->ls_count);
@@ -3513,7 +3519,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3513 3519
3514static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 3520static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
3515{ 3521{
3516 struct nfs4_client *clp = state->owner->so_client; 3522 struct nfs_client *clp = state->owner->so_client;
3517 unsigned char fl_flags = request->fl_flags; 3523 unsigned char fl_flags = request->fl_flags;
3518 int status; 3524 int status;
3519 3525
@@ -3715,7 +3721,7 @@ static struct inode_operations nfs4_file_inode_operations = {
3715 .listxattr = nfs4_listxattr, 3721 .listxattr = nfs4_listxattr,
3716}; 3722};
3717 3723
3718struct nfs_rpc_ops nfs_v4_clientops = { 3724const struct nfs_rpc_ops nfs_v4_clientops = {
3719 .version = 4, /* protocol version */ 3725 .version = 4, /* protocol version */
3720 .dentry_ops = &nfs4_dentry_operations, 3726 .dentry_ops = &nfs4_dentry_operations,
3721 .dir_inode_ops = &nfs4_dir_inode_operations, 3727 .dir_inode_ops = &nfs4_dir_inode_operations,
@@ -3723,6 +3729,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3723 .getroot = nfs4_proc_get_root, 3729 .getroot = nfs4_proc_get_root,
3724 .getattr = nfs4_proc_getattr, 3730 .getattr = nfs4_proc_getattr,
3725 .setattr = nfs4_proc_setattr, 3731 .setattr = nfs4_proc_setattr,
3732 .lookupfh = nfs4_proc_lookupfh,
3726 .lookup = nfs4_proc_lookup, 3733 .lookup = nfs4_proc_lookup,
3727 .access = nfs4_proc_access, 3734 .access = nfs4_proc_access,
3728 .readlink = nfs4_proc_readlink, 3735 .readlink = nfs4_proc_readlink,
@@ -3743,6 +3750,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3743 .statfs = nfs4_proc_statfs, 3750 .statfs = nfs4_proc_statfs,
3744 .fsinfo = nfs4_proc_fsinfo, 3751 .fsinfo = nfs4_proc_fsinfo,
3745 .pathconf = nfs4_proc_pathconf, 3752 .pathconf = nfs4_proc_pathconf,
3753 .set_capabilities = nfs4_server_capabilities,
3746 .decode_dirent = nfs4_decode_dirent, 3754 .decode_dirent = nfs4_decode_dirent,
3747 .read_setup = nfs4_proc_read_setup, 3755 .read_setup = nfs4_proc_read_setup,
3748 .read_done = nfs4_read_done, 3756 .read_done = nfs4_read_done,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8e6d8a..7b6df1852e75 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
61void 61void
62nfs4_renew_state(void *data) 62nfs4_renew_state(void *data)
63{ 63{
64 struct nfs4_client *clp = (struct nfs4_client *)data; 64 struct nfs_client *clp = (struct nfs_client *)data;
65 struct rpc_cred *cred; 65 struct rpc_cred *cred;
66 long lease, timeout; 66 long lease, timeout;
67 unsigned long last, now; 67 unsigned long last, now;
@@ -108,7 +108,7 @@ out:
108 108
109/* Must be called with clp->cl_sem locked for writes */ 109/* Must be called with clp->cl_sem locked for writes */
110void 110void
111nfs4_schedule_state_renewal(struct nfs4_client *clp) 111nfs4_schedule_state_renewal(struct nfs_client *clp)
112{ 112{
113 long timeout; 113 long timeout;
114 114
@@ -121,32 +121,20 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
121 __FUNCTION__, (timeout + HZ - 1) / HZ); 121 __FUNCTION__, (timeout + HZ - 1) / HZ);
122 cancel_delayed_work(&clp->cl_renewd); 122 cancel_delayed_work(&clp->cl_renewd);
123 schedule_delayed_work(&clp->cl_renewd, timeout); 123 schedule_delayed_work(&clp->cl_renewd, timeout);
124 set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
124 spin_unlock(&clp->cl_lock); 125 spin_unlock(&clp->cl_lock);
125} 126}
126 127
127void 128void
128nfs4_renewd_prepare_shutdown(struct nfs_server *server) 129nfs4_renewd_prepare_shutdown(struct nfs_server *server)
129{ 130{
130 struct nfs4_client *clp = server->nfs4_state;
131
132 if (!clp)
133 return;
134 flush_scheduled_work(); 131 flush_scheduled_work();
135 down_write(&clp->cl_sem);
136 if (!list_empty(&server->nfs4_siblings))
137 list_del_init(&server->nfs4_siblings);
138 up_write(&clp->cl_sem);
139} 132}
140 133
141/* Must be called with clp->cl_sem locked for writes */
142void 134void
143nfs4_kill_renewd(struct nfs4_client *clp) 135nfs4_kill_renewd(struct nfs_client *clp)
144{ 136{
145 down_read(&clp->cl_sem); 137 down_read(&clp->cl_sem);
146 if (!list_empty(&clp->cl_superblocks)) {
147 up_read(&clp->cl_sem);
148 return;
149 }
150 cancel_delayed_work(&clp->cl_renewd); 138 cancel_delayed_work(&clp->cl_renewd);
151 up_read(&clp->cl_sem); 139 up_read(&clp->cl_sem);
152 flush_scheduled_work(); 140 flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b07a22..5fffbdfa971f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,149 +50,15 @@
50#include "nfs4_fs.h" 50#include "nfs4_fs.h"
51#include "callback.h" 51#include "callback.h"
52#include "delegation.h" 52#include "delegation.h"
53#include "internal.h"
53 54
54#define OPENOWNER_POOL_SIZE 8 55#define OPENOWNER_POOL_SIZE 8
55 56
56const nfs4_stateid zero_stateid; 57const nfs4_stateid zero_stateid;
57 58
58static DEFINE_SPINLOCK(state_spinlock);
59static LIST_HEAD(nfs4_clientid_list); 59static LIST_HEAD(nfs4_clientid_list);
60 60
61void 61static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
62init_nfsv4_state(struct nfs_server *server)
63{
64 server->nfs4_state = NULL;
65 INIT_LIST_HEAD(&server->nfs4_siblings);
66}
67
68void
69destroy_nfsv4_state(struct nfs_server *server)
70{
71 kfree(server->mnt_path);
72 server->mnt_path = NULL;
73 if (server->nfs4_state) {
74 nfs4_put_client(server->nfs4_state);
75 server->nfs4_state = NULL;
76 }
77}
78
79/*
80 * nfs4_get_client(): returns an empty client structure
81 * nfs4_put_client(): drops reference to client structure
82 *
83 * Since these are allocated/deallocated very rarely, we don't
84 * bother putting them in a slab cache...
85 */
86static struct nfs4_client *
87nfs4_alloc_client(struct in_addr *addr)
88{
89 struct nfs4_client *clp;
90
91 if (nfs_callback_up() < 0)
92 return NULL;
93 if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
94 nfs_callback_down();
95 return NULL;
96 }
97 memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
98 init_rwsem(&clp->cl_sem);
99 INIT_LIST_HEAD(&clp->cl_delegations);
100 INIT_LIST_HEAD(&clp->cl_state_owners);
101 INIT_LIST_HEAD(&clp->cl_unused);
102 spin_lock_init(&clp->cl_lock);
103 atomic_set(&clp->cl_count, 1);
104 INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
105 INIT_LIST_HEAD(&clp->cl_superblocks);
106 rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
107 clp->cl_rpcclient = ERR_PTR(-EINVAL);
108 clp->cl_boot_time = CURRENT_TIME;
109 clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
110 return clp;
111}
112
113static void
114nfs4_free_client(struct nfs4_client *clp)
115{
116 struct nfs4_state_owner *sp;
117
118 while (!list_empty(&clp->cl_unused)) {
119 sp = list_entry(clp->cl_unused.next,
120 struct nfs4_state_owner,
121 so_list);
122 list_del(&sp->so_list);
123 kfree(sp);
124 }
125 BUG_ON(!list_empty(&clp->cl_state_owners));
126 nfs_idmap_delete(clp);
127 if (!IS_ERR(clp->cl_rpcclient))
128 rpc_shutdown_client(clp->cl_rpcclient);
129 kfree(clp);
130 nfs_callback_down();
131}
132
133static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
134{
135 struct nfs4_client *clp;
136 list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
137 if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
138 atomic_inc(&clp->cl_count);
139 return clp;
140 }
141 }
142 return NULL;
143}
144
145struct nfs4_client *nfs4_find_client(struct in_addr *addr)
146{
147 struct nfs4_client *clp;
148 spin_lock(&state_spinlock);
149 clp = __nfs4_find_client(addr);
150 spin_unlock(&state_spinlock);
151 return clp;
152}
153
154struct nfs4_client *
155nfs4_get_client(struct in_addr *addr)
156{
157 struct nfs4_client *clp, *new = NULL;
158
159 spin_lock(&state_spinlock);
160 for (;;) {
161 clp = __nfs4_find_client(addr);
162 if (clp != NULL)
163 break;
164 clp = new;
165 if (clp != NULL) {
166 list_add(&clp->cl_servers, &nfs4_clientid_list);
167 new = NULL;
168 break;
169 }
170 spin_unlock(&state_spinlock);
171 new = nfs4_alloc_client(addr);
172 spin_lock(&state_spinlock);
173 if (new == NULL)
174 break;
175 }
176 spin_unlock(&state_spinlock);
177 if (new)
178 nfs4_free_client(new);
179 return clp;
180}
181
182void
183nfs4_put_client(struct nfs4_client *clp)
184{
185 if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
186 return;
187 list_del(&clp->cl_servers);
188 spin_unlock(&state_spinlock);
189 BUG_ON(!list_empty(&clp->cl_superblocks));
190 rpc_wake_up(&clp->cl_rpcwaitq);
191 nfs4_kill_renewd(clp);
192 nfs4_free_client(clp);
193}
194
195static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
196{ 62{
197 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, 63 int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
198 nfs_callback_tcpport, cred); 64 nfs_callback_tcpport, cred);
@@ -204,13 +70,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
204} 70}
205 71
206u32 72u32
207nfs4_alloc_lockowner_id(struct nfs4_client *clp) 73nfs4_alloc_lockowner_id(struct nfs_client *clp)
208{ 74{
209 return clp->cl_lockowner_id ++; 75 return clp->cl_lockowner_id ++;
210} 76}
211 77
212static struct nfs4_state_owner * 78static struct nfs4_state_owner *
213nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred) 79nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
214{ 80{
215 struct nfs4_state_owner *sp = NULL; 81 struct nfs4_state_owner *sp = NULL;
216 82
@@ -224,7 +90,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
224 return sp; 90 return sp;
225} 91}
226 92
227struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp) 93struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
228{ 94{
229 struct nfs4_state_owner *sp; 95 struct nfs4_state_owner *sp;
230 struct rpc_cred *cred = NULL; 96 struct rpc_cred *cred = NULL;
@@ -238,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
238 return cred; 104 return cred;
239} 105}
240 106
241struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp) 107struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
242{ 108{
243 struct nfs4_state_owner *sp; 109 struct nfs4_state_owner *sp;
244 110
@@ -251,7 +117,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
251} 117}
252 118
253static struct nfs4_state_owner * 119static struct nfs4_state_owner *
254nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) 120nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
255{ 121{
256 struct nfs4_state_owner *sp, *res = NULL; 122 struct nfs4_state_owner *sp, *res = NULL;
257 123
@@ -294,7 +160,7 @@ nfs4_alloc_state_owner(void)
294void 160void
295nfs4_drop_state_owner(struct nfs4_state_owner *sp) 161nfs4_drop_state_owner(struct nfs4_state_owner *sp)
296{ 162{
297 struct nfs4_client *clp = sp->so_client; 163 struct nfs_client *clp = sp->so_client;
298 spin_lock(&clp->cl_lock); 164 spin_lock(&clp->cl_lock);
299 list_del_init(&sp->so_list); 165 list_del_init(&sp->so_list);
300 spin_unlock(&clp->cl_lock); 166 spin_unlock(&clp->cl_lock);
@@ -306,7 +172,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
306 */ 172 */
307struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 173struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
308{ 174{
309 struct nfs4_client *clp = server->nfs4_state; 175 struct nfs_client *clp = server->nfs_client;
310 struct nfs4_state_owner *sp, *new; 176 struct nfs4_state_owner *sp, *new;
311 177
312 get_rpccred(cred); 178 get_rpccred(cred);
@@ -337,7 +203,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
337 */ 203 */
338void nfs4_put_state_owner(struct nfs4_state_owner *sp) 204void nfs4_put_state_owner(struct nfs4_state_owner *sp)
339{ 205{
340 struct nfs4_client *clp = sp->so_client; 206 struct nfs_client *clp = sp->so_client;
341 struct rpc_cred *cred = sp->so_cred; 207 struct rpc_cred *cred = sp->so_cred;
342 208
343 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) 209 if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +406,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
540static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) 406static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
541{ 407{
542 struct nfs4_lock_state *lsp; 408 struct nfs4_lock_state *lsp;
543 struct nfs4_client *clp = state->owner->so_client; 409 struct nfs_client *clp = state->owner->so_client;
544 410
545 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); 411 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
546 if (lsp == NULL) 412 if (lsp == NULL)
@@ -752,7 +618,7 @@ out:
752 618
753static int reclaimer(void *); 619static int reclaimer(void *);
754 620
755static inline void nfs4_clear_recover_bit(struct nfs4_client *clp) 621static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
756{ 622{
757 smp_mb__before_clear_bit(); 623 smp_mb__before_clear_bit();
758 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); 624 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,25 +630,25 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
764/* 630/*
765 * State recovery routine 631 * State recovery routine
766 */ 632 */
767static void nfs4_recover_state(struct nfs4_client *clp) 633static void nfs4_recover_state(struct nfs_client *clp)
768{ 634{
769 struct task_struct *task; 635 struct task_struct *task;
770 636
771 __module_get(THIS_MODULE); 637 __module_get(THIS_MODULE);
772 atomic_inc(&clp->cl_count); 638 atomic_inc(&clp->cl_count);
773 task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim", 639 task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
774 NIPQUAD(clp->cl_addr)); 640 NIPQUAD(clp->cl_addr.sin_addr));
775 if (!IS_ERR(task)) 641 if (!IS_ERR(task))
776 return; 642 return;
777 nfs4_clear_recover_bit(clp); 643 nfs4_clear_recover_bit(clp);
778 nfs4_put_client(clp); 644 nfs_put_client(clp);
779 module_put(THIS_MODULE); 645 module_put(THIS_MODULE);
780} 646}
781 647
782/* 648/*
783 * Schedule a state recovery attempt 649 * Schedule a state recovery attempt
784 */ 650 */
785void nfs4_schedule_state_recovery(struct nfs4_client *clp) 651void nfs4_schedule_state_recovery(struct nfs_client *clp)
786{ 652{
787 if (!clp) 653 if (!clp)
788 return; 654 return;
@@ -879,7 +745,7 @@ out_err:
879 return status; 745 return status;
880} 746}
881 747
882static void nfs4_state_mark_reclaim(struct nfs4_client *clp) 748static void nfs4_state_mark_reclaim(struct nfs_client *clp)
883{ 749{
884 struct nfs4_state_owner *sp; 750 struct nfs4_state_owner *sp;
885 struct nfs4_state *state; 751 struct nfs4_state *state;
@@ -903,7 +769,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
903 769
904static int reclaimer(void *ptr) 770static int reclaimer(void *ptr)
905{ 771{
906 struct nfs4_client *clp = ptr; 772 struct nfs_client *clp = ptr;
907 struct nfs4_state_owner *sp; 773 struct nfs4_state_owner *sp;
908 struct nfs4_state_recovery_ops *ops; 774 struct nfs4_state_recovery_ops *ops;
909 struct rpc_cred *cred; 775 struct rpc_cred *cred;
@@ -970,12 +836,12 @@ out:
970 if (status == -NFS4ERR_CB_PATH_DOWN) 836 if (status == -NFS4ERR_CB_PATH_DOWN)
971 nfs_handle_cb_pathdown(clp); 837 nfs_handle_cb_pathdown(clp);
972 nfs4_clear_recover_bit(clp); 838 nfs4_clear_recover_bit(clp);
973 nfs4_put_client(clp); 839 nfs_put_client(clp);
974 module_put_and_exit(0); 840 module_put_and_exit(0);
975 return 0; 841 return 0;
976out_error: 842out_error:
977 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", 843 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
978 NIPQUAD(clp->cl_addr.s_addr), -status); 844 NIPQUAD(clp->cl_addr.sin_addr), -status);
979 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 845 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
980 goto out; 846 goto out;
981} 847}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8fb31c6..3dd413f52da1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
58/* Mapping from NFS error code to "errno" error code. */ 58/* Mapping from NFS error code to "errno" error code. */
59#define errno_NFSERR_IO EIO 59#define errno_NFSERR_IO EIO
60 60
61static int nfs_stat_to_errno(int); 61static int nfs4_stat_to_errno(int);
62 62
63/* NFSv4 COMPOUND tags are only wanted for debugging purposes */ 63/* NFSv4 COMPOUND tags are only wanted for debugging purposes */
64#ifdef DEBUG 64#ifdef DEBUG
@@ -128,7 +128,7 @@ static int nfs_stat_to_errno(int);
128#define decode_link_maxsz (op_decode_hdr_maxsz + 5) 128#define decode_link_maxsz (op_decode_hdr_maxsz + 5)
129#define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 129#define encode_symlink_maxsz (op_encode_hdr_maxsz + \
130 1 + nfs4_name_maxsz + \ 130 1 + nfs4_name_maxsz + \
131 nfs4_path_maxsz + \ 131 1 + \
132 nfs4_fattr_maxsz) 132 nfs4_fattr_maxsz)
133#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) 133#define decode_symlink_maxsz (op_decode_hdr_maxsz + 8)
134#define encode_create_maxsz (op_encode_hdr_maxsz + \ 134#define encode_create_maxsz (op_encode_hdr_maxsz + \
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
529 if (iap->ia_valid & ATTR_MODE) 529 if (iap->ia_valid & ATTR_MODE)
530 len += 4; 530 len += 4;
531 if (iap->ia_valid & ATTR_UID) { 531 if (iap->ia_valid & ATTR_UID) {
532 owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); 532 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
533 if (owner_namelen < 0) { 533 if (owner_namelen < 0) {
534 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", 534 printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
535 iap->ia_uid); 535 iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
541 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 541 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
542 } 542 }
543 if (iap->ia_valid & ATTR_GID) { 543 if (iap->ia_valid & ATTR_GID) {
544 owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); 544 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
545 if (owner_grouplen < 0) { 545 if (owner_grouplen < 0) {
546 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", 546 printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
547 iap->ia_gid); 547 iap->ia_gid);
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
673 673
674 switch (create->ftype) { 674 switch (create->ftype) {
675 case NF4LNK: 675 case NF4LNK:
676 RESERVE_SPACE(4 + create->u.symlink->len); 676 RESERVE_SPACE(4);
677 WRITE32(create->u.symlink->len); 677 WRITE32(create->u.symlink.len);
678 WRITEMEM(create->u.symlink->name, create->u.symlink->len); 678 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
679 break; 679 break;
680 680
681 case NF4BLK: case NF4CHR: 681 case NF4BLK: case NF4CHR:
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
1160 return 0; 1160 return 0;
1161} 1161}
1162 1162
1163static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid) 1163static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
1164{ 1164{
1165 uint32_t *p; 1165 uint32_t *p;
1166 1166
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
1246 return 0; 1246 return 0;
1247} 1247}
1248 1248
1249static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state) 1249static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
1250{ 1250{
1251 uint32_t *p; 1251 uint32_t *p;
1252 1252
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
1945/* 1945/*
1946 * a RENEW request 1946 * a RENEW request
1947 */ 1947 */
1948static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) 1948static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
1949{ 1949{
1950 struct xdr_stream xdr; 1950 struct xdr_stream xdr;
1951 struct compound_hdr hdr = { 1951 struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
1975/* 1975/*
1976 * a SETCLIENTID_CONFIRM request 1976 * a SETCLIENTID_CONFIRM request
1977 */ 1977 */
1978static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) 1978static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
1979{ 1979{
1980 struct xdr_stream xdr; 1980 struct xdr_stream xdr;
1981 struct compound_hdr hdr = { 1981 struct compound_hdr hdr = {
@@ -2127,12 +2127,12 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2127 } 2127 }
2128 READ32(nfserr); 2128 READ32(nfserr);
2129 if (nfserr != NFS_OK) 2129 if (nfserr != NFS_OK)
2130 return -nfs_stat_to_errno(nfserr); 2130 return -nfs4_stat_to_errno(nfserr);
2131 return 0; 2131 return 0;
2132} 2132}
2133 2133
2134/* Dummy routine */ 2134/* Dummy routine */
2135static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) 2135static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2136{ 2136{
2137 uint32_t *p; 2137 uint32_t *p;
2138 unsigned int strlen; 2138 unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
2636 return 0; 2636 return 0;
2637} 2637}
2638 2638
2639static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid) 2639static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
2640{ 2640{
2641 uint32_t len, *p; 2641 uint32_t len, *p;
2642 2642
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2660 return 0; 2660 return 0;
2661} 2661}
2662 2662
2663static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid) 2663static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
2664{ 2664{
2665 uint32_t len, *p; 2665 uint32_t len, *p;
2666 2666
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
3051 fattr->mode |= fmode; 3051 fattr->mode |= fmode;
3052 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0) 3052 if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
3053 goto xdr_error; 3053 goto xdr_error;
3054 if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0) 3054 if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
3055 goto xdr_error; 3055 goto xdr_error;
3056 if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0) 3056 if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
3057 goto xdr_error; 3057 goto xdr_error;
3058 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0) 3058 if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
3059 goto xdr_error; 3059 goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3254 if (decode_space_limit(xdr, &res->maxsize) < 0) 3254 if (decode_space_limit(xdr, &res->maxsize) < 0)
3255 return -EIO; 3255 return -EIO;
3256 } 3256 }
3257 return decode_ace(xdr, NULL, res->server->nfs4_state); 3257 return decode_ace(xdr, NULL, res->server->nfs_client);
3258} 3258}
3259 3259
3260static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3260static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
3565 return 0; 3565 return 0;
3566} 3566}
3567 3567
3568static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) 3568static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3569{ 3569{
3570 uint32_t *p; 3570 uint32_t *p;
3571 uint32_t opnum; 3571 uint32_t opnum;
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
3598 READ_BUF(len); 3598 READ_BUF(len);
3599 return -NFSERR_CLID_INUSE; 3599 return -NFSERR_CLID_INUSE;
3600 } else 3600 } else
3601 return -nfs_stat_to_errno(nfserr); 3601 return -nfs4_stat_to_errno(nfserr);
3602 3602
3603 return 0; 3603 return 0;
3604} 3604}
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
4256 if (!status) 4256 if (!status)
4257 status = decode_fsinfo(&xdr, fsinfo); 4257 status = decode_fsinfo(&xdr, fsinfo);
4258 if (!status) 4258 if (!status)
4259 status = -nfs_stat_to_errno(hdr.status); 4259 status = -nfs4_stat_to_errno(hdr.status);
4260 return status; 4260 return status;
4261} 4261}
4262 4262
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
4335 * a SETCLIENTID request 4335 * a SETCLIENTID request
4336 */ 4336 */
4337static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, 4337static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
4338 struct nfs4_client *clp) 4338 struct nfs_client *clp)
4339{ 4339{
4340 struct xdr_stream xdr; 4340 struct xdr_stream xdr;
4341 struct compound_hdr hdr; 4341 struct compound_hdr hdr;
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
4346 if (!status) 4346 if (!status)
4347 status = decode_setclientid(&xdr, clp); 4347 status = decode_setclientid(&xdr, clp);
4348 if (!status) 4348 if (!status)
4349 status = -nfs_stat_to_errno(hdr.status); 4349 status = -nfs4_stat_to_errno(hdr.status);
4350 return status; 4350 return status;
4351} 4351}
4352 4352
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
4368 if (!status) 4368 if (!status)
4369 status = decode_fsinfo(&xdr, fsinfo); 4369 status = decode_fsinfo(&xdr, fsinfo);
4370 if (!status) 4370 if (!status)
4371 status = -nfs_stat_to_errno(hdr.status); 4371 status = -nfs4_stat_to_errno(hdr.status);
4372 return status; 4372 return status;
4373} 4373}
4374 4374
@@ -4521,7 +4521,7 @@ static struct {
4521 * This one is used jointly by NFSv2 and NFSv3. 4521 * This one is used jointly by NFSv2 and NFSv3.
4522 */ 4522 */
4523static int 4523static int
4524nfs_stat_to_errno(int stat) 4524nfs4_stat_to_errno(int stat)
4525{ 4525{
4526 int i; 4526 int i;
4527 for (i = 0; nfs_errtbl[i].stat != -1; i++) { 4527 for (i = 0; nfs_errtbl[i].stat != -1; i++) {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea3229e..630e50647bbb 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
66 66
67 dprintk("%s: call getattr\n", __FUNCTION__); 67 dprintk("%s: call getattr\n", __FUNCTION__);
68 nfs_fattr_init(fattr); 68 nfs_fattr_init(fattr);
69 status = rpc_call_sync(server->client_sys, &msg, 0); 69 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
70 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); 70 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
71 if (status) 71 if (status)
72 return status; 72 return status;
73 dprintk("%s: call statfs\n", __FUNCTION__); 73 dprintk("%s: call statfs\n", __FUNCTION__);
74 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; 74 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
75 msg.rpc_resp = &fsinfo; 75 msg.rpc_resp = &fsinfo;
76 status = rpc_call_sync(server->client_sys, &msg, 0); 76 status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
77 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); 77 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
78 if (status) 78 if (status)
79 return status; 79 return status;
@@ -425,16 +425,17 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
425} 425}
426 426
427static int 427static int
428nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path, 428nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
429 struct iattr *sattr, struct nfs_fh *fhandle, 429 unsigned int len, struct iattr *sattr)
430 struct nfs_fattr *fattr)
431{ 430{
431 struct nfs_fh fhandle;
432 struct nfs_fattr fattr;
432 struct nfs_symlinkargs arg = { 433 struct nfs_symlinkargs arg = {
433 .fromfh = NFS_FH(dir), 434 .fromfh = NFS_FH(dir),
434 .fromname = name->name, 435 .fromname = dentry->d_name.name,
435 .fromlen = name->len, 436 .fromlen = dentry->d_name.len,
436 .topath = path->name, 437 .pages = &page,
437 .tolen = path->len, 438 .pathlen = len,
438 .sattr = sattr 439 .sattr = sattr
439 }; 440 };
440 struct rpc_message msg = { 441 struct rpc_message msg = {
@@ -443,13 +444,25 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
443 }; 444 };
444 int status; 445 int status;
445 446
446 if (path->len > NFS2_MAXPATHLEN) 447 if (len > NFS2_MAXPATHLEN)
447 return -ENAMETOOLONG; 448 return -ENAMETOOLONG;
448 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 449
449 nfs_fattr_init(fattr); 450 dprintk("NFS call symlink %s\n", dentry->d_name.name);
450 fhandle->size = 0; 451
451 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 452 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
452 nfs_mark_for_revalidate(dir); 453 nfs_mark_for_revalidate(dir);
454
455 /*
456 * V2 SYMLINK requests don't return any attributes. Setting the
457 * filehandle size to zero indicates to nfs_instantiate that it
458 * should fill in the data with a LOOKUP call on the wire.
459 */
460 if (status == 0) {
461 nfs_fattr_init(&fattr);
462 fhandle.size = 0;
463 status = nfs_instantiate(dentry, &fhandle, &fattr);
464 }
465
453 dprintk("NFS reply symlink: %d\n", status); 466 dprintk("NFS reply symlink: %d\n", status);
454 return status; 467 return status;
455} 468}
@@ -671,7 +684,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
671} 684}
672 685
673 686
674struct nfs_rpc_ops nfs_v2_clientops = { 687const struct nfs_rpc_ops nfs_v2_clientops = {
675 .version = 2, /* protocol version */ 688 .version = 2, /* protocol version */
676 .dentry_ops = &nfs_dentry_operations, 689 .dentry_ops = &nfs_dentry_operations,
677 .dir_inode_ops = &nfs_dir_inode_operations, 690 .dir_inode_ops = &nfs_dir_inode_operations,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f0aff824a291..69f1549da2b9 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
171 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 171 rdata->args.offset = page_offset(page) + rdata->args.pgbase;
172 172
173 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 173 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
174 NFS_SERVER(inode)->hostname, 174 NFS_SERVER(inode)->nfs_client->cl_hostname,
175 inode->i_sb->s_id, 175 inode->i_sb->s_id,
176 (long long)NFS_FILEID(inode), 176 (long long)NFS_FILEID(inode),
177 (unsigned long long)rdata->args.pgbase, 177 (unsigned long long)rdata->args.pgbase,
@@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
568 568
569 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); 569 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
570 570
571 /* Is this a short read? */ 571 if (task->tk_status < 0) {
572 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 572 if (task->tk_status == -ESTALE) {
573 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
574 nfs_mark_for_revalidate(data->inode);
575 }
576 } else if (resp->count < argp->count && !resp->eof) {
577 /* This is a short read! */
573 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 578 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
574 /* Has the server at least made some progress? */ 579 /* Has the server at least made some progress? */
575 if (resp->count != 0) { 580 if (resp->count != 0) {
@@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
616 if (error) 621 if (error)
617 goto out_error; 622 goto out_error;
618 623
624 error = -ESTALE;
625 if (NFS_STALE(inode))
626 goto out_error;
627
619 if (file == NULL) { 628 if (file == NULL) {
620 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 629 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
621 if (ctx == NULL) 630 if (ctx == NULL)
@@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
678 }; 687 };
679 struct inode *inode = mapping->host; 688 struct inode *inode = mapping->host;
680 struct nfs_server *server = NFS_SERVER(inode); 689 struct nfs_server *server = NFS_SERVER(inode);
681 int ret; 690 int ret = -ESTALE;
682 691
683 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 692 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
684 inode->i_sb->s_id, 693 inode->i_sb->s_id,
@@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
686 nr_pages); 695 nr_pages);
687 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 696 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
688 697
698 if (NFS_STALE(inode))
699 goto out;
700
689 if (filp == NULL) { 701 if (filp == NULL) {
690 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 702 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
691 if (desc.ctx == NULL) 703 if (desc.ctx == NULL)
@@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
701 ret = err; 713 ret = err;
702 } 714 }
703 put_nfs_open_context(desc.ctx); 715 put_nfs_open_context(desc.ctx);
716out:
704 return ret; 717 return ret;
705} 718}
706 719
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee74d9d..e8d40030cab4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
13 * 13 *
14 * Split from inode.c by David Howells <dhowells@redhat.com> 14 * Split from inode.c by David Howells <dhowells@redhat.com>
15 * 15 *
16 * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
17 * particular server are held in the same superblock
18 * - NFS superblocks can have several effective roots to the dentry tree
19 * - directory type roots are spliced into the tree when a path from one root reaches the root
20 * of another (see nfs_lookup())
16 */ 21 */
17 22
18#include <linux/config.h> 23#include <linux/config.h>
@@ -52,66 +57,12 @@
52 57
53#define NFSDBG_FACILITY NFSDBG_VFS 58#define NFSDBG_FACILITY NFSDBG_VFS
54 59
55/* Maximum number of readahead requests
56 * FIXME: this should really be a sysctl so that users may tune it to suit
57 * their needs. People that do NFS over a slow network, might for
58 * instance want to reduce it to something closer to 1 for improved
59 * interactive response.
60 */
61#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
62
63/*
64 * RPC cruft for NFS
65 */
66static struct rpc_version * nfs_version[] = {
67 NULL,
68 NULL,
69 &nfs_version2,
70#if defined(CONFIG_NFS_V3)
71 &nfs_version3,
72#elif defined(CONFIG_NFS_V4)
73 NULL,
74#endif
75#if defined(CONFIG_NFS_V4)
76 &nfs_version4,
77#endif
78};
79
80static struct rpc_program nfs_program = {
81 .name = "nfs",
82 .number = NFS_PROGRAM,
83 .nrvers = ARRAY_SIZE(nfs_version),
84 .version = nfs_version,
85 .stats = &nfs_rpcstat,
86 .pipe_dir_name = "/nfs",
87};
88
89struct rpc_stat nfs_rpcstat = {
90 .program = &nfs_program
91};
92
93
94#ifdef CONFIG_NFS_V3_ACL
95static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
96static struct rpc_version * nfsacl_version[] = {
97 [3] = &nfsacl_version3,
98};
99
100struct rpc_program nfsacl_program = {
101 .name = "nfsacl",
102 .number = NFS_ACL_PROGRAM,
103 .nrvers = ARRAY_SIZE(nfsacl_version),
104 .version = nfsacl_version,
105 .stats = &nfsacl_rpcstat,
106};
107#endif /* CONFIG_NFS_V3_ACL */
108
109static void nfs_umount_begin(struct vfsmount *, int); 60static void nfs_umount_begin(struct vfsmount *, int);
110static int nfs_statfs(struct dentry *, struct kstatfs *); 61static int nfs_statfs(struct dentry *, struct kstatfs *);
111static int nfs_show_options(struct seq_file *, struct vfsmount *); 62static int nfs_show_options(struct seq_file *, struct vfsmount *);
112static int nfs_show_stats(struct seq_file *, struct vfsmount *); 63static int nfs_show_stats(struct seq_file *, struct vfsmount *);
113static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 64static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
114static int nfs_clone_nfs_sb(struct file_system_type *fs_type, 65static int nfs_xdev_get_sb(struct file_system_type *fs_type,
115 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 66 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
116static void nfs_kill_super(struct super_block *); 67static void nfs_kill_super(struct super_block *);
117 68
@@ -120,15 +71,15 @@ static struct file_system_type nfs_fs_type = {
120 .name = "nfs", 71 .name = "nfs",
121 .get_sb = nfs_get_sb, 72 .get_sb = nfs_get_sb,
122 .kill_sb = nfs_kill_super, 73 .kill_sb = nfs_kill_super,
123 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 74 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
124}; 75};
125 76
126struct file_system_type clone_nfs_fs_type = { 77struct file_system_type nfs_xdev_fs_type = {
127 .owner = THIS_MODULE, 78 .owner = THIS_MODULE,
128 .name = "nfs", 79 .name = "nfs",
129 .get_sb = nfs_clone_nfs_sb, 80 .get_sb = nfs_xdev_get_sb,
130 .kill_sb = nfs_kill_super, 81 .kill_sb = nfs_kill_super,
131 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 82 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
132}; 83};
133 84
134static struct super_operations nfs_sops = { 85static struct super_operations nfs_sops = {
@@ -145,10 +96,10 @@ static struct super_operations nfs_sops = {
145#ifdef CONFIG_NFS_V4 96#ifdef CONFIG_NFS_V4
146static int nfs4_get_sb(struct file_system_type *fs_type, 97static int nfs4_get_sb(struct file_system_type *fs_type,
147 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 98 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
148static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, 99static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
149 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 100 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
150static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, 101static int nfs4_referral_get_sb(struct file_system_type *fs_type,
151 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 102 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
152static void nfs4_kill_super(struct super_block *sb); 103static void nfs4_kill_super(struct super_block *sb);
153 104
154static struct file_system_type nfs4_fs_type = { 105static struct file_system_type nfs4_fs_type = {
@@ -156,23 +107,23 @@ static struct file_system_type nfs4_fs_type = {
156 .name = "nfs4", 107 .name = "nfs4",
157 .get_sb = nfs4_get_sb, 108 .get_sb = nfs4_get_sb,
158 .kill_sb = nfs4_kill_super, 109 .kill_sb = nfs4_kill_super,
159 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 110 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
160}; 111};
161 112
162struct file_system_type clone_nfs4_fs_type = { 113struct file_system_type nfs4_xdev_fs_type = {
163 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
164 .name = "nfs4", 115 .name = "nfs4",
165 .get_sb = nfs_clone_nfs4_sb, 116 .get_sb = nfs4_xdev_get_sb,
166 .kill_sb = nfs4_kill_super, 117 .kill_sb = nfs4_kill_super,
167 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 118 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
168}; 119};
169 120
170struct file_system_type nfs_referral_nfs4_fs_type = { 121struct file_system_type nfs4_referral_fs_type = {
171 .owner = THIS_MODULE, 122 .owner = THIS_MODULE,
172 .name = "nfs4", 123 .name = "nfs4",
173 .get_sb = nfs_referral_nfs4_sb, 124 .get_sb = nfs4_referral_get_sb,
174 .kill_sb = nfs4_kill_super, 125 .kill_sb = nfs4_kill_super,
175 .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 126 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
176}; 127};
177 128
178static struct super_operations nfs4_sops = { 129static struct super_operations nfs4_sops = {
@@ -187,39 +138,7 @@ static struct super_operations nfs4_sops = {
187}; 138};
188#endif 139#endif
189 140
190#ifdef CONFIG_NFS_V4 141static struct shrinker *acl_shrinker;
191static const int nfs_set_port_min = 0;
192static const int nfs_set_port_max = 65535;
193
194static int param_set_port(const char *val, struct kernel_param *kp)
195{
196 char *endp;
197 int num = simple_strtol(val, &endp, 0);
198 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
199 return -EINVAL;
200 *((int *)kp->arg) = num;
201 return 0;
202}
203
204module_param_call(callback_tcpport, param_set_port, param_get_int,
205 &nfs_callback_set_tcpport, 0644);
206#endif
207
208#ifdef CONFIG_NFS_V4
209static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
210{
211 char *endp;
212 int num = simple_strtol(val, &endp, 0);
213 int jif = num * HZ;
214 if (endp == val || *endp || num < 0 || jif < num)
215 return -EINVAL;
216 *((int *)kp->arg) = jif;
217 return 0;
218}
219
220module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
221 &nfs_idmap_cache_timeout, 0644);
222#endif
223 142
224/* 143/*
225 * Register the NFS filesystems 144 * Register the NFS filesystems
@@ -240,6 +159,7 @@ int __init register_nfs_fs(void)
240 if (ret < 0) 159 if (ret < 0)
241 goto error_2; 160 goto error_2;
242#endif 161#endif
162 acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
243 return 0; 163 return 0;
244 164
245#ifdef CONFIG_NFS_V4 165#ifdef CONFIG_NFS_V4
@@ -257,6 +177,8 @@ error_0:
257 */ 177 */
258void __exit unregister_nfs_fs(void) 178void __exit unregister_nfs_fs(void)
259{ 179{
180 if (acl_shrinker != NULL)
181 remove_shrinker(acl_shrinker);
260#ifdef CONFIG_NFS_V4 182#ifdef CONFIG_NFS_V4
261 unregister_filesystem(&nfs4_fs_type); 183 unregister_filesystem(&nfs4_fs_type);
262 nfs_unregister_sysctl(); 184 nfs_unregister_sysctl();
@@ -269,11 +191,10 @@ void __exit unregister_nfs_fs(void)
269 */ 191 */
270static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) 192static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
271{ 193{
272 struct super_block *sb = dentry->d_sb; 194 struct nfs_server *server = NFS_SB(dentry->d_sb);
273 struct nfs_server *server = NFS_SB(sb);
274 unsigned char blockbits; 195 unsigned char blockbits;
275 unsigned long blockres; 196 unsigned long blockres;
276 struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode); 197 struct nfs_fh *fh = NFS_FH(dentry->d_inode);
277 struct nfs_fattr fattr; 198 struct nfs_fattr fattr;
278 struct nfs_fsstat res = { 199 struct nfs_fsstat res = {
279 .fattr = &fattr, 200 .fattr = &fattr,
@@ -282,7 +203,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
282 203
283 lock_kernel(); 204 lock_kernel();
284 205
285 error = server->rpc_ops->statfs(server, rootfh, &res); 206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
286 buf->f_type = NFS_SUPER_MAGIC; 207 buf->f_type = NFS_SUPER_MAGIC;
287 if (error < 0) 208 if (error < 0)
288 goto out_err; 209 goto out_err;
@@ -292,7 +213,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
292 * case where f_frsize != f_bsize. Eventually we want to 213 * case where f_frsize != f_bsize. Eventually we want to
293 * report the value of wtmult in this field. 214 * report the value of wtmult in this field.
294 */ 215 */
295 buf->f_frsize = sb->s_blocksize; 216 buf->f_frsize = dentry->d_sb->s_blocksize;
296 217
297 /* 218 /*
298 * On most *nix systems, f_blocks, f_bfree, and f_bavail 219 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -301,8 +222,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
301 * thus historically Linux's sys_statfs reports these 222 * thus historically Linux's sys_statfs reports these
302 * fields in units of f_bsize. 223 * fields in units of f_bsize.
303 */ 224 */
304 buf->f_bsize = sb->s_blocksize; 225 buf->f_bsize = dentry->d_sb->s_blocksize;
305 blockbits = sb->s_blocksize_bits; 226 blockbits = dentry->d_sb->s_blocksize_bits;
306 blockres = (1 << blockbits) - 1; 227 blockres = (1 << blockbits) - 1;
307 buf->f_blocks = (res.tbytes + blockres) >> blockbits; 228 buf->f_blocks = (res.tbytes + blockres) >> blockbits;
308 buf->f_bfree = (res.fbytes + blockres) >> blockbits; 229 buf->f_bfree = (res.fbytes + blockres) >> blockbits;
@@ -323,9 +244,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
323 244
324} 245}
325 246
247/*
248 * Map the security flavour number to a name
249 */
326static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) 250static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
327{ 251{
328 static struct { 252 static const struct {
329 rpc_authflavor_t flavour; 253 rpc_authflavor_t flavour;
330 const char *str; 254 const char *str;
331 } sec_flavours[] = { 255 } sec_flavours[] = {
@@ -356,10 +280,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
356 */ 280 */
357static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) 281static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
358{ 282{
359 static struct proc_nfs_info { 283 static const struct proc_nfs_info {
360 int flag; 284 int flag;
361 char *str; 285 const char *str;
362 char *nostr; 286 const char *nostr;
363 } nfs_info[] = { 287 } nfs_info[] = {
364 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 288 { NFS_MOUNT_SOFT, ",soft", ",hard" },
365 { NFS_MOUNT_INTR, ",intr", "" }, 289 { NFS_MOUNT_INTR, ",intr", "" },
@@ -369,11 +293,12 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
369 { NFS_MOUNT_NOACL, ",noacl", "" }, 293 { NFS_MOUNT_NOACL, ",noacl", "" },
370 { 0, NULL, NULL } 294 { 0, NULL, NULL }
371 }; 295 };
372 struct proc_nfs_info *nfs_infop; 296 const struct proc_nfs_info *nfs_infop;
297 struct nfs_client *clp = nfss->nfs_client;
373 char buf[12]; 298 char buf[12];
374 char *proto; 299 const char *proto;
375 300
376 seq_printf(m, ",vers=%d", nfss->rpc_ops->version); 301 seq_printf(m, ",vers=%d", clp->rpc_ops->version);
377 seq_printf(m, ",rsize=%d", nfss->rsize); 302 seq_printf(m, ",rsize=%d", nfss->rsize);
378 seq_printf(m, ",wsize=%d", nfss->wsize); 303 seq_printf(m, ",wsize=%d", nfss->wsize);
379 if (nfss->acregmin != 3*HZ || showdefaults) 304 if (nfss->acregmin != 3*HZ || showdefaults)
@@ -402,8 +327,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
402 proto = buf; 327 proto = buf;
403 } 328 }
404 seq_printf(m, ",proto=%s", proto); 329 seq_printf(m, ",proto=%s", proto);
405 seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); 330 seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
406 seq_printf(m, ",retrans=%u", nfss->retrans_count); 331 seq_printf(m, ",retrans=%u", clp->retrans_count);
407 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); 332 seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
408} 333}
409 334
@@ -417,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
417 nfs_show_mount_options(m, nfss, 0); 342 nfs_show_mount_options(m, nfss, 0);
418 343
419 seq_puts(m, ",addr="); 344 seq_puts(m, ",addr=");
420 seq_escape(m, nfss->hostname, " \t\n\\"); 345 seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
421 346
422 return 0; 347 return 0;
423} 348}
@@ -454,7 +379,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
454 seq_printf(m, ",namelen=%d", nfss->namelen); 379 seq_printf(m, ",namelen=%d", nfss->namelen);
455 380
456#ifdef CONFIG_NFS_V4 381#ifdef CONFIG_NFS_V4
457 if (nfss->rpc_ops->version == 4) { 382 if (nfss->nfs_client->cl_nfsversion == 4) {
458 seq_printf(m, "\n\tnfsv4:\t"); 383 seq_printf(m, "\n\tnfsv4:\t");
459 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); 384 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
460 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); 385 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -501,782 +426,353 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
501 426
502/* 427/*
503 * Begin unmount by attempting to remove all automounted mountpoints we added 428 * Begin unmount by attempting to remove all automounted mountpoints we added
504 * in response to traversals 429 * in response to xdev traversals and referrals
505 */ 430 */
506static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) 431static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
507{ 432{
508 struct nfs_server *server;
509 struct rpc_clnt *rpc;
510
511 shrink_submounts(vfsmnt, &nfs_automount_list); 433 shrink_submounts(vfsmnt, &nfs_automount_list);
512 if (!(flags & MNT_FORCE))
513 return;
514 /* -EIO all pending I/O */
515 server = NFS_SB(vfsmnt->mnt_sb);
516 rpc = server->client;
517 if (!IS_ERR(rpc))
518 rpc_killall_tasks(rpc);
519 rpc = server->client_acl;
520 if (!IS_ERR(rpc))
521 rpc_killall_tasks(rpc);
522} 434}
523 435
524/* 436/*
525 * Obtain the root inode of the file system. 437 * Validate the NFS2/NFS3 mount data
438 * - fills in the mount root filehandle
526 */ 439 */
527static struct inode * 440static int nfs_validate_mount_data(struct nfs_mount_data *data,
528nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) 441 struct nfs_fh *mntfh)
529{ 442{
530 struct nfs_server *server = NFS_SB(sb); 443 if (data == NULL) {
531 int error; 444 dprintk("%s: missing data argument\n", __FUNCTION__);
532 445 return -EINVAL;
533 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
534 if (error < 0) {
535 dprintk("nfs_get_root: getattr error = %d\n", -error);
536 return ERR_PTR(error);
537 } 446 }
538 447
539 server->fsid = fsinfo->fattr->fsid; 448 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
540 return nfs_fhget(sb, rootfh, fsinfo->fattr); 449 dprintk("%s: bad mount version\n", __FUNCTION__);
541} 450 return -EINVAL;
542 451 }
543/*
544 * Do NFS version-independent mount processing, and sanity checking
545 */
546static int
547nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
548{
549 struct nfs_server *server;
550 struct inode *root_inode;
551 struct nfs_fattr fattr;
552 struct nfs_fsinfo fsinfo = {
553 .fattr = &fattr,
554 };
555 struct nfs_pathconf pathinfo = {
556 .fattr = &fattr,
557 };
558 int no_root_error = 0;
559 unsigned long max_rpc_payload;
560
561 /* We probably want something more informative here */
562 snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
563
564 server = NFS_SB(sb);
565 452
566 sb->s_magic = NFS_SUPER_MAGIC; 453 switch (data->version) {
454 case 1:
455 data->namlen = 0;
456 case 2:
457 data->bsize = 0;
458 case 3:
459 if (data->flags & NFS_MOUNT_VER3) {
460 dprintk("%s: mount structure version %d does not support NFSv3\n",
461 __FUNCTION__,
462 data->version);
463 return -EINVAL;
464 }
465 data->root.size = NFS2_FHSIZE;
466 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
467 case 4:
468 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
469 dprintk("%s: mount structure version %d does not support strong security\n",
470 __FUNCTION__,
471 data->version);
472 return -EINVAL;
473 }
474 case 5:
475 memset(data->context, 0, sizeof(data->context));
476 }
567 477
568 server->io_stats = nfs_alloc_iostats(); 478 /* Set the pseudoflavor */
569 if (server->io_stats == NULL) 479 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
570 return -ENOMEM; 480 data->pseudoflavor = RPC_AUTH_UNIX;
571 481
572 root_inode = nfs_get_root(sb, &server->fh, &fsinfo); 482#ifndef CONFIG_NFS_V3
573 /* Did getting the root inode fail? */ 483 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
574 if (IS_ERR(root_inode)) { 484 if (data->flags & NFS_MOUNT_VER3) {
575 no_root_error = PTR_ERR(root_inode); 485 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
576 goto out_no_root; 486 return -EPROTONOSUPPORT;
577 }
578 sb->s_root = d_alloc_root(root_inode);
579 if (!sb->s_root) {
580 no_root_error = -ENOMEM;
581 goto out_no_root;
582 } 487 }
583 sb->s_root->d_op = server->rpc_ops->dentry_ops; 488#endif /* CONFIG_NFS_V3 */
584
585 /* mount time stamp, in seconds */
586 server->mount_time = jiffies;
587
588 /* Get some general file system info */
589 if (server->namelen == 0 &&
590 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
591 server->namelen = pathinfo.max_namelen;
592 /* Work out a lot of parameters */
593 if (server->rsize == 0)
594 server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
595 if (server->wsize == 0)
596 server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
597
598 if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
599 server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
600 if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
601 server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
602
603 max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
604 if (server->rsize > max_rpc_payload)
605 server->rsize = max_rpc_payload;
606 if (server->rsize > NFS_MAX_FILE_IO_SIZE)
607 server->rsize = NFS_MAX_FILE_IO_SIZE;
608 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
609
610 if (server->wsize > max_rpc_payload)
611 server->wsize = max_rpc_payload;
612 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
613 server->wsize = NFS_MAX_FILE_IO_SIZE;
614 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
615 489
616 if (sb->s_blocksize == 0) 490 /* We now require that the mount process passes the remote address */
617 sb->s_blocksize = nfs_block_bits(server->wsize, 491 if (data->addr.sin_addr.s_addr == INADDR_ANY) {
618 &sb->s_blocksize_bits); 492 dprintk("%s: mount program didn't pass remote address!\n",
619 server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); 493 __FUNCTION__);
620 494 return -EINVAL;
621 server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
622 if (server->dtsize > PAGE_CACHE_SIZE)
623 server->dtsize = PAGE_CACHE_SIZE;
624 if (server->dtsize > server->rsize)
625 server->dtsize = server->rsize;
626
627 if (server->flags & NFS_MOUNT_NOAC) {
628 server->acregmin = server->acregmax = 0;
629 server->acdirmin = server->acdirmax = 0;
630 sb->s_flags |= MS_SYNCHRONOUS;
631 } 495 }
632 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
633 496
634 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); 497 /* Prepare the root filehandle */
498 if (data->flags & NFS_MOUNT_VER3)
499 mntfh->size = data->root.size;
500 else
501 mntfh->size = NFS2_FHSIZE;
502
503 if (mntfh->size > sizeof(mntfh->data)) {
504 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
505 return -EINVAL;
506 }
635 507
636 server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; 508 memcpy(mntfh->data, data->root.data, mntfh->size);
637 server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; 509 if (mntfh->size < sizeof(mntfh->data))
510 memset(mntfh->data + mntfh->size, 0,
511 sizeof(mntfh->data) - mntfh->size);
638 512
639 /* We're airborne Set socket buffersize */
640 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
641 return 0; 513 return 0;
642 /* Yargs. It didn't work out. */
643out_no_root:
644 dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
645 if (!IS_ERR(root_inode))
646 iput(root_inode);
647 return no_root_error;
648} 514}
649 515
650/* 516/*
651 * Initialise the timeout values for a connection 517 * Initialise the common bits of the superblock
652 */ 518 */
653static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans) 519static inline void nfs_initialise_sb(struct super_block *sb)
654{ 520{
655 to->to_initval = timeo * HZ / 10; 521 struct nfs_server *server = NFS_SB(sb);
656 to->to_retries = retrans;
657 if (!to->to_retries)
658 to->to_retries = 2;
659
660 switch (proto) {
661 case IPPROTO_TCP:
662 if (!to->to_initval)
663 to->to_initval = 60 * HZ;
664 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
665 to->to_initval = NFS_MAX_TCP_TIMEOUT;
666 to->to_increment = to->to_initval;
667 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
668 to->to_exponential = 0;
669 break;
670 case IPPROTO_UDP:
671 default:
672 if (!to->to_initval)
673 to->to_initval = 11 * HZ / 10;
674 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
675 to->to_initval = NFS_MAX_UDP_TIMEOUT;
676 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
677 to->to_exponential = 1;
678 break;
679 }
680}
681 522
682/* 523 sb->s_magic = NFS_SUPER_MAGIC;
683 * Create an RPC client handle.
684 */
685static struct rpc_clnt *
686nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
687{
688 struct rpc_timeout timeparms;
689 struct rpc_xprt *xprt = NULL;
690 struct rpc_clnt *clnt = NULL;
691 int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
692
693 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
694
695 server->retrans_timeo = timeparms.to_initval;
696 server->retrans_count = timeparms.to_retries;
697
698 /* create transport and client */
699 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
700 if (IS_ERR(xprt)) {
701 dprintk("%s: cannot create RPC transport. Error = %ld\n",
702 __FUNCTION__, PTR_ERR(xprt));
703 return (struct rpc_clnt *)xprt;
704 }
705 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
706 server->rpc_ops->version, data->pseudoflavor);
707 if (IS_ERR(clnt)) {
708 dprintk("%s: cannot create RPC client. Error = %ld\n",
709 __FUNCTION__, PTR_ERR(xprt));
710 goto out_fail;
711 }
712 524
713 clnt->cl_intr = 1; 525 /* We probably want something more informative here */
714 clnt->cl_softrtry = 1; 526 snprintf(sb->s_id, sizeof(sb->s_id),
527 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
528
529 if (sb->s_blocksize == 0)
530 sb->s_blocksize = nfs_block_bits(server->wsize,
531 &sb->s_blocksize_bits);
715 532
716 return clnt; 533 if (server->flags & NFS_MOUNT_NOAC)
534 sb->s_flags |= MS_SYNCHRONOUS;
717 535
718out_fail: 536 nfs_super_set_maxbytes(sb, server->maxfilesize);
719 return clnt;
720} 537}
721 538
722/* 539/*
723 * Clone a server record 540 * Finish setting up an NFS2/3 superblock
724 */ 541 */
725static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data) 542static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
726{ 543{
727 struct nfs_server *server = NFS_SB(sb); 544 struct nfs_server *server = NFS_SB(sb);
728 struct nfs_server *parent = NFS_SB(data->sb);
729 struct inode *root_inode;
730 struct nfs_fsinfo fsinfo;
731 void *err = ERR_PTR(-ENOMEM);
732
733 sb->s_op = data->sb->s_op;
734 sb->s_blocksize = data->sb->s_blocksize;
735 sb->s_blocksize_bits = data->sb->s_blocksize_bits;
736 sb->s_maxbytes = data->sb->s_maxbytes;
737
738 server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
739 server->io_stats = nfs_alloc_iostats();
740 if (server->io_stats == NULL)
741 goto out;
742
743 server->client = rpc_clone_client(parent->client);
744 if (IS_ERR((err = server->client)))
745 goto out;
746
747 if (!IS_ERR(parent->client_sys)) {
748 server->client_sys = rpc_clone_client(parent->client_sys);
749 if (IS_ERR((err = server->client_sys)))
750 goto out;
751 }
752 if (!IS_ERR(parent->client_acl)) {
753 server->client_acl = rpc_clone_client(parent->client_acl);
754 if (IS_ERR((err = server->client_acl)))
755 goto out;
756 }
757 root_inode = nfs_fhget(sb, data->fh, data->fattr);
758 if (!root_inode)
759 goto out;
760 sb->s_root = d_alloc_root(root_inode);
761 if (!sb->s_root)
762 goto out_put_root;
763 fsinfo.fattr = data->fattr;
764 if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
765 nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
766 sb->s_root->d_op = server->rpc_ops->dentry_ops;
767 sb->s_flags |= MS_ACTIVE;
768 return server;
769out_put_root:
770 iput(root_inode);
771out:
772 return err;
773}
774 545
775/* 546 sb->s_blocksize_bits = 0;
776 * Copy an existing superblock and attach revised data 547 sb->s_blocksize = 0;
777 */ 548 if (data->bsize)
778static int nfs_clone_generic_sb(struct nfs_clone_mount *data, 549 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
779 struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
780 struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
781 struct vfsmount *mnt)
782{
783 struct nfs_server *server;
784 struct nfs_server *parent = NFS_SB(data->sb);
785 struct super_block *sb = ERR_PTR(-EINVAL);
786 char *hostname;
787 int error = -ENOMEM;
788 int len;
789
790 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
791 if (server == NULL)
792 goto out_err;
793 memcpy(server, parent, sizeof(*server));
794 hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
795 len = strlen(hostname) + 1;
796 server->hostname = kmalloc(len, GFP_KERNEL);
797 if (server->hostname == NULL)
798 goto free_server;
799 memcpy(server->hostname, hostname, len);
800 error = rpciod_up();
801 if (error != 0)
802 goto free_hostname;
803
804 sb = fill_sb(server, data);
805 if (IS_ERR(sb)) {
806 error = PTR_ERR(sb);
807 goto kill_rpciod;
808 }
809
810 if (sb->s_root)
811 goto out_rpciod_down;
812 550
813 server = fill_server(sb, data); 551 if (server->flags & NFS_MOUNT_VER3) {
814 if (IS_ERR(server)) { 552 /* The VFS shouldn't apply the umask to mode bits. We will do
815 error = PTR_ERR(server); 553 * so ourselves when necessary.
816 goto out_deactivate; 554 */
555 sb->s_flags |= MS_POSIXACL;
556 sb->s_time_gran = 1;
817 } 557 }
818 return simple_set_mnt(mnt, sb); 558
819out_deactivate: 559 sb->s_op = &nfs_sops;
820 up_write(&sb->s_umount); 560 nfs_initialise_sb(sb);
821 deactivate_super(sb);
822 return error;
823out_rpciod_down:
824 rpciod_down();
825 kfree(server->hostname);
826 kfree(server);
827 return simple_set_mnt(mnt, sb);
828kill_rpciod:
829 rpciod_down();
830free_hostname:
831 kfree(server->hostname);
832free_server:
833 kfree(server);
834out_err:
835 return error;
836} 561}
837 562
838/* 563/*
839 * Set up an NFS2/3 superblock 564 * Finish setting up a cloned NFS2/3 superblock
840 *
841 * The way this works is that the mount process passes a structure
842 * in the data argument which contains the server's IP address
843 * and the root file handle obtained from the server's mount
844 * daemon. We stash these away in the private superblock fields.
845 */ 565 */
846static int 566static void nfs_clone_super(struct super_block *sb,
847nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) 567 const struct super_block *old_sb)
848{ 568{
849 struct nfs_server *server; 569 struct nfs_server *server = NFS_SB(sb);
850 rpc_authflavor_t authflavor;
851 570
852 server = NFS_SB(sb); 571 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
853 sb->s_blocksize_bits = 0; 572 sb->s_blocksize = old_sb->s_blocksize;
854 sb->s_blocksize = 0; 573 sb->s_maxbytes = old_sb->s_maxbytes;
855 if (data->bsize)
856 sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
857 if (data->rsize)
858 server->rsize = nfs_block_size(data->rsize, NULL);
859 if (data->wsize)
860 server->wsize = nfs_block_size(data->wsize, NULL);
861 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
862
863 server->acregmin = data->acregmin*HZ;
864 server->acregmax = data->acregmax*HZ;
865 server->acdirmin = data->acdirmin*HZ;
866 server->acdirmax = data->acdirmax*HZ;
867
868 /* Start lockd here, before we might error out */
869 if (!(server->flags & NFS_MOUNT_NONLM))
870 lockd_up();
871
872 server->namelen = data->namlen;
873 server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
874 if (!server->hostname)
875 return -ENOMEM;
876 strcpy(server->hostname, data->hostname);
877
878 /* Check NFS protocol revision and initialize RPC op vector
879 * and file handle pool. */
880#ifdef CONFIG_NFS_V3
881 if (server->flags & NFS_MOUNT_VER3) {
882 server->rpc_ops = &nfs_v3_clientops;
883 server->caps |= NFS_CAP_READDIRPLUS;
884 } else {
885 server->rpc_ops = &nfs_v2_clientops;
886 }
887#else
888 server->rpc_ops = &nfs_v2_clientops;
889#endif
890 574
891 /* Fill in pseudoflavor for mount version < 5 */
892 if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
893 data->pseudoflavor = RPC_AUTH_UNIX;
894 authflavor = data->pseudoflavor; /* save for sb_init() */
895 /* XXX maybe we want to add a server->pseudoflavor field */
896
897 /* Create RPC client handles */
898 server->client = nfs_create_client(server, data);
899 if (IS_ERR(server->client))
900 return PTR_ERR(server->client);
901 /* RFC 2623, sec 2.3.2 */
902 if (authflavor != RPC_AUTH_UNIX) {
903 struct rpc_auth *auth;
904
905 server->client_sys = rpc_clone_client(server->client);
906 if (IS_ERR(server->client_sys))
907 return PTR_ERR(server->client_sys);
908 auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
909 if (IS_ERR(auth))
910 return PTR_ERR(auth);
911 } else {
912 atomic_inc(&server->client->cl_count);
913 server->client_sys = server->client;
914 }
915 if (server->flags & NFS_MOUNT_VER3) { 575 if (server->flags & NFS_MOUNT_VER3) {
916#ifdef CONFIG_NFS_V3_ACL 576 /* The VFS shouldn't apply the umask to mode bits. We will do
917 if (!(server->flags & NFS_MOUNT_NOACL)) { 577 * so ourselves when necessary.
918 server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
919 /* No errors! Assume that Sun nfsacls are supported */
920 if (!IS_ERR(server->client_acl))
921 server->caps |= NFS_CAP_ACLS;
922 }
923#else
924 server->flags &= ~NFS_MOUNT_NOACL;
925#endif /* CONFIG_NFS_V3_ACL */
926 /*
927 * The VFS shouldn't apply the umask to mode bits. We will
928 * do so ourselves when necessary.
929 */ 578 */
930 sb->s_flags |= MS_POSIXACL; 579 sb->s_flags |= MS_POSIXACL;
931 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
932 server->namelen = NFS3_MAXNAMLEN;
933 sb->s_time_gran = 1; 580 sb->s_time_gran = 1;
934 } else {
935 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
936 server->namelen = NFS2_MAXNAMLEN;
937 } 581 }
938 582
939 sb->s_op = &nfs_sops; 583 sb->s_op = old_sb->s_op;
940 return nfs_sb_init(sb, authflavor); 584 nfs_initialise_sb(sb);
941} 585}
942 586
943static int nfs_set_super(struct super_block *s, void *data) 587static int nfs_set_super(struct super_block *s, void *_server)
944{ 588{
945 s->s_fs_info = data; 589 struct nfs_server *server = _server;
946 return set_anon_super(s, data); 590 int ret;
591
592 s->s_fs_info = server;
593 ret = set_anon_super(s, server);
594 if (ret == 0)
595 server->s_dev = s->s_dev;
596 return ret;
947} 597}
948 598
949static int nfs_compare_super(struct super_block *sb, void *data) 599static int nfs_compare_super(struct super_block *sb, void *data)
950{ 600{
951 struct nfs_server *server = data; 601 struct nfs_server *server = data, *old = NFS_SB(sb);
952 struct nfs_server *old = NFS_SB(sb);
953 602
954 if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr) 603 if (old->nfs_client != server->nfs_client)
955 return 0; 604 return 0;
956 if (old->addr.sin_port != server->addr.sin_port) 605 if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
957 return 0; 606 return 0;
958 return !nfs_compare_fh(&old->fh, &server->fh); 607 return 1;
959} 608}
960 609
961static int nfs_get_sb(struct file_system_type *fs_type, 610static int nfs_get_sb(struct file_system_type *fs_type,
962 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 611 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
963{ 612{
964 int error;
965 struct nfs_server *server = NULL; 613 struct nfs_server *server = NULL;
966 struct super_block *s; 614 struct super_block *s;
967 struct nfs_fh *root; 615 struct nfs_fh mntfh;
968 struct nfs_mount_data *data = raw_data; 616 struct nfs_mount_data *data = raw_data;
617 struct dentry *mntroot;
618 int error;
969 619
970 error = -EINVAL; 620 /* Validate the mount data */
971 if (data == NULL) { 621 error = nfs_validate_mount_data(data, &mntfh);
972 dprintk("%s: missing data argument\n", __FUNCTION__); 622 if (error < 0)
973 goto out_err_noserver; 623 return error;
974 }
975 if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
976 dprintk("%s: bad mount version\n", __FUNCTION__);
977 goto out_err_noserver;
978 }
979 switch (data->version) {
980 case 1:
981 data->namlen = 0;
982 case 2:
983 data->bsize = 0;
984 case 3:
985 if (data->flags & NFS_MOUNT_VER3) {
986 dprintk("%s: mount structure version %d does not support NFSv3\n",
987 __FUNCTION__,
988 data->version);
989 goto out_err_noserver;
990 }
991 data->root.size = NFS2_FHSIZE;
992 memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
993 case 4:
994 if (data->flags & NFS_MOUNT_SECFLAVOUR) {
995 dprintk("%s: mount structure version %d does not support strong security\n",
996 __FUNCTION__,
997 data->version);
998 goto out_err_noserver;
999 }
1000 case 5:
1001 memset(data->context, 0, sizeof(data->context));
1002 }
1003#ifndef CONFIG_NFS_V3
1004 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
1005 error = -EPROTONOSUPPORT;
1006 if (data->flags & NFS_MOUNT_VER3) {
1007 dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
1008 goto out_err_noserver;
1009 }
1010#endif /* CONFIG_NFS_V3 */
1011 624
1012 error = -ENOMEM; 625 /* Get a volume representation */
1013 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); 626 server = nfs_create_server(data, &mntfh);
1014 if (!server) 627 if (IS_ERR(server)) {
628 error = PTR_ERR(server);
1015 goto out_err_noserver; 629 goto out_err_noserver;
1016 /* Zero out the NFS state stuff */
1017 init_nfsv4_state(server);
1018 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
1019
1020 root = &server->fh;
1021 if (data->flags & NFS_MOUNT_VER3)
1022 root->size = data->root.size;
1023 else
1024 root->size = NFS2_FHSIZE;
1025 error = -EINVAL;
1026 if (root->size > sizeof(root->data)) {
1027 dprintk("%s: invalid root filehandle\n", __FUNCTION__);
1028 goto out_err;
1029 }
1030 memcpy(root->data, data->root.data, root->size);
1031
1032 /* We now require that the mount process passes the remote address */
1033 memcpy(&server->addr, &data->addr, sizeof(server->addr));
1034 if (server->addr.sin_addr.s_addr == INADDR_ANY) {
1035 dprintk("%s: mount program didn't pass remote address!\n",
1036 __FUNCTION__);
1037 goto out_err;
1038 }
1039
1040 /* Fire up rpciod if not yet running */
1041 error = rpciod_up();
1042 if (error < 0) {
1043 dprintk("%s: couldn't start rpciod! Error = %d\n",
1044 __FUNCTION__, error);
1045 goto out_err;
1046 } 630 }
1047 631
632 /* Get a superblock - note that we may end up sharing one that already exists */
1048 s = sget(fs_type, nfs_compare_super, nfs_set_super, server); 633 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1049 if (IS_ERR(s)) { 634 if (IS_ERR(s)) {
1050 error = PTR_ERR(s); 635 error = PTR_ERR(s);
1051 goto out_err_rpciod; 636 goto out_err_nosb;
1052 } 637 }
1053 638
1054 if (s->s_root) 639 if (s->s_fs_info != server) {
1055 goto out_rpciod_down; 640 nfs_free_server(server);
641 server = NULL;
642 }
1056 643
1057 s->s_flags = flags; 644 if (!s->s_root) {
645 /* initial superblock/root creation */
646 s->s_flags = flags;
647 nfs_fill_super(s, data);
648 }
1058 649
1059 error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); 650 mntroot = nfs_get_root(s, &mntfh);
1060 if (error) { 651 if (IS_ERR(mntroot)) {
1061 up_write(&s->s_umount); 652 error = PTR_ERR(mntroot);
1062 deactivate_super(s); 653 goto error_splat_super;
1063 return error;
1064 } 654 }
1065 s->s_flags |= MS_ACTIVE;
1066 return simple_set_mnt(mnt, s);
1067 655
1068out_rpciod_down: 656 s->s_flags |= MS_ACTIVE;
1069 rpciod_down(); 657 mnt->mnt_sb = s;
1070 kfree(server); 658 mnt->mnt_root = mntroot;
1071 return simple_set_mnt(mnt, s); 659 return 0;
1072 660
1073out_err_rpciod: 661out_err_nosb:
1074 rpciod_down(); 662 nfs_free_server(server);
1075out_err:
1076 kfree(server);
1077out_err_noserver: 663out_err_noserver:
1078 return error; 664 return error;
665
666error_splat_super:
667 up_write(&s->s_umount);
668 deactivate_super(s);
669 return error;
1079} 670}
1080 671
672/*
673 * Destroy an NFS2/3 superblock
674 */
1081static void nfs_kill_super(struct super_block *s) 675static void nfs_kill_super(struct super_block *s)
1082{ 676{
1083 struct nfs_server *server = NFS_SB(s); 677 struct nfs_server *server = NFS_SB(s);
1084 678
1085 kill_anon_super(s); 679 kill_anon_super(s);
1086 680 nfs_free_server(server);
1087 if (!IS_ERR(server->client))
1088 rpc_shutdown_client(server->client);
1089 if (!IS_ERR(server->client_sys))
1090 rpc_shutdown_client(server->client_sys);
1091 if (!IS_ERR(server->client_acl))
1092 rpc_shutdown_client(server->client_acl);
1093
1094 if (!(server->flags & NFS_MOUNT_NONLM))
1095 lockd_down(); /* release rpc.lockd */
1096
1097 rpciod_down(); /* release rpciod */
1098
1099 nfs_free_iostats(server->io_stats);
1100 kfree(server->hostname);
1101 kfree(server);
1102 nfs_release_automount_timer();
1103}
1104
1105static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
1106{
1107 struct super_block *sb;
1108
1109 server->fsid = data->fattr->fsid;
1110 nfs_copy_fh(&server->fh, data->fh);
1111 sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1112 if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
1113 lockd_up();
1114 return sb;
1115} 681}
1116 682
1117static int nfs_clone_nfs_sb(struct file_system_type *fs_type, 683/*
1118 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 684 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
685 */
686static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
687 const char *dev_name, void *raw_data,
688 struct vfsmount *mnt)
1119{ 689{
1120 struct nfs_clone_mount *data = raw_data; 690 struct nfs_clone_mount *data = raw_data;
1121 return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt); 691 struct super_block *s;
1122} 692 struct nfs_server *server;
693 struct dentry *mntroot;
694 int error;
1123 695
1124#ifdef CONFIG_NFS_V4 696 dprintk("--> nfs_xdev_get_sb()\n");
1125static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
1126 struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
1127{
1128 struct nfs4_client *clp;
1129 struct rpc_xprt *xprt = NULL;
1130 struct rpc_clnt *clnt = NULL;
1131 int err = -EIO;
1132
1133 clp = nfs4_get_client(&server->addr.sin_addr);
1134 if (!clp) {
1135 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
1136 return ERR_PTR(err);
1137 }
1138 697
1139 /* Now create transport and client */ 698 /* create a new volume representation */
1140 down_write(&clp->cl_sem); 699 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
1141 if (IS_ERR(clp->cl_rpcclient)) { 700 if (IS_ERR(server)) {
1142 xprt = xprt_create_proto(proto, &server->addr, timeparms); 701 error = PTR_ERR(server);
1143 if (IS_ERR(xprt)) { 702 goto out_err_noserver;
1144 up_write(&clp->cl_sem);
1145 err = PTR_ERR(xprt);
1146 dprintk("%s: cannot create RPC transport. Error = %d\n",
1147 __FUNCTION__, err);
1148 goto out_fail;
1149 }
1150 /* Bind to a reserved port! */
1151 xprt->resvport = 1;
1152 clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
1153 server->rpc_ops->version, flavor);
1154 if (IS_ERR(clnt)) {
1155 up_write(&clp->cl_sem);
1156 err = PTR_ERR(clnt);
1157 dprintk("%s: cannot create RPC client. Error = %d\n",
1158 __FUNCTION__, err);
1159 goto out_fail;
1160 }
1161 clnt->cl_intr = 1;
1162 clnt->cl_softrtry = 1;
1163 clp->cl_rpcclient = clnt;
1164 memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
1165 nfs_idmap_new(clp);
1166 }
1167 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1168 clnt = rpc_clone_client(clp->cl_rpcclient);
1169 if (!IS_ERR(clnt))
1170 server->nfs4_state = clp;
1171 up_write(&clp->cl_sem);
1172 clp = NULL;
1173
1174 if (IS_ERR(clnt)) {
1175 dprintk("%s: cannot create RPC client. Error = %d\n",
1176 __FUNCTION__, err);
1177 return clnt;
1178 } 703 }
1179 704
1180 if (server->nfs4_state->cl_idmap == NULL) { 705 /* Get a superblock - note that we may end up sharing one that already exists */
1181 dprintk("%s: failed to create idmapper.\n", __FUNCTION__); 706 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1182 return ERR_PTR(-ENOMEM); 707 if (IS_ERR(s)) {
708 error = PTR_ERR(s);
709 goto out_err_nosb;
1183 } 710 }
1184 711
1185 if (clnt->cl_auth->au_flavor != flavor) { 712 if (s->s_fs_info != server) {
1186 struct rpc_auth *auth; 713 nfs_free_server(server);
1187 714 server = NULL;
1188 auth = rpcauth_create(flavor, clnt);
1189 if (IS_ERR(auth)) {
1190 dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
1191 return (struct rpc_clnt *)auth;
1192 }
1193 } 715 }
1194 return clnt;
1195
1196 out_fail:
1197 if (clp)
1198 nfs4_put_client(clp);
1199 return ERR_PTR(err);
1200}
1201
1202/*
1203 * Set up an NFS4 superblock
1204 */
1205static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
1206{
1207 struct nfs_server *server;
1208 struct rpc_timeout timeparms;
1209 rpc_authflavor_t authflavour;
1210 int err = -EIO;
1211 716
1212 sb->s_blocksize_bits = 0; 717 if (!s->s_root) {
1213 sb->s_blocksize = 0; 718 /* initial superblock/root creation */
1214 server = NFS_SB(sb); 719 s->s_flags = flags;
1215 if (data->rsize != 0) 720 nfs_clone_super(s, data->sb);
1216 server->rsize = nfs_block_size(data->rsize, NULL); 721 }
1217 if (data->wsize != 0)
1218 server->wsize = nfs_block_size(data->wsize, NULL);
1219 server->flags = data->flags & NFS_MOUNT_FLAGMASK;
1220 server->caps = NFS_CAP_ATOMIC_OPEN;
1221 722
1222 server->acregmin = data->acregmin*HZ; 723 mntroot = nfs_get_root(s, data->fh);
1223 server->acregmax = data->acregmax*HZ; 724 if (IS_ERR(mntroot)) {
1224 server->acdirmin = data->acdirmin*HZ; 725 error = PTR_ERR(mntroot);
1225 server->acdirmax = data->acdirmax*HZ; 726 goto error_splat_super;
727 }
1226 728
1227 server->rpc_ops = &nfs_v4_clientops; 729 s->s_flags |= MS_ACTIVE;
730 mnt->mnt_sb = s;
731 mnt->mnt_root = mntroot;
1228 732
1229 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); 733 dprintk("<-- nfs_xdev_get_sb() = 0\n");
734 return 0;
1230 735
1231 server->retrans_timeo = timeparms.to_initval; 736out_err_nosb:
1232 server->retrans_count = timeparms.to_retries; 737 nfs_free_server(server);
738out_err_noserver:
739 dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
740 return error;
1233 741
1234 /* Now create transport and client */ 742error_splat_super:
1235 authflavour = RPC_AUTH_UNIX; 743 up_write(&s->s_umount);
1236 if (data->auth_flavourlen != 0) { 744 deactivate_super(s);
1237 if (data->auth_flavourlen != 1) { 745 dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
1238 dprintk("%s: Invalid number of RPC auth flavours %d.\n", 746 return error;
1239 __FUNCTION__, data->auth_flavourlen); 747}
1240 err = -EINVAL;
1241 goto out_fail;
1242 }
1243 if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
1244 err = -EFAULT;
1245 goto out_fail;
1246 }
1247 }
1248 748
1249 server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour); 749#ifdef CONFIG_NFS_V4
1250 if (IS_ERR(server->client)) {
1251 err = PTR_ERR(server->client);
1252 dprintk("%s: cannot create RPC client. Error = %d\n",
1253 __FUNCTION__, err);
1254 goto out_fail;
1255 }
1256 750
751/*
752 * Finish setting up a cloned NFS4 superblock
753 */
754static void nfs4_clone_super(struct super_block *sb,
755 const struct super_block *old_sb)
756{
757 sb->s_blocksize_bits = old_sb->s_blocksize_bits;
758 sb->s_blocksize = old_sb->s_blocksize;
759 sb->s_maxbytes = old_sb->s_maxbytes;
1257 sb->s_time_gran = 1; 760 sb->s_time_gran = 1;
1258 761 sb->s_op = old_sb->s_op;
1259 sb->s_op = &nfs4_sops; 762 nfs_initialise_sb(sb);
1260 err = nfs_sb_init(sb, authflavour);
1261
1262 out_fail:
1263 return err;
1264} 763}
1265 764
1266static int nfs4_compare_super(struct super_block *sb, void *data) 765/*
766 * Set up an NFS4 superblock
767 */
768static void nfs4_fill_super(struct super_block *sb)
1267{ 769{
1268 struct nfs_server *server = data; 770 sb->s_time_gran = 1;
1269 struct nfs_server *old = NFS_SB(sb); 771 sb->s_op = &nfs4_sops;
1270 772 nfs_initialise_sb(sb);
1271 if (strcmp(server->hostname, old->hostname) != 0)
1272 return 0;
1273 if (strcmp(server->mnt_path, old->mnt_path) != 0)
1274 return 0;
1275 return 1;
1276} 773}
1277 774
1278static void * 775static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1279nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1280{ 776{
1281 void *p = NULL; 777 void *p = NULL;
1282 778
@@ -1297,14 +793,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
1297 return dst; 793 return dst;
1298} 794}
1299 795
796/*
797 * Get the superblock for an NFS4 mountpoint
798 */
1300static int nfs4_get_sb(struct file_system_type *fs_type, 799static int nfs4_get_sb(struct file_system_type *fs_type,
1301 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 800 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1302{ 801{
1303 int error;
1304 struct nfs_server *server;
1305 struct super_block *s;
1306 struct nfs4_mount_data *data = raw_data; 802 struct nfs4_mount_data *data = raw_data;
803 struct super_block *s;
804 struct nfs_server *server;
805 struct sockaddr_in addr;
806 rpc_authflavor_t authflavour;
807 struct nfs_fh mntfh;
808 struct dentry *mntroot;
809 char *mntpath = NULL, *hostname = NULL, ip_addr[16];
1307 void *p; 810 void *p;
811 int error;
1308 812
1309 if (data == NULL) { 813 if (data == NULL) {
1310 dprintk("%s: missing data argument\n", __FUNCTION__); 814 dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1315,84 +819,112 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1315 return -EINVAL; 819 return -EINVAL;
1316 } 820 }
1317 821
1318 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); 822 /* We now require that the mount process passes the remote address */
1319 if (!server) 823 if (data->host_addrlen != sizeof(addr))
1320 return -ENOMEM; 824 return -EINVAL;
1321 /* Zero out the NFS state stuff */ 825
1322 init_nfsv4_state(server); 826 if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
1323 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); 827 return -EFAULT;
828
829 if (addr.sin_family != AF_INET ||
830 addr.sin_addr.s_addr == INADDR_ANY
831 ) {
832 dprintk("%s: mount program didn't pass remote IP address!\n",
833 __FUNCTION__);
834 return -EINVAL;
835 }
836 /* RFC3530: The default port for NFS is 2049 */
837 if (addr.sin_port == 0)
838 addr.sin_port = NFS_PORT;
839
840 /* Grab the authentication type */
841 authflavour = RPC_AUTH_UNIX;
842 if (data->auth_flavourlen != 0) {
843 if (data->auth_flavourlen != 1) {
844 dprintk("%s: Invalid number of RPC auth flavours %d.\n",
845 __FUNCTION__, data->auth_flavourlen);
846 error = -EINVAL;
847 goto out_err_noserver;
848 }
849
850 if (copy_from_user(&authflavour, data->auth_flavours,
851 sizeof(authflavour))) {
852 error = -EFAULT;
853 goto out_err_noserver;
854 }
855 }
1324 856
1325 p = nfs_copy_user_string(NULL, &data->hostname, 256); 857 p = nfs_copy_user_string(NULL, &data->hostname, 256);
1326 if (IS_ERR(p)) 858 if (IS_ERR(p))
1327 goto out_err; 859 goto out_err;
1328 server->hostname = p; 860 hostname = p;
1329 861
1330 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024); 862 p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
1331 if (IS_ERR(p)) 863 if (IS_ERR(p))
1332 goto out_err; 864 goto out_err;
1333 server->mnt_path = p; 865 mntpath = p;
866
867 dprintk("MNTPATH: %s\n", mntpath);
1334 868
1335 p = nfs_copy_user_string(server->ip_addr, &data->client_addr, 869 p = nfs_copy_user_string(ip_addr, &data->client_addr,
1336 sizeof(server->ip_addr) - 1); 870 sizeof(ip_addr) - 1);
1337 if (IS_ERR(p)) 871 if (IS_ERR(p))
1338 goto out_err; 872 goto out_err;
1339 873
1340 /* We now require that the mount process passes the remote address */ 874 /* Get a volume representation */
1341 if (data->host_addrlen != sizeof(server->addr)) { 875 server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
1342 error = -EINVAL; 876 authflavour, &mntfh);
1343 goto out_free; 877 if (IS_ERR(server)) {
1344 } 878 error = PTR_ERR(server);
1345 if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { 879 goto out_err_noserver;
1346 error = -EFAULT;
1347 goto out_free;
1348 }
1349 if (server->addr.sin_family != AF_INET ||
1350 server->addr.sin_addr.s_addr == INADDR_ANY) {
1351 dprintk("%s: mount program didn't pass remote IP address!\n",
1352 __FUNCTION__);
1353 error = -EINVAL;
1354 goto out_free;
1355 }
1356
1357 /* Fire up rpciod if not yet running */
1358 error = rpciod_up();
1359 if (error < 0) {
1360 dprintk("%s: couldn't start rpciod! Error = %d\n",
1361 __FUNCTION__, error);
1362 goto out_free;
1363 } 880 }
1364 881
1365 s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); 882 /* Get a superblock - note that we may end up sharing one that already exists */
1366 883 s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
1367 if (IS_ERR(s)) { 884 if (IS_ERR(s)) {
1368 error = PTR_ERR(s); 885 error = PTR_ERR(s);
1369 goto out_free; 886 goto out_free;
1370 } 887 }
1371 888
1372 if (s->s_root) { 889 if (s->s_fs_info != server) {
1373 kfree(server->mnt_path); 890 nfs_free_server(server);
1374 kfree(server->hostname); 891 server = NULL;
1375 kfree(server);
1376 return simple_set_mnt(mnt, s);
1377 } 892 }
1378 893
1379 s->s_flags = flags; 894 if (!s->s_root) {
895 /* initial superblock/root creation */
896 s->s_flags = flags;
897 nfs4_fill_super(s);
898 }
1380 899
1381 error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0); 900 mntroot = nfs4_get_root(s, &mntfh);
1382 if (error) { 901 if (IS_ERR(mntroot)) {
1383 up_write(&s->s_umount); 902 error = PTR_ERR(mntroot);
1384 deactivate_super(s); 903 goto error_splat_super;
1385 return error;
1386 } 904 }
905
1387 s->s_flags |= MS_ACTIVE; 906 s->s_flags |= MS_ACTIVE;
1388 return simple_set_mnt(mnt, s); 907 mnt->mnt_sb = s;
908 mnt->mnt_root = mntroot;
909 kfree(mntpath);
910 kfree(hostname);
911 return 0;
912
1389out_err: 913out_err:
1390 error = PTR_ERR(p); 914 error = PTR_ERR(p);
915 goto out_err_noserver;
916
1391out_free: 917out_free:
1392 kfree(server->mnt_path); 918 nfs_free_server(server);
1393 kfree(server->hostname); 919out_err_noserver:
1394 kfree(server); 920 kfree(mntpath);
921 kfree(hostname);
1395 return error; 922 return error;
923
924error_splat_super:
925 up_write(&s->s_umount);
926 deactivate_super(s);
927 goto out_err_noserver;
1396} 928}
1397 929
1398static void nfs4_kill_super(struct super_block *sb) 930static void nfs4_kill_super(struct super_block *sb)
@@ -1403,135 +935,140 @@ static void nfs4_kill_super(struct super_block *sb)
1403 kill_anon_super(sb); 935 kill_anon_super(sb);
1404 936
1405 nfs4_renewd_prepare_shutdown(server); 937 nfs4_renewd_prepare_shutdown(server);
938 nfs_free_server(server);
939}
940
941/*
942 * Clone an NFS4 server record on xdev traversal (FSID-change)
943 */
944static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
945 const char *dev_name, void *raw_data,
946 struct vfsmount *mnt)
947{
948 struct nfs_clone_mount *data = raw_data;
949 struct super_block *s;
950 struct nfs_server *server;
951 struct dentry *mntroot;
952 int error;
953
954 dprintk("--> nfs4_xdev_get_sb()\n");
955
956 /* create a new volume representation */
957 server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
958 if (IS_ERR(server)) {
959 error = PTR_ERR(server);
960 goto out_err_noserver;
961 }
962
963 /* Get a superblock - note that we may end up sharing one that already exists */
964 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
965 if (IS_ERR(s)) {
966 error = PTR_ERR(s);
967 goto out_err_nosb;
968 }
1406 969
1407 if (server->client != NULL && !IS_ERR(server->client)) 970 if (s->s_fs_info != server) {
1408 rpc_shutdown_client(server->client); 971 nfs_free_server(server);
972 server = NULL;
973 }
1409 974
1410 destroy_nfsv4_state(server); 975 if (!s->s_root) {
976 /* initial superblock/root creation */
977 s->s_flags = flags;
978 nfs4_clone_super(s, data->sb);
979 }
980
981 mntroot = nfs4_get_root(s, data->fh);
982 if (IS_ERR(mntroot)) {
983 error = PTR_ERR(mntroot);
984 goto error_splat_super;
985 }
1411 986
1412 rpciod_down(); 987 s->s_flags |= MS_ACTIVE;
988 mnt->mnt_sb = s;
989 mnt->mnt_root = mntroot;
990
991 dprintk("<-- nfs4_xdev_get_sb() = 0\n");
992 return 0;
993
994out_err_nosb:
995 nfs_free_server(server);
996out_err_noserver:
997 dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
998 return error;
1413 999
1414 nfs_free_iostats(server->io_stats); 1000error_splat_super:
1415 kfree(server->hostname); 1001 up_write(&s->s_umount);
1416 kfree(server); 1002 deactivate_super(s);
1417 nfs_release_automount_timer(); 1003 dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
1004 return error;
1418} 1005}
1419 1006
1420/* 1007/*
1421 * Constructs the SERVER-side path 1008 * Create an NFS4 server record on referral traversal
1422 */ 1009 */
1423static inline char *nfs4_dup_path(const struct dentry *dentry) 1010static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
1011 const char *dev_name, void *raw_data,
1012 struct vfsmount *mnt)
1424{ 1013{
1425 char *page = (char *) __get_free_page(GFP_USER); 1014 struct nfs_clone_mount *data = raw_data;
1426 char *path; 1015 struct super_block *s;
1016 struct nfs_server *server;
1017 struct dentry *mntroot;
1018 struct nfs_fh mntfh;
1019 int error;
1427 1020
1428 path = nfs4_path(dentry, page, PAGE_SIZE); 1021 dprintk("--> nfs4_referral_get_sb()\n");
1429 if (!IS_ERR(path)) {
1430 int len = PAGE_SIZE + page - path;
1431 char *tmp = path;
1432 1022
1433 path = kmalloc(len, GFP_KERNEL); 1023 /* create a new volume representation */
1434 if (path) 1024 server = nfs4_create_referral_server(data, &mntfh);
1435 memcpy(path, tmp, len); 1025 if (IS_ERR(server)) {
1436 else 1026 error = PTR_ERR(server);
1437 path = ERR_PTR(-ENOMEM); 1027 goto out_err_noserver;
1438 } 1028 }
1439 free_page((unsigned long)page);
1440 return path;
1441}
1442 1029
1443static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data) 1030 /* Get a superblock - note that we may end up sharing one that already exists */
1444{ 1031 s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
1445 const struct dentry *dentry = data->dentry; 1032 if (IS_ERR(s)) {
1446 struct nfs4_client *clp = server->nfs4_state; 1033 error = PTR_ERR(s);
1447 struct super_block *sb; 1034 goto out_err_nosb;
1448
1449 server->fsid = data->fattr->fsid;
1450 nfs_copy_fh(&server->fh, data->fh);
1451 server->mnt_path = nfs4_dup_path(dentry);
1452 if (IS_ERR(server->mnt_path)) {
1453 sb = (struct super_block *)server->mnt_path;
1454 goto err;
1455 } 1035 }
1456 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1457 if (IS_ERR(sb) || sb->s_root)
1458 goto free_path;
1459 nfs4_server_capabilities(server, &server->fh);
1460
1461 down_write(&clp->cl_sem);
1462 atomic_inc(&clp->cl_count);
1463 list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
1464 up_write(&clp->cl_sem);
1465 return sb;
1466free_path:
1467 kfree(server->mnt_path);
1468err:
1469 server->mnt_path = NULL;
1470 return sb;
1471}
1472 1036
1473static int nfs_clone_nfs4_sb(struct file_system_type *fs_type, 1037 if (s->s_fs_info != server) {
1474 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1038 nfs_free_server(server);
1475{ 1039 server = NULL;
1476 struct nfs_clone_mount *data = raw_data; 1040 }
1477 return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
1478}
1479 1041
1480static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data) 1042 if (!s->s_root) {
1481{ 1043 /* initial superblock/root creation */
1482 struct super_block *sb = ERR_PTR(-ENOMEM); 1044 s->s_flags = flags;
1483 int len; 1045 nfs4_fill_super(s);
1484 1046 }
1485 len = strlen(data->mnt_path) + 1;
1486 server->mnt_path = kmalloc(len, GFP_KERNEL);
1487 if (server->mnt_path == NULL)
1488 goto err;
1489 memcpy(server->mnt_path, data->mnt_path, len);
1490 memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
1491
1492 sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
1493 if (IS_ERR(sb) || sb->s_root)
1494 goto free_path;
1495 return sb;
1496free_path:
1497 kfree(server->mnt_path);
1498err:
1499 server->mnt_path = NULL;
1500 return sb;
1501}
1502 1047
1503static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data) 1048 mntroot = nfs4_get_root(s, data->fh);
1504{ 1049 if (IS_ERR(mntroot)) {
1505 struct nfs_server *server = NFS_SB(sb); 1050 error = PTR_ERR(mntroot);
1506 struct rpc_timeout timeparms; 1051 goto error_splat_super;
1507 int proto, timeo, retrans; 1052 }
1508 void *err;
1509
1510 proto = IPPROTO_TCP;
1511 /* Since we are following a referral and there may be alternatives,
1512 set the timeouts and retries to low values */
1513 timeo = 2;
1514 retrans = 1;
1515 nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
1516
1517 server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
1518 if (IS_ERR((err = server->client)))
1519 goto out_err;
1520 1053
1521 sb->s_time_gran = 1; 1054 s->s_flags |= MS_ACTIVE;
1522 sb->s_op = &nfs4_sops; 1055 mnt->mnt_sb = s;
1523 err = ERR_PTR(nfs_sb_init(sb, data->authflavor)); 1056 mnt->mnt_root = mntroot;
1524 if (!IS_ERR(err))
1525 return server;
1526out_err:
1527 return (struct nfs_server *)err;
1528}
1529 1057
1530static int nfs_referral_nfs4_sb(struct file_system_type *fs_type, 1058 dprintk("<-- nfs4_referral_get_sb() = 0\n");
1531 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1059 return 0;
1532{ 1060
1533 struct nfs_clone_mount *data = raw_data; 1061out_err_nosb:
1534 return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt); 1062 nfs_free_server(server);
1063out_err_noserver:
1064 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
1065 return error;
1066
1067error_splat_super:
1068 up_write(&s->s_umount);
1069 deactivate_super(s);
1070 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
1071 return error;
1535} 1072}
1536 1073
1537#endif 1074#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7084ac9a6455..c12effb46fe5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
396out: 396out:
397 clear_bit(BDI_write_congested, &bdi->state); 397 clear_bit(BDI_write_congested, &bdi->state);
398 wake_up_all(&nfs_write_congestion); 398 wake_up_all(&nfs_write_congestion);
399 writeback_congestion_end();
399 return err; 400 return err;
400} 401}
401 402
@@ -1252,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1252 dprintk("NFS: %4d nfs_writeback_done (status %d)\n", 1253 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1253 task->tk_pid, task->tk_status); 1254 task->tk_pid, task->tk_status);
1254 1255
1255 /* Call the NFS version-specific code */ 1256 /*
1257 * ->write_done will attempt to use post-op attributes to detect
1258 * conflicting writes by other clients. A strict interpretation
1259 * of close-to-open would allow us to continue caching even if
1260 * another writer had changed the file, but some applications
1261 * depend on tighter cache coherency when writing.
1262 */
1256 status = NFS_PROTO(data->inode)->write_done(task, data); 1263 status = NFS_PROTO(data->inode)->write_done(task, data);
1257 if (status != 0) 1264 if (status != 0)
1258 return status; 1265 return status;
@@ -1273,7 +1280,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1273 if (time_before(complain, jiffies)) { 1280 if (time_before(complain, jiffies)) {
1274 dprintk("NFS: faulty NFS server %s:" 1281 dprintk("NFS: faulty NFS server %s:"
1275 " (committed = %d) != (stable = %d)\n", 1282 " (committed = %d) != (stable = %d)\n",
1276 NFS_SERVER(data->inode)->hostname, 1283 NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1277 resp->verf->committed, argp->stable); 1284 resp->verf->committed, argp->stable);
1278 complain = jiffies + 300 * HZ; 1285 complain = jiffies + 300 * HZ;
1279 } 1286 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1d2e3a..8583d99ee740 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
375{ 375{
376 struct sockaddr_in addr; 376 struct sockaddr_in addr;
377 struct nfs4_callback *cb = &clp->cl_callback; 377 struct nfs4_callback *cb = &clp->cl_callback;
378 struct rpc_timeout timeparms; 378 struct rpc_timeout timeparms = {
379 struct rpc_xprt * xprt; 379 .to_initval = (NFSD_LEASE_TIME/4) * HZ,
380 .to_retries = 5,
381 .to_maxval = (NFSD_LEASE_TIME/2) * HZ,
382 .to_exponential = 1,
383 };
380 struct rpc_program * program = &cb->cb_program; 384 struct rpc_program * program = &cb->cb_program;
381 struct rpc_stat * stat = &cb->cb_stat; 385 struct rpc_create_args args = {
382 struct rpc_clnt * clnt; 386 .protocol = IPPROTO_TCP,
387 .address = (struct sockaddr *)&addr,
388 .addrsize = sizeof(addr),
389 .timeout = &timeparms,
390 .servername = clp->cl_name.data,
391 .program = program,
392 .version = nfs_cb_version[1]->number,
393 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
394 .flags = (RPC_CLNT_CREATE_NOPING),
395 };
383 struct rpc_message msg = { 396 struct rpc_message msg = {
384 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 397 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
385 .rpc_argp = clp, 398 .rpc_argp = clp,
386 }; 399 };
387 char hostname[32];
388 int status; 400 int status;
389 401
390 if (atomic_read(&cb->cb_set)) 402 if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
396 addr.sin_port = htons(cb->cb_port); 408 addr.sin_port = htons(cb->cb_port);
397 addr.sin_addr.s_addr = htonl(cb->cb_addr); 409 addr.sin_addr.s_addr = htonl(cb->cb_addr);
398 410
399 /* Initialize timeout */
400 timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
401 timeparms.to_retries = 0;
402 timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
403 timeparms.to_exponential = 1;
404
405 /* Create RPC transport */
406 xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
407 if (IS_ERR(xprt)) {
408 dprintk("NFSD: couldn't create callback transport!\n");
409 goto out_err;
410 }
411
412 /* Initialize rpc_program */ 411 /* Initialize rpc_program */
413 program->name = "nfs4_cb"; 412 program->name = "nfs4_cb";
414 program->number = cb->cb_prog; 413 program->number = cb->cb_prog;
415 program->nrvers = ARRAY_SIZE(nfs_cb_version); 414 program->nrvers = ARRAY_SIZE(nfs_cb_version);
416 program->version = nfs_cb_version; 415 program->version = nfs_cb_version;
417 program->stats = stat; 416 program->stats = &cb->cb_stat;
418 417
419 /* Initialize rpc_stat */ 418 /* Initialize rpc_stat */
420 memset(stat, 0, sizeof(struct rpc_stat)); 419 memset(program->stats, 0, sizeof(cb->cb_stat));
421 stat->program = program; 420 program->stats->program = program;
422 421
423 /* Create RPC client 422 /* Create RPC client */
424 * 423 cb->cb_client = rpc_create(&args);
425 * XXX AUTH_UNIX only - need AUTH_GSS.... 424 if (!cb->cb_client) {
426 */
427 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
428 clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
429 if (IS_ERR(clnt)) {
430 dprintk("NFSD: couldn't create callback client\n"); 425 dprintk("NFSD: couldn't create callback client\n");
431 goto out_err; 426 goto out_err;
432 } 427 }
433 clnt->cl_intr = 0;
434 clnt->cl_softrtry = 1;
435 428
436 /* Kick rpciod, put the call on the wire. */ 429 /* Kick rpciod, put the call on the wire. */
437 430 if (rpciod_up() != 0)
438 if (rpciod_up() != 0) {
439 dprintk("nfsd: couldn't start rpciod for callbacks!\n");
440 goto out_clnt; 431 goto out_clnt;
441 }
442
443 cb->cb_client = clnt;
444 432
445 /* the task holds a reference to the nfs4_client struct */ 433 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 434 atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 436 msg.rpc_cred = nfsd4_lookupcred(clp,0);
449 if (IS_ERR(msg.rpc_cred)) 437 if (IS_ERR(msg.rpc_cred))
450 goto out_rpciod; 438 goto out_rpciod;
451 status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); 439 status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
452 put_rpccred(msg.rpc_cred); 440 put_rpccred(msg.rpc_cred);
453 441
454 if (status != 0) { 442 if (status != 0) {
@@ -462,7 +450,7 @@ out_rpciod:
462 rpciod_down(); 450 rpciod_down();
463 cb->cb_client = NULL; 451 cb->cb_client = NULL;
464out_clnt: 452out_clnt:
465 rpc_shutdown_client(clnt); 453 rpc_shutdown_client(cb->cb_client);
466out_err: 454out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 455 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 456 (int)clp->cl_name.len, clp->cl_name.data);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da7506363c..e35d7e52fdeb 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
33* 33*
34*/ 34*/
35 35
36 36#include <linux/err.h>
37#include <linux/sunrpc/svc.h> 37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h> 38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h> 39#include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname) 87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88{ 88{
89 struct xdr_netobj cksum; 89 struct xdr_netobj cksum;
90 struct crypto_tfm *tfm; 90 struct hash_desc desc;
91 struct scatterlist sg[1]; 91 struct scatterlist sg[1];
92 int status = nfserr_resource; 92 int status = nfserr_resource;
93 93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n", 94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data); 95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); 96 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
97 if (tfm == NULL) 97 desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
98 goto out; 98 if (IS_ERR(desc.tfm))
99 cksum.len = crypto_tfm_alg_digestsize(tfm); 99 goto out_no_tfm;
100 cksum.len = crypto_hash_digestsize(desc.tfm);
100 cksum.data = kmalloc(cksum.len, GFP_KERNEL); 101 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
101 if (cksum.data == NULL) 102 if (cksum.data == NULL)
102 goto out; 103 goto out;
103 crypto_digest_init(tfm);
104 104
105 sg[0].page = virt_to_page(clname->data); 105 sg[0].page = virt_to_page(clname->data);
106 sg[0].offset = offset_in_page(clname->data); 106 sg[0].offset = offset_in_page(clname->data);
107 sg[0].length = clname->len; 107 sg[0].length = clname->len;
108 108
109 crypto_digest_update(tfm, sg, 1); 109 if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
110 crypto_digest_final(tfm, cksum.data); 110 goto out;
111 111
112 md5_to_hex(dname, cksum.data); 112 md5_to_hex(dname, cksum.data);
113 113
114 kfree(cksum.data); 114 kfree(cksum.data);
115 status = nfs_ok; 115 status = nfs_ok;
116out: 116out:
117 crypto_free_tfm(tfm); 117 crypto_free_hash(desc.tfm);
118out_no_tfm:
118 return status; 119 return status;
119} 120}
120 121
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be845a614..9fb8132f19b0 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
16 file.o \ 16 file.o \
17 heartbeat.o \ 17 heartbeat.o \
18 inode.o \ 18 inode.o \
19 ioctl.o \
19 journal.o \ 20 journal.o \
20 localalloc.o \ 21 localalloc.o \
21 mmap.o \ 22 mmap.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05a93e0..f43bc5f18a35 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
1717 1717
1718 ocfs2_remove_from_cache(inode, eb_bh); 1718 ocfs2_remove_from_cache(inode, eb_bh);
1719 1719
1720 BUG_ON(eb->h_suballoc_slot);
1721 BUG_ON(el->l_recs[0].e_clusters); 1720 BUG_ON(el->l_recs[0].e_clusters);
1722 BUG_ON(el->l_recs[0].e_cpos); 1721 BUG_ON(el->l_recs[0].e_cpos);
1723 BUG_ON(el->l_recs[0].e_blkno); 1722 BUG_ON(el->l_recs[0].e_blkno);
1724 status = ocfs2_free_extent_block(handle, 1723 if (eb->h_suballoc_slot == 0) {
1725 tc->tc_ext_alloc_inode, 1724 /*
1726 tc->tc_ext_alloc_bh, 1725 * This code only understands how to
1727 eb); 1726 * lock the suballocator in slot 0,
1728 if (status < 0) { 1727 * which is fine because allocation is
1729 mlog_errno(status); 1728 * only ever done out of that
1730 goto bail; 1729 * suballocator too. A future version
1730 * might change that however, so avoid
1731 * a free if we don't know how to
1732 * handle it. This way an fs incompat
1733 * bit will not be necessary.
1734 */
1735 status = ocfs2_free_extent_block(handle,
1736 tc->tc_ext_alloc_inode,
1737 tc->tc_ext_alloc_bh,
1738 eb);
1739 if (status < 0) {
1740 mlog_errno(status);
1741 goto bail;
1742 }
1731 } 1743 }
1732 } 1744 }
1733 brelse(eb_bh); 1745 brelse(eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c342ce01..3d7c082a8f58 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ out:
391static int ocfs2_commit_write(struct file *file, struct page *page, 391static int ocfs2_commit_write(struct file *file, struct page *page,
392 unsigned from, unsigned to) 392 unsigned from, unsigned to)
393{ 393{
394 int ret, extending = 0, locklevel = 0; 394 int ret;
395 loff_t new_i_size;
396 struct buffer_head *di_bh = NULL; 395 struct buffer_head *di_bh = NULL;
397 struct inode *inode = page->mapping->host; 396 struct inode *inode = page->mapping->host;
398 struct ocfs2_journal_handle *handle = NULL; 397 struct ocfs2_journal_handle *handle = NULL;
398 struct ocfs2_dinode *di;
399 399
400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); 400 mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
401 401
402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for 402 /* NOTE: ocfs2_file_aio_write has ensured that it's safe for
403 * us to sample inode->i_size here without the metadata lock: 403 * us to continue here without rechecking the I/O against
404 * changed inode values.
404 * 405 *
405 * 1) We're currently holding the inode alloc lock, so no 406 * 1) We're currently holding the inode alloc lock, so no
406 * nodes can change it underneath us. 407 * nodes can change it underneath us.
407 * 408 *
408 * 2) We've had to take the metadata lock at least once 409 * 2) We've had to take the metadata lock at least once
409 * already to check for extending writes, hence insuring 410 * already to check for extending writes, suid removal, etc.
410 * that our current copy is also up to date. 411 * The meta data update code then ensures that we don't get a
412 * stale inode allocation image (i_size, i_clusters, etc).
411 */ 413 */
412 new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
413 if (new_i_size > i_size_read(inode)) {
414 extending = 1;
415 locklevel = 1;
416 }
417 414
418 ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page); 415 ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
419 if (ret != 0) { 416 if (ret != 0) {
420 mlog_errno(ret); 417 mlog_errno(ret);
421 goto out; 418 goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
427 goto out_unlock_meta; 424 goto out_unlock_meta;
428 } 425 }
429 426
430 if (extending) { 427 handle = ocfs2_start_walk_page_trans(inode, page, from, to);
431 handle = ocfs2_start_walk_page_trans(inode, page, from, to); 428 if (IS_ERR(handle)) {
432 if (IS_ERR(handle)) { 429 ret = PTR_ERR(handle);
433 ret = PTR_ERR(handle); 430 goto out_unlock_data;
434 handle = NULL; 431 }
435 goto out_unlock_data;
436 }
437 432
438 /* Mark our buffer early. We'd rather catch this error up here 433 /* Mark our buffer early. We'd rather catch this error up here
439 * as opposed to after a successful commit_write which would 434 * as opposed to after a successful commit_write which would
440 * require us to set back inode->i_size. */ 435 * require us to set back inode->i_size. */
441 ret = ocfs2_journal_access(handle, inode, di_bh, 436 ret = ocfs2_journal_access(handle, inode, di_bh,
442 OCFS2_JOURNAL_ACCESS_WRITE); 437 OCFS2_JOURNAL_ACCESS_WRITE);
443 if (ret < 0) { 438 if (ret < 0) {
444 mlog_errno(ret); 439 mlog_errno(ret);
445 goto out_commit; 440 goto out_commit;
446 }
447 } 441 }
448 442
449 /* might update i_size */ 443 /* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
453 goto out_commit; 447 goto out_commit;
454 } 448 }
455 449
456 if (extending) { 450 di = (struct ocfs2_dinode *)di_bh->b_data;
457 loff_t size = (u64) i_size_read(inode);
458 struct ocfs2_dinode *di =
459 (struct ocfs2_dinode *)di_bh->b_data;
460 451
461 /* ocfs2_mark_inode_dirty is too heavy to use here. */ 452 /* ocfs2_mark_inode_dirty() is too heavy to use here. */
462 inode->i_blocks = ocfs2_align_bytes_to_sectors(size); 453 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
463 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 454 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
455 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
464 456
465 di->i_size = cpu_to_le64(size); 457 inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
466 di->i_ctime = di->i_mtime = 458 di->i_size = cpu_to_le64((u64)i_size_read(inode));
467 cpu_to_le64(inode->i_mtime.tv_sec);
468 di->i_ctime_nsec = di->i_mtime_nsec =
469 cpu_to_le32(inode->i_mtime.tv_nsec);
470 459
471 ret = ocfs2_journal_dirty(handle, di_bh); 460 ret = ocfs2_journal_dirty(handle, di_bh);
472 if (ret < 0) { 461 if (ret < 0) {
473 mlog_errno(ret); 462 mlog_errno(ret);
474 goto out_commit; 463 goto out_commit;
475 }
476 } 464 }
477 465
478 BUG_ON(extending && (i_size_read(inode) != new_i_size));
479
480out_commit: 466out_commit:
481 if (handle) 467 ocfs2_commit_trans(handle);
482 ocfs2_commit_trans(handle);
483out_unlock_data: 468out_unlock_data:
484 ocfs2_data_unlock(inode, 1); 469 ocfs2_data_unlock(inode, 1);
485out_unlock_meta: 470out_unlock_meta:
486 ocfs2_meta_unlock(inode, locklevel); 471 ocfs2_meta_unlock(inode, 1);
487out: 472out:
488 if (di_bh) 473 if (di_bh)
489 brelse(di_bh); 474 brelse(di_bh);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6e..c9037414f4f6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", 100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
101 (unsigned long long)block, nr, flags, inode); 101 (unsigned long long)block, nr, flags, inode);
102 102
103 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
104 (!inode || !(flags & OCFS2_BH_CACHED)));
105
103 if (osb == NULL || osb->sb == NULL || bhs == NULL) { 106 if (osb == NULL || osb->sb == NULL || bhs == NULL) {
104 status = -EINVAL; 107 status = -EINVAL;
105 mlog_errno(status); 108 mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
140 bh = bhs[i]; 143 bh = bhs[i];
141 ignore_cache = 0; 144 ignore_cache = 0;
142 145
146 /* There are three read-ahead cases here which we need to
147 * be concerned with. All three assume a buffer has
148 * previously been submitted with OCFS2_BH_READAHEAD
149 * and it hasn't yet completed I/O.
150 *
151 * 1) The current request is sync to disk. This rarely
152 * happens these days, and never when performance
153 * matters - the code can just wait on the buffer
154 * lock and re-submit.
155 *
156 * 2) The current request is cached, but not
157 * readahead. ocfs2_buffer_uptodate() will return
158 * false anyway, so we'll wind up waiting on the
159 * buffer lock to do I/O. We re-check the request
160 * with after getting the lock to avoid a re-submit.
161 *
162 * 3) The current request is readahead (and so must
163 * also be a caching one). We short circuit if the
164 * buffer is locked (under I/O) and if it's in the
165 * uptodate cache. The re-check from #2 catches the
166 * case that the previous read-ahead completes just
167 * before our is-it-in-flight check.
168 */
169
143 if (flags & OCFS2_BH_CACHED && 170 if (flags & OCFS2_BH_CACHED &&
144 !ocfs2_buffer_uptodate(inode, bh)) { 171 !ocfs2_buffer_uptodate(inode, bh)) {
145 mlog(ML_UPTODATE, 172 mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
169 continue; 196 continue;
170 } 197 }
171 198
199 /* A read-ahead request was made - if the
200 * buffer is already under read-ahead from a
201 * previously submitted request than we are
202 * done here. */
203 if ((flags & OCFS2_BH_READAHEAD)
204 && ocfs2_buffer_read_ahead(inode, bh))
205 continue;
206
172 lock_buffer(bh); 207 lock_buffer(bh);
173 if (buffer_jbd(bh)) { 208 if (buffer_jbd(bh)) {
174#ifdef CATCH_BH_JBD_RACES 209#ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
181 continue; 216 continue;
182#endif 217#endif
183 } 218 }
219
220 /* Re-check ocfs2_buffer_uptodate() as a
221 * previously read-ahead buffer may have
222 * completed I/O while we were waiting for the
223 * buffer lock. */
224 if ((flags & OCFS2_BH_CACHED)
225 && !(flags & OCFS2_BH_READAHEAD)
226 && ocfs2_buffer_uptodate(inode, bh)) {
227 unlock_buffer(bh);
228 continue;
229 }
230
184 clear_buffer_uptodate(bh); 231 clear_buffer_uptodate(bh);
185 get_bh(bh); /* for end_buffer_read_sync() */ 232 get_bh(bh); /* for end_buffer_read_sync() */
186 bh->b_end_io = end_buffer_read_sync; 233 bh->b_end_io = end_buffer_read_sync;
187 if (flags & OCFS2_BH_READAHEAD) 234 submit_bh(READ, bh);
188 submit_bh(READA, bh);
189 else
190 submit_bh(READ, bh);
191 continue; 235 continue;
192 } 236 }
193 } 237 }
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
197 for (i = (nr - 1); i >= 0; i--) { 241 for (i = (nr - 1); i >= 0; i--) {
198 bh = bhs[i]; 242 bh = bhs[i];
199 243
200 /* We know this can't have changed as we hold the 244 if (!(flags & OCFS2_BH_READAHEAD)) {
201 * inode sem. Avoid doing any work on the bh if the 245 /* We know this can't have changed as we hold the
202 * journal has it. */ 246 * inode sem. Avoid doing any work on the bh if the
203 if (!buffer_jbd(bh)) 247 * journal has it. */
204 wait_on_buffer(bh); 248 if (!buffer_jbd(bh))
205 249 wait_on_buffer(bh);
206 if (!buffer_uptodate(bh)) { 250
207 /* Status won't be cleared from here on out, 251 if (!buffer_uptodate(bh)) {
208 * so we can safely record this and loop back 252 /* Status won't be cleared from here on out,
209 * to cleanup the other buffers. Don't need to 253 * so we can safely record this and loop back
210 * remove the clustered uptodate information 254 * to cleanup the other buffers. Don't need to
211 * for this bh as it's not marked locally 255 * remove the clustered uptodate information
212 * uptodate. */ 256 * for this bh as it's not marked locally
213 status = -EIO; 257 * uptodate. */
214 brelse(bh); 258 status = -EIO;
215 bhs[i] = NULL; 259 brelse(bh);
216 continue; 260 bhs[i] = NULL;
261 continue;
262 }
217 } 263 }
218 264
265 /* Always set the buffer in the cache, even if it was
266 * a forced read, or read-ahead which hasn't yet
267 * completed. */
219 if (inode) 268 if (inode)
220 ocfs2_set_buffer_uptodate(inode, bh); 269 ocfs2_set_buffer_uptodate(inode, bh);
221 } 270 }
222 if (inode) 271 if (inode)
223 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 272 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
224 273
225 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 274 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
226 (unsigned long long)block, nr, 275 (unsigned long long)block, nr,
227 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); 276 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
228 277
229bail: 278bail:
230 279
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb90937b68..6cc20930fac3 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,
49 49
50 50
51#define OCFS2_BH_CACHED 1 51#define OCFS2_BH_CACHED 1
52#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */ 52#define OCFS2_BH_READAHEAD 8
53 53
54static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, 54static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
55 struct buffer_head **bh, int flags, 55 struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d6cf65..305cba3681fe 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
320 max_pages = q->max_hw_segments; 320 max_pages = q->max_hw_segments;
321 max_pages--; /* Handle I/Os that straddle a page */ 321 max_pages--; /* Handle I/Os that straddle a page */
322 322
323 max_sectors = max_pages << (PAGE_SHIFT - 9); 323 if (max_pages) {
324 324 max_sectors = max_pages << (PAGE_SHIFT - 9);
325 } else {
326 /* If BIO contains 1 or less than 1 page. */
327 max_sectors = q->max_sectors;
328 }
325 /* Why is fls() 1-based???? */ 329 /* Why is fls() 1-based???? */
326 pow_two_sectors = 1 << (fls(max_sectors) - 1); 330 pow_two_sectors = 1 << (fls(max_sectors) - 1);
327 331
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index ff9e2e2104c2..4b46aac7d243 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,17 @@
44 * locking semantics of the file system using the protocol. It should 44 * locking semantics of the file system using the protocol. It should
45 * be somewhere else, I'm sure, but right now it isn't. 45 * be somewhere else, I'm sure, but right now it isn't.
46 * 46 *
47 * New in version 4:
48 * - Remove i_generation from lock names for better stat performance.
49 *
50 * New in version 3:
51 * - Replace dentry votes with a cluster lock
52 *
47 * New in version 2: 53 * New in version 2:
48 * - full 64 bit i_size in the metadata lock lvbs 54 * - full 64 bit i_size in the metadata lock lvbs
49 * - introduction of "rw" lock and pushing meta/data locking down 55 * - introduction of "rw" lock and pushing meta/data locking down
50 */ 56 */
51#define O2NET_PROTOCOL_VERSION 2ULL 57#define O2NET_PROTOCOL_VERSION 4ULL
52struct o2net_handshake { 58struct o2net_handshake {
53 __be64 protocol_version; 59 __be64 protocol_version;
54 __be64 connector_id; 60 __be64 connector_id;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1a01380e3878..014e73978dac 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
35 35
36#include "alloc.h" 36#include "alloc.h"
37#include "dcache.h" 37#include "dcache.h"
38#include "dlmglue.h"
38#include "file.h" 39#include "file.h"
39#include "inode.h" 40#include "inode.h"
40 41
42
41static int ocfs2_dentry_revalidate(struct dentry *dentry, 43static int ocfs2_dentry_revalidate(struct dentry *dentry,
42 struct nameidata *nd) 44 struct nameidata *nd)
43{ 45{
44 struct inode *inode = dentry->d_inode; 46 struct inode *inode = dentry->d_inode;
45 int ret = 0; /* if all else fails, just return false */ 47 int ret = 0; /* if all else fails, just return false */
46 struct ocfs2_super *osb; 48 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
47 49
48 mlog_entry("(0x%p, '%.*s')\n", dentry, 50 mlog_entry("(0x%p, '%.*s')\n", dentry,
49 dentry->d_name.len, dentry->d_name.name); 51 dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
55 goto bail; 57 goto bail;
56 } 58 }
57 59
58 osb = OCFS2_SB(inode->i_sb);
59
60 BUG_ON(!osb); 60 BUG_ON(!osb);
61 61
62 if (inode != osb->root_inode) { 62 if (inode == osb->root_inode || is_bad_inode(inode))
63 spin_lock(&OCFS2_I(inode)->ip_lock); 63 goto bail;
64 /* did we or someone else delete this inode? */ 64
65 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { 65 spin_lock(&OCFS2_I(inode)->ip_lock);
66 spin_unlock(&OCFS2_I(inode)->ip_lock); 66 /* did we or someone else delete this inode? */
67 mlog(0, "inode (%llu) deleted, returning false\n", 67 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
68 (unsigned long long)OCFS2_I(inode)->ip_blkno);
69 goto bail;
70 }
71 spin_unlock(&OCFS2_I(inode)->ip_lock); 68 spin_unlock(&OCFS2_I(inode)->ip_lock);
69 mlog(0, "inode (%llu) deleted, returning false\n",
70 (unsigned long long)OCFS2_I(inode)->ip_blkno);
71 goto bail;
72 }
73 spin_unlock(&OCFS2_I(inode)->ip_lock);
72 74
73 if (!inode->i_nlink) { 75 /*
74 mlog(0, "Inode %llu orphaned, returning false " 76 * We don't need a cluster lock to test this because once an
75 "dir = %d\n", 77 * inode nlink hits zero, it never goes back.
76 (unsigned long long)OCFS2_I(inode)->ip_blkno, 78 */
77 S_ISDIR(inode->i_mode)); 79 if (inode->i_nlink == 0) {
78 goto bail; 80 mlog(0, "Inode %llu orphaned, returning false "
79 } 81 "dir = %d\n",
82 (unsigned long long)OCFS2_I(inode)->ip_blkno,
83 S_ISDIR(inode->i_mode));
84 goto bail;
80 } 85 }
81 86
82 ret = 1; 87 ret = 1;
@@ -87,6 +92,322 @@ bail:
87 return ret; 92 return ret;
88} 93}
89 94
95static int ocfs2_match_dentry(struct dentry *dentry,
96 u64 parent_blkno,
97 int skip_unhashed)
98{
99 struct inode *parent;
100
101 /*
102 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
103 * to the lock data, so we skip those here, otherwise
104 * ocfs2_dentry_attach_lock() will get its original dentry
105 * back.
106 */
107 if (!dentry->d_fsdata)
108 return 0;
109
110 if (!dentry->d_parent)
111 return 0;
112
113 if (skip_unhashed && d_unhashed(dentry))
114 return 0;
115
116 parent = dentry->d_parent->d_inode;
117 /* Negative parent dentry? */
118 if (!parent)
119 return 0;
120
121 /* Name is in a different directory. */
122 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
123 return 0;
124
125 return 1;
126}
127
128/*
129 * Walk the inode alias list, and find a dentry which has a given
130 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
131 * is looking for a dentry_lock reference. The vote thread is looking
132 * to unhash aliases, so we allow it to skip any that already have
133 * that property.
134 */
135struct dentry *ocfs2_find_local_alias(struct inode *inode,
136 u64 parent_blkno,
137 int skip_unhashed)
138{
139 struct list_head *p;
140 struct dentry *dentry = NULL;
141
142 spin_lock(&dcache_lock);
143
144 list_for_each(p, &inode->i_dentry) {
145 dentry = list_entry(p, struct dentry, d_alias);
146
147 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
148 mlog(0, "dentry found: %.*s\n",
149 dentry->d_name.len, dentry->d_name.name);
150
151 dget_locked(dentry);
152 break;
153 }
154
155 dentry = NULL;
156 }
157
158 spin_unlock(&dcache_lock);
159
160 return dentry;
161}
162
163DEFINE_SPINLOCK(dentry_attach_lock);
164
165/*
166 * Attach this dentry to a cluster lock.
167 *
168 * Dentry locks cover all links in a given directory to a particular
169 * inode. We do this so that ocfs2 can build a lock name which all
170 * nodes in the cluster can agree on at all times. Shoving full names
171 * in the cluster lock won't work due to size restrictions. Covering
172 * links inside of a directory is a good compromise because it still
173 * allows us to use the parent directory lock to synchronize
174 * operations.
175 *
176 * Call this function with the parent dir semaphore and the parent dir
177 * cluster lock held.
178 *
179 * The dir semaphore will protect us from having to worry about
180 * concurrent processes on our node trying to attach a lock at the
181 * same time.
182 *
183 * The dir cluster lock (held at either PR or EX mode) protects us
184 * from unlink and rename on other nodes.
185 *
186 * A dput() can happen asynchronously due to pruning, so we cover
187 * attaching and detaching the dentry lock with a
188 * dentry_attach_lock.
189 *
190 * A node which has done lookup on a name retains a protected read
191 * lock until final dput. If the user requests and unlink or rename,
192 * the protected read is upgraded to an exclusive lock. Other nodes
193 * who have seen the dentry will then be informed that they need to
194 * downgrade their lock, which will involve d_delete on the
195 * dentry. This happens in ocfs2_dentry_convert_worker().
196 */
197int ocfs2_dentry_attach_lock(struct dentry *dentry,
198 struct inode *inode,
199 u64 parent_blkno)
200{
201 int ret;
202 struct dentry *alias;
203 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
204
205 mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
206 dentry->d_name.len, dentry->d_name.name,
207 (unsigned long long)parent_blkno, dl);
208
209 /*
210 * Negative dentry. We ignore these for now.
211 *
212 * XXX: Could we can improve ocfs2_dentry_revalidate() by
213 * tracking these?
214 */
215 if (!inode)
216 return 0;
217
218 if (dl) {
219 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
220 " \"%.*s\": old parent: %llu, new: %llu\n",
221 dentry->d_name.len, dentry->d_name.name,
222 (unsigned long long)parent_blkno,
223 (unsigned long long)dl->dl_parent_blkno);
224 return 0;
225 }
226
227 alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
228 if (alias) {
229 /*
230 * Great, an alias exists, which means we must have a
231 * dentry lock already. We can just grab the lock off
232 * the alias and add it to the list.
233 *
234 * We're depending here on the fact that this dentry
235 * was found and exists in the dcache and so must have
236 * a reference to the dentry_lock because we can't
237 * race creates. Final dput() cannot happen on it
238 * since we have it pinned, so our reference is safe.
239 */
240 dl = alias->d_fsdata;
241 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
242 (unsigned long long)parent_blkno,
243 (unsigned long long)OCFS2_I(inode)->ip_blkno);
244
245 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
246 " \"%.*s\": old parent: %llu, new: %llu\n",
247 dentry->d_name.len, dentry->d_name.name,
248 (unsigned long long)parent_blkno,
249 (unsigned long long)dl->dl_parent_blkno);
250
251 mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
252
253 goto out_attach;
254 }
255
256 /*
257 * There are no other aliases
258 */
259 dl = kmalloc(sizeof(*dl), GFP_NOFS);
260 if (!dl) {
261 ret = -ENOMEM;
262 mlog_errno(ret);
263 return ret;
264 }
265
266 dl->dl_count = 0;
267 /*
268 * Does this have to happen below, for all attaches, in case
269 * the struct inode gets blown away by votes?
270 */
271 dl->dl_inode = igrab(inode);
272 dl->dl_parent_blkno = parent_blkno;
273 ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
274
275out_attach:
276 spin_lock(&dentry_attach_lock);
277 dentry->d_fsdata = dl;
278 dl->dl_count++;
279 spin_unlock(&dentry_attach_lock);
280
281 /*
282 * This actually gets us our PRMODE level lock. From now on,
283 * we'll have a notification if one of these names is
284 * destroyed on another node.
285 */
286 ret = ocfs2_dentry_lock(dentry, 0);
287 if (!ret)
288 ocfs2_dentry_unlock(dentry, 0);
289 else
290 mlog_errno(ret);
291
292 dput(alias);
293
294 return ret;
295}
296
297/*
298 * ocfs2_dentry_iput() and friends.
299 *
300 * At this point, our particular dentry is detached from the inodes
301 * alias list, so there's no way that the locking code can find it.
302 *
303 * The interesting stuff happens when we determine that our lock needs
304 * to go away because this is the last subdir alias in the
305 * system. This function needs to handle a couple things:
306 *
307 * 1) Synchronizing lock shutdown with the downconvert threads. This
308 * is already handled for us via the lockres release drop function
309 * called in ocfs2_release_dentry_lock()
310 *
311 * 2) A race may occur when we're doing our lock shutdown and
312 * another process wants to create a new dentry lock. Right now we
313 * let them race, which means that for a very short while, this
314 * node might have two locks on a lock resource. This should be a
315 * problem though because one of them is in the process of being
316 * thrown out.
317 */
318static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
319 struct ocfs2_dentry_lock *dl)
320{
321 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
322 ocfs2_lock_res_free(&dl->dl_lockres);
323 iput(dl->dl_inode);
324 kfree(dl);
325}
326
327void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
328 struct ocfs2_dentry_lock *dl)
329{
330 int unlock = 0;
331
332 BUG_ON(dl->dl_count == 0);
333
334 spin_lock(&dentry_attach_lock);
335 dl->dl_count--;
336 unlock = !dl->dl_count;
337 spin_unlock(&dentry_attach_lock);
338
339 if (unlock)
340 ocfs2_drop_dentry_lock(osb, dl);
341}
342
343static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
344{
345 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
346
347 mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
348 "dentry: %.*s\n", dentry->d_name.len,
349 dentry->d_name.name);
350
351 if (!dl)
352 goto out;
353
354 mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
355 dentry->d_name.len, dentry->d_name.name,
356 dl->dl_count);
357
358 ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
359
360out:
361 iput(inode);
362}
363
364/*
365 * d_move(), but keep the locks in sync.
366 *
367 * When we are done, "dentry" will have the parent dir and name of
368 * "target", which will be thrown away.
369 *
370 * We manually update the lock of "dentry" if need be.
371 *
372 * "target" doesn't have it's dentry lock touched - we allow the later
373 * dput() to handle this for us.
374 *
375 * This is called during ocfs2_rename(), while holding parent
376 * directory locks. The dentries have already been deleted on other
377 * nodes via ocfs2_remote_dentry_delete().
378 *
379 * Normally, the VFS handles the d_move() for the file sytem, after
380 * the ->rename() callback. OCFS2 wants to handle this internally, so
381 * the new lock can be created atomically with respect to the cluster.
382 */
383void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
384 struct inode *old_dir, struct inode *new_dir)
385{
386 int ret;
387 struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
388 struct inode *inode = dentry->d_inode;
389
390 /*
391 * Move within the same directory, so the actual lock info won't
392 * change.
393 *
394 * XXX: Is there any advantage to dropping the lock here?
395 */
396 if (old_dir == new_dir)
397 goto out_move;
398
399 ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
400
401 dentry->d_fsdata = NULL;
402 ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
403 if (ret)
404 mlog_errno(ret);
405
406out_move:
407 d_move(dentry, target);
408}
409
90struct dentry_operations ocfs2_dentry_ops = { 410struct dentry_operations ocfs2_dentry_ops = {
91 .d_revalidate = ocfs2_dentry_revalidate, 411 .d_revalidate = ocfs2_dentry_revalidate,
412 .d_iput = ocfs2_dentry_iput,
92}; 413};
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index 90072771114b..c091c34d9883 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
28 28
29extern struct dentry_operations ocfs2_dentry_ops; 29extern struct dentry_operations ocfs2_dentry_ops;
30 30
31struct ocfs2_dentry_lock {
32 unsigned int dl_count;
33 u64 dl_parent_blkno;
34
35 /*
36 * The ocfs2_dentry_lock keeps an inode reference until
37 * dl_lockres has been destroyed. This is usually done in
38 * ->d_iput() anyway, so there should be minimal impact.
39 */
40 struct inode *dl_inode;
41 struct ocfs2_lock_res dl_lockres;
42};
43
44int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
45 u64 parent_blkno);
46
47void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
48 struct ocfs2_dentry_lock *dl);
49
50struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
51 int skip_unhashed);
52
53void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
54 struct inode *old_dir, struct inode *new_dir);
55
56extern spinlock_t dentry_attach_lock;
57
31#endif /* OCFS2_DCACHE_H */ 58#endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1a5f36..04e01915b86e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
75{ 75{
76 int error = 0; 76 int error = 0;
77 unsigned long offset, blk; 77 unsigned long offset, blk, last_ra_blk = 0;
78 int i, num, stored; 78 int i, stored;
79 struct buffer_head * bh, * tmp; 79 struct buffer_head * bh, * tmp;
80 struct ocfs2_dir_entry * de; 80 struct ocfs2_dir_entry * de;
81 int err; 81 int err;
82 struct inode *inode = filp->f_dentry->d_inode; 82 struct inode *inode = filp->f_dentry->d_inode;
83 struct super_block * sb = inode->i_sb; 83 struct super_block * sb = inode->i_sb;
84 int have_disk_lock = 0; 84 unsigned int ra_sectors = 16;
85 85
86 mlog_entry("dirino=%llu\n", 86 mlog_entry("dirino=%llu\n",
87 (unsigned long long)OCFS2_I(inode)->ip_blkno); 87 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
95 mlog_errno(error); 95 mlog_errno(error);
96 /* we haven't got any yet, so propagate the error. */ 96 /* we haven't got any yet, so propagate the error. */
97 stored = error; 97 stored = error;
98 goto bail; 98 goto bail_nolock;
99 } 99 }
100 have_disk_lock = 1;
101 100
102 offset = filp->f_pos & (sb->s_blocksize - 1); 101 offset = filp->f_pos & (sb->s_blocksize - 1);
103 102
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
113 continue; 112 continue;
114 } 113 }
115 114
116 /* 115 /* The idea here is to begin with 8k read-ahead and to stay
117 * Do the readahead (8k) 116 * 4k ahead of our current position.
118 */ 117 *
119 if (!offset) { 118 * TODO: Use the pagecache for this. We just need to
120 for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; 119 * make sure it's cluster-safe... */
120 if (!last_ra_blk
121 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
122 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
121 i > 0; i--) { 123 i > 0; i--) {
122 tmp = ocfs2_bread(inode, ++blk, &err, 1); 124 tmp = ocfs2_bread(inode, ++blk, &err, 1);
123 if (tmp) 125 if (tmp)
124 brelse(tmp); 126 brelse(tmp);
125 } 127 }
128 last_ra_blk = blk;
129 ra_sectors = 8;
126 } 130 }
127 131
128revalidate: 132revalidate:
@@ -194,9 +198,9 @@ revalidate:
194 198
195 stored = 0; 199 stored = 0;
196bail: 200bail:
197 if (have_disk_lock) 201 ocfs2_meta_unlock(inode, 0);
198 ocfs2_meta_unlock(inode, 0);
199 202
203bail_nolock:
200 mlog_exit(stored); 204 mlog_exit(stored);
201 205
202 return stored; 206 return stored;
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index 53652f51c0e1..cfd5cb65cab0 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
182 struct dlm_lockstatus *lksb, 182 struct dlm_lockstatus *lksb,
183 int flags, 183 int flags,
184 const char *name, 184 const char *name,
185 int namelen,
185 dlm_astlockfunc_t *ast, 186 dlm_astlockfunc_t *ast,
186 void *data, 187 void *data,
187 dlm_bastlockfunc_t *bast); 188 dlm_bastlockfunc_t *bast);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2bbe2c..681046d51393 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
320 320
321 res = dlm_lookup_lockres(dlm, name, locklen); 321 res = dlm_lookup_lockres(dlm, name, locklen);
322 if (!res) { 322 if (!res) {
323 mlog(ML_ERROR, "got %sast for unknown lockres! " 323 mlog(0, "got %sast for unknown lockres! "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 324 "cookie=%u:%llu, name=%.*s, namelen=%u\n",
325 past->type == DLM_AST ? "" : "b", 325 past->type == DLM_AST ? "" : "b",
326 dlm_get_lock_cookie_node(cookie), 326 dlm_get_lock_cookie_node(cookie),
327 dlm_get_lock_cookie_seq(cookie), 327 dlm_get_lock_cookie_seq(cookie),
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
367 goto do_ast; 367 goto do_ast;
368 } 368 }
369 369
370 mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, " 370 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
371 "name=%.*s, namelen=%u\n", 371 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
372 past->type == DLM_AST ? "" : "b", 372 dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
373 dlm_get_lock_cookie_node(cookie), 373 locklen, name, locklen);
374 dlm_get_lock_cookie_seq(cookie),
375 locklen, name, locklen);
376 374
377 ret = DLM_NORMAL; 375 ret = DLM_NORMAL;
378unlock_out: 376unlock_out:
@@ -464,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
464 mlog(ML_ERROR, "sent AST to node %u, it returned " 462 mlog(ML_ERROR, "sent AST to node %u, it returned "
465 "DLM_MIGRATING!\n", lock->ml.node); 463 "DLM_MIGRATING!\n", lock->ml.node);
466 BUG(); 464 BUG();
467 } else if (status != DLM_NORMAL) { 465 } else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
468 mlog(ML_ERROR, "AST to node %u returned %d!\n", 466 mlog(ML_ERROR, "AST to node %u returned %d!\n",
469 lock->ml.node, status); 467 lock->ml.node, status);
470 /* ignore it */ 468 /* ignore it */
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 14530ee7e11d..fa968180b072 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
747 u8 owner); 747 u8 owner);
748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 748struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
749 const char *lockid, 749 const char *lockid,
750 int namelen,
750 int flags); 751 int flags);
751struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, 752struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
752 const char *name, 753 const char *name,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5ca57ec650c7..42a1b91979b5 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
540 540
541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode, 541enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
542 struct dlm_lockstatus *lksb, int flags, 542 struct dlm_lockstatus *lksb, int flags,
543 const char *name, dlm_astlockfunc_t *ast, void *data, 543 const char *name, int namelen, dlm_astlockfunc_t *ast,
544 dlm_bastlockfunc_t *bast) 544 void *data, dlm_bastlockfunc_t *bast)
545{ 545{
546 enum dlm_status status; 546 enum dlm_status status;
547 struct dlm_lock_resource *res = NULL; 547 struct dlm_lock_resource *res = NULL;
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
571 recovery = (flags & LKM_RECOVERY); 571 recovery = (flags & LKM_RECOVERY);
572 572
573 if (recovery && 573 if (recovery &&
574 (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) { 574 (!dlm_is_recovery_lock(name, namelen) || convert) ) {
575 dlm_error(status); 575 dlm_error(status);
576 goto error; 576 goto error;
577 } 577 }
@@ -643,7 +643,7 @@ retry_convert:
643 } 643 }
644 644
645 status = DLM_IVBUFLEN; 645 status = DLM_IVBUFLEN;
646 if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) { 646 if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
647 dlm_error(status); 647 dlm_error(status);
648 goto error; 648 goto error;
649 } 649 }
@@ -659,7 +659,7 @@ retry_convert:
659 dlm_wait_for_recovery(dlm); 659 dlm_wait_for_recovery(dlm);
660 660
661 /* find or create the lock resource */ 661 /* find or create the lock resource */
662 res = dlm_get_lock_resource(dlm, name, flags); 662 res = dlm_get_lock_resource(dlm, name, namelen, flags);
663 if (!res) { 663 if (!res) {
664 status = DLM_IVLOCKID; 664 status = DLM_IVLOCKID;
665 dlm_error(status); 665 dlm_error(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9503240ef0e5..f784177b6241 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
740 */ 740 */
741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, 741struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
742 const char *lockid, 742 const char *lockid,
743 int namelen,
743 int flags) 744 int flags)
744{ 745{
745 struct dlm_lock_resource *tmpres=NULL, *res=NULL; 746 struct dlm_lock_resource *tmpres=NULL, *res=NULL;
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
748 int blocked = 0; 749 int blocked = 0;
749 int ret, nodenum; 750 int ret, nodenum;
750 struct dlm_node_iter iter; 751 struct dlm_node_iter iter;
751 unsigned int namelen, hash; 752 unsigned int hash;
752 int tries = 0; 753 int tries = 0;
753 int bit, wait_on_recovery = 0; 754 int bit, wait_on_recovery = 0;
754 755
755 BUG_ON(!lockid); 756 BUG_ON(!lockid);
756 757
757 namelen = strlen(lockid);
758 hash = dlm_lockid_hash(lockid, namelen); 758 hash = dlm_lockid_hash(lockid, namelen);
759 759
760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen); 760 mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 594745fab0b5..9d950d7cea38 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
2285 memset(&lksb, 0, sizeof(lksb)); 2285 memset(&lksb, 0, sizeof(lksb));
2286 2286
2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2287 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
2288 DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); 2288 DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
2289 dlm_reco_ast, dlm, dlm_reco_bast);
2289 2290
2290 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n", 2291 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
2291 dlm->name, ret, lksb.status); 2292 dlm->name, ret, lksb.status);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e641b084b343..eead48bbfac6 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
102 spin_unlock(&lockres->l_lock); 102 spin_unlock(&lockres->l_lock);
103} 103}
104 104
105#define user_log_dlm_error(_func, _stat, _lockres) do { \ 105#define user_log_dlm_error(_func, _stat, _lockres) do { \
106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ 106 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
107 "resource %s: %s\n", dlm_errname(_stat), _func, \ 107 "resource %.*s: %s\n", dlm_errname(_stat), _func, \
108 _lockres->l_name, dlm_errmsg(_stat)); \ 108 _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
109} while (0) 109} while (0)
110 110
111/* WARNING: This function lives in a world where the only three lock 111/* WARNING: This function lives in a world where the only three lock
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
127 struct user_lock_res *lockres = opaque; 127 struct user_lock_res *lockres = opaque;
128 struct dlm_lockstatus *lksb; 128 struct dlm_lockstatus *lksb;
129 129
130 mlog(0, "AST fired for lockres %s\n", lockres->l_name); 130 mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
131 lockres->l_name);
131 132
132 spin_lock(&lockres->l_lock); 133 spin_lock(&lockres->l_lock);
133 134
134 lksb = &(lockres->l_lksb); 135 lksb = &(lockres->l_lksb);
135 if (lksb->status != DLM_NORMAL) { 136 if (lksb->status != DLM_NORMAL) {
136 mlog(ML_ERROR, "lksb status value of %u on lockres %s\n", 137 mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
137 lksb->status, lockres->l_name); 138 lksb->status, lockres->l_namelen, lockres->l_name);
138 spin_unlock(&lockres->l_lock); 139 spin_unlock(&lockres->l_lock);
139 return; 140 return;
140 } 141 }
141 142
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, 143 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n", 144 "Lockres %.*s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags); 145 lockres->l_namelen, lockres->l_name, lockres->l_flags);
145 146
146 /* we're downconverting. */ 147 /* we're downconverting. */
147 if (lockres->l_requested < lockres->l_level) { 148 if (lockres->l_requested < lockres->l_level) {
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
213{ 214{
214 struct user_lock_res *lockres = opaque; 215 struct user_lock_res *lockres = opaque;
215 216
216 mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n", 217 mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
217 lockres->l_name, level); 218 lockres->l_namelen, lockres->l_name, level);
218 219
219 spin_lock(&lockres->l_lock); 220 spin_lock(&lockres->l_lock);
220 lockres->l_flags |= USER_LOCK_BLOCKED; 221 lockres->l_flags |= USER_LOCK_BLOCKED;
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
231{ 232{
232 struct user_lock_res *lockres = opaque; 233 struct user_lock_res *lockres = opaque;
233 234
234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 235 mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
236 lockres->l_name);
235 237
236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT) 238 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
237 mlog(ML_ERROR, "Dlm returns status %d\n", status); 239 mlog(ML_ERROR, "Dlm returns status %d\n", status);
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { 246 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
245 lockres->l_level = LKM_IVMODE; 247 lockres->l_level = LKM_IVMODE;
246 } else if (status == DLM_CANCELGRANT) { 248 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was 249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the 250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */ 251 * ast should've done this already. */
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
255 } else { 255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */ 257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
260 lockres->l_requested = LKM_IVMODE; /* cancel an 258 lockres->l_requested = LKM_IVMODE; /* cancel an
261 * upconvert 259 * upconvert
262 * request. */ 260 * request. */
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
287 struct user_lock_res *lockres = (struct user_lock_res *) opaque; 285 struct user_lock_res *lockres = (struct user_lock_res *) opaque;
288 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 286 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
289 287
290 mlog(0, "processing lockres %s\n", lockres->l_name); 288 mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
289 lockres->l_name);
291 290
292 spin_lock(&lockres->l_lock); 291 spin_lock(&lockres->l_lock);
293 292
294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), 293 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
295 "Lockres %s, flags 0x%x\n", 294 "Lockres %.*s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags); 295 lockres->l_namelen, lockres->l_name, lockres->l_flags);
297 296
298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's 297 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
299 * set, we want user_ast clear it. */ 298 * set, we want user_ast clear it. */
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
305 * flag, and finally we might get another bast which re-queues 304 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */ 305 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { 306 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock); 307 spin_unlock(&lockres->l_lock);
311 goto drop_ref; 308 goto drop_ref;
312 } 309 }
313 310
314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 311 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
315 mlog(0, "lock is in teardown so we do nothing\n");
316 spin_unlock(&lockres->l_lock); 312 spin_unlock(&lockres->l_lock);
317 goto drop_ref; 313 goto drop_ref;
318 } 314 }
319 315
320 if (lockres->l_flags & USER_LOCK_BUSY) { 316 if (lockres->l_flags & USER_LOCK_BUSY) {
321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 317 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
325 spin_unlock(&lockres->l_lock); 318 spin_unlock(&lockres->l_lock);
326 goto drop_ref; 319 goto drop_ref;
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
372 &lockres->l_lksb, 365 &lockres->l_lksb,
373 LKM_CONVERT|LKM_VALBLK, 366 LKM_CONVERT|LKM_VALBLK,
374 lockres->l_name, 367 lockres->l_name,
368 lockres->l_namelen,
375 user_ast, 369 user_ast,
376 lockres, 370 lockres,
377 user_bast); 371 user_bast);
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
420 414
421 if (level != LKM_EXMODE && 415 if (level != LKM_EXMODE &&
422 level != LKM_PRMODE) { 416 level != LKM_PRMODE) {
423 mlog(ML_ERROR, "lockres %s: invalid request!\n", 417 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
424 lockres->l_name); 418 lockres->l_namelen, lockres->l_name);
425 status = -EINVAL; 419 status = -EINVAL;
426 goto bail; 420 goto bail;
427 } 421 }
428 422
429 mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n", 423 mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
430 lockres->l_name, 424 lockres->l_namelen, lockres->l_name,
431 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", 425 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
432 lkm_flags); 426 lkm_flags);
433 427
434again: 428again:
435 if (signal_pending(current)) { 429 if (signal_pending(current)) {
@@ -474,15 +468,13 @@ again:
474 BUG_ON(level == LKM_IVMODE); 468 BUG_ON(level == LKM_IVMODE);
475 BUG_ON(level == LKM_NLMODE); 469 BUG_ON(level == LKM_NLMODE);
476 470
477 mlog(0, "lock %s, get lock from %d to level = %d\n",
478 lockres->l_name, lockres->l_level, level);
479
480 /* call dlm_lock to upgrade lock now */ 471 /* call dlm_lock to upgrade lock now */
481 status = dlmlock(dlm, 472 status = dlmlock(dlm,
482 level, 473 level,
483 &lockres->l_lksb, 474 &lockres->l_lksb,
484 local_flags, 475 local_flags,
485 lockres->l_name, 476 lockres->l_name,
477 lockres->l_namelen,
486 user_ast, 478 user_ast,
487 lockres, 479 lockres,
488 user_bast); 480 user_bast);
@@ -498,9 +490,6 @@ again:
498 goto bail; 490 goto bail;
499 } 491 }
500 492
501 mlog(0, "lock %s, successfull return from dlmlock\n",
502 lockres->l_name);
503
504 user_wait_on_busy_lock(lockres); 493 user_wait_on_busy_lock(lockres);
505 goto again; 494 goto again;
506 } 495 }
@@ -508,9 +497,6 @@ again:
508 user_dlm_inc_holders(lockres, level); 497 user_dlm_inc_holders(lockres, level);
509 spin_unlock(&lockres->l_lock); 498 spin_unlock(&lockres->l_lock);
510 499
511 mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
512 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
513
514 status = 0; 500 status = 0;
515bail: 501bail:
516 return status; 502 return status;
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
538{ 524{
539 if (level != LKM_EXMODE && 525 if (level != LKM_EXMODE &&
540 level != LKM_PRMODE) { 526 level != LKM_PRMODE) {
541 mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name); 527 mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
528 lockres->l_namelen, lockres->l_name);
542 return; 529 return;
543 } 530 }
544 531
545 mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
546 (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
547
548 spin_lock(&lockres->l_lock); 532 spin_lock(&lockres->l_lock);
549 user_dlm_dec_holders(lockres, level); 533 user_dlm_dec_holders(lockres, level);
550 __user_dlm_cond_queue_lockres(lockres); 534 __user_dlm_cond_queue_lockres(lockres);
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
602 memcpy(lockres->l_name, 586 memcpy(lockres->l_name,
603 dentry->d_name.name, 587 dentry->d_name.name,
604 dentry->d_name.len); 588 dentry->d_name.len);
589 lockres->l_namelen = dentry->d_name.len;
605} 590}
606 591
607int user_dlm_destroy_lock(struct user_lock_res *lockres) 592int user_dlm_destroy_lock(struct user_lock_res *lockres)
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
609 int status = -EBUSY; 594 int status = -EBUSY;
610 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); 595 struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
611 596
612 mlog(0, "asked to destroy %s\n", lockres->l_name); 597 mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
613 598
614 spin_lock(&lockres->l_lock); 599 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 600 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock); 601 spin_unlock(&lockres->l_lock);
618 return 0; 602 return 0;
619 } 603 }
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
623 while (lockres->l_flags & USER_LOCK_BUSY) { 607 while (lockres->l_flags & USER_LOCK_BUSY) {
624 spin_unlock(&lockres->l_lock); 608 spin_unlock(&lockres->l_lock);
625 609
626 mlog(0, "lock %s is busy\n", lockres->l_name);
627
628 user_wait_on_busy_lock(lockres); 610 user_wait_on_busy_lock(lockres);
629 611
630 spin_lock(&lockres->l_lock); 612 spin_lock(&lockres->l_lock);
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
632 614
633 if (lockres->l_ro_holders || lockres->l_ex_holders) { 615 if (lockres->l_ro_holders || lockres->l_ex_holders) {
634 spin_unlock(&lockres->l_lock); 616 spin_unlock(&lockres->l_lock);
635 mlog(0, "lock %s has holders\n", lockres->l_name);
636 goto bail; 617 goto bail;
637 } 618 }
638 619
639 status = 0; 620 status = 0;
640 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { 621 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
641 spin_unlock(&lockres->l_lock); 622 spin_unlock(&lockres->l_lock);
642 mlog(0, "lock %s is not attached\n", lockres->l_name);
643 goto bail; 623 goto bail;
644 } 624 }
645 625
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
647 lockres->l_flags |= USER_LOCK_BUSY; 627 lockres->l_flags |= USER_LOCK_BUSY;
648 spin_unlock(&lockres->l_lock); 628 spin_unlock(&lockres->l_lock);
649 629
650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
651 status = dlmunlock(dlm, 630 status = dlmunlock(dlm,
652 &lockres->l_lksb, 631 &lockres->l_lksb,
653 LKM_VALBLK, 632 LKM_VALBLK,
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 04178bc40b76..c400e93bbf79 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
53 53
54#define USER_DLM_LOCK_ID_MAX_LEN 32 54#define USER_DLM_LOCK_ID_MAX_LEN 32
55 char l_name[USER_DLM_LOCK_ID_MAX_LEN]; 55 char l_name[USER_DLM_LOCK_ID_MAX_LEN];
56 int l_namelen;
56 int l_level; 57 int l_level;
57 unsigned int l_ro_holders; 58 unsigned int l_ro_holders;
58 unsigned int l_ex_holders; 59 unsigned int l_ex_holders;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1fbb34d..de887063dcfc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -46,6 +46,7 @@
46#include "ocfs2.h" 46#include "ocfs2.h"
47 47
48#include "alloc.h" 48#include "alloc.h"
49#include "dcache.h"
49#include "dlmglue.h" 50#include "dlmglue.h"
50#include "extent_map.h" 51#include "extent_map.h"
51#include "heartbeat.h" 52#include "heartbeat.h"
@@ -66,78 +67,161 @@ struct ocfs2_mask_waiter {
66 unsigned long mw_goal; 67 unsigned long mw_goal;
67}; 68};
68 69
69static void ocfs2_inode_ast_func(void *opaque); 70static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
70static void ocfs2_inode_bast_func(void *opaque, 71static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
71 int level);
72static void ocfs2_super_ast_func(void *opaque);
73static void ocfs2_super_bast_func(void *opaque,
74 int level);
75static void ocfs2_rename_ast_func(void *opaque);
76static void ocfs2_rename_bast_func(void *opaque,
77 int level);
78
79/* so far, all locks have gotten along with the same unlock ast */
80static void ocfs2_unlock_ast_func(void *opaque,
81 enum dlm_status status);
82static int ocfs2_do_unblock_meta(struct inode *inode,
83 int *requeue);
84static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
85 int *requeue);
86static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
87 int *requeue);
88static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
89 int *requeue);
90static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
91 int *requeue);
92typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
93static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
94 struct ocfs2_lock_res *lockres,
95 int *requeue,
96 ocfs2_convert_worker_t *worker);
97 72
73/*
74 * Return value from ->downconvert_worker functions.
75 *
76 * These control the precise actions of ocfs2_unblock_lock()
77 * and ocfs2_process_blocked_lock()
78 *
79 */
80enum ocfs2_unblock_action {
81 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
82 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
83 * ->post_unlock callback */
84 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
85 * ->post_unlock() callback. */
86};
87
88struct ocfs2_unblock_ctl {
89 int requeue;
90 enum ocfs2_unblock_action unblock_action;
91};
92
93static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
94 int new_level);
95static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
96
97static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
98 int blocking);
99
100static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
101 int blocking);
102
103static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
104 struct ocfs2_lock_res *lockres);
105
106/*
107 * OCFS2 Lock Resource Operations
108 *
109 * These fine tune the behavior of the generic dlmglue locking infrastructure.
110 *
111 * The most basic of lock types can point ->l_priv to their respective
112 * struct ocfs2_super and allow the default actions to manage things.
113 *
114 * Right now, each lock type also needs to implement an init function,
115 * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
116 * should be called when the lock is no longer needed (i.e., object
117 * destruction time).
118 */
98struct ocfs2_lock_res_ops { 119struct ocfs2_lock_res_ops {
99 void (*ast)(void *); 120 /*
100 void (*bast)(void *, int); 121 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
101 void (*unlock_ast)(void *, enum dlm_status); 122 * this callback if ->l_priv is not an ocfs2_super pointer
102 int (*unblock)(struct ocfs2_lock_res *, int *); 123 */
124 struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
125
126 /*
127 * Optionally called in the downconvert (or "vote") thread
128 * after a successful downconvert. The lockres will not be
129 * referenced after this callback is called, so it is safe to
130 * free memory, etc.
131 *
132 * The exact semantics of when this is called are controlled
133 * by ->downconvert_worker()
134 */
135 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
136
137 /*
138 * Allow a lock type to add checks to determine whether it is
139 * safe to downconvert a lock. Return 0 to re-queue the
140 * downconvert at a later time, nonzero to continue.
141 *
142 * For most locks, the default checks that there are no
143 * incompatible holders are sufficient.
144 *
145 * Called with the lockres spinlock held.
146 */
147 int (*check_downconvert)(struct ocfs2_lock_res *, int);
148
149 /*
150 * Allows a lock type to populate the lock value block. This
151 * is called on downconvert, and when we drop a lock.
152 *
153 * Locks that want to use this should set LOCK_TYPE_USES_LVB
154 * in the flags field.
155 *
156 * Called with the lockres spinlock held.
157 */
158 void (*set_lvb)(struct ocfs2_lock_res *);
159
160 /*
161 * Called from the downconvert thread when it is determined
162 * that a lock will be downconverted. This is called without
163 * any locks held so the function can do work that might
164 * schedule (syncing out data, etc).
165 *
166 * This should return any one of the ocfs2_unblock_action
167 * values, depending on what it wants the thread to do.
168 */
169 int (*downconvert_worker)(struct ocfs2_lock_res *, int);
170
171 /*
172 * LOCK_TYPE_* flags which describe the specific requirements
173 * of a lock type. Descriptions of each individual flag follow.
174 */
175 int flags;
103}; 176};
104 177
178/*
179 * Some locks want to "refresh" potentially stale data when a
180 * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
181 * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
182 * individual lockres l_flags member from the ast function. It is
183 * expected that the locking wrapper will clear the
184 * OCFS2_LOCK_NEEDS_REFRESH flag when done.
185 */
186#define LOCK_TYPE_REQUIRES_REFRESH 0x1
187
188/*
189 * Indicate that a lock type makes use of the lock value block. The
190 * ->set_lvb lock type callback must be defined.
191 */
192#define LOCK_TYPE_USES_LVB 0x2
193
105static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { 194static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
106 .ast = ocfs2_inode_ast_func, 195 .get_osb = ocfs2_get_inode_osb,
107 .bast = ocfs2_inode_bast_func, 196 .flags = 0,
108 .unlock_ast = ocfs2_unlock_ast_func,
109 .unblock = ocfs2_unblock_inode_lock,
110}; 197};
111 198
112static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { 199static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
113 .ast = ocfs2_inode_ast_func, 200 .get_osb = ocfs2_get_inode_osb,
114 .bast = ocfs2_inode_bast_func, 201 .check_downconvert = ocfs2_check_meta_downconvert,
115 .unlock_ast = ocfs2_unlock_ast_func, 202 .set_lvb = ocfs2_set_meta_lvb,
116 .unblock = ocfs2_unblock_meta, 203 .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
117}; 204};
118 205
119static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
120 int blocking);
121
122static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { 206static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
123 .ast = ocfs2_inode_ast_func, 207 .get_osb = ocfs2_get_inode_osb,
124 .bast = ocfs2_inode_bast_func, 208 .downconvert_worker = ocfs2_data_convert_worker,
125 .unlock_ast = ocfs2_unlock_ast_func, 209 .flags = 0,
126 .unblock = ocfs2_unblock_data,
127}; 210};
128 211
129static struct ocfs2_lock_res_ops ocfs2_super_lops = { 212static struct ocfs2_lock_res_ops ocfs2_super_lops = {
130 .ast = ocfs2_super_ast_func, 213 .flags = LOCK_TYPE_REQUIRES_REFRESH,
131 .bast = ocfs2_super_bast_func,
132 .unlock_ast = ocfs2_unlock_ast_func,
133 .unblock = ocfs2_unblock_osb_lock,
134}; 214};
135 215
136static struct ocfs2_lock_res_ops ocfs2_rename_lops = { 216static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
137 .ast = ocfs2_rename_ast_func, 217 .flags = 0,
138 .bast = ocfs2_rename_bast_func, 218};
139 .unlock_ast = ocfs2_unlock_ast_func, 219
140 .unblock = ocfs2_unblock_osb_lock, 220static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
221 .get_osb = ocfs2_get_dentry_osb,
222 .post_unlock = ocfs2_dentry_post_unlock,
223 .downconvert_worker = ocfs2_dentry_convert_worker,
224 .flags = 0,
141}; 225};
142 226
143static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 227static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
@@ -147,29 +231,26 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
147 lockres->l_type == OCFS2_LOCK_TYPE_RW; 231 lockres->l_type == OCFS2_LOCK_TYPE_RW;
148} 232}
149 233
150static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres) 234static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
151{ 235{
152 return lockres->l_type == OCFS2_LOCK_TYPE_SUPER; 236 BUG_ON(!ocfs2_is_inode_lock(lockres));
153}
154 237
155static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres) 238 return (struct inode *) lockres->l_priv;
156{
157 return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
158} 239}
159 240
160static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres) 241static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
161{ 242{
162 BUG_ON(!ocfs2_is_super_lock(lockres) 243 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
163 && !ocfs2_is_rename_lock(lockres));
164 244
165 return (struct ocfs2_super *) lockres->l_priv; 245 return (struct ocfs2_dentry_lock *)lockres->l_priv;
166} 246}
167 247
168static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 248static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
169{ 249{
170 BUG_ON(!ocfs2_is_inode_lock(lockres)); 250 if (lockres->l_ops->get_osb)
251 return lockres->l_ops->get_osb(lockres);
171 252
172 return (struct inode *) lockres->l_priv; 253 return (struct ocfs2_super *)lockres->l_priv;
173} 254}
174 255
175static int ocfs2_lock_create(struct ocfs2_super *osb, 256static int ocfs2_lock_create(struct ocfs2_super *osb,
@@ -200,25 +281,6 @@ static int ocfs2_meta_lock_update(struct inode *inode,
200 struct buffer_head **bh); 281 struct buffer_head **bh);
201static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); 282static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
202static inline int ocfs2_highest_compat_lock_level(int level); 283static inline int ocfs2_highest_compat_lock_level(int level);
203static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
204 struct ocfs2_lock_res *lockres,
205 int new_level);
206
207static char *ocfs2_lock_type_strings[] = {
208 [OCFS2_LOCK_TYPE_META] = "Meta",
209 [OCFS2_LOCK_TYPE_DATA] = "Data",
210 [OCFS2_LOCK_TYPE_SUPER] = "Super",
211 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
212 /* Need to differntiate from [R]ename.. serializing writes is the
213 * important job it does, anyway. */
214 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
215};
216
217static char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
218{
219 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
220 return ocfs2_lock_type_strings[type];
221}
222 284
223static void ocfs2_build_lock_name(enum ocfs2_lock_type type, 285static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
224 u64 blkno, 286 u64 blkno,
@@ -265,13 +327,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
265static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, 327static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
266 struct ocfs2_lock_res *res, 328 struct ocfs2_lock_res *res,
267 enum ocfs2_lock_type type, 329 enum ocfs2_lock_type type,
268 u64 blkno,
269 u32 generation,
270 struct ocfs2_lock_res_ops *ops, 330 struct ocfs2_lock_res_ops *ops,
271 void *priv) 331 void *priv)
272{ 332{
273 ocfs2_build_lock_name(type, blkno, generation, res->l_name);
274
275 res->l_type = type; 333 res->l_type = type;
276 res->l_ops = ops; 334 res->l_ops = ops;
277 res->l_priv = priv; 335 res->l_priv = priv;
@@ -299,6 +357,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
299 357
300void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 358void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
301 enum ocfs2_lock_type type, 359 enum ocfs2_lock_type type,
360 unsigned int generation,
302 struct inode *inode) 361 struct inode *inode)
303{ 362{
304 struct ocfs2_lock_res_ops *ops; 363 struct ocfs2_lock_res_ops *ops;
@@ -319,9 +378,73 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
319 break; 378 break;
320 }; 379 };
321 380
322 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, 381 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
323 OCFS2_I(inode)->ip_blkno, 382 generation, res->l_name);
324 inode->i_generation, ops, inode); 383 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
384}
385
386static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
387{
388 struct inode *inode = ocfs2_lock_res_inode(lockres);
389
390 return OCFS2_SB(inode->i_sb);
391}
392
393static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
394{
395 __be64 inode_blkno_be;
396
397 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
398 sizeof(__be64));
399
400 return be64_to_cpu(inode_blkno_be);
401}
402
403static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
404{
405 struct ocfs2_dentry_lock *dl = lockres->l_priv;
406
407 return OCFS2_SB(dl->dl_inode->i_sb);
408}
409
410void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
411 u64 parent, struct inode *inode)
412{
413 int len;
414 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
415 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
416 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
417
418 ocfs2_lock_res_init_once(lockres);
419
420 /*
421 * Unfortunately, the standard lock naming scheme won't work
422 * here because we have two 16 byte values to use. Instead,
423 * we'll stuff the inode number as a binary value. We still
424 * want error prints to show something without garbling the
425 * display, so drop a null byte in there before the inode
426 * number. A future version of OCFS2 will likely use all
427 * binary lock names. The stringified names have been a
428 * tremendous aid in debugging, but now that the debugfs
429 * interface exists, we can mangle things there if need be.
430 *
431 * NOTE: We also drop the standard "pad" value (the total lock
432 * name size stays the same though - the last part is all
433 * zeros due to the memset in ocfs2_lock_res_init_once()
434 */
435 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
436 "%c%016llx",
437 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
438 (long long)parent);
439
440 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
441
442 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
443 sizeof(__be64));
444
445 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
446 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
447 dl);
325} 448}
326 449
327static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res, 450static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
@@ -330,8 +453,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
330 /* Superblock lockres doesn't come from a slab so we call init 453 /* Superblock lockres doesn't come from a slab so we call init
331 * once on it manually. */ 454 * once on it manually. */
332 ocfs2_lock_res_init_once(res); 455 ocfs2_lock_res_init_once(res);
456 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
457 0, res->l_name);
333 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER, 458 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
334 OCFS2_SUPER_BLOCK_BLKNO, 0,
335 &ocfs2_super_lops, osb); 459 &ocfs2_super_lops, osb);
336} 460}
337 461
@@ -341,7 +465,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
341 /* Rename lockres doesn't come from a slab so we call init 465 /* Rename lockres doesn't come from a slab so we call init
342 * once on it manually. */ 466 * once on it manually. */
343 ocfs2_lock_res_init_once(res); 467 ocfs2_lock_res_init_once(res);
344 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0, 468 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
469 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
345 &ocfs2_rename_lops, osb); 470 &ocfs2_rename_lops, osb);
346} 471}
347 472
@@ -495,7 +620,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
495 * information is already up to data. Convert from NL to 620 * information is already up to data. Convert from NL to
496 * *anything* however should mark ourselves as needing an 621 * *anything* however should mark ourselves as needing an
497 * update */ 622 * update */
498 if (lockres->l_level == LKM_NLMODE) 623 if (lockres->l_level == LKM_NLMODE &&
624 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
499 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 625 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
500 626
501 lockres->l_level = lockres->l_requested; 627 lockres->l_level = lockres->l_requested;
@@ -512,7 +638,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
512 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 638 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
513 639
514 if (lockres->l_requested > LKM_NLMODE && 640 if (lockres->l_requested > LKM_NLMODE &&
515 !(lockres->l_flags & OCFS2_LOCK_LOCAL)) 641 !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
642 lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
516 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); 643 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
517 644
518 lockres->l_level = lockres->l_requested; 645 lockres->l_level = lockres->l_requested;
@@ -522,68 +649,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
522 mlog_exit_void(); 649 mlog_exit_void();
523} 650}
524 651
525static void ocfs2_inode_ast_func(void *opaque)
526{
527 struct ocfs2_lock_res *lockres = opaque;
528 struct inode *inode;
529 struct dlm_lockstatus *lksb;
530 unsigned long flags;
531
532 mlog_entry_void();
533
534 inode = ocfs2_lock_res_inode(lockres);
535
536 mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
537 (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
538 ocfs2_lock_type_string(lockres->l_type));
539
540 BUG_ON(!ocfs2_is_inode_lock(lockres));
541
542 spin_lock_irqsave(&lockres->l_lock, flags);
543
544 lksb = &(lockres->l_lksb);
545 if (lksb->status != DLM_NORMAL) {
546 mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
547 "on inode %llu\n", lksb->status,
548 (unsigned long long)OCFS2_I(inode)->ip_blkno);
549 spin_unlock_irqrestore(&lockres->l_lock, flags);
550 mlog_exit_void();
551 return;
552 }
553
554 switch(lockres->l_action) {
555 case OCFS2_AST_ATTACH:
556 ocfs2_generic_handle_attach_action(lockres);
557 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
558 break;
559 case OCFS2_AST_CONVERT:
560 ocfs2_generic_handle_convert_action(lockres);
561 break;
562 case OCFS2_AST_DOWNCONVERT:
563 ocfs2_generic_handle_downconvert_action(lockres);
564 break;
565 default:
566 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
567 "lockres flags = 0x%lx, unlock action: %u\n",
568 lockres->l_name, lockres->l_action, lockres->l_flags,
569 lockres->l_unlock_action);
570
571 BUG();
572 }
573
574 /* data and rw locking ignores refresh flag for now. */
575 if (lockres->l_type != OCFS2_LOCK_TYPE_META)
576 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
577
578 /* set it to something invalid so if we get called again we
579 * can catch it. */
580 lockres->l_action = OCFS2_AST_INVALID;
581 spin_unlock_irqrestore(&lockres->l_lock, flags);
582 wake_up(&lockres->l_event);
583
584 mlog_exit_void();
585}
586
587static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, 652static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
588 int level) 653 int level)
589{ 654{
@@ -610,54 +675,33 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
610 return needs_downconvert; 675 return needs_downconvert;
611} 676}
612 677
613static void ocfs2_generic_bast_func(struct ocfs2_super *osb, 678static void ocfs2_blocking_ast(void *opaque, int level)
614 struct ocfs2_lock_res *lockres,
615 int level)
616{ 679{
680 struct ocfs2_lock_res *lockres = opaque;
681 struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
617 int needs_downconvert; 682 int needs_downconvert;
618 unsigned long flags; 683 unsigned long flags;
619 684
620 mlog_entry_void();
621
622 BUG_ON(level <= LKM_NLMODE); 685 BUG_ON(level <= LKM_NLMODE);
623 686
687 mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
688 lockres->l_name, level, lockres->l_level,
689 ocfs2_lock_type_string(lockres->l_type));
690
624 spin_lock_irqsave(&lockres->l_lock, flags); 691 spin_lock_irqsave(&lockres->l_lock, flags);
625 needs_downconvert = ocfs2_generic_handle_bast(lockres, level); 692 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
626 if (needs_downconvert) 693 if (needs_downconvert)
627 ocfs2_schedule_blocked_lock(osb, lockres); 694 ocfs2_schedule_blocked_lock(osb, lockres);
628 spin_unlock_irqrestore(&lockres->l_lock, flags); 695 spin_unlock_irqrestore(&lockres->l_lock, flags);
629 696
630 ocfs2_kick_vote_thread(osb);
631
632 wake_up(&lockres->l_event); 697 wake_up(&lockres->l_event);
633 mlog_exit_void();
634}
635
636static void ocfs2_inode_bast_func(void *opaque, int level)
637{
638 struct ocfs2_lock_res *lockres = opaque;
639 struct inode *inode;
640 struct ocfs2_super *osb;
641
642 mlog_entry_void();
643
644 BUG_ON(!ocfs2_is_inode_lock(lockres));
645 698
646 inode = ocfs2_lock_res_inode(lockres); 699 ocfs2_kick_vote_thread(osb);
647 osb = OCFS2_SB(inode->i_sb);
648
649 mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
650 (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
651 lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
652
653 ocfs2_generic_bast_func(osb, lockres, level);
654
655 mlog_exit_void();
656} 700}
657 701
658static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres, 702static void ocfs2_locking_ast(void *opaque)
659 int ignore_refresh)
660{ 703{
704 struct ocfs2_lock_res *lockres = opaque;
661 struct dlm_lockstatus *lksb = &lockres->l_lksb; 705 struct dlm_lockstatus *lksb = &lockres->l_lksb;
662 unsigned long flags; 706 unsigned long flags;
663 707
@@ -673,6 +717,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
673 switch(lockres->l_action) { 717 switch(lockres->l_action) {
674 case OCFS2_AST_ATTACH: 718 case OCFS2_AST_ATTACH:
675 ocfs2_generic_handle_attach_action(lockres); 719 ocfs2_generic_handle_attach_action(lockres);
720 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
676 break; 721 break;
677 case OCFS2_AST_CONVERT: 722 case OCFS2_AST_CONVERT:
678 ocfs2_generic_handle_convert_action(lockres); 723 ocfs2_generic_handle_convert_action(lockres);
@@ -681,80 +726,19 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
681 ocfs2_generic_handle_downconvert_action(lockres); 726 ocfs2_generic_handle_downconvert_action(lockres);
682 break; 727 break;
683 default: 728 default:
729 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
730 "lockres flags = 0x%lx, unlock action: %u\n",
731 lockres->l_name, lockres->l_action, lockres->l_flags,
732 lockres->l_unlock_action);
684 BUG(); 733 BUG();
685 } 734 }
686 735
687 if (ignore_refresh)
688 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
689
690 /* set it to something invalid so if we get called again we 736 /* set it to something invalid so if we get called again we
691 * can catch it. */ 737 * can catch it. */
692 lockres->l_action = OCFS2_AST_INVALID; 738 lockres->l_action = OCFS2_AST_INVALID;
693 spin_unlock_irqrestore(&lockres->l_lock, flags);
694 739
695 wake_up(&lockres->l_event); 740 wake_up(&lockres->l_event);
696} 741 spin_unlock_irqrestore(&lockres->l_lock, flags);
697
698static void ocfs2_super_ast_func(void *opaque)
699{
700 struct ocfs2_lock_res *lockres = opaque;
701
702 mlog_entry_void();
703 mlog(0, "Superblock AST fired\n");
704
705 BUG_ON(!ocfs2_is_super_lock(lockres));
706 ocfs2_generic_ast_func(lockres, 0);
707
708 mlog_exit_void();
709}
710
711static void ocfs2_super_bast_func(void *opaque,
712 int level)
713{
714 struct ocfs2_lock_res *lockres = opaque;
715 struct ocfs2_super *osb;
716
717 mlog_entry_void();
718 mlog(0, "Superblock BAST fired\n");
719
720 BUG_ON(!ocfs2_is_super_lock(lockres));
721 osb = ocfs2_lock_res_super(lockres);
722 ocfs2_generic_bast_func(osb, lockres, level);
723
724 mlog_exit_void();
725}
726
727static void ocfs2_rename_ast_func(void *opaque)
728{
729 struct ocfs2_lock_res *lockres = opaque;
730
731 mlog_entry_void();
732
733 mlog(0, "Rename AST fired\n");
734
735 BUG_ON(!ocfs2_is_rename_lock(lockres));
736
737 ocfs2_generic_ast_func(lockres, 1);
738
739 mlog_exit_void();
740}
741
742static void ocfs2_rename_bast_func(void *opaque,
743 int level)
744{
745 struct ocfs2_lock_res *lockres = opaque;
746 struct ocfs2_super *osb;
747
748 mlog_entry_void();
749
750 mlog(0, "Rename BAST fired\n");
751
752 BUG_ON(!ocfs2_is_rename_lock(lockres));
753
754 osb = ocfs2_lock_res_super(lockres);
755 ocfs2_generic_bast_func(osb, lockres, level);
756
757 mlog_exit_void();
758} 742}
759 743
760static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 744static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
@@ -810,9 +794,10 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
810 &lockres->l_lksb, 794 &lockres->l_lksb,
811 dlm_flags, 795 dlm_flags,
812 lockres->l_name, 796 lockres->l_name,
813 lockres->l_ops->ast, 797 OCFS2_LOCK_ID_MAX_LEN - 1,
798 ocfs2_locking_ast,
814 lockres, 799 lockres,
815 lockres->l_ops->bast); 800 ocfs2_blocking_ast);
816 if (status != DLM_NORMAL) { 801 if (status != DLM_NORMAL) {
817 ocfs2_log_dlm_error("dlmlock", status, lockres); 802 ocfs2_log_dlm_error("dlmlock", status, lockres);
818 ret = -EINVAL; 803 ret = -EINVAL;
@@ -930,6 +915,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb,
930 915
931 ocfs2_init_mask_waiter(&mw); 916 ocfs2_init_mask_waiter(&mw);
932 917
918 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
919 lkm_flags |= LKM_VALBLK;
920
933again: 921again:
934 wait = 0; 922 wait = 0;
935 923
@@ -997,11 +985,12 @@ again:
997 status = dlmlock(osb->dlm, 985 status = dlmlock(osb->dlm,
998 level, 986 level,
999 &lockres->l_lksb, 987 &lockres->l_lksb,
1000 lkm_flags|LKM_CONVERT|LKM_VALBLK, 988 lkm_flags|LKM_CONVERT,
1001 lockres->l_name, 989 lockres->l_name,
1002 lockres->l_ops->ast, 990 OCFS2_LOCK_ID_MAX_LEN - 1,
991 ocfs2_locking_ast,
1003 lockres, 992 lockres,
1004 lockres->l_ops->bast); 993 ocfs2_blocking_ast);
1005 if (status != DLM_NORMAL) { 994 if (status != DLM_NORMAL) {
1006 if ((lkm_flags & LKM_NOQUEUE) && 995 if ((lkm_flags & LKM_NOQUEUE) &&
1007 (status == DLM_NOTQUEUED)) 996 (status == DLM_NOTQUEUED))
@@ -1074,18 +1063,21 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1074 mlog_exit_void(); 1063 mlog_exit_void();
1075} 1064}
1076 1065
1077static int ocfs2_create_new_inode_lock(struct inode *inode, 1066int ocfs2_create_new_lock(struct ocfs2_super *osb,
1078 struct ocfs2_lock_res *lockres) 1067 struct ocfs2_lock_res *lockres,
1068 int ex,
1069 int local)
1079{ 1070{
1080 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1071 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1081 unsigned long flags; 1072 unsigned long flags;
1073 int lkm_flags = local ? LKM_LOCAL : 0;
1082 1074
1083 spin_lock_irqsave(&lockres->l_lock, flags); 1075 spin_lock_irqsave(&lockres->l_lock, flags);
1084 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1076 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1085 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1077 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1086 spin_unlock_irqrestore(&lockres->l_lock, flags); 1078 spin_unlock_irqrestore(&lockres->l_lock, flags);
1087 1079
1088 return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL); 1080 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1089} 1081}
1090 1082
1091/* Grants us an EX lock on the data and metadata resources, skipping 1083/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1097,6 +1089,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
1097int ocfs2_create_new_inode_locks(struct inode *inode) 1089int ocfs2_create_new_inode_locks(struct inode *inode)
1098{ 1090{
1099 int ret; 1091 int ret;
1092 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1100 1093
1101 BUG_ON(!inode); 1094 BUG_ON(!inode);
1102 BUG_ON(!ocfs2_inode_is_new(inode)); 1095 BUG_ON(!ocfs2_inode_is_new(inode));
@@ -1113,22 +1106,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1113 * on a resource which has an invalid one -- we'll set it 1106 * on a resource which has an invalid one -- we'll set it
1114 * valid when we release the EX. */ 1107 * valid when we release the EX. */
1115 1108
1116 ret = ocfs2_create_new_inode_lock(inode, 1109 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1117 &OCFS2_I(inode)->ip_rw_lockres);
1118 if (ret) { 1110 if (ret) {
1119 mlog_errno(ret); 1111 mlog_errno(ret);
1120 goto bail; 1112 goto bail;
1121 } 1113 }
1122 1114
1123 ret = ocfs2_create_new_inode_lock(inode, 1115 /*
1124 &OCFS2_I(inode)->ip_meta_lockres); 1116 * We don't want to use LKM_LOCAL on a meta data lock as they
1117 * don't use a generation in their lock names.
1118 */
1119 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
1125 if (ret) { 1120 if (ret) {
1126 mlog_errno(ret); 1121 mlog_errno(ret);
1127 goto bail; 1122 goto bail;
1128 } 1123 }
1129 1124
1130 ret = ocfs2_create_new_inode_lock(inode, 1125 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
1131 &OCFS2_I(inode)->ip_data_lockres);
1132 if (ret) { 1126 if (ret) {
1133 mlog_errno(ret); 1127 mlog_errno(ret);
1134 goto bail; 1128 goto bail;
@@ -1317,7 +1311,17 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1317 1311
1318 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1312 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1319 1313
1320 lvb->lvb_version = cpu_to_be32(OCFS2_LVB_VERSION); 1314 /*
1315 * Invalidate the LVB of a deleted inode - this way other
1316 * nodes are forced to go to disk and discover the new inode
1317 * status.
1318 */
1319 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1320 lvb->lvb_version = 0;
1321 goto out;
1322 }
1323
1324 lvb->lvb_version = OCFS2_LVB_VERSION;
1321 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1325 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1322 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1326 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1323 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 1327 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
@@ -1330,7 +1334,10 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1330 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime)); 1334 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
1331 lvb->lvb_imtime_packed = 1335 lvb->lvb_imtime_packed =
1332 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1336 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1337 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1338 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1333 1339
1340out:
1334 mlog_meta_lvb(0, lockres); 1341 mlog_meta_lvb(0, lockres);
1335 1342
1336 mlog_exit_void(); 1343 mlog_exit_void();
@@ -1360,6 +1367,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1360 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters); 1367 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
1361 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1368 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1362 1369
1370 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1371 ocfs2_set_inode_flags(inode);
1372
1363 /* fast-symlinks are a special case */ 1373 /* fast-symlinks are a special case */
1364 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters) 1374 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
1365 inode->i_blocks = 0; 1375 inode->i_blocks = 0;
@@ -1382,11 +1392,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1382 mlog_exit_void(); 1392 mlog_exit_void();
1383} 1393}
1384 1394
1385static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres) 1395static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1396 struct ocfs2_lock_res *lockres)
1386{ 1397{
1387 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1398 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1388 1399
1389 if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION) 1400 if (lvb->lvb_version == OCFS2_LVB_VERSION
1401 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1390 return 1; 1402 return 1;
1391 return 0; 1403 return 0;
1392} 1404}
@@ -1483,7 +1495,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1483 * map (directories, bitmap files, etc) */ 1495 * map (directories, bitmap files, etc) */
1484 ocfs2_extent_map_trunc(inode, 0); 1496 ocfs2_extent_map_trunc(inode, 0);
1485 1497
1486 if (ocfs2_meta_lvb_is_trustable(lockres)) { 1498 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1487 mlog(0, "Trusting LVB on inode %llu\n", 1499 mlog(0, "Trusting LVB on inode %llu\n",
1488 (unsigned long long)oi->ip_blkno); 1500 (unsigned long long)oi->ip_blkno);
1489 ocfs2_refresh_inode_from_lvb(inode); 1501 ocfs2_refresh_inode_from_lvb(inode);
@@ -1624,6 +1636,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
1624 wait_event(osb->recovery_event, 1636 wait_event(osb->recovery_event,
1625 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1637 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1626 1638
1639 /*
1640 * We only see this flag if we're being called from
1641 * ocfs2_read_locked_inode(). It means we're locking an inode
1642 * which hasn't been populated yet, so clear the refresh flag
1643 * and let the caller handle it.
1644 */
1645 if (inode->i_state & I_NEW) {
1646 status = 0;
1647 ocfs2_complete_lock_res_refresh(lockres, 0);
1648 goto bail;
1649 }
1650
1627 /* This is fun. The caller may want a bh back, or it may 1651 /* This is fun. The caller may want a bh back, or it may
1628 * not. ocfs2_meta_lock_update definitely wants one in, but 1652 * not. ocfs2_meta_lock_update definitely wants one in, but
1629 * may or may not read one, depending on what's in the 1653 * may or may not read one, depending on what's in the
@@ -1803,6 +1827,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
1803 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); 1827 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
1804} 1828}
1805 1829
1830int ocfs2_dentry_lock(struct dentry *dentry, int ex)
1831{
1832 int ret;
1833 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1834 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1835 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1836
1837 BUG_ON(!dl);
1838
1839 if (ocfs2_is_hard_readonly(osb))
1840 return -EROFS;
1841
1842 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
1843 if (ret < 0)
1844 mlog_errno(ret);
1845
1846 return ret;
1847}
1848
1849void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
1850{
1851 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1852 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1853 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1854
1855 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
1856}
1857
1806/* Reference counting of the dlm debug structure. We want this because 1858/* Reference counting of the dlm debug structure. We want this because
1807 * open references on the debug inodes can live on after a mount, so 1859 * open references on the debug inodes can live on after a mount, so
1808 * we can't rely on the ocfs2_super to always exist. */ 1860 * we can't rely on the ocfs2_super to always exist. */
@@ -1933,9 +1985,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1933 if (!lockres) 1985 if (!lockres)
1934 return -EINVAL; 1986 return -EINVAL;
1935 1987
1936 seq_printf(m, "0x%x\t" 1988 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
1937 "%.*s\t" 1989
1938 "%d\t" 1990 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
1991 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
1992 lockres->l_name,
1993 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
1994 else
1995 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
1996
1997 seq_printf(m, "%d\t"
1939 "0x%lx\t" 1998 "0x%lx\t"
1940 "0x%x\t" 1999 "0x%x\t"
1941 "0x%x\t" 2000 "0x%x\t"
@@ -1943,8 +2002,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
1943 "%u\t" 2002 "%u\t"
1944 "%d\t" 2003 "%d\t"
1945 "%d\t", 2004 "%d\t",
1946 OCFS2_DLM_DEBUG_STR_VERSION,
1947 OCFS2_LOCK_ID_MAX_LEN, lockres->l_name,
1948 lockres->l_level, 2005 lockres->l_level,
1949 lockres->l_flags, 2006 lockres->l_flags,
1950 lockres->l_action, 2007 lockres->l_action,
@@ -2134,7 +2191,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
2134 mlog_exit_void(); 2191 mlog_exit_void();
2135} 2192}
2136 2193
2137static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status) 2194static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
2138{ 2195{
2139 struct ocfs2_lock_res *lockres = opaque; 2196 struct ocfs2_lock_res *lockres = opaque;
2140 unsigned long flags; 2197 unsigned long flags;
@@ -2190,24 +2247,20 @@ complete_unlock:
2190 mlog_exit_void(); 2247 mlog_exit_void();
2191} 2248}
2192 2249
2193typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
2194
2195struct drop_lock_cb {
2196 ocfs2_pre_drop_cb_t *drop_func;
2197 void *drop_data;
2198};
2199
2200static int ocfs2_drop_lock(struct ocfs2_super *osb, 2250static int ocfs2_drop_lock(struct ocfs2_super *osb,
2201 struct ocfs2_lock_res *lockres, 2251 struct ocfs2_lock_res *lockres)
2202 struct drop_lock_cb *dcb)
2203{ 2252{
2204 enum dlm_status status; 2253 enum dlm_status status;
2205 unsigned long flags; 2254 unsigned long flags;
2255 int lkm_flags = 0;
2206 2256
2207 /* We didn't get anywhere near actually using this lockres. */ 2257 /* We didn't get anywhere near actually using this lockres. */
2208 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) 2258 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2209 goto out; 2259 goto out;
2210 2260
2261 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
2262 lkm_flags |= LKM_VALBLK;
2263
2211 spin_lock_irqsave(&lockres->l_lock, flags); 2264 spin_lock_irqsave(&lockres->l_lock, flags);
2212 2265
2213 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING), 2266 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
@@ -2230,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2230 spin_lock_irqsave(&lockres->l_lock, flags); 2283 spin_lock_irqsave(&lockres->l_lock, flags);
2231 } 2284 }
2232 2285
2233 if (dcb) 2286 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2234 dcb->drop_func(lockres, dcb->drop_data); 2287 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2288 lockres->l_level == LKM_EXMODE &&
2289 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2290 lockres->l_ops->set_lvb(lockres);
2291 }
2235 2292
2236 if (lockres->l_flags & OCFS2_LOCK_BUSY) 2293 if (lockres->l_flags & OCFS2_LOCK_BUSY)
2237 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n", 2294 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
@@ -2257,8 +2314,8 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2257 2314
2258 mlog(0, "lock %s\n", lockres->l_name); 2315 mlog(0, "lock %s\n", lockres->l_name);
2259 2316
2260 status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK, 2317 status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags,
2261 lockres->l_ops->unlock_ast, lockres); 2318 ocfs2_unlock_ast, lockres);
2262 if (status != DLM_NORMAL) { 2319 if (status != DLM_NORMAL) {
2263 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2320 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2264 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); 2321 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
@@ -2305,43 +2362,26 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
2305 spin_unlock_irqrestore(&lockres->l_lock, flags); 2362 spin_unlock_irqrestore(&lockres->l_lock, flags);
2306} 2363}
2307 2364
2308static void ocfs2_drop_osb_locks(struct ocfs2_super *osb) 2365void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
2366 struct ocfs2_lock_res *lockres)
2309{ 2367{
2310 int status; 2368 int ret;
2311
2312 mlog_entry_void();
2313
2314 ocfs2_mark_lockres_freeing(&osb->osb_super_lockres);
2315
2316 status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
2317 if (status < 0)
2318 mlog_errno(status);
2319
2320 ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres);
2321
2322 status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
2323 if (status < 0)
2324 mlog_errno(status);
2325 2369
2326 mlog_exit(status); 2370 ocfs2_mark_lockres_freeing(lockres);
2371 ret = ocfs2_drop_lock(osb, lockres);
2372 if (ret)
2373 mlog_errno(ret);
2327} 2374}
2328 2375
2329static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data) 2376static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
2330{ 2377{
2331 struct inode *inode = data; 2378 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
2332 2379 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
2333 /* the metadata lock requires a bit more work as we have an
2334 * LVB to worry about. */
2335 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2336 lockres->l_level == LKM_EXMODE &&
2337 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2338 __ocfs2_stuff_meta_lvb(inode);
2339} 2380}
2340 2381
2341int ocfs2_drop_inode_locks(struct inode *inode) 2382int ocfs2_drop_inode_locks(struct inode *inode)
2342{ 2383{
2343 int status, err; 2384 int status, err;
2344 struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
2345 2385
2346 mlog_entry_void(); 2386 mlog_entry_void();
2347 2387
@@ -2349,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode)
2349 * ocfs2_clear_inode has done it for us. */ 2389 * ocfs2_clear_inode has done it for us. */
2350 2390
2351 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2391 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2352 &OCFS2_I(inode)->ip_data_lockres, 2392 &OCFS2_I(inode)->ip_data_lockres);
2353 NULL);
2354 if (err < 0) 2393 if (err < 0)
2355 mlog_errno(err); 2394 mlog_errno(err);
2356 2395
2357 status = err; 2396 status = err;
2358 2397
2359 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2398 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2360 &OCFS2_I(inode)->ip_meta_lockres, 2399 &OCFS2_I(inode)->ip_meta_lockres);
2361 &meta_dcb);
2362 if (err < 0) 2400 if (err < 0)
2363 mlog_errno(err); 2401 mlog_errno(err);
2364 if (err < 0 && !status) 2402 if (err < 0 && !status)
2365 status = err; 2403 status = err;
2366 2404
2367 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2405 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2368 &OCFS2_I(inode)->ip_rw_lockres, 2406 &OCFS2_I(inode)->ip_rw_lockres);
2369 NULL);
2370 if (err < 0) 2407 if (err < 0)
2371 mlog_errno(err); 2408 mlog_errno(err);
2372 if (err < 0 && !status) 2409 if (err < 0 && !status)
@@ -2415,9 +2452,10 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
2415 &lockres->l_lksb, 2452 &lockres->l_lksb,
2416 dlm_flags, 2453 dlm_flags,
2417 lockres->l_name, 2454 lockres->l_name,
2418 lockres->l_ops->ast, 2455 OCFS2_LOCK_ID_MAX_LEN - 1,
2456 ocfs2_locking_ast,
2419 lockres, 2457 lockres,
2420 lockres->l_ops->bast); 2458 ocfs2_blocking_ast);
2421 if (status != DLM_NORMAL) { 2459 if (status != DLM_NORMAL) {
2422 ocfs2_log_dlm_error("dlmlock", status, lockres); 2460 ocfs2_log_dlm_error("dlmlock", status, lockres);
2423 ret = -EINVAL; 2461 ret = -EINVAL;
@@ -2476,7 +2514,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2476 status = dlmunlock(osb->dlm, 2514 status = dlmunlock(osb->dlm,
2477 &lockres->l_lksb, 2515 &lockres->l_lksb,
2478 LKM_CANCEL, 2516 LKM_CANCEL,
2479 lockres->l_ops->unlock_ast, 2517 ocfs2_unlock_ast,
2480 lockres); 2518 lockres);
2481 if (status != DLM_NORMAL) { 2519 if (status != DLM_NORMAL) {
2482 ocfs2_log_dlm_error("dlmunlock", status, lockres); 2520 ocfs2_log_dlm_error("dlmunlock", status, lockres);
@@ -2490,115 +2528,15 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2490 return ret; 2528 return ret;
2491} 2529}
2492 2530
2493static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode, 2531static int ocfs2_unblock_lock(struct ocfs2_super *osb,
2494 struct ocfs2_lock_res *lockres, 2532 struct ocfs2_lock_res *lockres,
2495 int new_level) 2533 struct ocfs2_unblock_ctl *ctl)
2496{
2497 int ret;
2498
2499 mlog_entry_void();
2500
2501 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2502
2503 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2504 ret = 0;
2505 mlog(0, "lockres %s currently being refreshed -- backing "
2506 "off!\n", lockres->l_name);
2507 } else if (new_level == LKM_PRMODE)
2508 ret = !lockres->l_ex_holders &&
2509 ocfs2_inode_fully_checkpointed(inode);
2510 else /* Must be NLMODE we're converting to. */
2511 ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
2512 ocfs2_inode_fully_checkpointed(inode);
2513
2514 mlog_exit(ret);
2515 return ret;
2516}
2517
2518static int ocfs2_do_unblock_meta(struct inode *inode,
2519 int *requeue)
2520{
2521 int new_level;
2522 int set_lvb = 0;
2523 int ret = 0;
2524 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
2525 unsigned long flags;
2526
2527 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2528
2529 mlog_entry_void();
2530
2531 spin_lock_irqsave(&lockres->l_lock, flags);
2532
2533 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
2534
2535 mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
2536 lockres->l_blocking);
2537
2538 BUG_ON(lockres->l_level != LKM_EXMODE &&
2539 lockres->l_level != LKM_PRMODE);
2540
2541 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2542 *requeue = 1;
2543 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2544 spin_unlock_irqrestore(&lockres->l_lock, flags);
2545 if (ret) {
2546 ret = ocfs2_cancel_convert(osb, lockres);
2547 if (ret < 0)
2548 mlog_errno(ret);
2549 }
2550 goto leave;
2551 }
2552
2553 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2554
2555 mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
2556 lockres->l_level, lockres->l_blocking, new_level);
2557
2558 if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
2559 if (lockres->l_level == LKM_EXMODE)
2560 set_lvb = 1;
2561
2562 /* If the lock hasn't been refreshed yet (rare), then
2563 * our memory inode values are old and we skip
2564 * stuffing the lvb. There's no need to actually clear
2565 * out the lvb here as it's value is still valid. */
2566 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2567 if (set_lvb)
2568 __ocfs2_stuff_meta_lvb(inode);
2569 } else
2570 mlog(0, "lockres %s: downconverting stale lock!\n",
2571 lockres->l_name);
2572
2573 mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
2574 "l_blocking=%d, new_level=%d\n",
2575 lockres->l_level, lockres->l_blocking, new_level);
2576
2577 ocfs2_prepare_downconvert(lockres, new_level);
2578 spin_unlock_irqrestore(&lockres->l_lock, flags);
2579 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2580 goto leave;
2581 }
2582 if (!ocfs2_inode_fully_checkpointed(inode))
2583 ocfs2_start_checkpoint(osb);
2584
2585 *requeue = 1;
2586 spin_unlock_irqrestore(&lockres->l_lock, flags);
2587 ret = 0;
2588leave:
2589 mlog_exit(ret);
2590 return ret;
2591}
2592
2593static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2594 struct ocfs2_lock_res *lockres,
2595 int *requeue,
2596 ocfs2_convert_worker_t *worker)
2597{ 2534{
2598 unsigned long flags; 2535 unsigned long flags;
2599 int blocking; 2536 int blocking;
2600 int new_level; 2537 int new_level;
2601 int ret = 0; 2538 int ret = 0;
2539 int set_lvb = 0;
2602 2540
2603 mlog_entry_void(); 2541 mlog_entry_void();
2604 2542
@@ -2608,7 +2546,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2608 2546
2609recheck: 2547recheck:
2610 if (lockres->l_flags & OCFS2_LOCK_BUSY) { 2548 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2611 *requeue = 1; 2549 ctl->requeue = 1;
2612 ret = ocfs2_prepare_cancel_convert(osb, lockres); 2550 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2613 spin_unlock_irqrestore(&lockres->l_lock, flags); 2551 spin_unlock_irqrestore(&lockres->l_lock, flags);
2614 if (ret) { 2552 if (ret) {
@@ -2622,27 +2560,33 @@ recheck:
2622 /* if we're blocking an exclusive and we have *any* holders, 2560 /* if we're blocking an exclusive and we have *any* holders,
2623 * then requeue. */ 2561 * then requeue. */
2624 if ((lockres->l_blocking == LKM_EXMODE) 2562 if ((lockres->l_blocking == LKM_EXMODE)
2625 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 2563 && (lockres->l_ex_holders || lockres->l_ro_holders))
2626 spin_unlock_irqrestore(&lockres->l_lock, flags); 2564 goto leave_requeue;
2627 *requeue = 1;
2628 ret = 0;
2629 goto leave;
2630 }
2631 2565
2632 /* If it's a PR we're blocking, then only 2566 /* If it's a PR we're blocking, then only
2633 * requeue if we've got any EX holders */ 2567 * requeue if we've got any EX holders */
2634 if (lockres->l_blocking == LKM_PRMODE && 2568 if (lockres->l_blocking == LKM_PRMODE &&
2635 lockres->l_ex_holders) { 2569 lockres->l_ex_holders)
2636 spin_unlock_irqrestore(&lockres->l_lock, flags); 2570 goto leave_requeue;
2637 *requeue = 1; 2571
2638 ret = 0; 2572 /*
2639 goto leave; 2573 * Can we get a lock in this state if the holder counts are
2640 } 2574 * zero? The meta data unblock code used to check this.
2575 */
2576 if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
2577 && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
2578 goto leave_requeue;
2579
2580 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2581
2582 if (lockres->l_ops->check_downconvert
2583 && !lockres->l_ops->check_downconvert(lockres, new_level))
2584 goto leave_requeue;
2641 2585
2642 /* If we get here, then we know that there are no more 2586 /* If we get here, then we know that there are no more
2643 * incompatible holders (and anyone asking for an incompatible 2587 * incompatible holders (and anyone asking for an incompatible
2644 * lock is blocked). We can now downconvert the lock */ 2588 * lock is blocked). We can now downconvert the lock */
2645 if (!worker) 2589 if (!lockres->l_ops->downconvert_worker)
2646 goto downconvert; 2590 goto downconvert;
2647 2591
2648 /* Some lockres types want to do a bit of work before 2592 /* Some lockres types want to do a bit of work before
@@ -2652,7 +2596,10 @@ recheck:
2652 blocking = lockres->l_blocking; 2596 blocking = lockres->l_blocking;
2653 spin_unlock_irqrestore(&lockres->l_lock, flags); 2597 spin_unlock_irqrestore(&lockres->l_lock, flags);
2654 2598
2655 worker(lockres, blocking); 2599 ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
2600
2601 if (ctl->unblock_action == UNBLOCK_STOP_POST)
2602 goto leave;
2656 2603
2657 spin_lock_irqsave(&lockres->l_lock, flags); 2604 spin_lock_irqsave(&lockres->l_lock, flags);
2658 if (blocking != lockres->l_blocking) { 2605 if (blocking != lockres->l_blocking) {
@@ -2662,25 +2609,43 @@ recheck:
2662 } 2609 }
2663 2610
2664downconvert: 2611downconvert:
2665 *requeue = 0; 2612 ctl->requeue = 0;
2666 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking); 2613
2614 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
2615 if (lockres->l_level == LKM_EXMODE)
2616 set_lvb = 1;
2617
2618 /*
2619 * We only set the lvb if the lock has been fully
2620 * refreshed - otherwise we risk setting stale
2621 * data. Otherwise, there's no need to actually clear
2622 * out the lvb here as it's value is still valid.
2623 */
2624 if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2625 lockres->l_ops->set_lvb(lockres);
2626 }
2667 2627
2668 ocfs2_prepare_downconvert(lockres, new_level); 2628 ocfs2_prepare_downconvert(lockres, new_level);
2669 spin_unlock_irqrestore(&lockres->l_lock, flags); 2629 spin_unlock_irqrestore(&lockres->l_lock, flags);
2670 ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0); 2630 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2671leave: 2631leave:
2672 mlog_exit(ret); 2632 mlog_exit(ret);
2673 return ret; 2633 return ret;
2634
2635leave_requeue:
2636 spin_unlock_irqrestore(&lockres->l_lock, flags);
2637 ctl->requeue = 1;
2638
2639 mlog_exit(0);
2640 return 0;
2674} 2641}
2675 2642
2676static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, 2643static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2677 int blocking) 2644 int blocking)
2678{ 2645{
2679 struct inode *inode; 2646 struct inode *inode;
2680 struct address_space *mapping; 2647 struct address_space *mapping;
2681 2648
2682 mlog_entry_void();
2683
2684 inode = ocfs2_lock_res_inode(lockres); 2649 inode = ocfs2_lock_res_inode(lockres);
2685 mapping = inode->i_mapping; 2650 mapping = inode->i_mapping;
2686 2651
@@ -2701,116 +2666,159 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2701 filemap_fdatawait(mapping); 2666 filemap_fdatawait(mapping);
2702 } 2667 }
2703 2668
2704 mlog_exit_void(); 2669 return UNBLOCK_CONTINUE;
2705} 2670}
2706 2671
2707int ocfs2_unblock_data(struct ocfs2_lock_res *lockres, 2672static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
2708 int *requeue) 2673 int new_level)
2709{ 2674{
2710 int status; 2675 struct inode *inode = ocfs2_lock_res_inode(lockres);
2711 struct inode *inode; 2676 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
2712 struct ocfs2_super *osb;
2713
2714 mlog_entry_void();
2715
2716 inode = ocfs2_lock_res_inode(lockres);
2717 osb = OCFS2_SB(inode->i_sb);
2718
2719 mlog(0, "unblock inode %llu\n",
2720 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2721 2677
2722 status = ocfs2_generic_unblock_lock(osb, 2678 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2723 lockres, 2679 BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed);
2724 requeue,
2725 ocfs2_data_convert_worker);
2726 if (status < 0)
2727 mlog_errno(status);
2728 2680
2729 mlog(0, "inode %llu, requeue = %d\n", 2681 if (checkpointed)
2730 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2682 return 1;
2731 2683
2732 mlog_exit(status); 2684 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
2733 return status; 2685 return 0;
2734} 2686}
2735 2687
2736static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres, 2688static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
2737 int *requeue)
2738{ 2689{
2739 int status; 2690 struct inode *inode = ocfs2_lock_res_inode(lockres);
2740 struct inode *inode;
2741
2742 mlog_entry_void();
2743
2744 mlog(0, "Unblock lockres %s\n", lockres->l_name);
2745
2746 inode = ocfs2_lock_res_inode(lockres);
2747 2691
2748 status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb), 2692 __ocfs2_stuff_meta_lvb(inode);
2749 lockres,
2750 requeue,
2751 NULL);
2752 if (status < 0)
2753 mlog_errno(status);
2754
2755 mlog_exit(status);
2756 return status;
2757} 2693}
2758 2694
2759 2695/*
2760int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres, 2696 * Does the final reference drop on our dentry lock. Right now this
2761 int *requeue) 2697 * happens in the vote thread, but we could choose to simplify the
2698 * dlmglue API and push these off to the ocfs2_wq in the future.
2699 */
2700static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
2701 struct ocfs2_lock_res *lockres)
2762{ 2702{
2763 int status; 2703 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2764 struct inode *inode; 2704 ocfs2_dentry_lock_put(osb, dl);
2765 2705}
2766 mlog_entry_void();
2767 2706
2768 inode = ocfs2_lock_res_inode(lockres); 2707/*
2708 * d_delete() matching dentries before the lock downconvert.
2709 *
2710 * At this point, any process waiting to destroy the
2711 * dentry_lock due to last ref count is stopped by the
2712 * OCFS2_LOCK_QUEUED flag.
2713 *
2714 * We have two potential problems
2715 *
2716 * 1) If we do the last reference drop on our dentry_lock (via dput)
2717 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
2718 * the downconvert to finish. Instead we take an elevated
2719 * reference and push the drop until after we've completed our
2720 * unblock processing.
2721 *
2722 * 2) There might be another process with a final reference,
2723 * waiting on us to finish processing. If this is the case, we
2724 * detect it and exit out - there's no more dentries anyway.
2725 */
2726static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
2727 int blocking)
2728{
2729 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2730 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
2731 struct dentry *dentry;
2732 unsigned long flags;
2733 int extra_ref = 0;
2769 2734
2770 mlog(0, "unblock inode %llu\n", 2735 /*
2771 (unsigned long long)OCFS2_I(inode)->ip_blkno); 2736 * This node is blocking another node from getting a read
2737 * lock. This happens when we've renamed within a
2738 * directory. We've forced the other nodes to d_delete(), but
2739 * we never actually dropped our lock because it's still
2740 * valid. The downconvert code will retain a PR for this node,
2741 * so there's no further work to do.
2742 */
2743 if (blocking == LKM_PRMODE)
2744 return UNBLOCK_CONTINUE;
2772 2745
2773 status = ocfs2_do_unblock_meta(inode, requeue); 2746 /*
2774 if (status < 0) 2747 * Mark this inode as potentially orphaned. The code in
2775 mlog_errno(status); 2748 * ocfs2_delete_inode() will figure out whether it actually
2749 * needs to be freed or not.
2750 */
2751 spin_lock(&oi->ip_lock);
2752 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2753 spin_unlock(&oi->ip_lock);
2776 2754
2777 mlog(0, "inode %llu, requeue = %d\n", 2755 /*
2778 (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue); 2756 * Yuck. We need to make sure however that the check of
2757 * OCFS2_LOCK_FREEING and the extra reference are atomic with
2758 * respect to a reference decrement or the setting of that
2759 * flag.
2760 */
2761 spin_lock_irqsave(&lockres->l_lock, flags);
2762 spin_lock(&dentry_attach_lock);
2763 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
2764 && dl->dl_count) {
2765 dl->dl_count++;
2766 extra_ref = 1;
2767 }
2768 spin_unlock(&dentry_attach_lock);
2769 spin_unlock_irqrestore(&lockres->l_lock, flags);
2779 2770
2780 mlog_exit(status); 2771 mlog(0, "extra_ref = %d\n", extra_ref);
2781 return status;
2782}
2783 2772
2784/* Generic unblock function for any lockres whose private data is an 2773 /*
2785 * ocfs2_super pointer. */ 2774 * We have a process waiting on us in ocfs2_dentry_iput(),
2786static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres, 2775 * which means we can't have any more outstanding
2787 int *requeue) 2776 * aliases. There's no need to do any more work.
2788{ 2777 */
2789 int status; 2778 if (!extra_ref)
2790 struct ocfs2_super *osb; 2779 return UNBLOCK_CONTINUE;
2780
2781 spin_lock(&dentry_attach_lock);
2782 while (1) {
2783 dentry = ocfs2_find_local_alias(dl->dl_inode,
2784 dl->dl_parent_blkno, 1);
2785 if (!dentry)
2786 break;
2787 spin_unlock(&dentry_attach_lock);
2791 2788
2792 mlog_entry_void(); 2789 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
2790 dentry->d_name.name);
2793 2791
2794 mlog(0, "Unblock lockres %s\n", lockres->l_name); 2792 /*
2793 * The following dcache calls may do an
2794 * iput(). Normally we don't want that from the
2795 * downconverting thread, but in this case it's ok
2796 * because the requesting node already has an
2797 * exclusive lock on the inode, so it can't be queued
2798 * for a downconvert.
2799 */
2800 d_delete(dentry);
2801 dput(dentry);
2795 2802
2796 osb = ocfs2_lock_res_super(lockres); 2803 spin_lock(&dentry_attach_lock);
2804 }
2805 spin_unlock(&dentry_attach_lock);
2797 2806
2798 status = ocfs2_generic_unblock_lock(osb, 2807 /*
2799 lockres, 2808 * If we are the last holder of this dentry lock, there is no
2800 requeue, 2809 * reason to downconvert so skip straight to the unlock.
2801 NULL); 2810 */
2802 if (status < 0) 2811 if (dl->dl_count == 1)
2803 mlog_errno(status); 2812 return UNBLOCK_STOP_POST;
2804 2813
2805 mlog_exit(status); 2814 return UNBLOCK_CONTINUE_POST;
2806 return status;
2807} 2815}
2808 2816
2809void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 2817void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2810 struct ocfs2_lock_res *lockres) 2818 struct ocfs2_lock_res *lockres)
2811{ 2819{
2812 int status; 2820 int status;
2813 int requeue = 0; 2821 struct ocfs2_unblock_ctl ctl = {0, 0,};
2814 unsigned long flags; 2822 unsigned long flags;
2815 2823
2816 /* Our reference to the lockres in this function can be 2824 /* Our reference to the lockres in this function can be
@@ -2821,7 +2829,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2821 2829
2822 BUG_ON(!lockres); 2830 BUG_ON(!lockres);
2823 BUG_ON(!lockres->l_ops); 2831 BUG_ON(!lockres->l_ops);
2824 BUG_ON(!lockres->l_ops->unblock);
2825 2832
2826 mlog(0, "lockres %s blocked.\n", lockres->l_name); 2833 mlog(0, "lockres %s blocked.\n", lockres->l_name);
2827 2834
@@ -2835,21 +2842,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
2835 goto unqueue; 2842 goto unqueue;
2836 spin_unlock_irqrestore(&lockres->l_lock, flags); 2843 spin_unlock_irqrestore(&lockres->l_lock, flags);
2837 2844
2838 status = lockres->l_ops->unblock(lockres, &requeue); 2845 status = ocfs2_unblock_lock(osb, lockres, &ctl);
2839 if (status < 0) 2846 if (status < 0)
2840 mlog_errno(status); 2847 mlog_errno(status);
2841 2848
2842 spin_lock_irqsave(&lockres->l_lock, flags); 2849 spin_lock_irqsave(&lockres->l_lock, flags);
2843unqueue: 2850unqueue:
2844 if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) { 2851 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
2845 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED); 2852 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
2846 } else 2853 } else
2847 ocfs2_schedule_blocked_lock(osb, lockres); 2854 ocfs2_schedule_blocked_lock(osb, lockres);
2848 2855
2849 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name, 2856 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
2850 requeue ? "yes" : "no"); 2857 ctl.requeue ? "yes" : "no");
2851 spin_unlock_irqrestore(&lockres->l_lock, flags); 2858 spin_unlock_irqrestore(&lockres->l_lock, flags);
2852 2859
2860 if (ctl.unblock_action != UNBLOCK_CONTINUE
2861 && lockres->l_ops->post_unlock)
2862 lockres->l_ops->post_unlock(osb, lockres);
2863
2853 mlog_exit_void(); 2864 mlog_exit_void();
2854} 2865}
2855 2866
@@ -2892,15 +2903,17 @@ void ocfs2_dump_meta_lvb_info(u64 level,
2892 2903
2893 mlog(level, "LVB information for %s (called from %s:%u):\n", 2904 mlog(level, "LVB information for %s (called from %s:%u):\n",
2894 lockres->l_name, function, line); 2905 lockres->l_name, function, line);
2895 mlog(level, "version: %u, clusters: %u\n", 2906 mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
2896 be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters)); 2907 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
2908 be32_to_cpu(lvb->lvb_igeneration));
2897 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n", 2909 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
2898 (unsigned long long)be64_to_cpu(lvb->lvb_isize), 2910 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
2899 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid), 2911 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
2900 be16_to_cpu(lvb->lvb_imode)); 2912 be16_to_cpu(lvb->lvb_imode));
2901 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, " 2913 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
2902 "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink), 2914 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
2903 (long long)be64_to_cpu(lvb->lvb_iatime_packed), 2915 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
2904 (long long)be64_to_cpu(lvb->lvb_ictime_packed), 2916 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
2905 (long long)be64_to_cpu(lvb->lvb_imtime_packed)); 2917 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
2918 be32_to_cpu(lvb->lvb_iattr));
2906} 2919}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db2d9ea..4a2769387229 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,10 +27,14 @@
27#ifndef DLMGLUE_H 27#ifndef DLMGLUE_H
28#define DLMGLUE_H 28#define DLMGLUE_H
29 29
30#define OCFS2_LVB_VERSION 2 30#include "dcache.h"
31
32#define OCFS2_LVB_VERSION 4
31 33
32struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
33 __be32 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1;
34 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
35 __be32 lvb_iuid; 39 __be32 lvb_iuid;
36 __be32 lvb_igid; 40 __be32 lvb_igid;
@@ -40,7 +44,9 @@ struct ocfs2_meta_lvb {
40 __be64 lvb_isize; 44 __be64 lvb_isize;
41 __be16 lvb_imode; 45 __be16 lvb_imode;
42 __be16 lvb_inlink; 46 __be16 lvb_inlink;
43 __be32 lvb_reserved[3]; 47 __be32 lvb_iattr;
48 __be32 lvb_igeneration;
49 __be32 lvb_reserved2;
44}; 50};
45 51
46/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */ 52/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
@@ -56,9 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
56void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
57void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
58 enum ocfs2_lock_type type, 64 enum ocfs2_lock_type type,
65 unsigned int generation,
59 struct inode *inode); 66 struct inode *inode);
67void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
68 u64 parent, struct inode *inode);
60void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 69void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
61int ocfs2_create_new_inode_locks(struct inode *inode); 70int ocfs2_create_new_inode_locks(struct inode *inode);
71int ocfs2_create_new_lock(struct ocfs2_super *osb,
72 struct ocfs2_lock_res *lockres, int ex, int local);
62int ocfs2_drop_inode_locks(struct inode *inode); 73int ocfs2_drop_inode_locks(struct inode *inode);
63int ocfs2_data_lock_full(struct inode *inode, 74int ocfs2_data_lock_full(struct inode *inode,
64 int write, 75 int write,
@@ -92,7 +103,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
92 int ex); 103 int ex);
93int ocfs2_rename_lock(struct ocfs2_super *osb); 104int ocfs2_rename_lock(struct ocfs2_super *osb);
94void ocfs2_rename_unlock(struct ocfs2_super *osb); 105void ocfs2_rename_unlock(struct ocfs2_super *osb);
106int ocfs2_dentry_lock(struct dentry *dentry, int ex);
107void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
108
95void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 109void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
110void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
111 struct ocfs2_lock_res *lockres);
96 112
97/* for the vote thread */ 113/* for the vote thread */
98void ocfs2_process_blocked_lock(struct ocfs2_super *osb, 114void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ec55ab3c1214..fb91089a60a7 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
33 33
34#include "dir.h" 34#include "dir.h"
35#include "dlmglue.h" 35#include "dlmglue.h"
36#include "dcache.h"
36#include "export.h" 37#include "export.h"
37#include "inode.h" 38#include "inode.h"
38 39
@@ -57,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
57 return ERR_PTR(-ESTALE); 58 return ERR_PTR(-ESTALE);
58 } 59 }
59 60
60 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); 61 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
61 62
62 if (IS_ERR(inode)) { 63 if (IS_ERR(inode)) {
63 mlog_errno(PTR_ERR(inode)); 64 mlog_errno(PTR_ERR(inode));
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
77 mlog_errno(-ENOMEM); 78 mlog_errno(-ENOMEM);
78 return ERR_PTR(-ENOMEM); 79 return ERR_PTR(-ENOMEM);
79 } 80 }
81 result->d_op = &ocfs2_dentry_ops;
80 82
81 mlog_exit_ptr(result); 83 mlog_exit_ptr(result);
82 return result; 84 return result;
@@ -113,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
113 goto bail_unlock; 115 goto bail_unlock;
114 } 116 }
115 117
116 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 118 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
117 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
118 mlog(ML_ERROR, "Unable to create inode %llu\n", 120 mlog(ML_ERROR, "Unable to create inode %llu\n",
119 (unsigned long long)blkno); 121 (unsigned long long)blkno);
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
127 parent = ERR_PTR(-ENOMEM); 129 parent = ERR_PTR(-ENOMEM);
128 } 130 }
129 131
132 parent->d_op = &ocfs2_dentry_ops;
133
130bail_unlock: 134bail_unlock:
131 ocfs2_meta_unlock(dir, 0); 135 ocfs2_meta_unlock(dir, 0);
132 136
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c874530..2bbfa17090cf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
44#include "file.h" 44#include "file.h"
45#include "sysfile.h" 45#include "sysfile.h"
46#include "inode.h" 46#include "inode.h"
47#include "ioctl.h"
47#include "journal.h" 48#include "journal.h"
48#include "mmap.h" 49#include "mmap.h"
49#include "suballoc.h" 50#include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
1227 .open = ocfs2_file_open, 1228 .open = ocfs2_file_open,
1228 .aio_read = ocfs2_file_aio_read, 1229 .aio_read = ocfs2_file_aio_read,
1229 .aio_write = ocfs2_file_aio_write, 1230 .aio_write = ocfs2_file_aio_write,
1231 .ioctl = ocfs2_ioctl,
1230}; 1232};
1231 1233
1232const struct file_operations ocfs2_dops = { 1234const struct file_operations ocfs2_dops = {
1233 .read = generic_read_dir, 1235 .read = generic_read_dir,
1234 .readdir = ocfs2_readdir, 1236 .readdir = ocfs2_readdir,
1235 .fsync = ocfs2_sync_file, 1237 .fsync = ocfs2_sync_file,
1238 .ioctl = ocfs2_ioctl,
1236}; 1239};
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86ed..69d3db569166 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
54 54
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56 56
57#define OCFS2_FI_FLAG_NOWAIT 0x1
58#define OCFS2_FI_FLAG_DELETE 0x2
59struct ocfs2_find_inode_args 57struct ocfs2_find_inode_args
60{ 58{
61 u64 fi_blkno; 59 u64 fi_blkno;
@@ -71,6 +69,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
71 struct inode *inode, 69 struct inode *inode,
72 struct buffer_head *fe_bh); 70 struct buffer_head *fe_bh);
73 71
72void ocfs2_set_inode_flags(struct inode *inode)
73{
74 unsigned int flags = OCFS2_I(inode)->ip_attr;
75
76 inode->i_flags &= ~(S_IMMUTABLE |
77 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
78
79 if (flags & OCFS2_IMMUTABLE_FL)
80 inode->i_flags |= S_IMMUTABLE;
81
82 if (flags & OCFS2_SYNC_FL)
83 inode->i_flags |= S_SYNC;
84 if (flags & OCFS2_APPEND_FL)
85 inode->i_flags |= S_APPEND;
86 if (flags & OCFS2_NOATIME_FL)
87 inode->i_flags |= S_NOATIME;
88 if (flags & OCFS2_DIRSYNC_FL)
89 inode->i_flags |= S_DIRSYNC;
90}
91
74struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 92struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
75 u64 blkno, 93 u64 blkno,
76 int delete_vote) 94 int delete_vote)
@@ -89,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
89 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); 107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
90} 108}
91 109
92struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
93{ 111{
94 struct inode *inode = NULL; 112 struct inode *inode = NULL;
95 struct super_block *sb = osb->sb; 113 struct super_block *sb = osb->sb;
@@ -107,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
107 } 125 }
108 126
109 args.fi_blkno = blkno; 127 args.fi_blkno = blkno;
110 args.fi_flags = 0; 128 args.fi_flags = flags;
111 args.fi_ino = ino_from_blkno(sb, blkno); 129 args.fi_ino = ino_from_blkno(sb, blkno);
112 130
113 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 131 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -260,7 +278,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
260 inode->i_blocks = 278 inode->i_blocks =
261 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size)); 279 ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
262 inode->i_mapping->a_ops = &ocfs2_aops; 280 inode->i_mapping->a_ops = &ocfs2_aops;
263 inode->i_flags |= S_NOATIME;
264 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); 281 inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
265 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); 282 inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
266 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); 283 inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,16 +293,13 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
276 293
277 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 294 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
278 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 295 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
279 296 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
280 if (create_ino)
281 inode->i_ino = ino_from_blkno(inode->i_sb,
282 le64_to_cpu(fe->i_blkno));
283
284 mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
285 (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
286 297
287 inode->i_nlink = le16_to_cpu(fe->i_links_count); 298 inode->i_nlink = le16_to_cpu(fe->i_links_count);
288 299
300 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
301 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
302
289 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
290 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
291 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 305 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -323,12 +337,31 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
323 break; 337 break;
324 } 338 }
325 339
340 if (create_ino) {
341 inode->i_ino = ino_from_blkno(inode->i_sb,
342 le64_to_cpu(fe->i_blkno));
343
344 /*
345 * If we ever want to create system files from kernel,
346 * the generation argument to
347 * ocfs2_inode_lock_res_init() will have to change.
348 */
349 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
350
351 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
352 OCFS2_LOCK_TYPE_META, 0, inode);
353 }
354
326 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 355 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
327 OCFS2_LOCK_TYPE_RW, inode); 356 OCFS2_LOCK_TYPE_RW, inode->i_generation,
328 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 357 inode);
329 OCFS2_LOCK_TYPE_META, inode); 358
330 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 359 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
331 OCFS2_LOCK_TYPE_DATA, inode); 360 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
361 inode);
362
363 ocfs2_set_inode_flags(inode);
364 inode->i_flags |= S_NOATIME;
332 365
333 status = 0; 366 status = 0;
334bail: 367bail:
@@ -343,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
343 struct ocfs2_super *osb; 376 struct ocfs2_super *osb;
344 struct ocfs2_dinode *fe; 377 struct ocfs2_dinode *fe;
345 struct buffer_head *bh = NULL; 378 struct buffer_head *bh = NULL;
346 int status; 379 int status, can_lock;
347 int sysfile = 0; 380 u32 generation = 0;
348 381
349 mlog_entry("(0x%p, 0x%p)\n", inode, args); 382 mlog_entry("(0x%p, 0x%p)\n", inode, args);
350 383
351 status = -EINVAL; 384 status = -EINVAL;
352 if (inode == NULL || inode->i_sb == NULL) { 385 if (inode == NULL || inode->i_sb == NULL) {
353 mlog(ML_ERROR, "bad inode\n"); 386 mlog(ML_ERROR, "bad inode\n");
354 goto bail; 387 return status;
355 } 388 }
356 sb = inode->i_sb; 389 sb = inode->i_sb;
357 osb = OCFS2_SB(sb); 390 osb = OCFS2_SB(sb);
@@ -359,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
359 if (!args) { 392 if (!args) {
360 mlog(ML_ERROR, "bad inode args\n"); 393 mlog(ML_ERROR, "bad inode args\n");
361 make_bad_inode(inode); 394 make_bad_inode(inode);
362 goto bail; 395 return status;
396 }
397
398 /*
399 * To improve performance of cold-cache inode stats, we take
400 * the cluster lock here if possible.
401 *
402 * Generally, OCFS2 never trusts the contents of an inode
403 * unless it's holding a cluster lock, so taking it here isn't
404 * a correctness issue as much as it is a performance
405 * improvement.
406 *
407 * There are three times when taking the lock is not a good idea:
408 *
409 * 1) During startup, before we have initialized the DLM.
410 *
411 * 2) If we are reading certain system files which never get
412 * cluster locks (local alloc, truncate log).
413 *
414 * 3) If the process doing the iget() is responsible for
415 * orphan dir recovery. We're holding the orphan dir lock and
416 * can get into a deadlock with another process on another
417 * node in ->delete_inode().
418 *
419 * #1 and #2 can be simply solved by never taking the lock
420 * here for system files (which are the only type we read
421 * during mount). It's a heavier approach, but our main
422 * concern is user-accesible files anyway.
423 *
424 * #3 works itself out because we'll eventually take the
425 * cluster lock before trusting anything anyway.
426 */
427 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
428 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
429
430 /*
431 * To maintain backwards compatibility with older versions of
432 * ocfs2-tools, we still store the generation value for system
433 * files. The only ones that actually matter to userspace are
434 * the journals, but it's easier and inexpensive to just flag
435 * all system files similarly.
436 */
437 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
438 generation = osb->fs_generation;
439
440 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
441 OCFS2_LOCK_TYPE_META,
442 generation, inode);
443
444 if (can_lock) {
445 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
446 if (status) {
447 make_bad_inode(inode);
448 mlog_errno(status);
449 return status;
450 }
363 } 451 }
364 452
365 /* Read the FE off disk. This is safe because the kernel only 453 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
366 * does one read_inode2 for a new inode, and if it doesn't 454 can_lock ? inode : NULL);
367 * exist yet then nobody can be working on it! */
368 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
369 if (status < 0) { 455 if (status < 0) {
370 mlog_errno(status); 456 mlog_errno(status);
371 make_bad_inode(inode);
372 goto bail; 457 goto bail;
373 } 458 }
374 459
460 status = -EINVAL;
375 fe = (struct ocfs2_dinode *) bh->b_data; 461 fe = (struct ocfs2_dinode *) bh->b_data;
376 if (!OCFS2_IS_VALID_DINODE(fe)) { 462 if (!OCFS2_IS_VALID_DINODE(fe)) {
377 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 463 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
378 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 464 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
379 make_bad_inode(inode);
380 goto bail; 465 goto bail;
381 } 466 }
382 467
383 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 468 /*
384 sysfile = 1; 469 * This is a code bug. Right now the caller needs to
470 * understand whether it is asking for a system file inode or
471 * not so the proper lock names can be built.
472 */
473 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
474 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
475 "Inode %llu: system file state is ambigous\n",
476 (unsigned long long)args->fi_blkno);
385 477
386 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 478 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
387 S_ISBLK(le16_to_cpu(fe->i_mode))) 479 S_ISBLK(le16_to_cpu(fe->i_mode)))
388 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 480 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
389 481
390 status = -EINVAL;
391 if (ocfs2_populate_inode(inode, fe, 0) < 0) { 482 if (ocfs2_populate_inode(inode, fe, 0) < 0) {
392 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", 483 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
393 (unsigned long long)fe->i_blkno, inode->i_ino); 484 (unsigned long long)fe->i_blkno, inode->i_ino);
394 make_bad_inode(inode);
395 goto bail; 485 goto bail;
396 } 486 }
397 487
398 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 488 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
399 489
400 if (sysfile)
401 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
402
403 status = 0; 490 status = 0;
404 491
405bail: 492bail:
493 if (can_lock)
494 ocfs2_meta_unlock(inode, 0);
495
496 if (status < 0)
497 make_bad_inode(inode);
498
406 if (args && bh) 499 if (args && bh)
407 brelse(bh); 500 brelse(bh);
408 501
@@ -875,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
875 goto bail_unlock_inode; 968 goto bail_unlock_inode;
876 } 969 }
877 970
878 /* Mark the inode as successfully deleted. This is important 971 /*
879 * for ocfs2_clear_inode as it will check this flag and skip 972 * Mark the inode as successfully deleted.
880 * any checkpointing work */ 973 *
974 * This is important for ocfs2_clear_inode() as it will check
975 * this flag and skip any checkpointing work
976 *
977 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
978 * the LVB for other nodes.
979 */
881 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 980 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
882 981
883bail_unlock_inode: 982bail_unlock_inode:
@@ -1002,12 +1101,10 @@ void ocfs2_drop_inode(struct inode *inode)
1002 /* Testing ip_orphaned_slot here wouldn't work because we may 1101 /* Testing ip_orphaned_slot here wouldn't work because we may
1003 * not have gotten a delete_inode vote from any other nodes 1102 * not have gotten a delete_inode vote from any other nodes
1004 * yet. */ 1103 * yet. */
1005 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { 1104 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1006 mlog(0, "Inode was orphaned on another node, clearing nlink.\n"); 1105 generic_delete_inode(inode);
1007 inode->i_nlink = 0; 1106 else
1008 } 1107 generic_drop_inode(inode);
1009
1010 generic_drop_inode(inode);
1011 1108
1012 mlog_exit_void(); 1109 mlog_exit_void();
1013} 1110}
@@ -1027,12 +1124,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
1027 u64 p_blkno; 1124 u64 p_blkno;
1028 int readflags = OCFS2_BH_CACHED; 1125 int readflags = OCFS2_BH_CACHED;
1029 1126
1030#if 0
1031 /* only turn this on if we know we can deal with read_block
1032 * returning nothing */
1033 if (reada) 1127 if (reada)
1034 readflags |= OCFS2_BH_READAHEAD; 1128 readflags |= OCFS2_BH_READAHEAD;
1035#endif
1036 1129
1037 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 1130 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
1038 i_size_read(inode)) { 1131 i_size_read(inode)) {
@@ -1131,6 +1224,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
1131 1224
1132 spin_lock(&OCFS2_I(inode)->ip_lock); 1225 spin_lock(&OCFS2_I(inode)->ip_lock);
1133 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1226 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1227 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1134 spin_unlock(&OCFS2_I(inode)->ip_lock); 1228 spin_unlock(&OCFS2_I(inode)->ip_lock);
1135 1229
1136 fe->i_size = cpu_to_le64(i_size_read(inode)); 1230 fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1263,8 @@ void ocfs2_refresh_inode(struct inode *inode,
1169 spin_lock(&OCFS2_I(inode)->ip_lock); 1263 spin_lock(&OCFS2_I(inode)->ip_lock);
1170 1264
1171 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1265 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1266 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1267 ocfs2_set_inode_flags(inode);
1172 i_size_write(inode, le64_to_cpu(fe->i_size)); 1268 i_size_write(inode, le64_to_cpu(fe->i_size));
1173 inode->i_nlink = le16_to_cpu(fe->i_links_count); 1269 inode->i_nlink = le16_to_cpu(fe->i_links_count);
1174 inode->i_uid = le32_to_cpu(fe->i_uid); 1270 inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6cf840..9957810fdf85 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
56 struct ocfs2_journal_handle *ip_handle; 56 struct ocfs2_journal_handle *ip_handle;
57 57
58 u32 ip_flags; /* see below */ 58 u32 ip_flags; /* see below */
59 u32 ip_attr; /* inode attributes */
59 60
60 /* protected by recovery_lock. */ 61 /* protected by recovery_lock. */
61 struct inode *ip_next_orphan; 62 struct inode *ip_next_orphan;
@@ -121,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
121void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
122void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
123void ocfs2_drop_inode(struct inode *inode); 124void ocfs2_drop_inode(struct inode *inode);
124struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); 125
126/* Flags for ocfs2_iget() */
127#define OCFS2_FI_FLAG_NOWAIT 0x1
128#define OCFS2_FI_FLAG_DELETE 0x2
129#define OCFS2_FI_FLAG_SYSFILE 0x4
130#define OCFS2_FI_FLAG_NOLOCK 0x8
131struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
125struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 132struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
126 u64 blkno, 133 u64 blkno,
127 int delete_vote); 134 int delete_vote);
@@ -142,4 +149,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
142int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); 149int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
143int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); 150int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
144 151
152void ocfs2_set_inode_flags(struct inode *inode);
153
145#endif /* OCFS2_INODE_H */ 154#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 000000000000..3663cef80689
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,136 @@
1/*
2 * linux/fs/ocfs2/ioctl.c
3 *
4 * Copyright (C) 2006 Herbert Poetzl
5 * adapted from Remy Card's ext2/ioctl.c
6 */
7
8#include <linux/fs.h>
9#include <linux/mount.h>
10
11#define MLOG_MASK_PREFIX ML_INODE
12#include <cluster/masklog.h>
13
14#include "ocfs2.h"
15#include "alloc.h"
16#include "dlmglue.h"
17#include "inode.h"
18#include "journal.h"
19
20#include "ocfs2_fs.h"
21#include "ioctl.h"
22
23#include <linux/ext2_fs.h>
24
25static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
26{
27 int status;
28
29 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
30 if (status < 0) {
31 mlog_errno(status);
32 return status;
33 }
34 *flags = OCFS2_I(inode)->ip_attr;
35 ocfs2_meta_unlock(inode, 0);
36
37 mlog_exit(status);
38 return status;
39}
40
41static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
42 unsigned mask)
43{
44 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
45 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
46 struct ocfs2_journal_handle *handle = NULL;
47 struct buffer_head *bh = NULL;
48 unsigned oldflags;
49 int status;
50
51 mutex_lock(&inode->i_mutex);
52
53 status = ocfs2_meta_lock(inode, NULL, &bh, 1);
54 if (status < 0) {
55 mlog_errno(status);
56 goto bail;
57 }
58
59 status = -EROFS;
60 if (IS_RDONLY(inode))
61 goto bail_unlock;
62
63 status = -EACCES;
64 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
65 goto bail_unlock;
66
67 if (!S_ISDIR(inode->i_mode))
68 flags &= ~OCFS2_DIRSYNC_FL;
69
70 handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
71 if (IS_ERR(handle)) {
72 status = PTR_ERR(handle);
73 mlog_errno(status);
74 goto bail_unlock;
75 }
76
77 oldflags = ocfs2_inode->ip_attr;
78 flags = flags & mask;
79 flags |= oldflags & ~mask;
80
81 /*
82 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
83 * the relevant capability.
84 */
85 status = -EPERM;
86 if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
87 (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
88 if (!capable(CAP_LINUX_IMMUTABLE))
89 goto bail_unlock;
90 }
91
92 ocfs2_inode->ip_attr = flags;
93 ocfs2_set_inode_flags(inode);
94
95 status = ocfs2_mark_inode_dirty(handle, inode, bh);
96 if (status < 0)
97 mlog_errno(status);
98
99 ocfs2_commit_trans(handle);
100bail_unlock:
101 ocfs2_meta_unlock(inode, 1);
102bail:
103 mutex_unlock(&inode->i_mutex);
104
105 if (bh)
106 brelse(bh);
107
108 mlog_exit(status);
109 return status;
110}
111
112int ocfs2_ioctl(struct inode * inode, struct file * filp,
113 unsigned int cmd, unsigned long arg)
114{
115 unsigned int flags;
116 int status;
117
118 switch (cmd) {
119 case OCFS2_IOC_GETFLAGS:
120 status = ocfs2_get_inode_attr(inode, &flags);
121 if (status < 0)
122 return status;
123
124 flags &= OCFS2_FL_VISIBLE;
125 return put_user(flags, (int __user *) arg);
126 case OCFS2_IOC_SETFLAGS:
127 if (get_user(flags, (int __user *) arg))
128 return -EFAULT;
129
130 return ocfs2_set_inode_attr(inode, flags,
131 OCFS2_FL_MODIFIABLE);
132 default:
133 return -ENOTTY;
134 }
135}
136
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 000000000000..4a7c82931dba
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
1/*
2 * ioctl.h
3 *
4 * Function prototypes
5 *
6 * Copyright (C) 2006 Herbert Poetzl
7 *
8 */
9
10#ifndef OCFS2_IOCTL_H
11#define OCFS2_IOCTL_H
12
13int ocfs2_ioctl(struct inode * inode, struct file * filp,
14 unsigned int cmd, unsigned long arg);
15
16#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1dd379a..fd9734def551 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1493 if (de->name_len == 2 && !strncmp("..", de->name, 2)) 1493 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1494 continue; 1494 continue;
1495 1495
1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); 1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1497 OCFS2_FI_FLAG_NOLOCK);
1497 if (IS_ERR(iter)) 1498 if (IS_ERR(iter))
1498 continue; 1499 continue;
1499 1500
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862c8bdd..849c3b4bb94a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
56#include "journal.h" 56#include "journal.h"
57#include "namei.h" 57#include "namei.h"
58#include "suballoc.h" 58#include "suballoc.h"
59#include "super.h"
59#include "symlink.h" 60#include "symlink.h"
60#include "sysfile.h" 61#include "sysfile.h"
61#include "uptodate.h" 62#include "uptodate.h"
@@ -178,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
178 if (status < 0) 179 if (status < 0)
179 goto bail_add; 180 goto bail_add;
180 181
181 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
182 if (IS_ERR(inode)) { 183 if (IS_ERR(inode)) {
183 mlog(ML_ERROR, "Unable to create inode %llu\n", 184 mlog(ML_ERROR, "Unable to create inode %llu\n",
184 (unsigned long long)blkno); 185 (unsigned long long)blkno);
@@ -198,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
198 spin_unlock(&oi->ip_lock); 199 spin_unlock(&oi->ip_lock);
199 200
200bail_add: 201bail_add:
201
202 dentry->d_op = &ocfs2_dentry_ops; 202 dentry->d_op = &ocfs2_dentry_ops;
203 ret = d_splice_alias(inode, dentry); 203 ret = d_splice_alias(inode, dentry);
204 204
205 if (inode) {
206 /*
207 * If d_splice_alias() finds a DCACHE_DISCONNECTED
208 * dentry, it will d_move() it on top of ourse. The
209 * return value will indicate this however, so in
210 * those cases, we switch them around for the locking
211 * code.
212 *
213 * NOTE: This dentry already has ->d_op set from
214 * ocfs2_get_parent() and ocfs2_get_dentry()
215 */
216 if (ret)
217 dentry = ret;
218
219 status = ocfs2_dentry_attach_lock(dentry, inode,
220 OCFS2_I(dir)->ip_blkno);
221 if (status) {
222 mlog_errno(status);
223 ret = ERR_PTR(status);
224 goto bail_unlock;
225 }
226 }
227
205bail_unlock: 228bail_unlock:
206 /* Don't drop the cluster lock until *after* the d_add -- 229 /* Don't drop the cluster lock until *after* the d_add --
207 * unlink on another node will message us to remove that 230 * unlink on another node will message us to remove that
@@ -310,13 +333,6 @@ static int ocfs2_mknod(struct inode *dir,
310 /* get our super block */ 333 /* get our super block */
311 osb = OCFS2_SB(dir->i_sb); 334 osb = OCFS2_SB(dir->i_sb);
312 335
313 if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
314 mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
315 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
316 status = -EMLINK;
317 goto leave;
318 }
319
320 handle = ocfs2_alloc_handle(osb); 336 handle = ocfs2_alloc_handle(osb);
321 if (handle == NULL) { 337 if (handle == NULL) {
322 status = -ENOMEM; 338 status = -ENOMEM;
@@ -331,6 +347,11 @@ static int ocfs2_mknod(struct inode *dir,
331 goto leave; 347 goto leave;
332 } 348 }
333 349
350 if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
351 status = -EMLINK;
352 goto leave;
353 }
354
334 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 355 dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
335 if (!dirfe->i_links_count) { 356 if (!dirfe->i_links_count) {
336 /* can't make a file in a deleted directory. */ 357 /* can't make a file in a deleted directory. */
@@ -419,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
419 goto leave; 440 goto leave;
420 } 441 }
421 442
443 status = ocfs2_dentry_attach_lock(dentry, inode,
444 OCFS2_I(dir)->ip_blkno);
445 if (status) {
446 mlog_errno(status);
447 goto leave;
448 }
449
422 insert_inode_hash(inode); 450 insert_inode_hash(inode);
423 dentry->d_op = &ocfs2_dentry_ops; 451 dentry->d_op = &ocfs2_dentry_ops;
424 d_instantiate(dentry, inode); 452 d_instantiate(dentry, inode);
@@ -643,11 +671,6 @@ static int ocfs2_link(struct dentry *old_dentry,
643 goto bail; 671 goto bail;
644 } 672 }
645 673
646 if (inode->i_nlink >= OCFS2_LINK_MAX) {
647 err = -EMLINK;
648 goto bail;
649 }
650
651 handle = ocfs2_alloc_handle(osb); 674 handle = ocfs2_alloc_handle(osb);
652 if (handle == NULL) { 675 if (handle == NULL) {
653 err = -ENOMEM; 676 err = -ENOMEM;
@@ -661,6 +684,11 @@ static int ocfs2_link(struct dentry *old_dentry,
661 goto bail; 684 goto bail;
662 } 685 }
663 686
687 if (!dir->i_nlink) {
688 err = -ENOENT;
689 goto bail;
690 }
691
664 err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name, 692 err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
665 dentry->d_name.len); 693 dentry->d_name.len);
666 if (err) 694 if (err)
@@ -726,6 +754,12 @@ static int ocfs2_link(struct dentry *old_dentry,
726 goto bail; 754 goto bail;
727 } 755 }
728 756
757 err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
758 if (err) {
759 mlog_errno(err);
760 goto bail;
761 }
762
729 atomic_inc(&inode->i_count); 763 atomic_inc(&inode->i_count);
730 dentry->d_op = &ocfs2_dentry_ops; 764 dentry->d_op = &ocfs2_dentry_ops;
731 d_instantiate(dentry, inode); 765 d_instantiate(dentry, inode);
@@ -744,6 +778,23 @@ bail:
744 return err; 778 return err;
745} 779}
746 780
781/*
782 * Takes and drops an exclusive lock on the given dentry. This will
783 * force other nodes to drop it.
784 */
785static int ocfs2_remote_dentry_delete(struct dentry *dentry)
786{
787 int ret;
788
789 ret = ocfs2_dentry_lock(dentry, 1);
790 if (ret)
791 mlog_errno(ret);
792 else
793 ocfs2_dentry_unlock(dentry, 1);
794
795 return ret;
796}
797
747static int ocfs2_unlink(struct inode *dir, 798static int ocfs2_unlink(struct inode *dir,
748 struct dentry *dentry) 799 struct dentry *dentry)
749{ 800{
@@ -833,8 +884,7 @@ static int ocfs2_unlink(struct inode *dir,
833 else 884 else
834 inode->i_nlink--; 885 inode->i_nlink--;
835 886
836 status = ocfs2_request_unlink_vote(inode, dentry, 887 status = ocfs2_remote_dentry_delete(dentry);
837 (unsigned int) inode->i_nlink);
838 if (status < 0) { 888 if (status < 0) {
839 /* This vote should succeed under all normal 889 /* This vote should succeed under all normal
840 * circumstances. */ 890 * circumstances. */
@@ -1020,7 +1070,6 @@ static int ocfs2_rename(struct inode *old_dir,
1020 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 1070 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1021 // this is the 1st dirent bh 1071 // this is the 1st dirent bh
1022 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink; 1072 nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1023 unsigned int links_count;
1024 1073
1025 /* At some point it might be nice to break this function up a 1074 /* At some point it might be nice to break this function up a
1026 * bit. */ 1075 * bit. */
@@ -1094,23 +1143,26 @@ static int ocfs2_rename(struct inode *old_dir,
1094 } 1143 }
1095 } 1144 }
1096 1145
1097 if (S_ISDIR(old_inode->i_mode)) { 1146 /*
1098 /* Directories actually require metadata updates to 1147 * Though we don't require an inode meta data update if
1099 * the directory info so we can't get away with not 1148 * old_inode is not a directory, we lock anyway here to ensure
1100 * doing node locking on it. */ 1149 * the vote thread on other nodes won't have to concurrently
1101 status = ocfs2_meta_lock(old_inode, handle, NULL, 1); 1150 * downconvert the inode and the dentry locks.
1102 if (status < 0) { 1151 */
1103 if (status != -ENOENT) 1152 status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
1104 mlog_errno(status); 1153 if (status < 0) {
1105 goto bail; 1154 if (status != -ENOENT)
1106 }
1107
1108 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1109 if (status < 0) {
1110 mlog_errno(status); 1155 mlog_errno(status);
1111 goto bail; 1156 goto bail;
1112 } 1157 }
1158
1159 status = ocfs2_remote_dentry_delete(old_dentry);
1160 if (status < 0) {
1161 mlog_errno(status);
1162 goto bail;
1163 }
1113 1164
1165 if (S_ISDIR(old_inode->i_mode)) {
1114 status = -EIO; 1166 status = -EIO;
1115 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); 1167 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1116 if (!old_inode_de_bh) 1168 if (!old_inode_de_bh)
@@ -1124,14 +1176,6 @@ static int ocfs2_rename(struct inode *old_dir,
1124 if (!new_inode && new_dir!=old_dir && 1176 if (!new_inode && new_dir!=old_dir &&
1125 new_dir->i_nlink >= OCFS2_LINK_MAX) 1177 new_dir->i_nlink >= OCFS2_LINK_MAX)
1126 goto bail; 1178 goto bail;
1127 } else {
1128 /* Ah, the simple case - we're a file so just send a
1129 * message. */
1130 status = ocfs2_request_rename_vote(old_inode, old_dentry);
1131 if (status < 0) {
1132 mlog_errno(status);
1133 goto bail;
1134 }
1135 } 1179 }
1136 1180
1137 status = -ENOENT; 1181 status = -ENOENT;
@@ -1203,13 +1247,7 @@ static int ocfs2_rename(struct inode *old_dir,
1203 goto bail; 1247 goto bail;
1204 } 1248 }
1205 1249
1206 if (S_ISDIR(new_inode->i_mode)) 1250 status = ocfs2_remote_dentry_delete(new_dentry);
1207 links_count = 0;
1208 else
1209 links_count = (unsigned int) (new_inode->i_nlink - 1);
1210
1211 status = ocfs2_request_unlink_vote(new_inode, new_dentry,
1212 links_count);
1213 if (status < 0) { 1251 if (status < 0) {
1214 mlog_errno(status); 1252 mlog_errno(status);
1215 goto bail; 1253 goto bail;
@@ -1388,6 +1426,7 @@ static int ocfs2_rename(struct inode *old_dir,
1388 } 1426 }
1389 } 1427 }
1390 1428
1429 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1391 status = 0; 1430 status = 0;
1392bail: 1431bail:
1393 if (rename_lock) 1432 if (rename_lock)
@@ -1676,6 +1715,12 @@ static int ocfs2_symlink(struct inode *dir,
1676 goto bail; 1715 goto bail;
1677 } 1716 }
1678 1717
1718 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1719 if (status) {
1720 mlog_errno(status);
1721 goto bail;
1722 }
1723
1679 insert_inode_hash(inode); 1724 insert_inode_hash(inode);
1680 dentry->d_op = &ocfs2_dentry_ops; 1725 dentry->d_op = &ocfs2_dentry_ops;
1681 d_instantiate(dentry, inode); 1726 d_instantiate(dentry, inode);
@@ -1964,13 +2009,8 @@ restart:
1964 } 2009 }
1965 num++; 2010 num++;
1966 2011
1967 /* XXX: questionable readahead stuff here */
1968 bh = ocfs2_bread(dir, b++, &err, 1); 2012 bh = ocfs2_bread(dir, b++, &err, 1);
1969 bh_use[ra_max] = bh; 2013 bh_use[ra_max] = bh;
1970#if 0 // ???
1971 if (bh)
1972 ll_rw_block(READ, 1, &bh);
1973#endif
1974 } 2014 }
1975 } 2015 }
1976 if ((bh = bh_use[ra_ptr++]) == NULL) 2016 if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1978,6 +2018,10 @@ restart:
1978 wait_on_buffer(bh); 2018 wait_on_buffer(bh);
1979 if (!buffer_uptodate(bh)) { 2019 if (!buffer_uptodate(bh)) {
1980 /* read error, skip block & hope for the best */ 2020 /* read error, skip block & hope for the best */
2021 ocfs2_error(dir->i_sb, "reading directory %llu, "
2022 "offset %lu\n",
2023 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2024 block);
1981 brelse(bh); 2025 brelse(bh);
1982 goto next; 2026 goto next;
1983 } 2027 }
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac547c15..3330a5dc6be2 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
114#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 114#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
115#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 115#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
116 116
117/* Inode attributes, keep in sync with EXT2 */
118#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
119#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
120#define OCFS2_COMPR_FL (0x00000004) /* Compress file */
121#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */
122#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */
123#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */
124#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */
125#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */
126#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */
127
128#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */
129#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
130
131/*
132 * ioctl commands
133 */
134#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
135#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long)
136
117/* 137/*
118 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 138 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
119 */ 139 */
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
399 __le32 i_atime_nsec; 419 __le32 i_atime_nsec;
400 __le32 i_ctime_nsec; 420 __le32 i_ctime_nsec;
401 __le32 i_mtime_nsec; 421 __le32 i_mtime_nsec;
402/*70*/ __le64 i_reserved1[9]; 422 __le32 i_attr;
423 __le32 i_reserved1;
424/*70*/ __le64 i_reserved2[8];
403/*B8*/ union { 425/*B8*/ union {
404 __le64 i_pad1; /* Generic way to refer to this 426 __le64 i_pad1; /* Generic way to refer to this
405 64bit union */ 427 64bit union */
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 7dd9e1e705b0..4d5d5655c185 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
35#define OCFS2_LOCK_ID_MAX_LEN 32 35#define OCFS2_LOCK_ID_MAX_LEN 32
36#define OCFS2_LOCK_ID_PAD "000000" 36#define OCFS2_LOCK_ID_PAD "000000"
37 37
38#define OCFS2_DENTRY_LOCK_INO_START 18
39
38enum ocfs2_lock_type { 40enum ocfs2_lock_type {
39 OCFS2_LOCK_TYPE_META = 0, 41 OCFS2_LOCK_TYPE_META = 0,
40 OCFS2_LOCK_TYPE_DATA, 42 OCFS2_LOCK_TYPE_DATA,
41 OCFS2_LOCK_TYPE_SUPER, 43 OCFS2_LOCK_TYPE_SUPER,
42 OCFS2_LOCK_TYPE_RENAME, 44 OCFS2_LOCK_TYPE_RENAME,
43 OCFS2_LOCK_TYPE_RW, 45 OCFS2_LOCK_TYPE_RW,
46 OCFS2_LOCK_TYPE_DENTRY,
44 OCFS2_NUM_LOCK_TYPES 47 OCFS2_NUM_LOCK_TYPES
45}; 48};
46 49
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
63 case OCFS2_LOCK_TYPE_RW: 66 case OCFS2_LOCK_TYPE_RW:
64 c = 'W'; 67 c = 'W';
65 break; 68 break;
69 case OCFS2_LOCK_TYPE_DENTRY:
70 c = 'N';
71 break;
66 default: 72 default:
67 c = '\0'; 73 c = '\0';
68 } 74 }
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
70 return c; 76 return c;
71} 77}
72 78
79static char *ocfs2_lock_type_strings[] = {
80 [OCFS2_LOCK_TYPE_META] = "Meta",
81 [OCFS2_LOCK_TYPE_DATA] = "Data",
82 [OCFS2_LOCK_TYPE_SUPER] = "Super",
83 [OCFS2_LOCK_TYPE_RENAME] = "Rename",
84 /* Need to differntiate from [R]ename.. serializing writes is the
85 * important job it does, anyway. */
86 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
87 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
88};
89
90static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
91{
92#ifdef __KERNEL__
93 mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
94#endif
95 return ocfs2_lock_type_strings[type];
96}
97
73#endif /* OCFS2_LOCKID_H */ 98#endif /* OCFS2_LOCKID_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d17e33e66a1e..4c29cd7cc8e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
202 202
203 mlog_entry_void(); 203 mlog_entry_void();
204 204
205 new = ocfs2_iget(osb, osb->root_blkno); 205 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
206 if (IS_ERR(new)) { 206 if (IS_ERR(new)) {
207 status = PTR_ERR(new); 207 status = PTR_ERR(new);
208 mlog_errno(status); 208 mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
210 } 210 }
211 osb->root_inode = new; 211 osb->root_inode = new;
212 212
213 new = ocfs2_iget(osb, osb->system_dir_blkno); 213 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
214 if (IS_ERR(new)) { 214 if (IS_ERR(new)) {
215 status = PTR_ERR(new); 215 status = PTR_ERR(new);
216 mlog_errno(status); 216 mlog_errno(status);
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
682 .kill_sb = kill_block_super, /* set to the generic one 682 .kill_sb = kill_block_super, /* set to the generic one
683 * right now, but do we 683 * right now, but do we
684 * need to change that? */ 684 * need to change that? */
685 .fs_flags = FS_REQUIRES_DEV, 685 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
686 .next = NULL 686 .next = NULL
687}; 687};
688 688
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7a437d..5df6e35d09b1 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30 30
31#include "ocfs2.h"
32
33#define MLOG_MASK_PREFIX ML_INODE 31#define MLOG_MASK_PREFIX ML_INODE
34#include <cluster/masklog.h> 32#include <cluster/masklog.h>
35 33
34#include "ocfs2.h"
35
36#include "alloc.h" 36#include "alloc.h"
37#include "dir.h" 37#include "dir.h"
38#include "inode.h" 38#include "inode.h"
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
115 goto bail; 115 goto bail;
116 } 116 }
117 117
118 inode = ocfs2_iget(osb, blkno); 118 inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
119 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
120 mlog_errno(PTR_ERR(inode)); 120 mlog_errno(PTR_ERR(inode));
121 inode = NULL; 121 inode = NULL;
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a793326..9707ed7a3206 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
206} 206}
207 207
208/* Warning: even if it returns true, this does *not* guarantee that 208/* Warning: even if it returns true, this does *not* guarantee that
209 * the block is stored in our inode metadata cache. */ 209 * the block is stored in our inode metadata cache.
210 *
211 * This can be called under lock_buffer()
212 */
210int ocfs2_buffer_uptodate(struct inode *inode, 213int ocfs2_buffer_uptodate(struct inode *inode,
211 struct buffer_head *bh) 214 struct buffer_head *bh)
212{ 215{
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
226 return ocfs2_buffer_cached(OCFS2_I(inode), bh); 229 return ocfs2_buffer_cached(OCFS2_I(inode), bh);
227} 230}
228 231
232/*
233 * Determine whether a buffer is currently out on a read-ahead request.
234 * ip_io_sem should be held to serialize submitters with the logic here.
235 */
236int ocfs2_buffer_read_ahead(struct inode *inode,
237 struct buffer_head *bh)
238{
239 return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
240}
241
229/* Requires ip_lock */ 242/* Requires ip_lock */
230static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, 243static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
231 sector_t block) 244 sector_t block)
@@ -403,7 +416,11 @@ out_free:
403 * 416 *
404 * Note that this function may actually fail to insert the block if 417 * Note that this function may actually fail to insert the block if
405 * memory cannot be allocated. This is not fatal however (but may 418 * memory cannot be allocated. This is not fatal however (but may
406 * result in a performance penalty) */ 419 * result in a performance penalty)
420 *
421 * Readahead buffers can be passed in here before the I/O request is
422 * completed.
423 */
407void ocfs2_set_buffer_uptodate(struct inode *inode, 424void ocfs2_set_buffer_uptodate(struct inode *inode,
408 struct buffer_head *bh) 425 struct buffer_head *bh)
409{ 426{
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d26b06..2e73206059a8 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
40 struct buffer_head *bh); 40 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 41void ocfs2_remove_from_cache(struct inode *inode,
42 struct buffer_head *bh); 42 struct buffer_head *bh);
43int ocfs2_buffer_read_ahead(struct inode *inode,
44 struct buffer_head *bh);
43 45
44#endif /* OCFS2_UPTODATE_H */ 46#endif /* OCFS2_UPTODATE_H */
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index cf70fe2075b8..5b4dca79990b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
74 __be32 v_orphaned_slot; /* Used during delete votes */ 74 __be32 v_orphaned_slot; /* Used during delete votes */
75 __be32 v_nlink; /* Used during unlink votes */ 75 __be32 v_nlink; /* Used during unlink votes */
76 } md1; /* Message type dependant 1 */ 76 } md1; /* Message type dependant 1 */
77 __be32 v_unlink_namelen;
78 __be64 v_unlink_parent;
79 u8 v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
80}; 77};
81 78
82/* Responses are given these values to maintain backwards 79/* Responses are given these values to maintain backwards
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
100enum ocfs2_vote_request { 97enum ocfs2_vote_request {
101 OCFS2_VOTE_REQ_INVALID = 0, 98 OCFS2_VOTE_REQ_INVALID = 0,
102 OCFS2_VOTE_REQ_DELETE, 99 OCFS2_VOTE_REQ_DELETE,
103 OCFS2_VOTE_REQ_UNLINK,
104 OCFS2_VOTE_REQ_RENAME,
105 OCFS2_VOTE_REQ_MOUNT, 100 OCFS2_VOTE_REQ_MOUNT,
106 OCFS2_VOTE_REQ_UMOUNT, 101 OCFS2_VOTE_REQ_UMOUNT,
107 OCFS2_VOTE_REQ_LAST 102 OCFS2_VOTE_REQ_LAST
@@ -261,103 +256,13 @@ done:
261 return response; 256 return response;
262} 257}
263 258
264static int ocfs2_match_dentry(struct dentry *dentry,
265 u64 parent_blkno,
266 unsigned int namelen,
267 const char *name)
268{
269 struct inode *parent;
270
271 if (!dentry->d_parent) {
272 mlog(0, "Detached from parent.\n");
273 return 0;
274 }
275
276 parent = dentry->d_parent->d_inode;
277 /* Negative parent dentry? */
278 if (!parent)
279 return 0;
280
281 /* Name is in a different directory. */
282 if (OCFS2_I(parent)->ip_blkno != parent_blkno)
283 return 0;
284
285 if (dentry->d_name.len != namelen)
286 return 0;
287
288 /* comparison above guarantees this is safe. */
289 if (memcmp(dentry->d_name.name, name, namelen))
290 return 0;
291
292 return 1;
293}
294
295static void ocfs2_process_dentry_request(struct inode *inode,
296 int rename,
297 unsigned int new_nlink,
298 u64 parent_blkno,
299 unsigned int namelen,
300 const char *name)
301{
302 struct dentry *dentry = NULL;
303 struct list_head *p;
304 struct ocfs2_inode_info *oi = OCFS2_I(inode);
305
306 mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
307 (unsigned long long)parent_blkno, namelen, namelen, name);
308
309 spin_lock(&dcache_lock);
310
311 /* Another node is removing this name from the system. It is
312 * up to us to find the corresponding dentry and if it exists,
313 * unhash it from the dcache. */
314 list_for_each(p, &inode->i_dentry) {
315 dentry = list_entry(p, struct dentry, d_alias);
316
317 if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
318 mlog(0, "dentry found: %.*s\n",
319 dentry->d_name.len, dentry->d_name.name);
320
321 dget_locked(dentry);
322 break;
323 }
324
325 dentry = NULL;
326 }
327
328 spin_unlock(&dcache_lock);
329
330 if (dentry) {
331 d_delete(dentry);
332 dput(dentry);
333 }
334
335 /* rename votes don't send link counts */
336 if (!rename) {
337 mlog(0, "new_nlink = %u\n", new_nlink);
338
339 /* We don't have the proper locks here to directly
340 * change i_nlink and besides, the vote is sent
341 * *before* the operation so it may have failed on the
342 * other node. This passes a hint to ocfs2_drop_inode
343 * to force ocfs2_delete_inode, who will take the
344 * proper cluster locks to sort things out. */
345 if (new_nlink == 0) {
346 spin_lock(&oi->ip_lock);
347 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
348 spin_unlock(&OCFS2_I(inode)->ip_lock);
349 }
350 }
351}
352
353static void ocfs2_process_vote(struct ocfs2_super *osb, 259static void ocfs2_process_vote(struct ocfs2_super *osb,
354 struct ocfs2_vote_msg *msg) 260 struct ocfs2_vote_msg *msg)
355{ 261{
356 int net_status, vote_response; 262 int net_status, vote_response;
357 int orphaned_slot = 0; 263 int orphaned_slot = 0;
358 int rename = 0; 264 unsigned int node_num, generation;
359 unsigned int node_num, generation, new_nlink, namelen; 265 u64 blkno;
360 u64 blkno, parent_blkno;
361 enum ocfs2_vote_request request; 266 enum ocfs2_vote_request request;
362 struct inode *inode = NULL; 267 struct inode *inode = NULL;
363 struct ocfs2_msg_hdr *hdr = &msg->v_hdr; 268 struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
437 vote_response = ocfs2_process_delete_request(inode, 342 vote_response = ocfs2_process_delete_request(inode,
438 &orphaned_slot); 343 &orphaned_slot);
439 break; 344 break;
440 case OCFS2_VOTE_REQ_RENAME:
441 rename = 1;
442 /* fall through */
443 case OCFS2_VOTE_REQ_UNLINK:
444 parent_blkno = be64_to_cpu(msg->v_unlink_parent);
445 namelen = be32_to_cpu(msg->v_unlink_namelen);
446 /* new_nlink will be ignored in case of a rename vote */
447 new_nlink = be32_to_cpu(msg->md1.v_nlink);
448 ocfs2_process_dentry_request(inode, rename, new_nlink,
449 parent_blkno, namelen,
450 msg->v_unlink_dirent);
451 break;
452 default: 345 default:
453 mlog(ML_ERROR, "node %u, invalid request: %u\n", 346 mlog(ML_ERROR, "node %u, invalid request: %u\n",
454 node_num, request); 347 node_num, request);
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
889 return status; 782 return status;
890} 783}
891 784
892static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
893 struct dentry *dentry)
894{
895 struct inode *parent = dentry->d_parent->d_inode;
896
897 /* We need some values which will uniquely identify a dentry
898 * on the other nodes so that they can find it and run
899 * d_delete against it. Parent directory block and full name
900 * should suffice. */
901
902 mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
903 (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
904 dentry->d_name.name);
905
906 request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
907 request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
908 memcpy(request->v_unlink_dirent, dentry->d_name.name,
909 dentry->d_name.len);
910}
911
912int ocfs2_request_unlink_vote(struct inode *inode,
913 struct dentry *dentry,
914 unsigned int nlink)
915{
916 int status;
917 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
918 struct ocfs2_vote_msg *request;
919
920 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
921 return -ENAMETOOLONG;
922
923 status = -ENOMEM;
924 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
925 inode->i_generation,
926 OCFS2_VOTE_REQ_UNLINK, nlink);
927 if (request) {
928 ocfs2_setup_unlink_vote(request, dentry);
929
930 status = ocfs2_request_vote(inode, request, NULL);
931
932 kfree(request);
933 }
934 return status;
935}
936
937int ocfs2_request_rename_vote(struct inode *inode,
938 struct dentry *dentry)
939{
940 int status;
941 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
942 struct ocfs2_vote_msg *request;
943
944 if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
945 return -ENAMETOOLONG;
946
947 status = -ENOMEM;
948 request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
949 inode->i_generation,
950 OCFS2_VOTE_REQ_RENAME, 0);
951 if (request) {
952 ocfs2_setup_unlink_vote(request, dentry);
953
954 status = ocfs2_request_vote(inode, request, NULL);
955
956 kfree(request);
957 }
958 return status;
959}
960
961int ocfs2_request_mount_vote(struct ocfs2_super *osb) 785int ocfs2_request_mount_vote(struct ocfs2_super *osb)
962{ 786{
963 int status; 787 int status;
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h
index 9cce60703466..53ebc1c69e56 100644
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
39} 39}
40 40
41int ocfs2_request_delete_vote(struct inode *inode); 41int ocfs2_request_delete_vote(struct inode *inode);
42int ocfs2_request_unlink_vote(struct inode *inode,
43 struct dentry *dentry,
44 unsigned int nlink);
45int ocfs2_request_rename_vote(struct inode *inode,
46 struct dentry *dentry);
47int ocfs2_request_mount_vote(struct ocfs2_super *osb); 42int ocfs2_request_mount_vote(struct ocfs2_super *osb);
48int ocfs2_request_umount_vote(struct ocfs2_super *osb); 43int ocfs2_request_umount_vote(struct ocfs2_super *osb);
49int ocfs2_register_net_handlers(struct ocfs2_super *osb); 44int ocfs2_register_net_handlers(struct ocfs2_super *osb);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 93a56bd4a2b7..592a6402e851 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -8,10 +8,10 @@
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/openprom_fs.h>
12#include <linux/init.h> 11#include <linux/init.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/magic.h>
15 15
16#include <asm/openprom.h> 16#include <asm/openprom.h>
17#include <asm/oplib.h> 17#include <asm/oplib.h>